Skip to content
Cade Smith Fastball
Cade Smith Fastball
All data as of 08.21.2025
All of Cade Smith's Pitches
library(readr)
library(dplyr)
library(stringr)
library(ggplot2)
All_Cade_Smith <- read_csv("All_Cade_Smith.csv", col_types = cols(
details = col_character(),
description = col_character(),
events = col_character()
)) %>%
select(pitch_type, game_date, release_speed, batter, events, description, zone, des, stand, type, hit_location, bb_type, balls, strikes, game_year, on_3b, on_2b, on_1b, outs_when_up, inning, inning_topbot, hit_distance_sc, launch_speed, launch_angle, effective_speed, release_spin_rate, estimated_ba_using_speedangle, estimated_woba_using_speedangle, woba_value, home_score, away_score, delta_run_exp, estimated_slg_using_speedangle, delta_pitcher_run_exp) %>%
mutate(des_date = paste(des, game_date, sep = " - ")) %>%
mutate(lead_batter = lead(batter)) %>%
mutate(pitchtype_zone = paste(pitch_type, zone, sep = " ")) %>%
mutate(next_pitch = lag(pitchtype_zone)) %>%
mutate(count = paste(balls, strikes, sep = " - ")) %>%
mutate(count_after_pitch = lag(count)) %>%
mutate(in_out_zone = ifelse(zone %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9) & description %in% c("foul", "swinging_strike", "swinging_strike_blocked", "called_strike", "ball", "foul_tip", "foul_bunt") | !zone %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9) & description %in% c("foul", "swinging_strike", "swinging_strike_blocked", "foul_tip", "foul_bunt"), "S", ifelse(type == "X", "X", "B"))) %>%
mutate(called_right = ifelse(in_out_zone == type, "Yes", "No")) %>%
mutate(pitch_relative_location = ifelse(stand == "L" & zone == "1", "high-inside",
ifelse(stand == "L" & zone == "2", "high-middle",
ifelse(stand == "L" & zone == "3", "high-outside",
ifelse(stand == "L" & zone == "4", "middle-inside",
ifelse(stand == "L" & zone == "5", "middle-middle",
ifelse(stand == "L" & zone == "6", "middle-outside",
ifelse(stand == "L" & zone == "7", "low-inside",
ifelse(stand == "L" & zone == "8", "low-middle",
ifelse(stand == "L" & zone == "9", "low-outside",
ifelse(stand == "L" & zone == "11", "high-inside-ball",
ifelse(stand == "L" & zone == "12", "high-outside-ball",
ifelse(stand == "L" & zone == "13", "low-inside-ball",
ifelse(stand == "L" & zone == "14", "low-outside-ball",
ifelse(stand == "R" & zone == "1", "high-outside",
ifelse(stand == "R" & zone == "2", "high-middle",
ifelse(stand == "R" & zone == "3", "high-inside",
ifelse(stand == "R" & zone == "4", "middle-outside",
ifelse(stand == "R" & zone == "5", "middle-middle",
ifelse(stand == "R" & zone == "6", "middle-inside",
ifelse(stand == "R" & zone == "7", "low-outside",
ifelse(stand == "R" & zone == "8", "low-middle",
ifelse(stand == "R" & zone == "9", "low-inside",
ifelse(stand == "R" & zone == "11", "high-outside-ball",
ifelse(stand == "R" & zone == "12", "high-inside-ball",
ifelse(stand == "R" & zone == "13", "low-outside-ball",
ifelse(stand == "R" & zone == "14", "low-inside-ball", NA))))))))))))))))))))))))))) %>%
mutate(inside_out = ifelse(str_detect(pitch_relative_location, "inside"), "inside",
ifelse(str_detect(pitch_relative_location, "outside"), "outside",
ifelse(str_detect(pitch_relative_location, "middle"), "middle", "NA")))) %>%
mutate(slugging_value = as.numeric(ifelse(events == "single", 1,
ifelse(events == "double", 2,
ifelse(events == "triple", 3,
ifelse(events == "home_run", 4,
ifelse(events == "walk", "null", 0))))))) %>%
mutate(on_base_value = as.numeric(ifelse(events == "single", 1,
ifelse(events == "double", 1,
ifelse(events == "triple", 1,
ifelse(events == "home_run", 1,
ifelse(events == "walk", 1,
ifelse(events == "hit_by_pitch", 1, 0)))))))) %>%
mutate(pitch = ifelse(pitch_type == "FF", "Fastball", ifelse(pitch_type == "ST", "Sweeper", ifelse(pitch_type == "FS", "Splitter", "NA"))))
summary_stats_all_pitches <- All_Cade_Smith %>%
group_by(pitch_type) %>%
summarize(count = n(), average_speed = mean(release_speed, na.rm = TRUE)) %>%
mutate(percentage = count / sum(count))
summary_stats_all_pitches
left_right_stats <- All_Cade_Smith %>%
group_by(stand, pitch_type) %>%
summarize(count = n(), average_speed = mean(release_speed, na.rm = TRUE)) %>%
mutate(percentage = count / sum(count, na.rm = TRUE))
left_right_stats
All_Cade_Smithlibrary(ggplot2)
library(dplyr)
guardians_blue <- "#002A5B"
guardians_red <- "#CF132D"
guardians_white <- "#FEFFFD"
guardians_city_red <- "#7F0D1C"
guardians_city_blue <- "#264383"
guardians_city_white <- "#DBD5A7"
# Calculate percentages
pitch_data <- All_Cade_Smith %>%
count(pitch) %>%
mutate(percentage = n / sum(n) * 100)
# Define custom colors for each pitch type
custom_colors <- c(guardians_blue, guardians_red, guardians_city_white)
alternate_colors <- c(guardians_city_red, guardians_city_blue)
# Create pie chart with labels
ggplot(pitch_data, aes(x = "", y = percentage, fill = pitch)) +
geom_bar(width = 1, stat = "identity") +
coord_polar(theta = "y") +
labs(title = "Pitch Type Distribution", x = "", y = "Percentage", fill = "Pitch Type") +
theme_void() +
theme(legend.title = element_text(size = 12), # Add title to legend
plot.title = element_text(size = 20), # Large text for title
text = element_text(size = 12)) + # Medium text for percentages
geom_text(aes(label = paste0(round(percentage, 1), "%")),
position = position_stack(vjust = 0.5),
color = guardians_white) + # Set text color to guardians white
scale_fill_manual(values = custom_colors)ggplot(left_right_stats, aes(x = pitch_type, y = count, fill = stand)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Pitch Type Count by Batter Stand", x = "Pitch Type", y = "Count", fill = "Batter Stand") +
theme_minimal() +
theme(plot.title = element_text(size = 20),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)) +
scale_fill_manual(values = alternate_colors)Observations:
- Cade Smith rarely throws the sweeper against lefties (3.89%).
- Cade Smith only throws Fastballs and Splitters against lefties.
- Cade Smith faces slightly more righties than lefties.
library(dplyr)
library(readxl)
library(readr)
library(tidyr)
library(stringr)
library(ggplot2)
library(lubridate)
cade_smith_fastball <- read_excel("Favorite Pitches.xlsx", sheet = 1)
cade_smith_fastball <- cade_smith_fastball %>%
mutate(across(everything(), ~str_replace_all(., "Invalid Number", "N/A"))) %>%
mutate(Handedness = case_when(
str_detect(Batter, "\\(L\\)") ~ "L",
str_detect(Batter, "\\(R\\)") ~ "R",
str_detect(Batter, "\\(S\\)") ~ "S",
TRUE ~ NA_character_
)) %>%
mutate(Batter = str_remove_all(Batter, "\\s*\\(L\\)|\\s*\\(R\\)|\\s*\\(S\\)")) %>%
separate(Batter, into = c("Last", "First"), sep = ", ") %>%
mutate(Batter = paste(First, Last, sep = " ")) %>%
select(-First, -Last) %>%
select(Date, MPH, "Spin Rate", Batter, everything()) %>%
mutate(Year = substr(Date, 1, 4)) %>%
mutate(Month = substr(Date, 6, 7)) %>%
mutate(Month = month.name[as.numeric(Month)]) %>%
mutate(Month_Year = paste(Month, Year, sep = " ")) %>%
mutate("Outcome of AB" = case_when(
str_detect(Details, "strikes out") ~ "Strikeout",
str_detect(Details, "lines out") ~ "Lineout",
str_detect(Details, "flies out") ~ "Flyout",
str_detect(Details, "grounds out") ~ "Groundout",
str_detect(Details, "singles") ~ "Single",
str_detect(Details, "doubles") ~ "Double",
str_detect(Details, "triples") ~ "Triple",
str_detect(Details, "homers") ~ "Home Run",
str_detect(Details, "pops out") ~ "Popout",
str_detect(Details, "walks") ~ "Walk",
str_detect(Details, "called out on strikes") ~ "Strikeout",
str_detect(Details, "grounds into a force out") ~ "Ground into Force Out",
str_detect(Details, "grounds into a double play") ~ "GIDP",
str_detect(Details, "a ground-rule double") ~ "Ground-Rule Double",
TRUE ~ NA_character_
)) %>%
mutate(Runs = str_count(Details, "scores"))cade_smith_fastball %>%
summarize(avg_spin_rate = mean(as.numeric(`Spin Rate`), na.rm = TRUE),
avg_mph = mean(as.numeric(MPH), na.rm = TRUE))Monthly Splits
average_stats <- cade_smith_fastball %>%
mutate(MPH = as.numeric(MPH), `Spin Rate` = as.numeric(`Spin Rate`)) %>%
group_by(Month_Year) %>%
summarise(
avg_spin_rate = mean(`Spin Rate`, na.rm = TRUE),
avg_mph = mean(MPH, na.rm = TRUE)
) %>%
mutate(Month = match(substr(Month_Year, 1, (str_length(Month_Year) - 5)), month.name)) %>%
mutate(Year = substr(Month_Year, (str_length(Month_Year) - 3), str_length(Month_Year))) %>%
mutate(Date_Format = as.Date(paste(Year, Month, "01", sep = "-"), format = "%Y-%m-%d")) %>%
arrange(Year, Month) %>%
mutate(change_MPH = avg_mph - lag(avg_mph)) %>%
mutate(change_spin_rate = avg_spin_rate - lag(avg_spin_rate))
average_statslibrary(ggplot2)
library(gridExtra)
# Define custom colors
guardians_city_red <- "#7F0D1C"
guardians_city_blue <- "#0C2340"
# Filter data for 2024 and 2025
average_stats_2024 <- average_stats %>% filter(Year == "2024")
average_stats_2025 <- average_stats %>% filter(Year == "2025")
# Define the full range of months for the x-axis
full_months <- seq.Date(from = as.Date("2024-01-01"), to = as.Date("2025-12-01"), by = "month")
# Plot for avg_mph in 2024
p1 <- ggplot(average_stats_2024, aes(x = Date_Format, y = avg_mph)) +
geom_line(color = guardians_city_red) +
scale_x_date(breaks = full_months, date_labels = "%b %Y") +
labs(title = "Average MPH Over Time (2024)", x = "Date", y = "Average MPH") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Plot for avg_spin_rate in 2024
p2 <- ggplot(average_stats_2024, aes(x = Date_Format, y = avg_spin_rate)) +
geom_line(color = guardians_city_blue) +
scale_x_date(breaks = full_months, date_labels = "%b %Y") +
labs(title = "Average Spin Rate Over Time (2024)", x = "Date", y = "Average Spin Rate") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Plot for avg_mph in 2025
p3 <- ggplot(average_stats_2025, aes(x = Date_Format, y = avg_mph)) +
geom_line(color = guardians_city_red) +
scale_x_date(breaks = full_months, date_labels = "%b %Y") +
labs(title = "Average MPH Over Time (2025)", x = "Date", y = "Average MPH") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Plot for avg_spin_rate in 2025
p4 <- ggplot(average_stats_2025, aes(x = Date_Format, y = avg_spin_rate)) +
geom_line(color = guardians_city_blue) +
scale_x_date(breaks = full_months, date_labels = "%b %Y") +
labs(title = "Average Spin Rate Over Time (2025)", x = "Date", y = "Average Spin Rate") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Arrange the plots in a 2x2 grid
all_averages <- grid.arrange(p1, p2, p3, p4, ncol = 2)
all_averagesObservations:
- The month that saw the highest average MPH on the fastball and highest average spin rate on the fastball was July of 2025.
Zone versus Batter Hand
library(dplyr)
zone_one <- cade_smith_fastball %>% filter(Zone == 1)
handedness_count_1 <- zone_one %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_1
zone_two <- cade_smith_fastball %>% filter(Zone == 2)
handedness_count_2 <- zone_two %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_2
zone_three <- cade_smith_fastball %>% filter(Zone == 3)
handedness_count_3 <- zone_three %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_3
zone_four <- cade_smith_fastball %>% filter(Zone == 4)
handedness_count_4 <- zone_four %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_4
zone_five <- cade_smith_fastball %>% filter(Zone == 5)
handedness_count_5 <- zone_five %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_5
zone_six <- cade_smith_fastball %>% filter(Zone == 6)
handedness_count_6 <- zone_six %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_6
zone_seven <- cade_smith_fastball %>% filter(Zone == 7)
handedness_count_7 <- zone_seven %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_7
zone_eight <- cade_smith_fastball %>% filter(Zone == 8)
handedness_count_8 <- zone_eight %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_8
zone_nine <- cade_smith_fastball %>% filter(Zone == 9)
handedness_count_9 <- zone_nine %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_9
zone_eleven <- cade_smith_fastball %>% filter(Zone == 11)
handedness_count_11 <- zone_eleven %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_11
zone_twelve <- cade_smith_fastball %>% filter(Zone == 12)
handedness_count_12 <- zone_twelve %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_12
zone_thirteen <- cade_smith_fastball %>% filter(Zone == 13)
handedness_count_13 <- zone_thirteen %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_13
zone_fourteen <- cade_smith_fastball %>% filter(Zone == 14)
handedness_count_14 <- zone_fourteen %>%
group_by(Handedness) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count))
handedness_count_14Observations:
- Cade Smith biggest zone discrepancy is zone three. When he throws in this zone, 74.65% of the time it is against a right handed batter. This is an up and in fastball for righties.
- In zone four, which is inside and middle of the zone, Cade Smith is facing a left hander 64.21% of the time. This is wild because he faces lefties 46% of the time, therefore this zone is prioritized for left handers.