Skip to content

Cade Smith Fastball

All data as of 08.21.2025

All of Cade Smith's Pitches

library(readr)
library(dplyr)
library(stringr) 
library(ggplot2)

All_Cade_Smith <- read_csv("All_Cade_Smith.csv", col_types = cols(
  details = col_character(),
  description = col_character(),
  events = col_character()
)) %>%
  select(pitch_type, game_date, release_speed, batter, events, description, zone, des, stand, type, hit_location, bb_type, balls, strikes, game_year, on_3b, on_2b, on_1b, outs_when_up, inning, inning_topbot, hit_distance_sc, launch_speed, launch_angle, effective_speed, release_spin_rate, estimated_ba_using_speedangle, estimated_woba_using_speedangle, woba_value, home_score, away_score, delta_run_exp, estimated_slg_using_speedangle, delta_pitcher_run_exp) %>%
  mutate(des_date = paste(des, game_date, sep = " - ")) %>%
  mutate(lead_batter = lead(batter)) %>%
  mutate(pitchtype_zone = paste(pitch_type, zone, sep = " ")) %>%
  mutate(next_pitch = lag(pitchtype_zone)) %>%
  mutate(count = paste(balls, strikes, sep = " - ")) %>%
  mutate(count_after_pitch = lag(count)) %>%
  mutate(in_out_zone = ifelse(zone %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9) & description %in% c("foul", "swinging_strike", "swinging_strike_blocked", "called_strike", "ball", "foul_tip", "foul_bunt") | !zone %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9) & description %in% c("foul", "swinging_strike", "swinging_strike_blocked", "foul_tip", "foul_bunt"), "S", ifelse(type == "X", "X", "B"))) %>%
  mutate(called_right = ifelse(in_out_zone == type, "Yes", "No")) %>%
  mutate(pitch_relative_location = ifelse(stand == "L" & zone == "1", "high-inside", 
                                          ifelse(stand == "L" & zone == "2", "high-middle", 
                                          ifelse(stand == "L" & zone == "3", "high-outside", 
                                          ifelse(stand == "L" & zone == "4", "middle-inside", 
                                          ifelse(stand == "L" & zone == "5", "middle-middle", 
                                          ifelse(stand == "L" & zone == "6", "middle-outside", 
                                          ifelse(stand == "L" & zone == "7", "low-inside", 
                                          ifelse(stand == "L" & zone == "8", "low-middle", 
                                          ifelse(stand == "L" & zone == "9", "low-outside", 
                                          ifelse(stand == "L" & zone == "11", "high-inside-ball", 
                                          ifelse(stand == "L" & zone == "12", "high-outside-ball", 
                                          ifelse(stand == "L" & zone == "13", "low-inside-ball", 
                                          ifelse(stand == "L" & zone == "14", "low-outside-ball", 				  
									      ifelse(stand == "R" & zone == "1", "high-outside", 
                                          ifelse(stand == "R" & zone == "2", "high-middle", 
                                          ifelse(stand == "R" & zone == "3", "high-inside", 
                                          ifelse(stand == "R" & zone == "4", "middle-outside", 
                                          ifelse(stand == "R" & zone == "5", "middle-middle", 
                                          ifelse(stand == "R" & zone == "6", "middle-inside", 
                                          ifelse(stand == "R" & zone == "7", "low-outside", 
                                          ifelse(stand == "R" & zone == "8", "low-middle", 
                                          ifelse(stand == "R" & zone == "9", "low-inside", 
                                          ifelse(stand == "R" & zone == "11", "high-outside-ball", 
                                          ifelse(stand == "R" & zone == "12", "high-inside-ball", 
                                          ifelse(stand == "R" & zone == "13", "low-outside-ball", 
                                          ifelse(stand == "R" & zone == "14", "low-inside-ball", NA))))))))))))))))))))))))))) %>%
  mutate(inside_out = ifelse(str_detect(pitch_relative_location, "inside"), "inside", 
                             ifelse(str_detect(pitch_relative_location, "outside"), "outside", 
                                    ifelse(str_detect(pitch_relative_location, "middle"), "middle", "NA")))) %>%
  mutate(slugging_value = as.numeric(ifelse(events == "single", 1, 
                                 ifelse(events == "double", 2, 
                                 ifelse(events == "triple", 3, 
                                 ifelse(events == "home_run", 4, 
								 ifelse(events == "walk", "null", 0))))))) %>%
mutate(on_base_value = as.numeric(ifelse(events == "single", 1, 
                                 ifelse(events == "double", 1, 
                                 ifelse(events == "triple", 1, 
                                 ifelse(events == "home_run", 1, 
								 ifelse(events == "walk", 1, 
								 ifelse(events == "hit_by_pitch", 1, 0)))))))) %>%
  mutate(pitch = ifelse(pitch_type == "FF", "Fastball", ifelse(pitch_type == "ST", "Sweeper", ifelse(pitch_type == "FS", "Splitter", "NA"))))

summary_stats_all_pitches <- All_Cade_Smith %>%
  group_by(pitch_type) %>%
  summarize(count = n(), average_speed = mean(release_speed, na.rm = TRUE)) %>%
  mutate(percentage = count / sum(count))
summary_stats_all_pitches

left_right_stats <- All_Cade_Smith %>%
  group_by(stand, pitch_type) %>%
  summarize(count = n(), average_speed = mean(release_speed, na.rm = TRUE)) %>%
  mutate(percentage = count / sum(count, na.rm = TRUE))  
left_right_stats
All_Cade_Smith
library(ggplot2)
library(dplyr)

guardians_blue <- "#002A5B"
guardians_red <- "#CF132D"
guardians_white <- "#FEFFFD"
guardians_city_red <- "#7F0D1C"
guardians_city_blue <- "#264383"
guardians_city_white <- "#DBD5A7"

# Calculate percentages
pitch_data <- All_Cade_Smith %>%
  count(pitch) %>%
  mutate(percentage = n / sum(n) * 100)

# Define custom colors for each pitch type
custom_colors <- c(guardians_blue, guardians_red, guardians_city_white)
alternate_colors <- c(guardians_city_red, guardians_city_blue)

# Create pie chart with labels
ggplot(pitch_data, aes(x = "", y = percentage, fill = pitch)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar(theta = "y") +
  labs(title = "Pitch Type Distribution", x = "", y = "Percentage", fill = "Pitch Type") +
  theme_void() +
  theme(legend.title = element_text(size = 12),  # Add title to legend
        plot.title = element_text(size = 20),  # Large text for title
        text = element_text(size = 12)) +  # Medium text for percentages
  geom_text(aes(label = paste0(round(percentage, 1), "%")), 
            position = position_stack(vjust = 0.5), 
            color = guardians_white) +  # Set text color to guardians white
  scale_fill_manual(values = custom_colors)
ggplot(left_right_stats, aes(x = pitch_type, y = count, fill = stand)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Pitch Type Count by Batter Stand", x = "Pitch Type", y = "Count", fill = "Batter Stand") +
  theme_minimal() +
  theme(plot.title = element_text(size = 20),  
        axis.title = element_text(size = 14),  
        axis.text = element_text(size = 12)) +
        scale_fill_manual(values = alternate_colors)

Observations:

  1. Cade Smith rarely throws the sweeper against lefties (3.89%).
  2. Cade Smith only throws Fastballs and Splitters against lefties.
  3. Cade Smith faces slightly more righties than lefties.
library(dplyr)
library(readxl)
library(readr)
library(tidyr)
library(stringr)
library(ggplot2)
library(lubridate)

cade_smith_fastball <- read_excel("Favorite Pitches.xlsx", sheet = 1) 

cade_smith_fastball <- cade_smith_fastball %>%
  mutate(across(everything(), ~str_replace_all(., "Invalid Number", "N/A"))) %>%
  mutate(Handedness = case_when(
    str_detect(Batter, "\\(L\\)") ~ "L",
    str_detect(Batter, "\\(R\\)") ~ "R",
    str_detect(Batter, "\\(S\\)") ~ "S",
    TRUE ~ NA_character_
  )) %>%
  mutate(Batter = str_remove_all(Batter, "\\s*\\(L\\)|\\s*\\(R\\)|\\s*\\(S\\)")) %>%
  separate(Batter, into = c("Last", "First"), sep = ", ") %>%
  mutate(Batter = paste(First, Last, sep = " ")) %>%
  select(-First, -Last) %>%
  select(Date, MPH, "Spin Rate", Batter, everything()) %>%
  mutate(Year = substr(Date, 1, 4)) %>%
  mutate(Month = substr(Date, 6, 7)) %>%
  mutate(Month = month.name[as.numeric(Month)]) %>%
  mutate(Month_Year = paste(Month, Year, sep = " ")) %>%
  mutate("Outcome of AB" = case_when(
    str_detect(Details, "strikes out") ~ "Strikeout",
    str_detect(Details, "lines out") ~ "Lineout",
    str_detect(Details, "flies out") ~ "Flyout",
    str_detect(Details, "grounds out") ~ "Groundout",
    str_detect(Details, "singles") ~ "Single",
	str_detect(Details, "doubles") ~ "Double",
	str_detect(Details, "triples") ~ "Triple",
	str_detect(Details, "homers") ~ "Home Run",
	str_detect(Details, "pops out") ~ "Popout", 
	str_detect(Details, "walks") ~ "Walk",
	str_detect(Details, "called out on strikes") ~ "Strikeout",
	str_detect(Details, "grounds into a force out") ~ "Ground into Force Out",  
	str_detect(Details, "grounds into a double play") ~ "GIDP",
	str_detect(Details, "a ground-rule double") ~ "Ground-Rule Double", 
    TRUE ~ NA_character_
  )) %>%
  mutate(Runs = str_count(Details, "scores"))
cade_smith_fastball %>%
  summarize(avg_spin_rate = mean(as.numeric(`Spin Rate`), na.rm = TRUE),
            avg_mph = mean(as.numeric(MPH), na.rm = TRUE))

Monthly Splits

average_stats <- cade_smith_fastball %>%
  mutate(MPH = as.numeric(MPH), `Spin Rate` = as.numeric(`Spin Rate`)) %>%
  group_by(Month_Year) %>%
  summarise(
    avg_spin_rate = mean(`Spin Rate`, na.rm = TRUE),
    avg_mph = mean(MPH, na.rm = TRUE)
  ) %>%
  mutate(Month = match(substr(Month_Year, 1, (str_length(Month_Year) - 5)), month.name)) %>%
  mutate(Year = substr(Month_Year, (str_length(Month_Year) - 3), str_length(Month_Year))) %>%
  mutate(Date_Format = as.Date(paste(Year, Month, "01", sep = "-"), format = "%Y-%m-%d")) %>%
  arrange(Year, Month) %>%
  mutate(change_MPH = avg_mph - lag(avg_mph)) %>%
  mutate(change_spin_rate = avg_spin_rate - lag(avg_spin_rate))

average_stats
library(ggplot2)
library(gridExtra)

# Define custom colors
guardians_city_red <- "#7F0D1C"
guardians_city_blue <- "#0C2340"

# Filter data for 2024 and 2025
average_stats_2024 <- average_stats %>% filter(Year == "2024")
average_stats_2025 <- average_stats %>% filter(Year == "2025")

# Define the full range of months for the x-axis
full_months <- seq.Date(from = as.Date("2024-01-01"), to = as.Date("2025-12-01"), by = "month")

# Plot for avg_mph in 2024
p1 <- ggplot(average_stats_2024, aes(x = Date_Format, y = avg_mph)) +
  geom_line(color = guardians_city_red) +
  scale_x_date(breaks = full_months, date_labels = "%b %Y") +
  labs(title = "Average MPH Over Time (2024)", x = "Date", y = "Average MPH") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Plot for avg_spin_rate in 2024
p2 <- ggplot(average_stats_2024, aes(x = Date_Format, y = avg_spin_rate)) +
  geom_line(color = guardians_city_blue) +
  scale_x_date(breaks = full_months, date_labels = "%b %Y") +
  labs(title = "Average Spin Rate Over Time (2024)", x = "Date", y = "Average Spin Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Plot for avg_mph in 2025
p3 <- ggplot(average_stats_2025, aes(x = Date_Format, y = avg_mph)) +
  geom_line(color = guardians_city_red) +
  scale_x_date(breaks = full_months, date_labels = "%b %Y") +
  labs(title = "Average MPH Over Time (2025)", x = "Date", y = "Average MPH") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Plot for avg_spin_rate in 2025
p4 <- ggplot(average_stats_2025, aes(x = Date_Format, y = avg_spin_rate)) +
  geom_line(color = guardians_city_blue) +
  scale_x_date(breaks = full_months, date_labels = "%b %Y") +
  labs(title = "Average Spin Rate Over Time (2025)", x = "Date", y = "Average Spin Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Arrange the plots in a 2x2 grid
all_averages <- grid.arrange(p1, p2, p3, p4, ncol = 2)
all_averages

Observations:

  1. The month that saw the highest average MPH on the fastball and highest average spin rate on the fastball was July of 2025.

Zone versus Batter Hand

library(dplyr)

zone_one <- cade_smith_fastball %>% filter(Zone == 1)

handedness_count_1 <- zone_one %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_1

zone_two <- cade_smith_fastball %>% filter(Zone == 2)

handedness_count_2 <- zone_two %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_2

zone_three <- cade_smith_fastball %>% filter(Zone == 3)

handedness_count_3 <- zone_three %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_3

zone_four <- cade_smith_fastball %>% filter(Zone == 4)

handedness_count_4 <- zone_four %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_4

zone_five <- cade_smith_fastball %>% filter(Zone == 5)

handedness_count_5 <- zone_five %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_5

zone_six <- cade_smith_fastball %>% filter(Zone == 6)

handedness_count_6 <- zone_six %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_6

zone_seven <- cade_smith_fastball %>% filter(Zone == 7)

handedness_count_7 <- zone_seven %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_7

zone_eight <- cade_smith_fastball %>% filter(Zone == 8)

handedness_count_8 <- zone_eight %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_8

zone_nine <- cade_smith_fastball %>% filter(Zone == 9)

handedness_count_9 <- zone_nine %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_9

zone_eleven <- cade_smith_fastball %>% filter(Zone == 11)

handedness_count_11 <- zone_eleven %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_11

zone_twelve <- cade_smith_fastball %>% filter(Zone == 12)

handedness_count_12 <- zone_twelve %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_12

zone_thirteen <- cade_smith_fastball %>% filter(Zone == 13)

handedness_count_13 <- zone_thirteen %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_13

zone_fourteen <- cade_smith_fastball %>% filter(Zone == 14)

handedness_count_14 <- zone_fourteen %>%
  group_by(Handedness) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count))

handedness_count_14

Observations:

  1. Cade Smith biggest zone discrepancy is zone three. When he throws in this zone, 74.65% of the time it is against a right handed batter. This is an up and in fastball for righties.
  2. In zone four, which is inside and middle of the zone, Cade Smith is facing a left hander 64.21% of the time. This is wild because he faces lefties 46% of the time, therefore this zone is prioritized for left handers.