Skip to content
suppressPackageStartupMessages(library(tidyverse))
library(readr)
library(data.table)
install.packages("readxl")
library(readxl)
library(dplyr)
library(ggplot2)
install.packages("geomtextpath")
library(geomtextpath)
install.packages("ggrepel")
library(ggrepel)

mlb_player_stats.df <- read_excel("MLB Database(AutoRecovered).xlsx")
savant_stats.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 2)
defense_and_running <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 3)
fangraphs.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 4)
mlb_teams.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 5)
mlb_standings.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 6)

war_per_game.df <- mlb_player_stats.df %>%
  select(NAME, Pos, WAR, G, Team) %>%
  mutate(war_per_game = WAR / G) %>%
  mutate(war_per_162 = war_per_game * 162) %>%
  arrange(desc(war_per_162))

top50warper162 <- war_per_game.df %>%
  slice_max(order_by = war_per_162, n = 50)
top50warper162

ggplot(top50warper162, aes(x = G, y = WAR)) +
  geom_point(size = 2, color="#2e2375") +
  geom_text_repel(
    aes(label = ifelse(WAR < 5.5, NA, NAME)),
    min.segment.length = 0,
	size = 5,
    nudge_x = -.5,
    nudge_y = .5,
    force = 50,
    max.iter = 5000,
    color = "#2e2375",
    fill = "White") +
  theme(
    panel.background = element_rect(fill = "#89869c",
                                    colour = "#89869c",
                                    size = 0.5, linetype = "solid"),
    panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                    colour = "white"), 
    panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                    colour = "white")
  ) + 
  labs(title = "War versus Games Played") +
  ylab("WAR") +
  xlab("Games Played") +
  theme(
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16)
  ) +
  labs(caption = "Data Collected from Baseball Reference") +
  theme(plot.title = element_text(hjust=0.5, vjust = 2.0, size = 20))
suppressPackageStartupMessages(library(tidyverse))
library(readr)
library(data.table)
install.packages("readxl")
library(readxl)
library(dplyr)
library(ggplot2)
install.packages("geomtextpath")
library(geomtextpath)
install.packages("ggrepel")
library(ggrepel)
library(tibble)

william_contreras_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=1) %>%
mutate(name = "William Contreras", short_name = "WC")
pete_alonso_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=2) %>%
mutate(name = "Pete Alonso", short_name = "PA")
gleyber_torres_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=3) %>%
mutate(name = "Gleyber Torres", short_name = "GT")
austin_riley_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=4) %>%
mutate(name = "Austin Riley", short_name = "AR")
geraldo_perdomo_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=5) %>%
mutate(name = "Geraldo Perdomo", short_name = "GP")
trent_grisham_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=6) %>%
mutate(name = "Trent Grisham", short_name = "TG")
fernando_tatis_jr_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=7) %>%
mutate(name = "Fernando Tatis Jr.", short_name = "FTJ")
james_wood_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=8) %>%
mutate(name = "James Wood", short_name = "JW")
ben_rice_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=9) %>%
mutate(name = "Ben Rice", short_name = "BR")
elly_de_la_cruz_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=10) %>%
mutate(name = "Elly De La Cruz", short_name = "EDLC")
austin_hays_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=11) %>%
mutate(name = "Austin Hays", short_name = "AH")
gunnar_henderson_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=12) %>%
mutate(name = "Gunnar Henderson", short_name = "GH")
ryan_ohearn_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=13) %>%
mutate(name = "Ryan O'Hearn", short_name = "ROH")

fantasy_splits_df <- william_contreras_splits %>%
rbind(pete_alonso_splits, gleyber_torres_splits, austin_riley_splits, geraldo_perdomo_splits, trent_grisham_splits, fernando_tatis_jr_splits, james_wood_splits, ben_rice_splits, elly_de_la_cruz_splits, austin_hays_splits, gunnar_henderson_splits, ryan_ohearn_splits)

colnames(fantasy_splits_df)
fantasy_splits_df <- fantasy_splits_df[, c(23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ,18, 19, 20, 21, 22, 24)] %>%
na.omit() %>%
filter(!is.na(Season) & Season != "Season") %>%
mutate(Season = ifelse(Season == "Total", "Career", str_remove(Season, '\\.0'))) %>%
filter(!Handedness %in% c("vs L as R", "vs L as L", "vs R as R", "vs R as L")) %>%
mutate(G = as.numeric(G), AB = as.numeric(AB), H = as.numeric(H), `1B` = as.numeric(`1B`), `2B` = as.numeric(`2B`), `3B` = as.numeric(`3B`), HR = as.numeric(HR), R = as.numeric(R), RBI = as.numeric(RBI), BB = as.numeric(BB), IBB = as.numeric(IBB), SO = as.numeric(SO), HBP = as.numeric(HBP), SF = as.numeric(SF), SH = as.numeric(SH), GDP = as.numeric(GDP), SB = as.numeric(SB), CS = as.numeric(CS)) %>%
mutate(Bases = `1B` + (2 * `2B`) + (3 * `3B`) + (4 * HR)) %>%
mutate(SLG = round(Bases / AB, 3)) %>%
mutate(OBP = round((H + BB + HBP) / (AB + BB + HBP + SF), 3)) %>%
mutate(OPS = SLG + OBP) %>%
mutate(fantasy_points = Bases + BB + R + RBI + SB - SO - CS + IBB + HBP - (2 * GDP)) %>%
mutate(fantasy_ppg = fantasy_points / G) %>%
mutate(fantasy_ppab = fantasy_points / AB)

vsLeft <- fantasy_splits_df %>%
filter(Handedness == "vs L")
vsRight <- fantasy_splits_df %>%
filter(Handedness == "vs R")
Home <- fantasy_splits_df %>%
filter(Handedness == "Home")
Away <- fantasy_splits_df %>%
filter(Handedness == "Away")
HomevsLeft <- fantasy_splits_df %>%
filter(Handedness == "Home vs L")
HomevsRight <- fantasy_splits_df %>%
filter(Handedness == "Home vs R")
AwayvsRight <- fantasy_splits_df %>%
filter(Handedness == "Away vs R")
AwayvsLeft <- fantasy_splits_df %>%
filter(Handedness == "Away vs L")

StatsThisYear <- fantasy_splits_df %>%
filter(Season == 2025) %>%
arrange(desc(fantasy_ppab))

StatsThisYear

Outfielders <- StatsThisYear %>%
filter(name %in% c("Trent Grisham", "Fernando Tatis Jr.", "Austin Hays", "James Wood"))

Shortstops <- StatsThisYear %>%
	   filter(name %in% c("Geraldo Perdomo", "Elly De La Cruz", "Gunnar Henderson"))

Firstbasemen <- StatsThisYear %>%
	   filter(name %in% c("Pete Alonso", "Ben Rice", "Ryan O'Hearn"))

ggplot(vsRight, aes(short_name, fantasy_ppab, fill = name)) +
geom_col() +
scale_fill_manual(values=c("#e70019",
                             "#13234b",
                             "#ccced4",
                             "#e70019",
                             "#fcc424",
                             "#df4601",
                             "#a71930",
                             "#df4601",
                             "#091a5c",
                             "#ff5910",
                             "#df4601",
                             "#ccced4",
                             "#f8c610"))

ggplot(vsLeft, aes(short_name, fantasy_ppab, fill = name)) +
geom_col() +
scale_fill_manual(values=c("#e70019",
                             "#13234b",
                             "#ccced4",
                             "#e70019",
                             "#fcc424",
                             "#df4601",
                             "#a71930",
                             "#df4601",
                             "#091a5c",
                             "#ff5910",
                             "#df4601",
                             "#ccced4",
                             "#f8c610"))

PitcherStats <- read.csv("PitcherStats.csv")
PitcherStats <- PitcherStats %>%
mutate(name = as.character(last_name..first_name), K = as.numeric(strikeout), BB = as.numeric(walk), SLG = round(as.numeric(slg_percent), 3), R = as.numeric(p_run), SB = as.numeric(p_total_stolen_base), RBI = as.numeric(p_rbi), GIDP = as.numeric(p_gnd_into_dp), IBB = as.numeric(p_intent_walk), TB = as.numeric(p_total_bases), CS = as.numeric(p_total_caught_stealing)) %>%
mutate(fantasy_points_allowed = (-1 * (K + CS)) + (-2 * GIDP) + SB + RBI + IBB + BB + TB + R)
PitcherStats <- PitcherStats[, c("name", "K", "BB", "SLG", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed")]

SavantStatsPitcher <- read.csv("Savant_Stats.csv")
PitcherStats
SavantStatsPitcher <- SavantStatsPitcher %>%
mutate(name = as.character(last_name..first_name), G = as.numeric(p_game), AVG = round(as.numeric(batting_avg), 3), SLG = round(as.numeric(slg_percent), 3), OBP = round(as.numeric(on_base_percent), 3), OPS = round(as.numeric(on_base_plus_slg), 3), xBA = round(as.numeric(xba), 3), xSLG = round(as.numeric(xslg), 3), wOBA = round(as.numeric(woba), 3), xwOBA = round(as.numeric(xwoba), 3))

elly_de_la_cruz_splits
# Ensure that the necessary libraries are loaded
suppressPackageStartupMessages(library(tidyverse))
library(readr)
library(data.table)
install.packages("readxl")
library(readxl)
library(dplyr)
library(ggplot2)
install.packages("geomtextpath")
library(geomtextpath)
install.packages("ggrepel")
library(ggrepel)
library(tibble)

PitcherStats <- read.csv("PitcherStats.csv")
PitcherStats <- PitcherStats %>%
mutate(name = as.character(last_name..first_name), K = as.numeric(strikeout), BB = as.numeric(walk), SLG = round(as.numeric(slg_percent), 3), R = as.numeric(p_run), SB = as.numeric(p_total_stolen_base), RBI = as.numeric(p_rbi), GIDP = as.numeric(p_gnd_into_dp), IBB = as.numeric(p_intent_walk), TB = as.numeric(p_total_bases), CS = as.numeric(p_total_caught_stealing)) %>%
mutate(fantasy_points_allowed = (-1 * (K + CS)) + (-2 * GIDP) + SB + RBI + IBB + BB + TB + R)
PitcherStats <- PitcherStats[, c("name", "K", "BB", "SLG", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed")]

SavantStatsPitcher <- read.csv("Savant_Stats.csv")
SavantStatsPitcher <- SavantStatsPitcher %>%
mutate(name = as.character(last_name..first_name), G = as.numeric(p_game), AVG = round(as.numeric(batting_avg), 3), SLG = round(as.numeric(slg_percent), 3), OBP = round(as.numeric(on_base_percent), 3), OPS = round(as.numeric(on_base_plus_slg), 3), xBA = round(as.numeric(xba), 3), xSLG = round(as.numeric(xslg), 3), wOBA = round(as.numeric(woba), 3), xwOBA = round(as.numeric(xwoba), 3))

all_pitcher_stats <- SavantStatsPitcher %>%
  left_join(PitcherStats, by = "name")

all_pitcher_stats <- all_pitcher_stats %>%
  select("name", "G", "AVG", "SLG.x", "OBP", "OPS", "xBA", "xSLG", "wOBA", "xwOBA", "K", "BB", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed") %>%
  mutate(SLG = SLG.x) %>%
  select("name", "G", "AVG", "SLG", "OBP", "OPS", "xBA", "xSLG", "wOBA", "xwOBA", "K", "BB", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed") %>%
  mutate(fantasy_ppg = fantasy_points_allowed / G) %>%
  mutate(AB = TB / SLG) %>%
  mutate(xBases = AB * xSLG) %>%
  mutate(xFantasyPPA = (-1 * (K + CS)) + (-2 * GIDP) + SB + RBI + IBB + BB + xBases + R) %>%
  mutate(xFantasyPPAPG = xFantasyPPA / G) %>%
  mutate("xFanPPAPG - FPApg" = xFantasyPPAPG - fantasy_ppg) %>%
  mutate(short_name = name) %>%
  separate(col = short_name, into = c("Last", "First"), sep = " ") %>%
  mutate(First = substr(First, 1, 1), Last = substr(Last, 1, 1)) %>%
  mutate(short_name = paste(First, Last, sep = ""))

all_pitcher_stats

top10pitchers <- all_pitcher_stats %>%
select(name, short_name, G, AVG, SLG, xBA, xSLG, wOBA, xwOBA, xFantasyPPAPG, fantasy_ppg) %>%
arrange(desc(xFantasyPPAPG)) %>%
slice_min(order_by = xFantasyPPAPG, n = 10)
top10pitchers

worst10pitchers <- all_pitcher_stats %>%
select(name, short_name, G, AVG, SLG, xBA, xSLG, wOBA, xwOBA, xFantasyPPAPG, fantasy_ppg) %>%
arrange(desc(xFantasyPPAPG)) %>%
slice_max(order_by = xFantasyPPAPG, n = 10)
worst10pitchers

top10pitchersBar <- ggplot(top10pitchers, aes(short_name, xFantasyPPAPG, fill = name)) +
geom_col() + 
scale_fill_manual(values=c("#e57424",
                             "#1334a4",
                             "#e70019",
                             "#1334a4",
                             "#eb1b2b",
                             "#fc5a12",
                             "#0b2a53",
                             "#fc5b04",
                             "#eb1b2b",
                             "#045b9c")) +
labs(title = "Hardest Pitchers To Face", subtitle = "In Fantasy Baseball") +
labs(caption = "(based on data from Fangraphs as of 5/17/2025)") +
labs(x = "Pitcher", y = "Fantasy Points Allowed to Batters Per Game") +
theme_bw() + 
geom_text(aes(y = xFantasyPPAPG, label = round(xFantasyPPAPG, digits=3)), vjust = -0.5,
            check_overlap = TRUE)

top10pitchersBar
library(readxl)
library(dplyr)
library(stringr)
library(tidyr)  # Added tidyr library

pitchers_BBR <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 7)

# Ensure the column name is correctly referenced
pitchers_BBR <- pitchers_BBR %>%
  mutate(Name = str_replace(Name, "\\*", " - Left")) %>%
  separate(col = Name, into = c("Name", "Throw"), sep = " - ") %>%
  mutate(Throw = replace_na(Throw, "Right"))
pitchers_BBR
pitchers_BBR_limited <- pitchers_BBR[, c("Name", "Throw", "Age", "Tm")]
pitchers_BBR_limited