Skip to content
Baseball Research
suppressPackageStartupMessages(library(tidyverse))
library(readr)
library(data.table)
install.packages("readxl")
library(readxl)
library(dplyr)
library(ggplot2)
install.packages("geomtextpath")
library(geomtextpath)
install.packages("ggrepel")
library(ggrepel)
mlb_player_stats.df <- read_excel("MLB Database(AutoRecovered).xlsx")
savant_stats.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 2)
defense_and_running <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 3)
fangraphs.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 4)
mlb_teams.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 5)
mlb_standings.df <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 6)
war_per_game.df <- mlb_player_stats.df %>%
select(NAME, Pos, WAR, G, Team) %>%
mutate(war_per_game = WAR / G) %>%
mutate(war_per_162 = war_per_game * 162) %>%
arrange(desc(war_per_162))
top50warper162 <- war_per_game.df %>%
slice_max(order_by = war_per_162, n = 50)
top50warper162
ggplot(top50warper162, aes(x = G, y = WAR)) +
geom_point(size = 2, color="#2e2375") +
geom_text_repel(
aes(label = ifelse(WAR < 5.5, NA, NAME)),
min.segment.length = 0,
size = 5,
nudge_x = -.5,
nudge_y = .5,
force = 50,
max.iter = 5000,
color = "#2e2375",
fill = "White") +
theme(
panel.background = element_rect(fill = "#89869c",
colour = "#89869c",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white")
) +
labs(title = "War versus Games Played") +
ylab("WAR") +
xlab("Games Played") +
theme(
axis.title.x = element_text(size = 16),
axis.title.y = element_text(size = 16)
) +
labs(caption = "Data Collected from Baseball Reference") +
theme(plot.title = element_text(hjust=0.5, vjust = 2.0, size = 20))
suppressPackageStartupMessages(library(tidyverse))
library(readr)
library(data.table)
install.packages("readxl")
library(readxl)
library(dplyr)
library(ggplot2)
install.packages("geomtextpath")
library(geomtextpath)
install.packages("ggrepel")
library(ggrepel)
library(tibble)
william_contreras_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=1) %>%
mutate(name = "William Contreras", short_name = "WC")
pete_alonso_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=2) %>%
mutate(name = "Pete Alonso", short_name = "PA")
gleyber_torres_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=3) %>%
mutate(name = "Gleyber Torres", short_name = "GT")
austin_riley_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=4) %>%
mutate(name = "Austin Riley", short_name = "AR")
geraldo_perdomo_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=5) %>%
mutate(name = "Geraldo Perdomo", short_name = "GP")
trent_grisham_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=6) %>%
mutate(name = "Trent Grisham", short_name = "TG")
fernando_tatis_jr_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=7) %>%
mutate(name = "Fernando Tatis Jr.", short_name = "FTJ")
james_wood_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=8) %>%
mutate(name = "James Wood", short_name = "JW")
ben_rice_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=9) %>%
mutate(name = "Ben Rice", short_name = "BR")
elly_de_la_cruz_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=10) %>%
mutate(name = "Elly De La Cruz", short_name = "EDLC")
austin_hays_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=11) %>%
mutate(name = "Austin Hays", short_name = "AH")
gunnar_henderson_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=12) %>%
mutate(name = "Gunnar Henderson", short_name = "GH")
ryan_ohearn_splits <- read_excel("Fantasy Baseball Team Splits.xlsx", sheet=13) %>%
mutate(name = "Ryan O'Hearn", short_name = "ROH")
fantasy_splits_df <- william_contreras_splits %>%
rbind(pete_alonso_splits, gleyber_torres_splits, austin_riley_splits, geraldo_perdomo_splits, trent_grisham_splits, fernando_tatis_jr_splits, james_wood_splits, ben_rice_splits, elly_de_la_cruz_splits, austin_hays_splits, gunnar_henderson_splits, ryan_ohearn_splits)
colnames(fantasy_splits_df)
fantasy_splits_df <- fantasy_splits_df[, c(23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ,18, 19, 20, 21, 22, 24)] %>%
na.omit() %>%
filter(!is.na(Season) & Season != "Season") %>%
mutate(Season = ifelse(Season == "Total", "Career", str_remove(Season, '\\.0'))) %>%
filter(!Handedness %in% c("vs L as R", "vs L as L", "vs R as R", "vs R as L")) %>%
mutate(G = as.numeric(G), AB = as.numeric(AB), H = as.numeric(H), `1B` = as.numeric(`1B`), `2B` = as.numeric(`2B`), `3B` = as.numeric(`3B`), HR = as.numeric(HR), R = as.numeric(R), RBI = as.numeric(RBI), BB = as.numeric(BB), IBB = as.numeric(IBB), SO = as.numeric(SO), HBP = as.numeric(HBP), SF = as.numeric(SF), SH = as.numeric(SH), GDP = as.numeric(GDP), SB = as.numeric(SB), CS = as.numeric(CS)) %>%
mutate(Bases = `1B` + (2 * `2B`) + (3 * `3B`) + (4 * HR)) %>%
mutate(SLG = round(Bases / AB, 3)) %>%
mutate(OBP = round((H + BB + HBP) / (AB + BB + HBP + SF), 3)) %>%
mutate(OPS = SLG + OBP) %>%
mutate(fantasy_points = Bases + BB + R + RBI + SB - SO - CS + IBB + HBP - (2 * GDP)) %>%
mutate(fantasy_ppg = fantasy_points / G) %>%
mutate(fantasy_ppab = fantasy_points / AB)
vsLeft <- fantasy_splits_df %>%
filter(Handedness == "vs L")
vsRight <- fantasy_splits_df %>%
filter(Handedness == "vs R")
Home <- fantasy_splits_df %>%
filter(Handedness == "Home")
Away <- fantasy_splits_df %>%
filter(Handedness == "Away")
HomevsLeft <- fantasy_splits_df %>%
filter(Handedness == "Home vs L")
HomevsRight <- fantasy_splits_df %>%
filter(Handedness == "Home vs R")
AwayvsRight <- fantasy_splits_df %>%
filter(Handedness == "Away vs R")
AwayvsLeft <- fantasy_splits_df %>%
filter(Handedness == "Away vs L")
StatsThisYear <- fantasy_splits_df %>%
filter(Season == 2025) %>%
arrange(desc(fantasy_ppab))
StatsThisYear
Outfielders <- StatsThisYear %>%
filter(name %in% c("Trent Grisham", "Fernando Tatis Jr.", "Austin Hays", "James Wood"))
Shortstops <- StatsThisYear %>%
filter(name %in% c("Geraldo Perdomo", "Elly De La Cruz", "Gunnar Henderson"))
Firstbasemen <- StatsThisYear %>%
filter(name %in% c("Pete Alonso", "Ben Rice", "Ryan O'Hearn"))
ggplot(vsRight, aes(short_name, fantasy_ppab, fill = name)) +
geom_col() +
scale_fill_manual(values=c("#e70019",
"#13234b",
"#ccced4",
"#e70019",
"#fcc424",
"#df4601",
"#a71930",
"#df4601",
"#091a5c",
"#ff5910",
"#df4601",
"#ccced4",
"#f8c610"))
ggplot(vsLeft, aes(short_name, fantasy_ppab, fill = name)) +
geom_col() +
scale_fill_manual(values=c("#e70019",
"#13234b",
"#ccced4",
"#e70019",
"#fcc424",
"#df4601",
"#a71930",
"#df4601",
"#091a5c",
"#ff5910",
"#df4601",
"#ccced4",
"#f8c610"))
PitcherStats <- read.csv("PitcherStats.csv")
PitcherStats <- PitcherStats %>%
mutate(name = as.character(last_name..first_name), K = as.numeric(strikeout), BB = as.numeric(walk), SLG = round(as.numeric(slg_percent), 3), R = as.numeric(p_run), SB = as.numeric(p_total_stolen_base), RBI = as.numeric(p_rbi), GIDP = as.numeric(p_gnd_into_dp), IBB = as.numeric(p_intent_walk), TB = as.numeric(p_total_bases), CS = as.numeric(p_total_caught_stealing)) %>%
mutate(fantasy_points_allowed = (-1 * (K + CS)) + (-2 * GIDP) + SB + RBI + IBB + BB + TB + R)
PitcherStats <- PitcherStats[, c("name", "K", "BB", "SLG", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed")]
SavantStatsPitcher <- read.csv("Savant_Stats.csv")
PitcherStats
SavantStatsPitcher <- SavantStatsPitcher %>%
mutate(name = as.character(last_name..first_name), G = as.numeric(p_game), AVG = round(as.numeric(batting_avg), 3), SLG = round(as.numeric(slg_percent), 3), OBP = round(as.numeric(on_base_percent), 3), OPS = round(as.numeric(on_base_plus_slg), 3), xBA = round(as.numeric(xba), 3), xSLG = round(as.numeric(xslg), 3), wOBA = round(as.numeric(woba), 3), xwOBA = round(as.numeric(xwoba), 3))
elly_de_la_cruz_splits
# Ensure that the necessary libraries are loaded
suppressPackageStartupMessages(library(tidyverse))
library(readr)
library(data.table)
install.packages("readxl")
library(readxl)
library(dplyr)
library(ggplot2)
install.packages("geomtextpath")
library(geomtextpath)
install.packages("ggrepel")
library(ggrepel)
library(tibble)
PitcherStats <- read.csv("PitcherStats.csv")
PitcherStats <- PitcherStats %>%
mutate(name = as.character(last_name..first_name), K = as.numeric(strikeout), BB = as.numeric(walk), SLG = round(as.numeric(slg_percent), 3), R = as.numeric(p_run), SB = as.numeric(p_total_stolen_base), RBI = as.numeric(p_rbi), GIDP = as.numeric(p_gnd_into_dp), IBB = as.numeric(p_intent_walk), TB = as.numeric(p_total_bases), CS = as.numeric(p_total_caught_stealing)) %>%
mutate(fantasy_points_allowed = (-1 * (K + CS)) + (-2 * GIDP) + SB + RBI + IBB + BB + TB + R)
PitcherStats <- PitcherStats[, c("name", "K", "BB", "SLG", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed")]
SavantStatsPitcher <- read.csv("Savant_Stats.csv")
SavantStatsPitcher <- SavantStatsPitcher %>%
mutate(name = as.character(last_name..first_name), G = as.numeric(p_game), AVG = round(as.numeric(batting_avg), 3), SLG = round(as.numeric(slg_percent), 3), OBP = round(as.numeric(on_base_percent), 3), OPS = round(as.numeric(on_base_plus_slg), 3), xBA = round(as.numeric(xba), 3), xSLG = round(as.numeric(xslg), 3), wOBA = round(as.numeric(woba), 3), xwOBA = round(as.numeric(xwoba), 3))
all_pitcher_stats <- SavantStatsPitcher %>%
left_join(PitcherStats, by = "name")
all_pitcher_stats <- all_pitcher_stats %>%
select("name", "G", "AVG", "SLG.x", "OBP", "OPS", "xBA", "xSLG", "wOBA", "xwOBA", "K", "BB", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed") %>%
mutate(SLG = SLG.x) %>%
select("name", "G", "AVG", "SLG", "OBP", "OPS", "xBA", "xSLG", "wOBA", "xwOBA", "K", "BB", "R", "SB", "RBI", "GIDP", "IBB", "TB", "CS", "fantasy_points_allowed") %>%
mutate(fantasy_ppg = fantasy_points_allowed / G) %>%
mutate(AB = TB / SLG) %>%
mutate(xBases = AB * xSLG) %>%
mutate(xFantasyPPA = (-1 * (K + CS)) + (-2 * GIDP) + SB + RBI + IBB + BB + xBases + R) %>%
mutate(xFantasyPPAPG = xFantasyPPA / G) %>%
mutate("xFanPPAPG - FPApg" = xFantasyPPAPG - fantasy_ppg) %>%
mutate(short_name = name) %>%
separate(col = short_name, into = c("Last", "First"), sep = " ") %>%
mutate(First = substr(First, 1, 1), Last = substr(Last, 1, 1)) %>%
mutate(short_name = paste(First, Last, sep = ""))
all_pitcher_stats
top10pitchers <- all_pitcher_stats %>%
select(name, short_name, G, AVG, SLG, xBA, xSLG, wOBA, xwOBA, xFantasyPPAPG, fantasy_ppg) %>%
arrange(desc(xFantasyPPAPG)) %>%
slice_min(order_by = xFantasyPPAPG, n = 10)
top10pitchers
worst10pitchers <- all_pitcher_stats %>%
select(name, short_name, G, AVG, SLG, xBA, xSLG, wOBA, xwOBA, xFantasyPPAPG, fantasy_ppg) %>%
arrange(desc(xFantasyPPAPG)) %>%
slice_max(order_by = xFantasyPPAPG, n = 10)
worst10pitchers
top10pitchersBar <- ggplot(top10pitchers, aes(short_name, xFantasyPPAPG, fill = name)) +
geom_col() +
scale_fill_manual(values=c("#e57424",
"#1334a4",
"#e70019",
"#1334a4",
"#eb1b2b",
"#fc5a12",
"#0b2a53",
"#fc5b04",
"#eb1b2b",
"#045b9c")) +
labs(title = "Hardest Pitchers To Face", subtitle = "In Fantasy Baseball") +
labs(caption = "(based on data from Fangraphs as of 5/17/2025)") +
labs(x = "Pitcher", y = "Fantasy Points Allowed to Batters Per Game") +
theme_bw() +
geom_text(aes(y = xFantasyPPAPG, label = round(xFantasyPPAPG, digits=3)), vjust = -0.5,
check_overlap = TRUE)
top10pitchersBarlibrary(readxl)
library(dplyr)
library(stringr)
library(tidyr) # Added tidyr library
pitchers_BBR <- read_excel("MLB Database(AutoRecovered).xlsx", sheet = 7)
# Ensure the column name is correctly referenced
pitchers_BBR <- pitchers_BBR %>%
mutate(Name = str_replace(Name, "\\*", " - Left")) %>%
separate(col = Name, into = c("Name", "Throw"), sep = " - ") %>%
mutate(Throw = replace_na(Throw, "Right"))
pitchers_BBR
pitchers_BBR_limited <- pitchers_BBR[, c("Name", "Throw", "Age", "Tm")]
pitchers_BBR_limited