Skip to content
ATP/WTA Player Rankings
install.packages("rvest")
install.packages("tidyverse")
install.packages("openxlsx")
library(rvest)
library(dplyr)
library(openxlsx) # URL for men's rankings
men_url <- "https://www.espn.com/tennis/rankings"
# URL for women's rankings
women_url <- "https://www.espn.com/tennis/rankings/_/type/wta"
# Function to scrape the tables
scrape_table <- function(url) {
page <- read_html(url)
table <- page %>%
html_nodes("table") %>%
.[[1]] %>%
html_table(fill = TRUE)
return(table)
}
# Scrape the men's table
men_table <- scrape_table(men_url)
men_table <- men_table[[1]] # The table we want is the first one on the page
# Scrape the women's table
women_table <- scrape_table(women_url)
women_table <- women_table[[1]] # The table we want is the first one on the page
men_tablewomen_table# Clean up the tables
#men_table <- men_table[-1, ] # Remove the first row (headers) as we'll add our own later
#colnames(men_table) <- c("RK", "NAME", "AGE", "PTS") # Rename columns to match our desired output
#men_table <- men_table[, c(1,2,4,3)] # Reorder columns to match our desired output
#women_table <- women_table[-1, ] # Remove the first row (headers) as we'll add our own later
#colnames(women_table) <- c("RK", "NAME", "AGE", "PTS") # Rename columns to match our desired output
#women_table <- women_table[, c(1,2,4,3)] # Reorder columns to match our desired output# Combine the tables
#combined_table <- bind_rows(men_table, women_table) # Export to Excel write.xlsx(combined_table, "2023 ATP/WTA Player Rankings.xlsx")
# Export to Excel
#write.xlsx(combined_table, "2023 ATP/WTA Player Rankings")