Skip to content
install.packages("rvest")
install.packages("tidyverse")
install.packages("openxlsx")
library(rvest) 
library(dplyr) 
library(openxlsx) # URL for men's rankings 
men_url <- "https://www.espn.com/tennis/rankings" 
# URL for women's rankings 
women_url <- "https://www.espn.com/tennis/rankings/_/type/wta" 
# Function to scrape the tables 
scrape_table <- function(url) { 
    page <- read_html(url) 
    table <- page %>% 
    	html_nodes("table") %>% 
    	.[[1]] %>% 
    html_table(fill = TRUE) 
    return(table) 
} 

# Scrape the men's table 
men_table <- scrape_table(men_url) 
men_table <- men_table[[1]] # The table we want is the first one on the page 
# Scrape the women's table 
women_table <- scrape_table(women_url) 
women_table <- women_table[[1]] # The table we want is the first one on the page 
men_table
women_table
# Clean up the tables 
#men_table <- men_table[-1, ] # Remove the first row (headers) as we'll add our own later
#colnames(men_table) <- c("RK", "NAME", "AGE", "PTS") # Rename columns to match our desired output 
#men_table <- men_table[, c(1,2,4,3)] # Reorder columns to match our desired output 
#women_table <- women_table[-1, ] # Remove the first row (headers) as we'll add our own later 
#colnames(women_table) <- c("RK", "NAME", "AGE", "PTS") # Rename columns to match our desired output 
#women_table <- women_table[, c(1,2,4,3)] # Reorder columns to match our desired output
# Combine the tables 
#combined_table <- bind_rows(men_table, women_table) # Export to Excel write.xlsx(combined_table, "2023 ATP/WTA Player Rankings.xlsx")

# Export to Excel
#write.xlsx(combined_table, "2023 ATP/WTA Player Rankings")