Skip to content
install.packages('openxlsx')
library(openxlsx)
library(readxl)
library(ggplot2)
library(dplyr)
# Load the metadata excel file
stlsc <- read_excel("MW2.xlsx")
sheet_name <- excel_sheets("MW2.xlsx")
# Input all the sheets in the excel into a list
sheet_list <- lapply(sheet_name, function(sheet){
read_excel("MW2.xlsx", sheet = sheet)
})
# Combine the sheets into one single dataframe
combined <- bind_rows(sheet_list, .id = "sheet")
combined
# Filter the combined dataframe to obtain player data from both teams
both <- combined %>% 
 	filter (TeamType == 0 | TeamType == 1) %>%
 	arrange (TeamType, JerseyNumber, FrameCount) 
	# Arrange the data by ascending order
# Filter and divide the data into separate dataframe
stl <- combined %>%
	filter (TeamType == 1) %>%
	arrange (JerseyNumber, FrameCount)

char <- combined %>%
	filter (TeamType == 0) %>%
	arrange (JerseyNumber, FrameCount)
# Generate separate dataframes for each JerseyNumber

stl_list <- unique(stl$JerseyNumber) # Get unique JerseyNumbers

stl_df_list <- list() # Create an empty list to store dataframes

# Loop through each JerseyNumber and create a dataframe

for (jersey in stl_list) {

  jersey_df <- stl %>% 
	filter(JerseyNumber == jersey)

  stl_df_list[[paste0("stl2_", jersey)]] <- jersey_df

}
# Generate separate dataframes for each JerseyNumber

char_list <- unique(char$JerseyNumber) # Get unique JerseyNumbers

char_df_list <- list() # Create an empty list to store dataframes

# Loop through each JerseyNumber and create a dataframe

for (jersey in char_list) {

  jersey_df <- char %>% 
	filter(JerseyNumber == jersey)

  char_df_list[[paste0("char_", jersey)]] <- jersey_df

}

player_list <- c(stl_df_list, char_df_list)
# Calculate the average speed of both teams
avg <- both %>%
	group_by (TeamType)%>%
	summarize (speed = mean(PlayerSpeed))
avg
# Calculate the distance covered by each individual player
calculate_distance <- function(df) {
  dist <- c(0, sqrt(diff(df$XPosition)^2 + diff(df$YPosition)^2))  # calculate distance between consecutive points
  dist_per_frame <- dist / diff(df$FrameCount)  # calculate distance per frame
  return(sum(dist_per_frame, na.rm = TRUE))  # sum distance over all frames
}
distance_covered <- lapply(player_list, calculate_distance)
# Filter the player data with speed greater than 7m/s and longer than 1 second
sprint_1sec <- lapply(player_list, function(df) 
{
df %>%
	filter(PlayerSpeed >= 7) %>%
	group_by(grp = cumsum(c(1, diff(FrameCount)) > 1)) %>%
	filter(n() >= 30) %>%
	ungroup() %>%
	select(-grp)})
# Calculate each player's sprint count and sprint distance for each sprint
sprint_segment <- lapply(sprint_1sec, function(df)
{
df %>%
group_by(segment = cumsum(c(1, diff(FrameCount)) > 1)) %>%
summarize(  TeamType = first(TeamType),
		JerseyNumber = first(JerseyNumber),
		distance = sum(sqrt(diff(XPosition)^2 + diff(YPosition)^2))) %>%
ungroup()
})
sprint_segment
# Calculate the total sprint distance for each player
sprint_covered <- lapply(sprint_segment, function(df)
{
df %>%
summarize(TeamType = first(TeamType),
		JerseyNumber = first(JerseyNumber),
		sprint_covered = sum(distance))
}
)
# Filter and segment data that players maintain high speed at greater than 5m/s for more than 2 seconds

highspeed_2sec <- lapply(player_list, function(df) 
{
df %>%
filter(PlayerSpeed >= 5) %>%
group_by(grp = cumsum(c(1, diff(FrameCount)) > 1)) %>%
filter(n() >= 60) %>%
ungroup() %>%
select(-grp)})