Skip to content
MW2
install.packages('openxlsx')
library(openxlsx)
library(readxl)
library(ggplot2)
library(dplyr)
# Load the metadata excel file
stlsc <- read_excel("MW2.xlsx")
sheet_name <- excel_sheets("MW2.xlsx")
# Input all the sheets in the excel into a list
sheet_list <- lapply(sheet_name, function(sheet){
read_excel("MW2.xlsx", sheet = sheet)
})
# Combine the sheets into one single dataframe
combined <- bind_rows(sheet_list, .id = "sheet")
combined
# Filter the combined dataframe to obtain player data from both teams
both <- combined %>%
filter (TeamType == 0 | TeamType == 1) %>%
arrange (TeamType, JerseyNumber, FrameCount)
# Arrange the data by ascending order
# Filter and divide the data into separate dataframe
stl <- combined %>%
filter (TeamType == 1) %>%
arrange (JerseyNumber, FrameCount)
char <- combined %>%
filter (TeamType == 0) %>%
arrange (JerseyNumber, FrameCount)
# Generate separate dataframes for each JerseyNumber
stl_list <- unique(stl$JerseyNumber) # Get unique JerseyNumbers
stl_df_list <- list() # Create an empty list to store dataframes
# Loop through each JerseyNumber and create a dataframe
for (jersey in stl_list) {
jersey_df <- stl %>%
filter(JerseyNumber == jersey)
stl_df_list[[paste0("stl2_", jersey)]] <- jersey_df
}
# Generate separate dataframes for each JerseyNumber
char_list <- unique(char$JerseyNumber) # Get unique JerseyNumbers
char_df_list <- list() # Create an empty list to store dataframes
# Loop through each JerseyNumber and create a dataframe
for (jersey in char_list) {
jersey_df <- char %>%
filter(JerseyNumber == jersey)
char_df_list[[paste0("char_", jersey)]] <- jersey_df
}
player_list <- c(stl_df_list, char_df_list)
# Calculate the average speed of both teams
avg <- both %>%
group_by (TeamType)%>%
summarize (speed = mean(PlayerSpeed))
avg
# Calculate the distance covered by each individual player
calculate_distance <- function(df) {
dist <- c(0, sqrt(diff(df$XPosition)^2 + diff(df$YPosition)^2)) # calculate distance between consecutive points
dist_per_frame <- dist / diff(df$FrameCount) # calculate distance per frame
return(sum(dist_per_frame, na.rm = TRUE)) # sum distance over all frames
}
distance_covered <- lapply(player_list, calculate_distance)
# Filter the player data with speed greater than 7m/s and longer than 1 second
sprint_1sec <- lapply(player_list, function(df)
{
df %>%
filter(PlayerSpeed >= 7) %>%
group_by(grp = cumsum(c(1, diff(FrameCount)) > 1)) %>%
filter(n() >= 30) %>%
ungroup() %>%
select(-grp)})
# Calculate each player's sprint count and sprint distance for each sprint
sprint_segment <- lapply(sprint_1sec, function(df)
{
df %>%
group_by(segment = cumsum(c(1, diff(FrameCount)) > 1)) %>%
summarize( TeamType = first(TeamType),
JerseyNumber = first(JerseyNumber),
distance = sum(sqrt(diff(XPosition)^2 + diff(YPosition)^2))) %>%
ungroup()
})
sprint_segment
# Calculate the total sprint distance for each player
sprint_covered <- lapply(sprint_segment, function(df)
{
df %>%
summarize(TeamType = first(TeamType),
JerseyNumber = first(JerseyNumber),
sprint_covered = sum(distance))
}
)
# Filter and segment data that players maintain high speed at greater than 5m/s for more than 2 seconds
highspeed_2sec <- lapply(player_list, function(df)
{
df %>%
filter(PlayerSpeed >= 5) %>%
group_by(grp = cumsum(c(1, diff(FrameCount)) > 1)) %>%
filter(n() >= 60) %>%
ungroup() %>%
select(-grp)})