# Import required packages
library(dplyr)
library(readr)
# Load the data
flights <- read_csv("flights2022-h2.csv")
airlines <- read_csv("airlines.csv")
airports <- read_csv("airports.csv")
# Question 1: Which airline and airport pair receives the most flights from NYC and what is the average duration of the flights?
# Selecting relevant columns from the flights dataframe
df1 <- flights %>% select(carrier, origin, dest, air_time)
# Joining df1 and the airlines dataframe
df2 <- df1 %>% full_join(airlines, by = "carrier")
# Renaming the column "name" in df2 to "airline_name"
df3 <- df2 %>%
rename(airline_name = name)
# Selecting relevant columns from the airports dataframe
df4 <- airports %>% select(faa, name)
# Joining df3 and df4
df5 <- df3 %>%
full_join(df4, by = c("dest" = "faa"))
# Renaming the column "name" in df5 to "airport_name"
df6 <- df5 %>% rename(airport_name = name)
# Selecting relevant columns from df7
df7 <- df6 %>% select(airline_name, airport_name, air_time)
# Finding the number of flights and the average durations of the flight for pairs of airlines and destination airports
df8 <- df7 %>%
group_by(airline_name, airport_name) %>%
summarise(number_flights = n(), average_duration = mean(air_time, na.rm = TRUE), .groups = "drop")
# Ordering the results from the highest number of flights to the lowest
df9 <- df8 %>% arrange(desc(number_flights))
# Extracting the pair with the highest number of flights
frequent <- head(df9, n=1)
print(frequent)
# Question 2: Find the airport that has the longest average flight duration (in hours) from NYC. What is the name of this airport?
# Ordering from the longest average flight duration to the shortest
df10 <- df9 %>% arrange(desc(average_duration))
# Extracting the answer
longest <- head(df10, n=1)
print(longest)
# Question 3: Which airport is the least frequented destination for flights departing from JFK? Save answer as a character string called least
# Selecting relevant columns from df6
df11 <- df6 %>% select(airline_name, origin, airport_name, air_time)
# Filtering for JFK as the origin airport
df12 <- df11 %>% filter(origin == "JFK")
# Finding the number of flights for each destination airport
df13 <- df12 %>% group_by(airport_name) %>% summarise(number_flights = n(), .groups = "drop")
# Arrange the results from the highest number of flights to the lowest
df14 <- df13 %>% arrange(number_flights)
# Extracting the answer and saving it as a character string
df15 <- head(df14, n=1)
print(df15[1,1])
least <- "Eagle County Regional Airport"
print(least)