Skip to content
CFB_analysis
A data analysis of the modern historical success and failures of the Miami Hurricanes.
DataFrameas
df
variable
--Label all home wins and loses
WITH home AS(
SELECT
game_id,
CASE WHEN home_points::integer > away_points::integer THEN 'win'
ELSE 'loss' end as home_record
FROM 'cfb_games_in_data_repo.csv'
WHERE home_team = 'Miami'),
--Label all away wins and loses
away AS(
SELECT
game_id,
CASE WHEN home_points::integer < away_points::integer THEN 'win'
ELSE 'loss' end as away_record
FROM 'cfb_games_in_data_repo.csv'
WHERE away_team = 'Miami')
-- Select all info about every miami game in data base
SELECT
g.season,
g.home_team,
g.away_team,
home.home_record,
away.away_record
FROM 'cfb_games_in_data_repo.csv' as g
LEFT JOIN away
ON g.game_id = away.game_id
LEFT JOIN home
ON home.game_id = g.game_id
WHERE (g.home_team = 'Miami'
OR g.away_team = 'Miami')
ORDER BY start_date
;
DataFrameas
df2
variable
-- Lets look at record by season
SELECT
season,
sum(CASE WHEN home_team = 'Miami' AND home_points::integer > away_points::integer THEN 1
WHEN away_team = 'Miami' AND away_points::integer > home_points::integer THEN 1
ELSE 0 end) as wins,
sum(CASE WHEN home_team = 'Miami' AND home_points::integer < away_points::integer THEN 1
WHEN away_team = 'Miami' AND away_points::integer < home_points::integer THEN 1
ELSE 0 end) as loses
FROM 'cfb_games_in_data_repo.csv'
WHERE season <> 2002
GROUP BY season
ORDER BY season
-- Miami has not had consitent great seasons in the past 20 years.
DataFrameas
df3
variable
-- Average and Median Wins
WITH totalwins AS (SELECT
season,
sum(CASE WHEN home_team = 'Miami' AND home_points::integer > away_points::integer THEN 1
WHEN away_team = 'Miami' AND away_points::integer > home_points::integer THEN 1
ELSE 0 end) as wins
FROM 'cfb_games_in_data_repo.csv'
WHERE season <> 2002
GROUP BY season
ORDER BY season)
SELECT
round(avg(wins),2) as Average_Wins,
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY wins) as Median_wins
FROM totalwins
-- With an average win total of 7.61 and a median of 7, the Hurricanes are not an elite team
DataFrameas
df1
variable
-- Lets compare those totals to a couple of the best teams in the past 20 years
--Select Alabamas wins
WITH totalwinsAlabama AS (SELECT
season,
sum(CASE WHEN home_team = 'Alabama' AND home_points::integer > away_points::integer THEN 1
WHEN away_team = 'Alabama' AND away_points::integer > home_points::integer THEN 1
ELSE 0 end) as wins
FROM 'cfb_games_in_data_repo.csv'
WHERE season <> 2002
AND home_points <> 'NA'
AND away_points <> 'NA'
GROUP BY season
ORDER BY season),
--Select Ohio States wins
totalwinsOSU AS (SELECT
season,
sum(CASE WHEN home_team = 'Ohio State' AND home_points::integer > away_points::integer THEN 1
WHEN away_team = 'Ohio State' AND away_points::integer > home_points::integer THEN 1
ELSE 0 end) as wins
FROM 'cfb_games_in_data_repo.csv'
WHERE season <> 2002
AND home_points <> 'NA'
AND away_points <> 'NA'
GROUP BY season
ORDER BY season),
-- Select Oregons wins
totalwinsOregon AS (SELECT
season,
sum(CASE WHEN home_team = 'Oregon' AND home_points::integer > away_points::integer THEN 1
WHEN away_team = 'Oregon' AND away_points::integer > home_points::integer THEN 1
ELSE 0 end) as wins
FROM 'cfb_games_in_data_repo.csv'
WHERE season <> 2002
AND home_points <> 'NA'
AND away_points <> 'NA'
GROUP BY season
ORDER BY season)
--Put them all together
SELECT
'Alabama' as team,
round(avg(wins),2) as Average_Wins,
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY wins) as Median_wins
FROM totalwinsAlabama
UNION
SELECT
'Ohio State' as team,
round(avg(wins),2) as Average_Wins,
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY wins) as Median_wins
FROM totalwinsOSU
UNION
SELECT
'Oregon' as team,
round(avg(wins),2) as Average_Wins,
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY wins) as Median_wins
FROM totalwinsOregon
-- When comparing the most consistent teams of the past 20 years, it is clear that Miami has been a step behind them.