Skip to content
Intermediate SQL
Data Manipulation in SQL
Here you can access every table used in the course. To access each table, you will need to specify the soccer
schema in your queries (e.g., soccer.match
for the match
table, and soccer.league
for the league
table).
Note: When using sample integrations such as those that contain course data, you have read-only access. You can run queries, but cannot make any changes such as adding, deleting, or modifying the data (e.g., creating tables, views, etc.).
Take Notes
Add notes about the concepts you've learned and SQL cells with queries you want to keep.
Add your notes here
Explore Datasets
Use the match
, league
, and country
tables to explore the data and practice your skills!
- Use the
match
,league
, andcountry
tables to return the number of matches played in Great Britain versus elsewhere in the world.- "England", "Scotland", and "Wales" should be categorized as "Great Britain"
- All other leagues will need to be categorized as "World".
- Use the
match
andcountry
tables to return the countries in which the average number of goals (home and away goals) scored are greater than the average number of goals of all matches. - In a soccer league, points are assigned to teams based on the result of a game. Here, let's assume that 3 points are awarded for a win, 1 for a tie, and 0 for a defeat. Use the
match
table to calculate the running total of points earned by the team "Chelsea" (team id 8455) in the season "2014/2015".- The final output should have the match date, the points earned by Chelsea, and the running total.
DataFrameas
df1
variable
SELECT
c.name AS country,
-- Sum the total records in each season where the home team won
SUM(CASE WHEN m.season = '2012/2013' AND m.home_goal > m.away_goal
THEN 1 ELSE 0 END) AS matches_2012_2013,
SUM(CASE WHEN m.season = '2013/2014' AND m.home_goal > m.away_goal
THEN 1 ELSE 0 END) AS matches_2013_2014,
SUM(CASE WHEN m.season = '2014/2015' AND m.home_goal > m.away_goal
THEN 1 ELSE 0 END) AS matches_2014_2015
FROM country AS c
LEFT JOIN match AS m
ON c.id = m.country_id
-- Group by country name alias
GROUP BY country;
Hidden output
DataFrameas
df
variable
SELECT
-- Select the team long and short names
team_long_name,
team_short_name
FROM team
-- Filter for teams with 8 or more home goals
WHERE team_api_id IN
(SELECT hometeam_ID
FROM match
WHERE home_goal >= 8);
DataFrameas
df2
variable
SELECT
-- Select country name and the count match IDs
c.name AS country_name,
COUNT(sub.id) AS matches
FROM country AS c
-- Inner join the subquery onto country
-- Select the country id and match id columns
INNER JOIN (SELECT country_id, id
FROM match
-- Filter the subquery by matches with 10+ goals
WHERE (home_goal + away_goal) >= 10) AS sub
ON c.id = sub.country_id
GROUP BY country_name;
DataFrameas
df3
variable
SELECT
-- Select the league name and average goals scored
l.name AS league,
ROUND(AVG(m.home_goal + m.away_goal),2) AS avg_goals,
-- Subtract the overall average from the league average
ROUND(AVG(m.home_goal + m.away_goal) -
(SELECT AVG(home_goal + away_goal)
FROM match
WHERE season = '2013/2014'),2) AS diff
FROM league AS l
LEFT JOIN match AS m
ON l.country_id = m.country_id
-- Only include 2013/2014 results
WHERE m.season = '2013/2014'
GROUP BY l.name;
DataFrameas
df4
variable
SELECT
-- Select country ID, date, home, and away goals from match
main.country_id,
main.date,
main.home_goal,
main.away_goal
FROM match AS main
WHERE
-- Filter for matches with the highest number of goals scored
(home_goal + away_goal) =
(SELECT MAX(sub.home_goal + sub.away_goal)
FROM match AS sub
WHERE main.country_id = sub.country_id
AND main.season = sub.season);
DataFrameas
df5
variable
-- Set up your CTE
WITH match_list AS (
SELECT
country_id,
(home_goal + away_goal) AS goals
FROM match
-- Create a list of match IDs to filter data in the CTE
WHERE id IN (
SELECT id
FROM match
WHERE season = '2013/2014' AND EXTRACT(MONTH FROM date) = 08))
-- Select the league name and average of goals in the CTE
SELECT
l.name,
AVG(match_list.goals)
FROM league AS l
-- Join the CTE onto the league table
LEFT JOIN match_list ON l.id = match_list.country_id
GROUP BY l.name;
DataFrameas
df6
variable
SELECT
-- Select the league name and average goals scored
l.name AS league,
AVG(m.home_goal + m.away_goal) AS avg_goals,
-- Rank leagues in descending order by average goals
RANK() OVER(ORDER BY AVG(m.home_goal + m.away_goal) DESC) AS league_rank
FROM league AS l
LEFT JOIN match AS m
ON l.id = m.country_id
WHERE m.season = '2011/2012'
GROUP BY l.name
-- Order the query by the rank you created
ORDER BY league_rank;
DataFrameas
df7
variable
SELECT
date,
season,
home_goal,
away_goal,
CASE WHEN hometeam_id = 8673 THEN 'home'
ELSE 'away' END AS warsaw_location,
-- Calculate the average goals scored partitioned by season
AVG(home_goal) OVER(PARTITION BY season) AS season_homeavg,
AVG(away_goal) OVER(PARTITION BY season) AS season_awayavg
FROM match
-- Filter the data set for Legia Warszawa matches only
WHERE
hometeam_id = 8673
OR awayteam_id = 8673
ORDER BY (home_goal + away_goal) DESC;
DataFrameas
df8
variable
SELECT
-- Select the date, home goal, and away goals
date,
home_goal,
away_goal,
-- Create a running total and running average of home goals
SUM(home_goal) OVER(ORDER BY date DESC
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS running_total,
AVG(home_goal) OVER(ORDER BY date DESC
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS running_avg
FROM match
WHERE
awayteam_id = 9908
AND season = '2011/2012';
DataFrameas
df9
variable
-- Set up the home team CTE
WITH home AS (
SELECT m.id, t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
WHEN m.home_goal < m.away_goal THEN 'MU Loss'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.hometeam_id = t.team_api_id),
-- Set up the away team CTE
away AS (
SELECT m.id, t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Loss'
WHEN m.home_goal < m.away_goal THEN 'MU Win'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.awayteam_id = t.team_api_id)
-- Select team names, the date and goals
SELECT DISTINCT
m.date,
home.team_long_name AS home_team,
away.team_long_name AS away_team,
m.home_goal, m.away_goal
-- Join the CTEs onto the match table
FROM match AS m
LEFT JOIN home ON m.id = home.id
LEFT JOIN away ON m.id = away.id
WHERE m.season = '2014/2015'
AND (home.team_long_name = 'Manchester United'
OR away.team_long_name = 'Manchester United');
DataFrameas
df10
variable
--Überbrutál mindenazegyben
-- Set up the home team CTE
WITH home AS (
SELECT m.id, t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Win'
WHEN m.home_goal < m.away_goal THEN 'MU Loss'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.hometeam_id = t.team_api_id),
-- Set up the away team CTE
away AS (
SELECT m.id, t.team_long_name,
CASE WHEN m.home_goal > m.away_goal THEN 'MU Loss'
WHEN m.home_goal < m.away_goal THEN 'MU Win'
ELSE 'Tie' END AS outcome
FROM match AS m
LEFT JOIN team AS t ON m.awayteam_id = t.team_api_id)
-- Select columns and and rank the matches by goal difference
SELECT DISTINCT
date,
home.team_long_name AS home_team,
away.team_long_name AS away_team,
m.home_goal, m.away_goal,
RANK() OVER(ORDER BY ABS(home_goal - away_goal) DESC) as match_rank
-- Join the CTEs onto the match table
FROM match AS m
LEFT JOIN home ON m.id = home.id
LEFT JOIN away ON m.id = away.id
WHERE m.season = '2014/2015'
AND ((home.team_long_name = 'Manchester United' AND home.outcome = 'MU Loss')
OR (away.team_long_name = 'Manchester United' AND away.outcome = 'MU Loss'));