Skip to content
This project analyses and compares the weekly hours viewed of different shows on Netflix. The table used for the analysis all_weeks_global
with the week_commencing_date
, show_title
, season_title
and weekly_hours
viewed as the main fields used for the project. Three analysis were performed:
- Netflix's Top Christmas English Films on first week of December 2021 ordered by weekly hours viewed
- Comparing the weekly hours viewed for the Top 10 shows that appeared in all four weeks of December 2021, figures rounded to the nearest million
- Comparing hours viewed of 5 seasons of Money Heist in the dataset in September 2021, figures rounded to the nearest million
DataFrameavailable as
df4
variable
-- Netflix's Top Christmas English Films on first week of December 2021
SELECT
RANK() OVER(ORDER BY weekly_hours_viewed DESC) AS rank,
EXTRACT(month FROM week::TIMESTAMP) AS month,
show_title,
weekly_hours_viewed
FROM all_weeks_global
WHERE category IN ('Films (English)')
AND show_title LIKE '%Christmas%'
AND EXTRACT(day FROM week::TIMESTAMP) = 5
ORDER BY weekly_hours_viewed DESC;
DataFrameavailable as
df2
variable
-- Comparing the weekly hours viewed for the Top 10 shows that appeared in all four weeks of December 2021, figures rounded to the nearest million
-- Computing first to fourth week hours
WITH first_week AS (
SELECT
show_title,
season_title,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed
FROM all_weeks_global
WHERE EXTRACT(day FROM week::TIMESTAMP) = 5
AND EXTRACT(month FROM week::TIMESTAMP) = 12
AND EXTRACT(year FROM week::TIMESTAMP) = 2021),
second_week AS (
SELECT
show_title,
season_title,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed
FROM all_weeks_global
WHERE EXTRACT(day FROM week::TIMESTAMP) = 12
AND EXTRACT(month FROM week::TIMESTAMP) = 12
AND EXTRACT(year FROM week::TIMESTAMP) = 2021),
third_week AS (
SELECT
show_title,
season_title,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed
FROM all_weeks_global
WHERE EXTRACT(day FROM week::TIMESTAMP) = 19
AND EXTRACT(month FROM week::TIMESTAMP) = 12
AND EXTRACT(year FROM week::TIMESTAMP) = 2021),
fourth_week AS (
SELECT
show_title,
season_title,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed
FROM all_weeks_global
WHERE EXTRACT(day FROM week::TIMESTAMP) = 26
AND EXTRACT(month FROM week::TIMESTAMP) = 12
AND EXTRACT(year FROM week::TIMESTAMP) = 2021)
-- Return rank, show title, season, title, and weekly hours from week 1-4
SELECT
RANK() OVER(ORDER BY f.weekly_hours_viewed + s.weekly_hours_viewed + t.weekly_hours_viewed + fo.weekly_hours_viewed DESC) AS rank,
f.show_title,
f.season_title,
f.weekly_hours_viewed/1000000 AS first_week_hours,
s.weekly_hours_viewed/1000000 AS second_week_hours,
t.weekly_hours_viewed/1000000 AS third_week_hours,
fo.weekly_hours_viewed/1000000 AS fourth_week_hours,
ROUND((f.weekly_hours_viewed + s.weekly_hours_viewed + t.weekly_hours_viewed + fo.weekly_hours_viewed)/1000000,2) AS total_hours_december,
ROUND(((f.weekly_hours_viewed + s.weekly_hours_viewed + t.weekly_hours_viewed + fo.weekly_hours_viewed)/4)/1000000,2) AS avg_hours_per_week
FROM first_week AS f
INNER JOIN second_week AS s -- JOIN only if title appeared in the first week of December
ON f.show_title = s.show_title AND f.season_title = s.season_title
INNER JOIN third_week AS t
ON s.show_title = t.show_title AND s.season_title = t.season_title
INNER JOIN fourth_week AS fo
ON t.show_title = fo.show_title AND t.season_title = fo.season_title
ORDER BY total_hours_december DESC
LIMIT 10;
DataFrameavailable as
df5
variable
-- Comparing hours viewed of 5 seasons of Money Heist in the dataset in September 2021 (values in million)
-- Computing the weekly views for each part of Money Heist
WITH part_1_views AS (
SELECT
show_title,
season_title,
EXTRACT(year FROM week::TIMESTAMP) AS year,
EXTRACT(month FROM week::TIMESTAMP) AS month,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed AS weekly_views
FROM all_weeks_global
WHERE show_title IN ('Money Heist')
AND season_title IN ('Money Heist: Part 1')
AND EXTRACT(year FROM week::TIMESTAMP) = 2021
AND EXTRACT(month FROM week::TIMESTAMP) = 9),
part_2_views AS (
SELECT
show_title,
season_title,
EXTRACT(year FROM week::TIMESTAMP) AS year,
EXTRACT(month FROM week::TIMESTAMP) AS month,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed AS weekly_views
FROM all_weeks_global
WHERE show_title IN ('Money Heist')
AND season_title IN ('Money Heist: Part 2')
AND EXTRACT(year FROM week::TIMESTAMP) = 2021
AND EXTRACT(month FROM week::TIMESTAMP) = 9),
part_3_views AS (
SELECT
show_title,
season_title,
EXTRACT(year FROM week::TIMESTAMP) AS year,
EXTRACT(month FROM week::TIMESTAMP) AS month,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed AS weekly_views
FROM all_weeks_global
WHERE show_title IN ('Money Heist')
AND season_title IN ('Money Heist: Part 3')
AND EXTRACT(year FROM week::TIMESTAMP) = 2021
AND EXTRACT(month FROM week::TIMESTAMP) = 9),
part_4_views AS (
SELECT
show_title,
season_title,
EXTRACT(year FROM week::TIMESTAMP) AS year,
EXTRACT(month FROM week::TIMESTAMP) AS month,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed AS weekly_views
FROM all_weeks_global
WHERE show_title IN ('Money Heist')
AND season_title IN ('Money Heist: Part 4')
AND EXTRACT(year FROM week::TIMESTAMP) = 2021
AND EXTRACT(month FROM week::TIMESTAMP) = 9),
part_5_views AS (
SELECT
show_title,
season_title,
EXTRACT(year FROM week::TIMESTAMP) AS year,
EXTRACT(month FROM week::TIMESTAMP) AS month,
EXTRACT(day FROM week::TIMESTAMP) AS day,
weekly_hours_viewed AS weekly_views
FROM all_weeks_global
WHERE show_title IN ('Money Heist')
AND season_title IN ('Money Heist: Part 5')
AND EXTRACT(year FROM week::TIMESTAMP) = 2021
AND EXTRACT(month FROM week::TIMESTAMP) = 9),
views AS(
SELECT
(CASE WHEN p1.day = 5 THEN '1'
WHEN p2.day = 12 THEN '2'
WHEN p3.day = 19 THEN '3'
WHEN p4.day = 26 THEN '4' END) AS week,
ROUND(p1.weekly_views/1000000,2) AS part_1_weekly_views,
ROUND(p2.weekly_views/1000000,2) AS part_2_weekly_views,
ROUND(p3.weekly_views/1000000,2) AS part_3_weekly_views,
ROUND(p4.weekly_views/1000000,2) AS part_4_weekly_views,
ROUND(p5.weekly_views/1000000,2) AS part_5_weekly_views
FROM part_1_views AS p1
LEFT JOIN part_2_views AS p2
ON p1.day = p2.day
LEFT JOIN part_3_views AS p3
ON p1.day = p3.day
LEFT JOIN part_4_views AS p4
ON p1.day = p4.day
LEFT JOIN part_5_views AS p5
ON p1.day = p5.day
GROUP BY week, p1.weekly_views, p2.weekly_views, p3.weekly_views, p4.weekly_views, p5.weekly_views
ORDER BY week)
SELECT *
FROM views
UNION ALL
SELECT
'Total' AS week,
SUM(part_1_weekly_views) AS part_1_weekly_views,
SUM(part_2_weekly_views) AS part_2_weekly_views,
SUM(part_3_weekly_views) AS part_3_weekly_views,
SUM(part_4_weekly_views) AS part_4_weekly_views,
SUM(part_5_weekly_views) AS part_5_weekly_views
FROM views
DataFrameavailable as
df1
variable
SELECT *
FROM all_weeks_global;