Skip to content
SQL practice
# Start coding here...
DataFrameas
df9
variable
SELECT *
FROM information_schema.tables
WHERE table_schema = 'movies';
DataFrameas
df8
variable
SELECT *
FROM actors
LIMIT 100
DataFrameas
df
variable
SELECT *
FROM movies.movies
OFFSET 10
FETCH FIRST 10 ROWS ONLY
Queryas
query1
variable
SELECT * FROM movies.actors
LIMIT 10
Queryas
query2
variable
SELECT *
FROM cinema.roles
OFFSET 200
Queryas
query3
variable
SELECT *
FROM movies.movies
ORDER BY year_of_release DESC
Queryas
query4
variable
SELECT *
FROM movies.renting
LIMIT 10
Queryas
query5
variable
SELECT m.title, string_agg(a.name, ', ') AS cast
FROM movies.actsin as ai
JOIN movies.movies AS m
ON m.movie_id = ai.movie_id
JOIN movies.actors AS a
on ai.actor_id = a.actor_id
WHERE EXISTS (
SELECT *
FROM movies.movies AS sub
WHERE genre = 'Drama'
AND sub.movie_id = m.movie_id
)
GROUP BY m.title
Analyzing the dvdrentals database
Determining which actor/actress duos appear most frequently
DataFrameas
df3
variable
-- Same actor_id for both, but different names?? This should be impossible
WITH film_duos AS (
SELECT
title,
actor1.actor_id AS actor1_id, actor1.first_name AS actor1_first_name, actor1.last_name AS actor1_last_name,
actor2.actor_id AS actor2_id, actor2.first_name AS actor2_first_name, actor2.last_name AS actor2_last_name--,
FROM dvdrentals.film_actor AS fa1
JOIN dvdrentals.film_actor AS fa2
ON fa1.film_id = fa2.film_id
AND fa1.actor_id < fa2.actor_id
JOIN dvdrentals.actor AS actor1
ON fa1.actor_id = actor1.actor_id
JOIN dvdrentals.actor AS actor2
ON fa2.actor_id = actor2.actor_id
JOIN dvdrentals.film f1
ON fa1.film_id = f1.film_id
)
SELECT
actor1_id, actor1_first_name || actor1_last_name AS actor1_name,
actor2_id, actor2_first_name || actor2_last_name AS actor2_name,
COUNT(*) AS times_acted_together
FROM film_duos
GROUP BY actor1_id, actor1_name, actor2_id, actor2_name
ORDER BY times_acted_together DESC
DataFrameas
df2
variable
Run cancelled
-- Potiential bug? Query works properly when aliases are used for fa1.actor_id and fa2.actor_id
-- If not, duplicated actor_id's are returned, when this is impossible based on the join criteria of fa1.actor_id < fa2.actor_id
SELECT fa1.film_id AS film1, fa2.film_id AS film2, fa1.actor_id AS actor1, fa2.actor_id AS actor2
FROM dvdrentals.film_actor AS fa1
JOIN dvdrentals.film_actor AS fa2
ON fa1.film_id = fa2.film_id
AND fa1.actor_id < fa2.actor_id
DataFrameas
df4
variable
WITH actor_duos AS (
SELECT fa1.actor_id AS actor1_id, fa1.actor_name AS actor1, fa2.actor_id AS actor2_id, fa2.actor_name AS actor2, fa1.film_id
FROM (
SELECT fa1.actor_id, a1.first_name || ' ' || a1.last_name AS actor_name, fa1.film_id
FROM dvdrentals.film_actor AS fa1
JOIN dvdrentals.actor AS a1 ON fa1.actor_id = a1.actor_id
) fa1
JOIN (
SELECT fa2.actor_id, a2.first_name || ' ' || a2.last_name AS actor_name, fa2.film_id
FROM dvdrentals.film_actor AS fa2
JOIN dvdrentals.actor AS a2 ON fa2.actor_id = a2.actor_id
) fa2 ON fa1.film_id = fa2.film_id AND fa1.actor_id < fa2.actor_id
),
film_cast AS (
SELECT f.film_id, ARRAY_AGG(a.actor_id ORDER BY a.actor_id) AS cast
FROM dvdrentals.film AS f
JOIN dvdrentals.film_actor AS fa ON f.film_id = fa.film_id
JOIN dvdrentals.actor AS a ON fa.actor_id = a.actor_id
GROUP BY f.film_id
)
SELECT ad.actor1, ad.actor2, fc.film_id
FROM actor_duos AS ad
JOIN film_cast AS fc ON ad.film_id = fc.film_id
WHERE ad.actor1_id = ANY(fc.cast) AND ad.actor2_id = ANY(fc.cast);