Skip to content

Joining Data with SQL

Here you can access every table used in the course. To access each table, you will need to specify the world schema in your queries (e.g., world.countries for the countries table, and world.languages for the languages table).


Note: When using sample databases such as those that contain course data, you have read-only access. You can run queries, but cannot make any changes such as adding, deleting, or modifying the data (e.g., creating tables, views, etc.).

Spinner
DataFrameas
df
variable
WITH businesses_over_century AS (
	SELECT *
	FROM public.categories AS cat
	INNER JOIN public.businesses AS ob
	ON cat.category_code = ob.category_code
	INNER JOIN public.new_businesses AS nb
	ON cat.category_code = nb.category_code
	WHERE ob.year_founded < '1924'
		AND nb.year_founded < '1924'
)

SELECT category, COUNT(*) AS business_count
FROM businesses_over_century
GROUP BY category
ORDER BY business_count DESC
LIMIT 1;
Spinner
DataFrameas
df1
variable
WITH old_businesses_count AS (
    SELECT COUNT(*) AS old_count, c.continent
    FROM public.countries AS c 
    LEFT JOIN public.businesses AS ob
    ON c.country_code = ob.country_code
    WHERE ob.business IS NULL
    GROUP BY c.continent
),
new_businesses_count AS (
    SELECT COUNT(*) AS new_count, c.continent
    FROM public.countries AS c 
    LEFT JOIN public.new_businesses AS nb
    ON c.country_code = nb.country_code
    WHERE nb.business IS NULL
    GROUP BY c.continent
)

SELECT 
    nbc.continent, nbc.new_count, obc.old_count, nbc.new_count + obc.old_count AS total
FROM 
    new_businesses_count AS nbc
FULL OUTER JOIN 
    old_businesses_count AS obc
ON 
    nbc.continent = obc.continent;
Spinner
DataFrameas
df2
variable
SELECT bc.continent, bc.business, bc.year_founded, bc.country
FROM (
    SELECT business, year_founded, country, continent
    FROM businesses
    JOIN countries USING (country_code)
) bc
JOIN (SELECT continent, MIN(year_founded) AS year_founded
    FROM businesses
    JOIN countries 
    USING (country_code)
    GROUP BY continent
) c 
ON bc.continent = c.continent AND bc.year_founded = c.year_founded
Spinner
DataFrameas
theory
variable
-- LEFT JOIN WITH AGGREGATE FUNCTION AND GROUP BY --
SELECT 
	region, 
	AVG(gdp_percapita) AS avg_gdp
FROM countries AS c
LEFT JOIN economies AS e
USING(code)
WHERE year = 2010
GROUP BY region;
Spinner
DataFrameas
theory
variable
-- left join with agg function, group by, order by and limit --
SELECT 
	region, 
	AVG(gdp_percapita) AS avg_gdp
FROM countries AS c
LEFT JOIN economies AS e
USING(code)
WHERE year = 2010
GROUP BY region
ORDER BY AVG(gdp_percapita) DESC
LIMIT 10;
Spinner
DataFrameas
theory
variable
-- chaning joins
SELECT 
	name, e.year, 
	fertility_rate, 
	unemployment_rate 
FROM customers AS c
INNER JOIN population AS p
ON c.code = p.country_code
INNER JOIN economies AS e
ON e.code = c.code
	AND p.year = e.year