Skip to content
Joining Data in SQL
Joining Data with SQL
Here you can access every table used in the course. To access each table, you will need to specify the world schema in your queries (e.g., world.countries for the countries table, and world.languages for the languages table).
Note: When using sample databases such as those that contain course data, you have read-only access. You can run queries, but cannot make any changes such as adding, deleting, or modifying the data (e.g., creating tables, views, etc.).
DataFrameas
df
variable
WITH businesses_over_century AS (
SELECT *
FROM public.categories AS cat
INNER JOIN public.businesses AS ob
ON cat.category_code = ob.category_code
INNER JOIN public.new_businesses AS nb
ON cat.category_code = nb.category_code
WHERE ob.year_founded < '1924'
AND nb.year_founded < '1924'
)
SELECT category, COUNT(*) AS business_count
FROM businesses_over_century
GROUP BY category
ORDER BY business_count DESC
LIMIT 1;DataFrameas
df1
variable
WITH old_businesses_count AS (
SELECT COUNT(*) AS old_count, c.continent
FROM public.countries AS c
LEFT JOIN public.businesses AS ob
ON c.country_code = ob.country_code
WHERE ob.business IS NULL
GROUP BY c.continent
),
new_businesses_count AS (
SELECT COUNT(*) AS new_count, c.continent
FROM public.countries AS c
LEFT JOIN public.new_businesses AS nb
ON c.country_code = nb.country_code
WHERE nb.business IS NULL
GROUP BY c.continent
)
SELECT
nbc.continent, nbc.new_count, obc.old_count, nbc.new_count + obc.old_count AS total
FROM
new_businesses_count AS nbc
FULL OUTER JOIN
old_businesses_count AS obc
ON
nbc.continent = obc.continent;DataFrameas
df2
variable
SELECT bc.continent, bc.business, bc.year_founded, bc.country
FROM (
SELECT business, year_founded, country, continent
FROM businesses
JOIN countries USING (country_code)
) bc
JOIN (SELECT continent, MIN(year_founded) AS year_founded
FROM businesses
JOIN countries
USING (country_code)
GROUP BY continent
) c
ON bc.continent = c.continent AND bc.year_founded = c.year_foundedDataFrameas
theory
variable
-- LEFT JOIN WITH AGGREGATE FUNCTION AND GROUP BY --
SELECT
region,
AVG(gdp_percapita) AS avg_gdp
FROM countries AS c
LEFT JOIN economies AS e
USING(code)
WHERE year = 2010
GROUP BY region;DataFrameas
theory
variable
-- left join with agg function, group by, order by and limit --
SELECT
region,
AVG(gdp_percapita) AS avg_gdp
FROM countries AS c
LEFT JOIN economies AS e
USING(code)
WHERE year = 2010
GROUP BY region
ORDER BY AVG(gdp_percapita) DESC
LIMIT 10;DataFrameas
theory
variable
-- chaning joins
SELECT
name, e.year,
fertility_rate,
unemployment_rate
FROM customers AS c
INNER JOIN population AS p
ON c.code = p.country_code
INNER JOIN economies AS e
ON e.code = c.code
AND p.year = e.year