Skip to content
Project portfolio - World Nations SQL Analysis
World Nations
Dataset nation consists of seven tables:
- continents
- countries
- country_languages
- country_stats
- languages
- region_areas
- regions
Dataset insights to find:
- Continents area, population and population per 1km2.
- Top ten countries in GDP groth per capita between year 1990 and 2018.
- Top five languages used in the world - nr of countries that uses this language.
- List of countries, where Polish language is not official language.
- List of languages, which are not official in any country.
- Population groth/decline in every country between year 2000 and 2018.
Dataset problems:
- Russian Federation is matched to Europe, although most of its teritory belongs to Asia. That's why continent area data will be incorrect in that matter.
DataFrameas
df
variable
-- table using CTE for continent area
WITH area AS (
-- Select each continent area
SELECT continent, SUM(region_area) AS continent_area
-- Join continents, regions and region_areas tables to find area for each continent
FROM (
SELECT cont.continent_id, reg.region_id, cont.name AS continent, reg.name AS region, reg_a.region_area AS region_area
FROM continents AS cont
INNER JOIN regions AS reg ON cont.continent_id = reg.continent_id
INNER JOIN region_areas AS reg_a ON reg.name = reg_a.region_name
) AS area
GROUP BY continent
),
-- table using CTE for continent population
population AS (
-- Select each continent population
SELECT continents.name AS continent, SUM(country_stats.population) AS continent_population
-- Join continents regions, countries and countr_stats tables to find population for each continent
FROM continents
INNER JOIN regions ON continents.continent_id = regions.continent_id
INNER JOIN countries ON regions.region_id = countries.region_id
INNER JOIN country_stats ON countries.country_id = country_stats.country_id
WHERE year = '2016'
GROUP BY continent
)
-- final query to find how much people per 1 km2 is in each continent
SELECT area.continent, ROUND(continent_area/1000000, 2) as area_mln_km2, ROUND(continent_population/1000000, 2) AS population_mln_people, ROUND(continent_population/continent_area,0) AS people_per_1km2
FROM area
INNER JOIN population ON area.continent = population.continent
ORDER BY people_per_1km2 DESC;DataFrameas
df1
variable
-- define gdp and population per country in 1990
WITH 1990_countries AS (
SELECT countries.country_id, countries.name AS country, year, gdp AS 1990_gdp, population AS 1990_population
FROM country_stats
INNER JOIN countries ON country_stats.country_id = countries.country_id
WHERE year = 1990
),
-- define gdp and population per country in 2018
2018_countries AS (
SELECT countries.country_id, countries.name AS country, year, gdp AS 2018_gdp, population AS 2018_population
FROM country_stats
INNER JOIN countries ON country_stats.country_id = countries.country_id
WHERE year = 2018
)
-- find gdp growth per capita from 1990 to 2018
SELECT 1990_countries.country, (2018_countries.2018_gdp/2018_countries.2018_population - 1990_countries.1990_gdp/1990_countries.1990_population) AS gdp_per_capita_groth
FROM 1990_countries
INNER JOIN 2018_countries ON 1990_countries.country_id = 2018_countries.country_id
ORDER BY gdp_per_capita_groth DESC
LIMIT 10
;
DataFrameas
df2
variable
-- top 5 official languages
SELECT language, COUNT(*) AS count_of_countries
FROM country_languages AS cl
INNER JOIN languages AS l ON cl.language_id = l.language_id
WHERE official = 1
GROUP BY language
ORDER BY count_of_countries DESC
LIMIT 5
;DataFrameas
df3
variable
-- list of countries, where Polish language is not official country language
SELECT countries.name AS country
FROM countries
INNER JOIN country_languages AS cl ON countries.country_id = cl.country_id
INNER JOIN languages AS l ON cl.language_id = l.language_id
WHERE official = 0 AND language = 'Polish';
DataFrameas
df4
variable
-- list of languages, which are not official in any country ordered by number of countries using that language
SELECT language, COUNT(country_id) AS number_of_countries
FROM country_languages AS cl
JOIN languages AS l ON cl.language_id = l.language_id
WHERE official = 0 AND cl.language_id NOT IN (
SELECT language_id
FROM country_languages
WHERE official = 1)
GROUP BY language
ORDER BY number_of_countries DESC;DataFrameas
df5
variable
-- 2018 population in every country
WITH 2018_CTE AS (
SELECT c.country_id, name, population
FROM countries AS c
INNER JOIN country_stats AS cs ON c.country_id = cs.country_id
WHERE year = 2018
),
-- 2000 population in every country
2000_CTE AS (
SELECT c.country_id, name, population
FROM countries AS c
INNER JOIN country_stats AS cs ON c.country_id = cs.country_id
WHERE year = 2000
)
-- final query to find population growth or decline between year 2000 and 2018
SELECT 2018_CTE.name AS country,
CASE WHEN 2018_CTE.population - 2000_CTE.population > 0 THEN 'growth'
ELSE 'decline' END AS population_growth_decline
FROM 2018_CTE
INNER JOIN 2000_CTE ON 2018_CTE.country_id = 2000_CTE.country_id;
DataFrameas
events_per_city
variable
SELECT v.venuecity, COUNT(e.eventid) AS event_count
FROM event e
JOIN venue v ON e.venueid = v.venueid
GROUP BY v.venuecity
ORDER BY event_count DESC
LIMIT 20;