Skip to content

World Nations

Dataset nation consists of seven tables:

  • continents
  • countries
  • country_languages
  • country_stats
  • languages
  • region_areas
  • regions

Dataset insights to find:

  • Continents area, population and population per 1km2.
  • Top ten countries in GDP groth per capita between year 1990 and 2018.
  • Top five languages used in the world - nr of countries that uses this language.
  • List of countries, where Polish language is not official language.
  • List of languages, which are not official in any country.
  • Population groth/decline in every country between year 2000 and 2018.

Dataset problems:

  • Russian Federation is matched to Europe, although most of its teritory belongs to Asia. That's why continent area data will be incorrect in that matter.
Spinner
DataFrameas
df
variable
-- table using CTE for continent area
WITH area AS (
	-- Select each continent area
	SELECT continent, SUM(region_area) AS continent_area
	-- Join continents, regions and region_areas tables to find area for each continent
	FROM (
		SELECT cont.continent_id, reg.region_id, cont.name AS continent, reg.name AS region, reg_a.region_area AS region_area
		FROM continents AS cont 
		INNER JOIN regions AS reg ON cont.continent_id = reg.continent_id
		INNER JOIN region_areas AS reg_a ON reg.name = reg_a.region_name
	) AS area
	GROUP BY continent 
),

-- table using CTE for continent population
population AS (
	-- Select each continent population
	SELECT continents.name AS continent, SUM(country_stats.population) AS continent_population
	-- Join continents regions, countries and countr_stats tables to find population for each continent
	FROM continents 
	INNER JOIN regions ON continents.continent_id = regions.continent_id
	INNER JOIN countries ON regions.region_id = countries.region_id
	INNER JOIN country_stats ON countries.country_id = country_stats.country_id
	WHERE year = '2016'
	GROUP BY continent
)

-- final query to find how much people per 1 km2 is in each continent
SELECT area.continent, ROUND(continent_area/1000000, 2) as area_mln_km2, ROUND(continent_population/1000000, 2) AS population_mln_people, ROUND(continent_population/continent_area,0) AS people_per_1km2
FROM area 
INNER JOIN population ON area.continent = population.continent
ORDER BY people_per_1km2 DESC;
Spinner
DataFrameas
df1
variable
-- define gdp and population per country in 1990
WITH 1990_countries AS (
	SELECT countries.country_id, countries.name AS country, year, gdp AS 1990_gdp, population AS 1990_population
	FROM country_stats
	INNER JOIN countries ON country_stats.country_id = countries.country_id
	WHERE year = 1990
),

-- define gdp and population per country in 2018
2018_countries AS (
	SELECT countries.country_id, countries.name AS country, year, gdp AS 2018_gdp, population AS 2018_population
	FROM country_stats
	INNER JOIN countries ON country_stats.country_id = countries.country_id
	WHERE year = 2018
)

-- find gdp growth per capita from 1990 to 2018
SELECT 1990_countries.country, (2018_countries.2018_gdp/2018_countries.2018_population - 1990_countries.1990_gdp/1990_countries.1990_population) AS gdp_per_capita_groth
FROM 1990_countries
INNER JOIN 2018_countries ON 1990_countries.country_id = 2018_countries.country_id
ORDER BY gdp_per_capita_groth DESC
LIMIT 10
;

Spinner
DataFrameas
df2
variable
-- top 5 official languages
SELECT language, COUNT(*) AS count_of_countries
FROM country_languages AS cl
INNER JOIN languages AS l ON cl.language_id = l.language_id
WHERE official = 1
GROUP BY language
ORDER BY count_of_countries DESC
LIMIT 5
;
Spinner
DataFrameas
df3
variable
-- list of countries, where Polish language is not official country language
SELECT countries.name AS country
FROM countries 
INNER JOIN country_languages AS cl ON countries.country_id = cl.country_id
INNER JOIN languages AS l ON cl.language_id = l.language_id
WHERE official = 0 AND language = 'Polish';
Spinner
DataFrameas
df4
variable
-- list of languages, which are not official in any country ordered by number of countries using that language
SELECT language, COUNT(country_id) AS number_of_countries
FROM country_languages AS cl
JOIN languages AS l ON cl.language_id = l.language_id
WHERE official = 0 AND cl.language_id NOT IN (
	SELECT language_id
	FROM country_languages
	WHERE official = 1)
GROUP BY language 
ORDER BY number_of_countries DESC;
Spinner
DataFrameas
df5
variable
-- 2018 population in every country
WITH 2018_CTE AS (
	SELECT c.country_id, name, population
	FROM countries AS c
	INNER JOIN country_stats AS cs ON c.country_id = cs.country_id
	WHERE year = 2018
),
-- 2000 population in every country	
2000_CTE AS (
	SELECT c.country_id, name, population
	FROM countries AS c
	INNER JOIN country_stats AS cs ON c.country_id = cs.country_id
	WHERE year = 2000
)
-- final query to find population growth or decline between year 2000 and 2018
SELECT 2018_CTE.name AS country, 
	CASE WHEN 2018_CTE.population - 2000_CTE.population > 0 THEN 'growth'
		 ELSE 'decline' END AS population_growth_decline
FROM 2018_CTE 
INNER JOIN 2000_CTE ON 2018_CTE.country_id = 2000_CTE.country_id;
Spinner
DataFrameas
events_per_city
variable
SELECT v.venuecity, COUNT(e.eventid) AS event_count
FROM event e
JOIN venue v ON e.venueid = v.venueid
GROUP BY v.venuecity
ORDER BY event_count DESC
LIMIT 20;