Skip to content

Data Source: https://ourworldindata.org/covid-deaths Duration: 2020/03/01 - 2023/03/31 Using SQL to explore the Covid-19 data sets.

Spinner
DataFrameas
df2
variable
SELECT * FROM 'CovidVaccination.csv' AS df2
ORDER BY 3, 4;
Spinner
DataFrameas
df1
variable
SELECT * FROM 'CovidDeaths.csv' AS df1
WHERE continent is not nulL
ORDER BY 3, 4;
Spinner
DataFrameas
df3
variable
SELECT location, date, total_cases, new_cases, total_deaths, population
FROM df1
ORDER BY 1, 2;
Spinner
DataFrameas
df
variable
-- Looking at Total Cases VS Total Deaths
-- Shows likelihood of dying if you contact covid in France
SELECT location, date, total_cases, total_deaths, (total_deaths/total_cases)*100 AS death_percentage
FROM df1
WHERE location = 'France'
ORDER BY 1, 2;
Spinner
DataFrameas
df5
variable
-- Looking at Total Cases VS Population
-- Shows what percentage of population got Covid
SELECT location, date, population, total_cases, (total_cases/population)*100 AS Percent_populaiton_infected
FROM df1
WHERE location = 'France'
ORDER BY 1, 2;
Spinner
DataFrameas
df6
variable
-- Looking at Countries with Hightest Infection Rate compared to Population
SELECT location, population, MAX(total_cases) AS highest_infection_count, MAX((total_cases/population))*100 AS Percent_population_infected_max
FROM df1
GROUP BY location, population
ORDER BY 4 DESC
Spinner
DataFrameas
df10
variable
-- Showing Countries with Highest Death Count per Population

SELECT location, MAX(cast(total_deaths as int)) as Total_death_count
FROM df1
GROUP BY location
ORDER BY 2 DESC
Spinner
DataFrameas
df7
variable
-- Showing contintents with the highest death count per population
SELECT continent, MAX(cast(total_deaths as int)) as Total_death_count
FROM df1
GROUP BY continent
ORDER BY 2 DESC
Spinner
DataFrameas
df8
variable
-- Gobal numbers
SELECT date, SUM(new_cases), SUM(new_deaths), SUM(new_deaths)/SUM(new_cases)*100 
FROM df1
GROUP BY date, 
ORDER BY 1, 2;
Spinner
DataFrameas
df9
variable
SELECT SUM(new_cases) AS total_cases, SUM(new_deaths) AS total_deaths, SUM(new_deaths)/SUM(new_cases)*100 AS death_percentage
FROM df1 
ORDER BY 1, 2;
Spinner
DataFrameas
dea
variable
-- Looking at Total Population VS Vaccinations
-- Using CTE with name dea which includes the following columns: continent, location, date, population
-- Using CTE with name vac which includes the following columns: continent, location date, new_vaccinations

WITH dea AS 
(SELECT continent, location, CAST(date AS date) AS date, population
FROM 'CovidDeaths.csv'
ORDER BY 2, 3),
vac AS 
(SELECT continent, location, CAST(date AS date) AS date, new_vaccinations
FROM 'CovidVaccination.csv'
ORDER BY 2, 3)
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations 
FROM vac JOIN dea 
ON vac.location = dea.location AND vac.date = dea.date
ORDER BY 1, 2, 3;
Spinner
DataFrameas
df4
variable
-- lookaing at rolling people vaccinated
WITH dea AS 
(SELECT continent, location, CAST(date AS date) AS date, population
FROM 'CovidDeaths.csv'
ORDER BY 2, 3),
vac AS 
(SELECT continent, location, CAST(date AS date) AS date, new_vaccinations
FROM 'CovidVaccination.csv'
ORDER BY 2, 3)
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations, SUM(vac.new_vaccinations) OVER (Partition by dea.location ORDER BY dea.location, dea.date) AS Rolling_people_vaccinated
FROM vac JOIN dea 
ON vac.location = dea.location AND vac.date = dea.date
ORDER BY 1, 2, 3;
Spinner
DataFrameas
df11
variable
--Lookaing at location in France
WITH dea AS 
(SELECT continent, location, CAST(date AS date) AS date, population
FROM 'CovidDeaths.csv'
ORDER BY 2, 3),
vac AS 
(SELECT continent, location, CAST(date AS date) AS date, new_vaccinations
FROM 'CovidVaccination.csv'
ORDER BY 2, 3),
popvsvac AS (SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations, SUM(vac.new_vaccinations) OVER (Partition by dea.location ORDER BY dea.location, dea.date) AS Rolling_people_vaccinated
FROM vac JOIN dea 
ON vac.location = dea.location AND vac.date = dea.date
ORDER BY 2, 3)

SELECT *, (Rolling_people_vaccinated/population)*100
FROM popvsvac
WHERE location = 'France';
Spinner
DataFrameas
df13
variable
-- Creating a table named dea
-- Creating a table named vac
-- looking at the location in France
CREATE TABLE dea
(
continent nvarchar(255),
location nvarchar(255),
date datetime,
population numeric
);

INSERT INTO dea
SELECT continent, location, CAST(date AS date) AS date, population
FROM 'CovidDeaths.csv';

CREATE TABLE vac
(
continent nvarchar(255),
location nvarchar(255),
date datetime,
new_vaccinations numeric
);

INSERT INTO vac
SELECT continent, location, CAST(date AS date) AS date, new_vaccinations
FROM 'CovidVaccination.csv';

WITH popvsvac AS (
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations, SUM(vac.new_vaccinations) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS Rolling_people_vaccinated
FROM dea JOIN vac 
ON vac.location = dea.location AND vac.date = dea.date
ORDER BY dea.location, dea.date)

SELECT *, (Rolling_people_vaccinated/population)*100
FROM popvsvac
WHERE location = 'France';

CREATE VIEW percentpopulationvaccinated AS 
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations, SUM(vac.new_vaccinations) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS Rolling_people_vaccinated
FROM dea JOIN vac 
ON vac.location = dea.location AND vac.date = dea.date