Skip to content

Tables to be used:

Spinner
DataFrameas
companies
variable
SELECT * FROM public.companies LIMIT 3;
Spinner
DataFrameas
industries
variable
SELECT * FROM public.industries LIMIT 3;
Spinner
DataFrameas
dates
variable
SELECT * FROM public.dates LIMIT 3;
Spinner
DataFrameas
product_emissions
variable
SELECT * FROM public.product_emissions LIMIT 3;
Spinner
DataFrameas
funding
variable
SELECT * FROM public.funding LIMIT 3;

Questions and Queries

1. Major investors

Which investors are funding the largest number of companies and what industries are they investing in?
Spinner
DataFrameas
df4
variable
WITH investors AS(
	SELECT
		company_id,
		SPLIT_PART(select_investors, ',', 1) AS investor_1,
		SPLIT_PART(select_investors, ',', 2) AS investor_2,
		SPLIT_PART(select_investors, ',', 3) AS investor_3
	FROM public.funding
	),
	inv_union AS(
	SELECT TRIM(investor_1) AS investor, company_id
	FROM investors
	UNION ALL
	SELECT TRIM(investor_2) AS investor, company_id
	FROM investors
	UNION ALL
	SELECT TRIM(investor_3) AS investor, company_id
	FROM investors
	),
	nr_of_fund AS(
	SELECT investor, COUNT(company_id) AS nr_of_companies_funded
	FROM inv_union
	GROUP BY investor
	ORDER BY COUNT(company_id) DESC
	),
	inv_industries AS(
	SELECT DISTINCT investor, industry
	FROM inv_union
	LEFT JOIN public.industries USING(company_id)
	ORDER BY investor
	)
SELECT
	investor,
	nr_of_companies_funded,
	industries_invested_in
FROM (SELECT
	  	investor,
	  	STRING_AGG(industry, ', ' ORDER BY industry) AS industries_invested_in
	  FROM inv_industries
	  GROUP BY investor) AS invest_ind
JOIN nr_of_fund USING(investor)
WHERE investor != ''
ORDER BY nr_of_companies_funded DESC;

2. Continents with highest increase in unicorn companies

Which continents were characterized by the highest increase of unicorn companies on the year intervals of 2007-2014, 2015-2020 and >2020?
Spinner
DataFrameas
df
variable
WITH bfr_2015 AS (
	SELECT
		ROW_NUMBER() OVER() AS row_n,
		continent AS cnt_bfr
	FROM companies
	JOIN dates USING(company_id)
	WHERE extract(YEAR FROM date_joined) < 2015
	GROUP BY continent
	ORDER BY COUNT(*)
	),
	btwn_15_20 AS (
	SELECT
		ROW_NUMBER() OVER() AS row_n,
		continent AS cnt_btwn
	FROM companies
	JOIN dates USING(company_id)
	WHERE extract(YEAR FROM date_joined) >= 2015 AND
		extract(YEAR FROM date_joined) < 2020
	GROUP BY continent
	ORDER BY COUNT(*)
	),
	aft_2020 AS (
	SELECT
		ROW_NUMBER() OVER() AS row_n,
		continent AS cnt_aft
	FROM companies
	JOIN dates USING(company_id)
	WHERE extract(YEAR FROM date_joined) >= 2020
	GROUP BY continent
	ORDER BY COUNT(*)
	)
SELECT
	ROW_NUMBER() OVER() AS rank,
	cnt_bfr AS top_continents_before_2015,
	cnt_btwn AS top_continents_2015_to_2020,
	cnt_aft AS top_continents_after_2020
FROM bfr_2015
JOIN btwn_15_20 USING(row_n)
JOIN aft_2020 USING(row_n);

3. Industries with the most rapid 'unicorn' becoming companies

In which industries do companies take the least amount of time from founding to becoming a unicorn company?
Spinner
DataFrameas
df1
variable
WITH diffs AS (
	SELECT
		industry,
		EXTRACT(YEAR FROM date_joined) - year_founded AS diff
	FROM public.companies AS c
	JOIN public.dates AS d USING(company_id)
	JOIN public.industries AS i USING(company_id)
	)
SELECT
	RANK() OVER(ORDER BY mean) AS ranking,
	industry,
	ROUND(mean::NUMERIC,2) AS average_time_in_years,
	med AS median_time_in_years
FROM (SELECT
	  	industry,
	  	PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY diff) AS med,
	  	AVG(diff) AS mean
	  FROM diffs
	  GROUP BY industry) AS temp_tbl;

4. Companies with the highest carbon footprint

Within each year, what are the three unicorn companies that generate largest carbon footprint?
Spinner
DataFrameas
df2
variable
WITH cte AS (
	SELECT year, company, SUM(carbon_footprint_pcf) AS ftprnt
	FROM public.product_emissions
	GROUP BY year, company
	)
SELECT year, company, ranking
FROM (SELECT
		year,
	  	company,
	  	RANK() OVER(PARTITION BY year ORDER BY ftprnt DESC) AS ranking
	  FROM cte) AS ranked
WHERE ranking <= 3;