Skip to content
Own SQL Queries: Unicorn Companies
Tables to be used:
DataFrameas
companies
variable
SELECT * FROM public.companies LIMIT 3;DataFrameas
industries
variable
SELECT * FROM public.industries LIMIT 3;DataFrameas
dates
variable
SELECT * FROM public.dates LIMIT 3;DataFrameas
product_emissions
variable
SELECT * FROM public.product_emissions LIMIT 3;DataFrameas
funding
variable
SELECT * FROM public.funding LIMIT 3;Questions and Queries
1. Major investors
Which investors are funding the largest number of companies and what industries are they investing in?
DataFrameas
df4
variable
WITH investors AS(
SELECT
company_id,
SPLIT_PART(select_investors, ',', 1) AS investor_1,
SPLIT_PART(select_investors, ',', 2) AS investor_2,
SPLIT_PART(select_investors, ',', 3) AS investor_3
FROM public.funding
),
inv_union AS(
SELECT TRIM(investor_1) AS investor, company_id
FROM investors
UNION ALL
SELECT TRIM(investor_2) AS investor, company_id
FROM investors
UNION ALL
SELECT TRIM(investor_3) AS investor, company_id
FROM investors
),
nr_of_fund AS(
SELECT investor, COUNT(company_id) AS nr_of_companies_funded
FROM inv_union
GROUP BY investor
ORDER BY COUNT(company_id) DESC
),
inv_industries AS(
SELECT DISTINCT investor, industry
FROM inv_union
LEFT JOIN public.industries USING(company_id)
ORDER BY investor
)
SELECT
investor,
nr_of_companies_funded,
industries_invested_in
FROM (SELECT
investor,
STRING_AGG(industry, ', ' ORDER BY industry) AS industries_invested_in
FROM inv_industries
GROUP BY investor) AS invest_ind
JOIN nr_of_fund USING(investor)
WHERE investor != ''
ORDER BY nr_of_companies_funded DESC;2. Continents with highest increase in unicorn companies
Which continents were characterized by the highest increase of unicorn companies on the year intervals of 2007-2014, 2015-2020 and >2020?
DataFrameas
df
variable
WITH bfr_2015 AS (
SELECT
ROW_NUMBER() OVER() AS row_n,
continent AS cnt_bfr
FROM companies
JOIN dates USING(company_id)
WHERE extract(YEAR FROM date_joined) < 2015
GROUP BY continent
ORDER BY COUNT(*)
),
btwn_15_20 AS (
SELECT
ROW_NUMBER() OVER() AS row_n,
continent AS cnt_btwn
FROM companies
JOIN dates USING(company_id)
WHERE extract(YEAR FROM date_joined) >= 2015 AND
extract(YEAR FROM date_joined) < 2020
GROUP BY continent
ORDER BY COUNT(*)
),
aft_2020 AS (
SELECT
ROW_NUMBER() OVER() AS row_n,
continent AS cnt_aft
FROM companies
JOIN dates USING(company_id)
WHERE extract(YEAR FROM date_joined) >= 2020
GROUP BY continent
ORDER BY COUNT(*)
)
SELECT
ROW_NUMBER() OVER() AS rank,
cnt_bfr AS top_continents_before_2015,
cnt_btwn AS top_continents_2015_to_2020,
cnt_aft AS top_continents_after_2020
FROM bfr_2015
JOIN btwn_15_20 USING(row_n)
JOIN aft_2020 USING(row_n);3. Industries with the most rapid 'unicorn' becoming companies
In which industries do companies take the least amount of time from founding to becoming a unicorn company?
DataFrameas
df1
variable
WITH diffs AS (
SELECT
industry,
EXTRACT(YEAR FROM date_joined) - year_founded AS diff
FROM public.companies AS c
JOIN public.dates AS d USING(company_id)
JOIN public.industries AS i USING(company_id)
)
SELECT
RANK() OVER(ORDER BY mean) AS ranking,
industry,
ROUND(mean::NUMERIC,2) AS average_time_in_years,
med AS median_time_in_years
FROM (SELECT
industry,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY diff) AS med,
AVG(diff) AS mean
FROM diffs
GROUP BY industry) AS temp_tbl;4. Companies with the highest carbon footprint
Within each year, what are the three unicorn companies that generate largest carbon footprint?
DataFrameas
df2
variable
WITH cte AS (
SELECT year, company, SUM(carbon_footprint_pcf) AS ftprnt
FROM public.product_emissions
GROUP BY year, company
)
SELECT year, company, ranking
FROM (SELECT
year,
company,
RANK() OVER(PARTITION BY year ORDER BY ftprnt DESC) AS ranking
FROM cte) AS ranked
WHERE ranking <= 3;