Skip to content
Project - Analyzing Unicorn Companies
Did you know that the average return from investing in stocks is 10% per year! But who wants to be average?!
You have been asked to support an investment firm by analyzing trends in high-growth companies. They are interested in understanding which industries are producing the highest valuations and the rate at which new high-value companies are emerging. Providing them with this information gives them a competitive insight as to industry trends and how they should structure their portfolio looking forward.
You have been given access to their unicorns database, which contains the following tables:
dates
| Column | Description |
|---|---|
| company_id | A unique ID for the company. |
| date_joined | The date that the company became a unicorn. |
| year_founded | The year that the company was founded. |
funding
| Column | Description |
|---|---|
| company_id | A unique ID for the company. |
| valuation | Company value in US dollars. |
| funding | The amount of funding raised in US dollars. |
| select_investors | A list of key investors in the company. |
industries
| Column | Description |
|---|---|
| company_id | A unique ID for the company. |
| industry | The industry that the company operates in. |
companies
| Column | Description |
|---|---|
| company_id | A unique ID for the company. |
| company | The name of the company. |
| city | The city where the company is headquartered. |
| country | The country where the company is headquartered. |
| continent | The continent where the company is headquartered. |
DataFrameas
df3
variable
SELECT industry, SUM(num) as num_unicorns,AVG(val) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE year IN (2021,2020,2019)
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3
DataFrameas
df5
variable
SELECT industry, SUM(num) as num_unicorns,AVG(val) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE year = 2020
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3DataFrameas
df6
variable
SELECT industry, SUM(num) as num_unicorns,AVG(val) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE industry IN ('Internet software & services','E-commerce & direct-to-consumer','Fintech') AND year = 2019
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3DataFrameas
df
variable
SELECT industry, SUM(num) as num_unicorns,AVG(val) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE year = 2021
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3DataFrameas
df8
variable
[25]
(SELECT industry, SUM(num) as num_unicorns,AVG(val) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE year = 2021
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3)
UNION
(SELECT industry, SUM(num) as num_unicorns,AVG(val) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE year = 2020
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3)
UNION
(SELECT industry, SUM(num) as num_unicorns,AVG(val) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE industry IN ('Internet software & services','E-commerce & direct-to-consumer','Fintech') AND year = 2019
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3)
DataFrameas
df9
variable
SELECT industry ,year , num_unicorns, average_valuation_billions
FROM ((SELECT industry, SUM(num) as num_unicorns,ROUND(AVG(val/ 1000000000),2) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE year = 2021
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3)
UNION
(SELECT industry, SUM(num) as num_unicorns,ROUND(AVG(val/ 1000000000),2) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE year = 2020
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3)
UNION
(SELECT industry, SUM(num) as num_unicorns,ROUND(AVG(val/ 1000000000),2) as average_valuation_billions, year
FROM
(SELECT i.industry, COUNT(c.company) as num,EXTRACT(YEAR FROM d.date_joined) AS year, f.valuation as val
FROM industries AS i
LEFT JOIN companies AS c
USING(company_id)
LEFT JOIN dates as d
USING(company_id)
LEFT JOIN funding as f
USING (company_id)
GROUP BY i.industry, c.company, year, f.valuation) AS sub_query
WHERE industry IN ('Internet software & services','E-commerce & direct-to-consumer','Fintech') AND year = 2019
GROUP BY industry, year
ORDER BY num_unicorns DESC
LIMIT 3)
) AS FINAL
GROUP BY industry, year, num_unicorns, average_valuation_billions
ORDER BY industry, year DESC