Skip to content
(SQL) Project: Analyzing Unicorn Companies
  • AI Chat
  • Code
  • Report
  • Did you know that the average return from investing in stocks is 10% per year (not accounting for inflation)? But who wants to be average?!

    You have been asked to support an investment firm by analyzing trends in high-growth companies. They are interested in understanding which industries are producing the highest valuations and the rate at which new high-value companies are emerging. Providing them with this information gives them a competitive insight as to industry trends and how they should structure their portfolio looking forward.

    You have been given access to their unicorns database, which contains the following tables:

    dates

    ColumnDescription
    company_idA unique ID for the company.
    date_joinedThe date that the company became a unicorn.
    year_foundedThe year that the company was founded.

    funding

    ColumnDescription
    company_idA unique ID for the company.
    valuationCompany value in US dollars.
    fundingThe amount of funding raised in US dollars.
    select_investorsA list of key investors in the company.

    industries

    ColumnDescription
    company_idA unique ID for the company.
    industryThe industry that the company operates in.

    companies

    ColumnDescription
    company_idA unique ID for the company.
    companyThe name of the company.
    cityThe city where the company is headquartered.
    countryThe country where the company is headquartered.
    continentThe continent where the company is headquartered.

    The output

    Your query should return a table in the following format:

    industryyearnum_unicornsaverage_valuation_billions
    industry12021------
    industry22020------
    industry32019------
    industry12021------
    industry22020------
    industry32019------
    industry12021------
    industry22020------
    industry32019------

    Where industry1, industry2, and industry3 are the three top-performing industries.

    Spinner
    DataFrameavailable as
    df6
    variable
    -- CTE combination of all datasets
    WITH all_data AS
    (SELECT i.company_id, EXTRACT(year FROM date_joined) AS year_unicorn, year_founded, valuation/1000000000 AS valuation_in_billions, funding, select_investors, industry, company, city, country, continent
    FROM industries AS i
    LEFT JOIN funding AS f
    ON i.company_id = f.company_id
    LEFT JOIN dates AS d
    ON d.company_id = i.company_id
    LEFT JOIN companies AS c
    ON c.company_id = i.company_id)
    
    -- find the list of companies with the highest amount of valuation
    SELECT company, country, MAX(valuation_in_billions) AS max_val_in_billions_USD
    FROM all_data
    GROUP BY company, country
    ORDER BY max_val_in_billions_USD DESC;
    Spinner
    DataFrameavailable as
    df5
    variable
    -- Company on the highest valuation of all records alongside its detail 
    WITH all_data AS
    	(SELECT i.company_id, EXTRACT(year FROM date_joined) AS year_unicorn, year_founded, valuation, funding, select_investors, industry, company, city, country, continent
    FROM industries AS i
    LEFT JOIN funding AS f
    ON i.company_id = f.company_id
    LEFT JOIN dates AS d
    ON d.company_id = i.company_id
    LEFT JOIN companies AS c
    ON c.company_id = i.company_id)
    
    SELECT * FROM all_data
    WHERE valuation = (
    			SELECT MAX(valuation) FROM all_data)
    Spinner
    DataFrameavailable as
    df_1
    variable
    -- Exploring the "dates" table dataset
    SELECT * FROM dates
    Spinner
    DataFrameavailable as
    df1
    variable
    -- Exploring the "funding" table dataset
    SELECT * FROM funding
    Spinner
    DataFrameavailable as
    df2
    variable
    -- Exploring the "industries" table dataset
    SELECT * FROM industries
    Spinner
    DataFrameavailable as
    df14
    variable
    -- List of most industries from the dataset
    SELECT industry, COUNT(*) AS num_ind FROM industries
    GROUP BY industry
    ORDER BY num_ind DESC
    Spinner
    DataFrameavailable as
    df9
    variable
    -- Most common industries in the dataset
    WITH all_data AS
    (SELECT i.company_id, EXTRACT(year FROM date_joined) AS year_unicorn, year_founded, valuation, funding, select_investors, industry, company, city, country, continent
    FROM industries AS i
    LEFT JOIN funding AS f
    ON i.company_id = f.company_id
    LEFT JOIN dates AS d
    ON d.company_id = i.company_id
    LEFT JOIN companies AS c
    ON c.company_id = i.company_id)
    
    SELECT industry, COUNT(industry) AS num_ind
    FROM all_data
    GROUP BY industry
    ORDER BY num_ind DESC;
    Spinner
    DataFrameavailable as
    df3
    variable
    -- Exploring the "companies" table dataset
    SELECT * FROM companies
    Spinner
    DataFrameavailable as
    df15
    variable
    -- Companies spread all over the world
    SELECT industry, COUNT(industry) AS num_ind, continent, country, city
    FROM industries AS i
    LEFT JOIN companies AS c
    ON i.company_id = c.company_id
    GROUP BY industry, continent, country, city
    ORDER BY num_ind DESC
    Spinner
    DataFrameavailable as
    df16
    variable
    -- list of most industries that belong to unicorn categori among countries and cities
    SELECT industry, COUNT(industry) AS num_ind, country, city
    FROM industries AS i
    LEFT JOIN companies AS c
    ON i.company_id = c.company_id
    WHERE continent = 'Asia'
    GROUP BY industry, country, city
    ORDER BY num_ind DESC
    Spinner
    DataFrameavailable as
    df17
    variable
    -- Unicorn Analysis in Indonesia
    SELECT company, industry, city, EXTRACT(year FROM date_joined) AS date_joined_unicorn, 
    	valuation/1000000000 AS valuation_in_billions_USD
    FROM industries AS i
    LEFT JOIN companies AS c
    ON i.company_id = c.company_id
    LEFT JOIN dates AS d
    ON d.company_id = i.company_id
    LEFT JOIN funding AS f
    ON i.company_id = f.company_id
    WHERE continent = 'Asia' AND country = 'Indonesia'
    GROUP BY date_joined_unicorn, company, industry, city, valuation
    ORDER BY date_joined_unicorn DESC
    Spinner
    DataFrameavailable as
    df20
    variable
    -- -- Unicorn Analysis in Indonesia
    SELECT industry, COUNT(industry) AS num_ind, city, EXTRACT(year FROM date_joined) AS date_joined_unicorn, 
    	SUM(valuation/1000000000) AS total_valuation_in_billions_USD
    FROM industries AS i
    LEFT JOIN companies AS c
    ON i.company_id = c.company_id
    LEFT JOIN dates AS d
    ON d.company_id = i.company_id
    LEFT JOIN funding AS f
    ON i.company_id = f.company_id
    WHERE continent = 'Asia' AND country = 'Indonesia'
    GROUP BY industry, date_joined_unicorn, city
    ORDER BY num_ind DESC
    Spinner
    DataFrameavailable as
    df18
    variable
    -- Unicorn Analysis in Sweden
    SELECT company, industry, city, EXTRACT(year FROM date_joined) AS date_joined_unicorn, 
    	valuation/1000000000 AS valuation_in_billions_USD
    FROM industries AS i
    LEFT JOIN companies AS c
    ON i.company_id = c.company_id
    LEFT JOIN dates AS d
    ON d.company_id = i.company_id
    LEFT JOIN funding AS f
    ON i.company_id = f.company_id
    WHERE continent = 'Europe' AND country = 'Sweden'
    GROUP BY date_joined_unicorn, company, industry, city, valuation
    ORDER BY valuation_in_billions_USD DESC