Skip to content

Unicorn Companies

A unicorn company is a privately held company with a current valuation of over $1 billion USD. This dataset consists of unicorn companies and startups across the globe as of November 2021, including country of origin, sector, select investors, and valuation of each unicorn.

Note former unicorn companies that have since exited due to IPO or acquisitions are not included in this list.

Not sure where to begin? Scroll to the bottom to find challenges!

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

unicorn = pd.read_csv("unicorn_companies.csv")
unicorn.rename(columns= {'Select Investors' : 'Investors'}, inplace= True)
unicorn.head()
unicorn['Investor 1'] = unicorn['Investors'].str.split(',').str[0]
unicorn['Total Investors'] = unicorn.Investors.str.len()
unicorn.head()
unicorn.columns
plt.figure(figsize=(10,6))
plt.scatter(unicorn['Total Investors'], unicorn['Valuation ($B) '], alpha= 0.7)
plt.show()
plt.figure(figsize = (10,6))
plt.hist(unicorn['Valuation ($B) '],bins=50, edgecolor='black', color='blue', alpha=0.7)
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize = (10,6))
plt.hist(unicorn['Total Investors'],bins=50, edgecolor='black', color='blue', alpha=0.7)
plt.xticks(rotation=90)
plt.show()
unicorn['Date Added'] = pd.to_datetime(unicorn['Date Added'])
unicorn.dtypes
unicorn['Valuation ($B) '] = unicorn['Valuation ($B) '].str.replace("$", "").astype(float)
unicorn.dtypes
plt.boxplot(unicorn['Valuation ($B) '])
plt.show()
x = unicorn['Date Added']
y = unicorn['Valuation ($B) ']

threshold = 20

plt.figure(figsize=(20,12))
plt.scatter(x, y, alpha=0.7)
for i, label in enumerate(unicorn['Company']):
    if y[i] > threshold:
        plt.annotate(label, (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='center')
plt.show()
unicorn.describe()
unicorn.corr(method='pearson')
x = unicorn['Date Added']
y = unicorn['Valuation ($B) ']

threshold = 20

plt.figure(figsize=(20,12))
plt.scatter(x, y, alpha=0.7)
for i, label in enumerate(unicorn['Country']):
    if y[i] > threshold:
        plt.annotate(label, (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='center')
plt.show()
x = unicorn['Date Added']
y = unicorn['Valuation ($B) ']

threshold = 5

plt.figure(figsize=(20,12))
plt.scatter(x, y, alpha=0.7)
for i, label in enumerate(unicorn['Total Investors']):
    if y[i] > threshold:
        plt.annotate(label, (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='center')
plt.show()