Skip to content
Unicorn Companies
Unicorn Companies
A unicorn company is a privately held company with a current valuation of over $1 billion USD. This dataset consists of unicorn companies and startups across the globe as of November 2021, including country of origin, sector, select investors, and valuation of each unicorn.
Note former unicorn companies that have since exited due to IPO or acquisitions are not included in this list.
Not sure where to begin? Scroll to the bottom to find challenges!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
unicorn = pd.read_csv("unicorn_companies.csv")
unicorn.rename(columns= {'Select Investors' : 'Investors'}, inplace= True)
unicorn.head()unicorn['Investor 1'] = unicorn['Investors'].str.split(',').str[0]
unicorn['Total Investors'] = unicorn.Investors.str.len()
unicorn.head()unicorn.columnsplt.figure(figsize=(10,6))
plt.scatter(unicorn['Total Investors'], unicorn['Valuation ($B) '], alpha= 0.7)
plt.show()plt.figure(figsize = (10,6))
plt.hist(unicorn['Valuation ($B) '],bins=50, edgecolor='black', color='blue', alpha=0.7)
plt.xticks(rotation=90)
plt.show()plt.figure(figsize = (10,6))
plt.hist(unicorn['Total Investors'],bins=50, edgecolor='black', color='blue', alpha=0.7)
plt.xticks(rotation=90)
plt.show()unicorn['Date Added'] = pd.to_datetime(unicorn['Date Added'])
unicorn.dtypesunicorn['Valuation ($B) '] = unicorn['Valuation ($B) '].str.replace("$", "").astype(float)
unicorn.dtypesplt.boxplot(unicorn['Valuation ($B) '])
plt.show()x = unicorn['Date Added']
y = unicorn['Valuation ($B) ']
threshold = 20
plt.figure(figsize=(20,12))
plt.scatter(x, y, alpha=0.7)
for i, label in enumerate(unicorn['Company']):
if y[i] > threshold:
plt.annotate(label, (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='center')
plt.show()unicorn.describe()unicorn.corr(method='pearson')x = unicorn['Date Added']
y = unicorn['Valuation ($B) ']
threshold = 20
plt.figure(figsize=(20,12))
plt.scatter(x, y, alpha=0.7)
for i, label in enumerate(unicorn['Country']):
if y[i] > threshold:
plt.annotate(label, (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='center')
plt.show()x = unicorn['Date Added']
y = unicorn['Valuation ($B) ']
threshold = 5
plt.figure(figsize=(20,12))
plt.scatter(x, y, alpha=0.7)
for i, label in enumerate(unicorn['Total Investors']):
if y[i] > threshold:
plt.annotate(label, (x[i], y[i]), textcoords="offset points", xytext=(0,5), ha='center')
plt.show()