Unicorn Companies
A unicorn company is a privately held company with a current valuation of over $1 billion USD. This dataset consists of unicorn companies and startups across the globe as of November 2021, including country of origin, sector, select investors, and valuation of each unicorn.
Note former unicorn companies that have since exited due to IPO or acquisitions are not included in this list.
#Loading packages
import pandas as pd
import matplotlib.pyplot as plt
#pull and print columns and data frame
uc = pd.read_csv("unicorn_companiesv2.csv")
uc = pd.DataFrame(uc, columns=['Company', 'Valuation ($B)','Date Added', 'Country', 'Category', 'Select Investors'])
uc
# Extracting the year from the Date Added column
uc['Year Added'] = uc['Date Added'].str.extract(r'(\d{4})')
uc.info()
uc.head()
#Range and count of Categories
uc["Category"] = uc["Category"].str.replace("Artificial intelligence", "Artificial Intelligence") # Corrected line, cleaning data
uc["Category"] = uc["Category"].str.replace("Finttech", "Fintech")
Category = list(set(uc["Category"].str.split(',').explode().str.strip().unique())) #extracting unique categories of investors
Category_Count = uc.groupby(['Category'])['Category'].count() #count of categories
print(Category)
print(Category_Count)
# Plotting the chart
ax = Category_Count.plot.bar(edgecolor='black', color='red')
# Adding data labels
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + p.get_width() / 2, p.get_height()), ha='center', va='bottom')
plt.xlabel('Category')
plt.ylabel('Count')
plt.title('Count of Categories')
plt.show()
#Range and count of investors
Select_Investors = list(set(uc["Select Investors"].str.split(',').explode().str.strip().unique())) #extracting unique investors
Investors_count = uc['Select Investors'].str.split(',').explode().str.strip().value_counts() #extracting investor count
print(Select_Investors)
print(Investors_count)
# Plotting the bar chart of top 10 Investors_count
# Selecting the top 10 investors by count
top_10_investors = Investors_count.head(10)
# Plotting the chart
ax = top_10_investors.plot.bar(edgecolor='black', color='purple')
# Adding data labels
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + p.get_width() / 2, p.get_height()), ha='center', va='bottom')
plt.xlabel('Investor')
plt.ylabel('Count')
plt.title('Top 10 Investors by Count')
plt.show()
#Range and count of countries
Select_Country =uc["Country"].unique()
Country_count =uc["Country"].value_counts()
print(Select_Country)
print(Country_count)
# Plotting the bar chart of top 10 Country_count
# Selecting the top 10 Country_count
top_10_country = Country_count.head(10)
# Plotting the chart
ax = top_10_country.plot.bar(edgecolor='black', color='Orange')
# Adding data labels
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + p.get_width() / 2, p.get_height()), ha='center', va='bottom')
plt.xlabel('Country')
plt.ylabel('Count')
plt.title('Top 10 Country')
plt.show()
#Total valuation, no of companies, no of investors
total_val = uc["Valuation ($B)"].sum()
print("Total valuation:", total_val, "USD billion")
Total_country = len(Select_Country)
print("Total number of countries:", Total_country)
Total_investors = len(Select_Investors)
print("Total investors:", Total_investors)
# Getting the top 5 investors by valuation
# Grouping the data by investors and summing the valuation
investors_valuation = uc.groupby('Select Investors')['Valuation ($B)'].sum()
# Sorting the data in descending order
top_5_investors = investors_valuation.sort_values(ascending=False).head(5)
top_5_investors
# Plotting the chart
ax = top_5_investors.plot.bar(edgecolor='black', color='Green')
# Adding data labels
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() + p.get_width() / 2, p.get_height()), ha='center', va='bottom')
plt.xlabel('Investors')
plt.xticks(rotation='vertical')
plt.ylabel('Valuation ($B)')
plt.title('Top 5 Investors by Valuation ($B)')
plt.show()
# Getting the top 5 countries by valuation
# Grouping the data by investors and summing the valuation
country_valuation = uc.groupby('Country')['Valuation ($B)'].sum()
# Sorting the data in descending order
top_5_country = country_valuation.sort_values(ascending=False).head(5)
top_5_country
#filtering the data by country, then summing the valuation
filtered_data = uc.query("Country in ['United States', 'China', 'Germany']")
filtered_data = filtered_data.groupby('Country')['Valuation ($B)'].sum()
print(filtered_data)
Filtering by Country and Valuation
To filter the data by country and valuation, we can use the query
method of the DataFrame. Here's an example of how to filter the data for the countries United States, China, and Germany:
filtered_data = uc.query("Country in ['United States', 'China', 'Germany']") filtered_data
This will return a new DataFrame filtered_data
that contains only the rows where the country is either United States, China, or Germany.
To further filter the data based on valuation, you can add another condition to the query:
filtered_data = uc.query("Country in ['United States', 'China', 'Germany'] and `Valuation ($B)` > 100") filtered_data
This will filter the data for the countries United States, China, and Germany, and also for companies with a valuation greater than 100 billion dollars.