Skip to content

Help your hedge fund manager!

You have two datasets at your disposal: Balance_Sheet.xlsx and Income_Statement.xlsx. Both these datasets have three columns in common:

  • "Company": The company's ticker name.
  • "comp_type" The type of industry the company in question belongs to. It is either "tech" for companies in the technology industry, "fmcg" for companies in the fast-moving consumer goods industry, and "real_est" for companies in the real estate industry.
  • "Year": The year the company's information is from.

The rest of the columns in the datasets contain information from the financial statement of the "Company" in question. Note that the columns in Balance_Sheet.xlsx only contain financial information from the balance sheet. Similarly, the columns in Income_Statement.xlsx only contain financial information from the income statement. The columns are named accordingly. For instance, the column "Total Liab" from Balance_Sheet.xlsx is the total liability.

import pandas as pd

# Load the datasets from the 'DTATA Source' directory
try:
    df_bs = pd.read_excel('data/Balance_Sheet.xlsx')
    df_is = pd.read_excel('data/Income_Statement.xlsx')
except FileNotFoundError as e:
    print(f"Error: {e}")
    print("Please make sure the files 'Balance_Sheet.xlsx' and 'Income_Statement.xlsx' are in the correct directory.")
    # Exit the script if files are not found
    import sys
    sys.exit(1)

# Check the columns of the dataframes
print("Balance Sheet Columns:", df_bs.columns)
print("Income Statement Columns:", df_is.columns)

# Select common columns and 'Total Liab' from df_bs
common_columns = ['company', 'comp_type', 'Year']
df_bs_selected = df_bs[common_columns + ['Total Liab']]

# Select common columns from df_is
df_is_selected = df_is[common_columns]

# Merge the selected columns from both dataframes on common columns
df = pd.merge(df_bs_selected, df_is_selected, on=common_columns, how='inner')
# Rename df to df_ratios
df_ratios = df.copy()

# Calculate the debt-to-equity ratio and save it in a column named 'leverage_ratio'
# Debt-to-equity ratio = Total Liab / Total Stockholder Equity
df_ratios = pd.merge(df_ratios, df_bs[['company', 'comp_type', 'Year', 'Total Stockholder Equity']], on=['company', 'comp_type', 'Year'], how='inner')
df_ratios['leverage_ratio'] = df_ratios['Total Liab'] / df_ratios['Total Stockholder Equity']

# Display the first few rows of the dataframe to verify the new column
df_ratios.head()
# Calculate the gross margin ratio and save it in a column named 'profitability_ratio'
# Gross margin ratio = Gross Profit / Total Revenue
df_ratios = pd.merge(df_ratios, df_is[['company', 'comp_type', 'Year', 'Gross Profit', 'Total Revenue']], on=['company', 'comp_type', 'Year'], how='inner')
df_ratios['profitability_ratio'] = df_ratios['Gross Profit'] / df_ratios['Total Revenue']

# Display the first few rows of the dataframe to verify the new column
df_ratios.head()
# Group the dataset by 'comp_type' and calculate the average 'profitability_ratio' for each group
avg_profitability = df_ratios.groupby('comp_type')['profitability_ratio'].mean()

# Identify the 'comp_type' with the lowest average 'profitability_ratio'
lowest_profitability = avg_profitability.idxmin()

# Display the result
lowest_profitability
# Group the dataset by 'comp_type' and calculate the maximum 'leverage_ratio' for each group
max_leverage = df_ratios.groupby('comp_type')['leverage_ratio'].max()

# Identify the 'comp_type' with the highest maximum 'leverage_ratio'
highest_leverage = max_leverage.idxmax()

# Display the result
highest_leverage
import matplotlib.pyplot as plt
import numpy as np

# Filter the dataset for real estate companies
real_estate_df = df_ratios[df_ratios['comp_type'] == 'real estate']

# Create a scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(df_ratios['leverage_ratio'], df_ratios['profitability_ratio'])
#plt.scatter(real_estate_df['leverage_ratio'], real_estate_df['profitability_ratio'], alpha=0.5)
plt.title('Leverage Ratio vs Profitability Ratio for Real Estate Companies')
plt.xlabel('Leverage Ratio')
plt.ylabel('Profitability Ratio')
plt.grid(True)
plt.show()

# Calculate the correlation between 'leverage_ratio' and 'profitability_ratio' for real estate companies
correlation = df_ratios['leverage_ratio'].corr(df_ratios['profitability_ratio'])

# Determine the relationship based on the correlation value
if correlation > 0:
    relationship = 'positive'
elif correlation < 0:
    relationship = 'negative'
else:
    relationship = 'no relationship'

relationship