Skip to content

Help your hedge fund manager!

You have two datasets at your disposal: Balance_Sheet.xlsx and Income_Statement.xlsx. Both these datasets have three columns in common:

  • "Company": The company's ticker name.
  • "comp_type" The type of industry the company in question belongs to. It is either "tech" for companies in the technology industry, "fmcg" for companies in the fast-moving consumer goods industry, and "real_est" for companies in the real estate industry.
  • "Year": The year the company's information is from.

The rest of the columns in the datasets contain information from the financial statement of the "Company" in question. Note that the columns in Balance_Sheet.xlsx only contain financial information from the balance sheet. Similarly, the columns in Income_Statement.xlsx only contain financial information from the income statement. The columns are named accordingly. For instance, the column "Total Liab" from Balance_Sheet.xlsx is the total liability.

import pandas as pd
import matplotlib.pyplot as plt
# Load the datasets (replace with actual file paths)
balance_sheet_df = pd.read_excel("data/Balance_Sheet.xlsx")
income_statement_df = pd.read_excel("data/Income_Statement.xlsx")

# Print a message indicating successful data loading 
print("Data loaded successfully!")
# Merge the datasets based on common columns (assuming Company, comp_type, and Year)
merged_df = balance_sheet_df.merge(income_statement_df, on=["company", "comp_type", "Year"], how="inner")

# Print a message indicating successful merge 
print("DataFrames merged successfully!")
# Debt-to-equity ratio calculation 
merged_df["leverage_ratio"] = merged_df["Total Liab"].fillna(0).astype(float) / \
                              merged_df["Total Stockholder Equity"].fillna(0).astype(float)

def calculate_profitability_ratio(row):
  # Corrected column names based on merged_df
  gross_margin = (row["Total Revenue"] - row["Cost Of Goods Sold"]) / row["Total Revenue"]
  operating_margin = (row["Operating Income"] / row["Total Revenue"]) * 100
  # Choose which ratio to return (gross_margin or operating_margin) based on your preference
  return gross_margin  # or return operating_margin based on preference

# Add the calculated profitability ratio to the DataFrame using apply
merged_df["profitability_ratio"] = merged_df.apply(calculate_profitability_ratio, axis=1)

# Create a DataFrame named df_ratios to store the calculated ratios
df_ratios = merged_df[["leverage_ratio", "profitability_ratio"]]

# Print a message indicating successful ratio calculation and storage in df_ratios
print("Ratios calculated and stored in df_ratios DataFrame!")
# Calculate average ratios grouped by company type using pivot_table
average_ratios = merged_df.pivot_table(values=["leverage_ratio", "profitability_ratio"], index=["comp_type"], aggfunc="mean")

# Find the company type with the highest leverage ratio
highest_leverage = average_ratios["leverage_ratio"].idxmax()  # Get the index (company type) with the maximum value

# Print the average ratios by company type
print("Average Ratios by Company Type:")
print(average_ratios)

# Print the company type with the highest leverage ratio
print("Company Type with Highest Leverage Ratio:", highest_leverage)
# Filter real estate companies
real_estate_data = merged_df[merged_df["comp_type"] == "real_est"]

# Calculate correlation between leverage and profitability ratios
correlation = real_estate_data["leverage_ratio"].corr(real_estate_data["profitability_ratio"])

# Prepare data for regression plot (assuming profitability_ratio is the dependent variable)
x = real_estate_data["leverage_ratio"]
y = real_estate_data["profitability_ratio"]

# Perform linear regression (optional, for line of best fit)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x.values.reshape(-1, 1), y.values.reshape(-1, 1))

# Generate predicted profitability based on leverage ratio
predicted_profitability = model.predict(x.values.reshape(-1, 1))

# Create scatter plot with optional line of best fit
plt.scatter(x, y)
if model:
  plt.plot(x, predicted_profitability, color='red')  # Plot the regression line (optional)
plt.xlabel("Leverage Ratio")
plt.ylabel("Profitability Ratio")
plt.title("Leverage vs. Profitability (Real Estate)")
plt.grid(True)
plt.show()

# Print the correlation coefficient
print("Correlation between Leverage and Profitability (Real Estate):", correlation)

# Analyze the plot and correlation to determine the relationship
if correlation > 0:
  relationship = "positive"
elif correlation < 0:
  relationship = "negative"
else:
  relationship = "no relationship"

print("Relationship between Leverage and Profitability (Real Estate):", relationship)