Skip to content
Remsil Bakery Baked Goods Analysis
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import Image, display
# Loading data from the Excel file, skipping the first two rows and using specific columns
file = 'CAKE DEPT. PRODUCTION OCTOBER 2023.xlsx'
cake = pd.read_excel(file, skiprows=2, index_col=[0, 1, 2], usecols=[0, 1, 2, 34, 35, 36])
# Dropping rows with missing values in 'TOTAL AMOUNT', 'TOTAL', and 'PRICE' columns to ensure data integrity
cake = cake.dropna(subset=['TOTAL AMOUNT', 'TOTAL', 'PRICE'])
# Converting 'TOTAL' and 'PRICE' columns to numeric types for accurate calculations
cake['TOTAL'] = pd.to_numeric(cake['TOTAL'], errors='coerce')
cake['PRICE'] = pd.to_numeric(cake['PRICE'], errors='coerce')
# Filtering the DataFrame to include only rows where 'TOTAL AMOUNT' is greater than 0
cake_dept = cake[cake['TOTAL AMOUNT'] > 0]
# Sorting the DataFrame by 'TOTAL AMOUNT' in descending order to identify top-selling items
cake_dept_sorted = cake_dept.sort_values(by='TOTAL AMOUNT', ascending=False)
# Resetting the index of the sorted DataFrame for easier access to rows
cake_dept_sorted_reset = cake_dept_sorted.reset_index()
# Accessing the first and last row to identify the highest and lowest selling items
first_row = cake_dept_sorted_reset.iloc[0]
last_row = cake_dept_sorted_reset.iloc[-1]
# Displaying the highest and lowest selling items with clear and concise messages
print(f"Highest Selling Item: {first_row['ITEM']} (Size: {first_row['SIZE (INCHES)']} inches) - Total Amount Sold: {first_row['TOTAL AMOUNT']}")
print(f"Lowest Selling Item: {last_row['ITEM']} (Size: {last_row['SIZE (INCHES)']} inches) - Total Amount Sold: {last_row['TOTAL AMOUNT']}")
print(f"Total Number of Products Made in October 2023: {cake_dept_sorted.shape[0]}")
# Defining the variables for the scatter plot
items = cake_dept_sorted_reset['ITEM']
amount = cake_dept_sorted_reset['TOTAL']
# Creating the scatter plot to visualize the total amount sold by item size
sns.set(style="whitegrid")
plt.figure(figsize=(14, 10))
scatter_plot = sns.scatterplot(x=items, y=amount, size='SIZE (INCHES)', hue='SIZE (INCHES)', palette='viridis', data=cake_dept_sorted_reset, legend=False, sizes=(20, 200))
# Adding title and labels to the scatter plot for better understanding
plt.title('Total Amount Sold by Item Size', fontsize=18)
plt.xlabel('Item', fontsize=16)
plt.ylabel('Total Amount Sold', fontsize=16)
plt.xticks(rotation=90, fontsize=12)
plt.yticks(fontsize=12)
plt.show()