Skip to content
Competition - motorcycle parts
# Importing the pandas module
import pandas as pd
# Reading in the sales data
df = pd.read_csv('data/sales_data.csv', parse_dates=['date'])
# Take a look at the first datapoints
df.head()Hidden output
df.head()Hidden output
Sales analysis
Break down of sales
import matplotlib.pyplot as plt
import seaborn as sns
# Set up the figure and axes
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
# Bar chart for total sales by product line
sns.barplot(data=df, x='product_line', y='total', ax=axes[0], ci=None)
axes[0].set_title('Total Sales by Product Line')
axes[0].set_xlabel('Product Line')
axes[0].set_ylabel('Total Sales')
axes[0].tick_params(axis='x', rotation=45)
# Bar chart for total sales by warehouse with hue differentiation by client type
sns.barplot(data=df, x='warehouse', y='total', hue='client_type', ax=axes[1], ci=None)
axes[1].set_title('Total Sales by Warehouse')
axes[1].set_xlabel('Warehouse')
axes[1].set_ylabel('Total Sales')
axes[1].tick_params(axis='x', rotation=45)
# Adjust layout
plt.tight_layout()suspension and frame parts yields the top two Sales results being responsible for roughly 50% of all sales.
our customers are retail customers and warehouses.overall warehouses are responsible for the majority of all sales
import pandas as pd
import matplotlib.pyplot as plt
# Create a stacked bar chart showing the percentage of sales by payment type
sales_by_payment_percentage = df.groupby(['warehouse', 'payment'])['total'].sum().unstack().fillna(0)
sales_by_payment_percentage = sales_by_payment_percentage.div(sales_by_payment_percentage.sum(axis=1), axis=0)
ax = sales_by_payment_percentage.plot(kind='bar', stacked=True, figsize=(10, 6), rot=0, color=['blue', 'green', 'red'])
plt.xlabel('Warehouse')
plt.ylabel('Percentage of Sales')
plt.title('Percentage of Sales by Payment Type and Warehouse', fontweight='bold', fontsize=12)
plt.legend(title='Payment Type')
plt.ylim(0, 1) # Set y-axis to show percentage from 0 to 100%
# Add percentage values to each of the bars
for p in ax.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
ax.text(x + width / 2, y + height / 2, f'{height:.1%}', ha='center', va='center', fontsize=8, fontweight='bold', color='white')
plt.show()our customers prefer to do money transfers.they make 50% of all Sales. Second preferred payment method is the credit card, roughly around 40%. Cash does not A bigger role anymore with roughly 10% share
# What is the average unit price for each product line?
gr_prices = df.groupby('product_line')[['quantity', 'total']].agg({'quantity': 'sum', 'total': 'sum'})
gr_prices = gr_prices.assign(price_avg = gr_prices['total']/gr_prices['quantity'])
display(gr_prices)Hidden output
engines, frames and suspension parts after the best average price ratio.
import seaborn as sns
import matplotlib.pyplot as plt
# Sort gr_prices by product line alphabetically
gr_prices_sorted = gr_prices.sort_index()
# Set up the figure and axes
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Dot plot for average unit price per product line
sns.stripplot(x=gr_prices_sorted.index, y='price_avg', data=gr_prices_sorted, ax=axes[0], size=8, color='blue')
axes[0].set_title('Average Unit Price per Product Line', fontweight='bold', fontsize=12)
axes[0].set_xlabel('Product Line')
axes[0].set_ylabel('Average Unit Price')
axes[0].tick_params(axis='x', rotation=45)
# Box plot for distribution of unit prices per product line
sns.boxplot(x='product_line', y='unit_price', data=df, ax=axes[1], palette='Set2', order=sorted(df['product_line'].unique()))
axes[1].set_title('Distribution of Unit Prices per Product Line', fontweight='bold', fontsize=12)
axes[1].set_xlabel('Product Line')
axes[1].set_ylabel('Unit Price')
axes[1].tick_params(axis='x', rotation=45)
# Set the overall title for the figure
fig.suptitle('Price Analysis by Product Line', fontweight='bold', fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()interestingly, there is a very minor difference in average prices of retail and warehouses.
# Calculate the average unit price for each product line and customer type
gr_prices_customer = df.groupby(['product_line', 'client_type'])[['quantity', 'total']].agg({'quantity': 'sum', 'total': 'sum'})
gr_prices_customer = gr_prices_customer.assign(price_avg = gr_prices_customer['total']/gr_prices_customer['quantity']).reset_index()
# Set up the figure and axes
fig, ax = plt.subplots(figsize=(14, 8))
# Bar plot for average unit price per product line and customer type
sns.barplot(x='product_line', y='price_avg', hue='client_type', data=gr_prices_customer, palette='Set1', ax=ax)
# Set titles and labels
ax.set_title('Average Unit Price by Product Line and Customer Type', fontweight='bold', fontsize=14)
ax.set_xlabel('Product Line', fontsize=12)
ax.set_ylabel('Average Unit Price', fontsize=12)
ax.tick_params(axis='x', rotation=45)
# Display the plot
plt.tight_layout()
plt.show()Checking data
df.info()