Skip to content
Exploratory Data Analysis for Supermarket
# Numerical/Dataframe libraries
import pandas as pd
import numpy as np
#Vizualization
import matplotlib.pyplot as plt
import seaborn as sns
#Read the raw dataset
url='https://drive.google.com/file/d/1xa_06Kqic0-KflBwUy1_ntSKleHS639N/view?usp=sharing'
file_id=url.split('/')[-2]
dwn_url='https://drive.google.com/uc?id=' + file_id
supermarket_df = pd.read_csv(dwn_url)
supermarket_df.head(5).style.background_gradient(cmap='twilight')
#Observe the data
shape=supermarket_df.shape
#print(f'The dataset contains {shape[0]} rows and {shape[1]} columns')
#Overall product sales by cities
plt.figure(dpi=140)
sns.countplot(y ='Product line',hue = "City", data = supermarket_df)
plt.xlabel('Count')
plt.title('Product sales by cities')
plt.show()
#Product sales density
plt.figure(dpi=140)
ax = sns.histplot(supermarket_df, x='Product line', hue='Gender', multiple='stack',
edgecolor='white', kde=True,shrink=0.8, hue_order=['Male','Female'])
plt.xticks(rotation=45)
plt.title("Product sales density based on gender")
plt.show()
# Converting time to hours
supermarket_df['Time'] = pd.to_datetime(supermarket_df['Time'])
supermarket_df['Hour'] = (supermarket_df['Time']).dt.hour
supermarket_df['Hour'].unique()
# Converting Date to Month/Year
supermarket_df.Date = pd.to_datetime(supermarket_df.Date)
supermarket_df['Month']=supermarket_df.Date.dt.month_name()
supermarket_df['Year']=supermarket_df.Date.dt.year
# Converting Date to weekday
supermarket_df['Weekday']=supermarket_df.Date.dt.day_name()
#print(supermarket_df['weekday'])
#Hourly sales
plt.figure(dpi=140)
sns.lineplot( x="Hour", y='Quantity', data=supermarket_df)
plt.title('Hourly Sales')
plt.show()
plt.figure(figsize=(13, 6))
plt.title('Daily Sales by Day of the Week')
sns.countplot(y=supermarket_df['Weekday'], data=supermarket_df)
#plt.figure(dpi=140)
#sns.boxplot(x=supermarket_df['Weekday'], y=supermarket_df['Quantity'])
#plt.xticks(rotation=45)
#plt.title('Daily Sales by Day of the Week')
plt.show()
#
plt.figure(dpi=140)
sns.boxplot(x=supermarket_df['Branch'], y=supermarket_df['gross income'])
plt.title('Gross income by branches')
plt.show()
plt.figure(dpi=140)
sns.lineplot(x= 'Month',
y = 'gross income' , data=supermarket_df )
plt.title('Gross income in '+ str(supermarket_df['Year'].unique()))
plt.xticks(rotation=45)
plt.show()
#product rating distribution base on customertype
plt.figure(dpi=140)
f =sns.relplot(x="Total", y="Rating", data=supermarket_df, col='Customer type',hue="Customer type")
f.fig.subplots_adjust(top=.8)
f.fig.suptitle("Product rating distribution by customer type")
plt.show()
sns.relplot(x="gross income",y="Quantity", data=supermarket_df, kind="scatter", col="Payment",
size="Payment", hue="Payment", alpha=0.4)
plt.title('Income by payment type')
plt.show()
# Positive correlation between unit price, quantity,total, tax5%
plt.figure(figsize=(18,12))
plt.title('Heatmap of Supermarket sale in '+ str(supermarket_df['Year'].unique()))
sns.heatmap(supermarket_df.corr(),annot=True, cmap='twilight')
plt.show()