Skip to content
Unlocking Retail Insights: A Data-Driven Analysis of Superstore Sales
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib as pltsm=pd.read_csv("Sample - Superstore.csv", encoding='latin1')sm.head()sm.info()sm['Order Date']=pd.to_datetime(sm['Order Date'], format='%m/%d/%Y')
sm['Ship Date']=pd.to_datetime(sm['Ship Date'], format='%m/%d/%Y')sm['OrderY']=sm['Order Date'].dt.year
sm['OrderM']=sm['Order Date'].dt.month
sm['OrderD']=sm['Order Date'].dt.daysm['Profitability']=sm['Profit']/sm['Sales']sm.describe()sm.describe(include="object")sm.isna().sum().sum()sm.duplicated().sum()sm.head()ocm=sm.groupby("OrderM")["Order ID"].count().reset_index()
ocm.columns=["OrderM","Count"]
px.line(ocm,x="OrderM",y="Count",markers=True)ocym=sm.groupby(["OrderY","OrderM"])["Order ID"].count().reset_index()
ocym.columns=["OrderY","OrderM","Order ID"]
ocym["Date"] = pd.to_datetime(ocym["OrderY"].astype(str) + "-" + ocym["OrderM"].astype(str).str.zfill(2))
px.line(ocym,x="Date",y="Order ID",markers=True,title="Orders Over Time")
px.histogram(sm,x="Segment",color="Region")