Skip to content

About Dataset

Online Retail is a transnational data set which contains all the transactions occurring between 01/12/2010 and 09/12/2011 for a UK-based and registered non-store online retail.

Loading Datasets

import pandas as pd

# Replace 'path_to_file' with the actual path to the downloaded file
online_retail_df = pd.read_excel('datasets/Online Retail.xlsx')
# data = pd.read_excel('path_to_file/Online Retail.xlsx', engine='openpyxl')

Exploratory Data Analysis (EDA)

online_retail_df.head()
online_retail_df.shape
online_retail_df.dtypes
online_retail_df.describe()
import matplotlib.pyplot as plt

# Create a new column 'YearMonth' for grouping
online_retail_df['YearMonth'] = online_retail_df['InvoiceDate'].dt.to_period('M')

# Group by 'YearMonth' and count unique 'InvoiceNo'
order_counts = online_retail_df.groupby('YearMonth')['InvoiceNo'].nunique()

# Plotting
plt.figure(figsize=(12, 6))
order_counts.plot(kind='bar')
plt.title('Monthly Order Count')
plt.xlabel('Year-Month')
plt.ylabel('Number of Orders')
plt.xticks(rotation=45)
plt.show()
merged_df.sort_values(by='UniqueItemCount', ascending=False).head()
filtered_20713_df = online_retail_df[online_retail_df['StockCode'] == 20713]
filtered_20713_df.shape
filtered_20713_df.head()
# Group by 'StockCode', 'Description', and 'UnitPrice', and count unique 'InvoiceNo'
grouped_20713_df = filtered_20713_df.groupby(['StockCode', 'Description', 'UnitPrice'])['InvoiceNo'].nunique().reset_index()

# Rename the columns for clarity
grouped_20713_df.columns = ['StockCode', 'Description', 'UnitPrice', 'InvoiceNoCount']

# Display the grouped DataFrame
grouped_20713_df.head()
# Ensure 'YearMonth' column is in the DataFrame
online_retail_df['YearMonth'] = online_retail_df['InvoiceDate'].dt.to_period('M')

# Plotting
plt.figure(figsize=(14, 7))
sns.boxplot(data=online_retail_df, x='YearMonth', y='UnitPrice')
plt.title('Box Plot of Unit Price by YearMonth')
plt.xlabel('YearMonth')
plt.ylabel('Unit Price')
plt.xticks(rotation=45)
plt.show()