Skip to content
UCI - Online Retail
About Dataset
Online Retail is a transnational data set which contains all the transactions occurring between 01/12/2010 and 09/12/2011 for a UK-based and registered non-store online retail.
Loading Datasets
import pandas as pd
# Replace 'path_to_file' with the actual path to the downloaded file
online_retail_df = pd.read_excel('datasets/Online Retail.xlsx')
# data = pd.read_excel('path_to_file/Online Retail.xlsx', engine='openpyxl')Exploratory Data Analysis (EDA)
online_retail_df.head()online_retail_df.shapeonline_retail_df.dtypesonline_retail_df.describe()import matplotlib.pyplot as plt
# Create a new column 'YearMonth' for grouping
online_retail_df['YearMonth'] = online_retail_df['InvoiceDate'].dt.to_period('M')
# Group by 'YearMonth' and count unique 'InvoiceNo'
order_counts = online_retail_df.groupby('YearMonth')['InvoiceNo'].nunique()
# Plotting
plt.figure(figsize=(12, 6))
order_counts.plot(kind='bar')
plt.title('Monthly Order Count')
plt.xlabel('Year-Month')
plt.ylabel('Number of Orders')
plt.xticks(rotation=45)
plt.show()merged_df.sort_values(by='UniqueItemCount', ascending=False).head()filtered_20713_df = online_retail_df[online_retail_df['StockCode'] == 20713]
filtered_20713_df.shapefiltered_20713_df.head()# Group by 'StockCode', 'Description', and 'UnitPrice', and count unique 'InvoiceNo'
grouped_20713_df = filtered_20713_df.groupby(['StockCode', 'Description', 'UnitPrice'])['InvoiceNo'].nunique().reset_index()
# Rename the columns for clarity
grouped_20713_df.columns = ['StockCode', 'Description', 'UnitPrice', 'InvoiceNoCount']
# Display the grouped DataFrame
grouped_20713_df.head()# Ensure 'YearMonth' column is in the DataFrame
online_retail_df['YearMonth'] = online_retail_df['InvoiceDate'].dt.to_period('M')
# Plotting
plt.figure(figsize=(14, 7))
sns.boxplot(data=online_retail_df, x='YearMonth', y='UnitPrice')
plt.title('Box Plot of Unit Price by YearMonth')
plt.xlabel('YearMonth')
plt.ylabel('Unit Price')
plt.xticks(rotation=45)
plt.show()