Skip to content
Quantium Dataset
Loading libraries and the data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
pd.set_option('display.max_columns', None)#importing the first dataset df1
df1 = pd.read_excel("QVI_transaction_data.xlsx")#reading the excel file takes too long. We will save the df as csv and we'll use the csv file moving forward.
#df1.to_csv('QVI_transaction_data.csv', index=False)#re-importing the df
df1 = pd.read_csv('QVI_transaction_data.csv')#Exploring df1's shape and first rows
print("Shape:", df1.shape, '\n')
df1.head()#importing the second dataset
df2 = pd.read_csv('QVI_purchase_behaviour.csv')#Exploring df2's shape and first rows
print("Shape:", df2.shape, '\n')
df2.head()#combining the two datasets into one
df = df1.merge(df2, how='left', on='LYLTY_CARD_NBR')
df.head()print("Merged shape:", df.shape)Data Cleaning
#Formula to convert excel date integer to pandas datetime object
def xldate_to_datetime(xldate):
temp = dt.datetime(1899, 12, 30)
delta = dt.timedelta(days=xldate)
return temp+delta#Reassigning the Date column with correct datetime values
df['DATE'] = df['DATE'].apply(xldate_to_datetime)#Verifying that the data type for date changed
df.info()#descriptive statistics
df.describe()