Skip to content
pip install mlxtend
Hidden output
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
df_ = pd.read_excel('online_retail_II.xlsx', sheet_name="Year 2010-2011")
df = df_.copy()
df.head()
df.describe().T
df.isnull().sum()
df.shape
def outlier_thresholds(dataframe, variable):
quartile1 = dataframe[variable].quantile(0.01)
quartile3 = dataframe[variable].quantile(0.99)
interquartile_range = quartile3 - quartile1
up_limit = quartile3 + 1.5 * interquartile_range
low_limit = quartile1 - 1.5 * interquartile_range
return low_limit, up_limit
def replace_with_thresholds(dataframe, variable):
low_limit, up_limit = outlier_thresholds(dataframe, variable)
dataframe.loc[(dataframe[variable] < low_limit), variable] = low_limit
dataframe.loc[(dataframe[variable] > up_limit), variable] = up_limit
return
def retail_data_prep(dataframe):
dataframe.dropna(inplace=True)
dataframe = dataframe[~dataframe['Invoice'].str.contains("C",na=False)]
dataframe = dataframe[dataframe['Quantity'] > 0]
dataframe = dataframe[dataframe['Price'] > 0]
replace_with_thresholds(dataframe, "Quantity")
replace_with_thresholds(dataframe, "Price")
return dataframe
df = retail_data_prep(df)
df.describe().T
ARL Veri Yapısı Hazırlama
Analizi bir ülke özeline indereceğimizden örnek olması için o şekilde devam ediyoruz.
df_fr = df[df["Country"] == "France"]
df_fr.head()