Skip to content
Customer Second Order Predictor
  • AI Chat
  • Code
  • Report
  • Machine Learning Classification Model Predicting Customers Reorder Based on First Order

    Branton Stanley

    ** This was a project I completed for my Master's degree in Data Science ** It does suffer from a lack of data, as I only had about 4,000 customers, but over 3,500 products

    Importing Data and Libraries

    import pandas as pd
    import numpy as np
    import os
    
    #ML Models
    import tensorflow as tf
    from tensorflow import keras
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import LogisticRegression
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import VotingClassifier
    
    #Data Preprocessing
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    
    #ML Model Metrics
    from sklearn.metrics import accuracy_score, precision_score, recall_score
    
    #Dimensionality Reduction
    from sklearn.decomposition import PCA
    
    #Grid Search for parameter optimization
    from sklearn.model_selection import GridSearchCV
    
    #import keras_tuner
    from tensorflow.keras import layers
    
    
    # Ignore useless warnings (see SciPy issue #5998)
    import warnings
    warnings.filterwarnings(action="ignore", message="^internal gelsd")

    Importing the data

    data = pd.read_csv('Business Sales Transaction.csv')
    data
    data = data[data['Quantity']>0] #Dropping all rows with negative quantity (negative quantity represents a return)
    data
    #Converting TrasactionNo to data type integer
    data['TransactionNo'] = data['TransactionNo'].astype(int)
    data.dtypes

    Data Preparation

    df = data.copy() #Creating a copy of the data so any changes won't affect the original data

    First Order Information and Target Column

    The following section will add the 'second_order' target column and will remove all order information after the first order

    #Getting the first order transaction number and the number of orders placed by each CustomerNo
    df_orders =  df.groupby(by = 'CustomerNo').agg(
        first_order = ('TransactionNo', 'min'),
        orders = ('TransactionNo', 'nunique')
        )
    df_orders
    #Getting the second_order target column
    df_orders['second_order'] = np.where(df_orders['orders'] > 1, 1, 0)
    df_orders
    #Merging the two tables, leaving only first order information and adding the target column
    df_first_order = pd.merge(df, df_orders, how = 'inner', left_on = ['CustomerNo', 'TransactionNo'], right_on = ['CustomerNo', 'first_order'])
    df_first_order