Skip to content
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#read data
data = pd.read_csv('historical_data.csv')
data.head()
#check column data types
data.info()
#convert date columns to date type
data['created_at'] = pd.to_datetime(data['created_at'])
data['actual_delivery_time'] = pd.to_datetime(data['actual_delivery_time'])
from datetime import datetime
data['actual_total_delivery_duration'] = (data['actual_delivery_time'] - data['created_at']).dt.total_seconds()
# Convert actual_total_delivery_duration from timedelta to total seconds
data['busy_dashers_ratio'] = data['total_busy_dashers'] / data['total_onshift_dashers']
data['estimated_non_prep_duration'] = data['estimated_store_to_consumer_driving_duration'] + data['estimated_order_place_duration']
#check which columns to encode
data['market_id'].nunique()
#check which columns to encode
data['store_id'].nunique()
data['order_protocol'].nunique()
#encoding order_protocol
order_protocol_dummies = pd.get_dummies(data['order_protocol'])
order_protocol_dummies = order_protocol_dummies.add_prefix('order_protocol_')
order_protocol_dummies.head()
#encoding market_id
market_id_dummies = pd.get_dummies(data['market_id'])
market_id_dummies = market_id_dummies.add_prefix('market_id_')
market_id_dummies.head()
#create dictionary with most repeated categories for each store to fill null rows where it is possible
store_id_unique = data['store_id'].unique().tolist()
store_id_and_category = {store_id: data[data['store_id'] == store_id].store_primary_category.mode() for store_id in store_id_unique}
import numpy as np
def fill(store_id):
try:
return store_id_and_category[store_id].values[0]
except:
return np.nan
#fill null values
data['nan_free_store_primary_category'] = data['store_id'].apply(fill)
data['nan_free_store_primary_category'].nunique()
#encoding store_primary_category
store_primary_category_dummies = pd.get_dummies(data['nan_free_store_primary_category'])
store_primary_category_dummies = store_primary_category_dummies.add_prefix('category_')
store_primary_category_dummies.head()