Skip to content

We want to see which ad sources provide the best ROI. We will take a look at 3 different segments to optimize marketing expenses.

We will first look at product performance and see the number of unique users are visiting, total visits per day for all users and purchase volume. In the first section, we will pull the data and preprocess them for optimization and check for errors.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats as st

visits = pd.read_csv('visits_log_us.csv')
orders = pd.read_csv('orders_log_us.csv')
costs = pd.read_csv('costs_us.csv')

visits.info()
visits.head()
orders.head()
costs.head()
####Preprocessing####
##Visits##
visits.isnull().sum()
visits.info()
visits.head(2)
visits.rename(columns={'Device':'device', 'End Ts':'end_ts', 'Source Id':'source_id', 'Start Ts':'start_ts', 'UID':'uid'}, inplace=True)
visits.columns
print(visits['device'].unique())
visits['source_id'].unique()
visits['device'] = visits['device'].astype('category')
visits['source_id'] = visits['source_id'].astype('int8')

visits['end_ts'] = pd.to_datetime(visits['end_ts'])
visits['start_ts'] = pd.to_datetime(visits['start_ts'])

visits.info()
##Orders##
orders.isnull().sum()
orders.info()
orders.rename(columns={'Buy Ts':'buy_ts', 'Revenue':'revenue', 'Uid':'uid'}, inplace=True)
orders['revenue'].describe()
orders['revenue'] = orders['revenue'].astype('float16')
orders['buy_ts'] = pd.to_datetime(orders['buy_ts'])
orders.info()