Skip to content

Import, Format and Add Necessary indicator to original Datasets

import pandas as pd
import numpy as np
users = pd.read_csv('users.csv')
events = pd.read_csv('subscription_events.csv')
visits = pd.read_csv('user_visits.csv')
visits.head()
users.head()
users.groupby(['plan_code'])['id'].count()
# Convert to datetime format
users['signed_up_at'] = pd.to_datetime(users['signed_up_at'], format='%Y-%m-%d %H:%M:%S')
events['event_time'] = pd.to_datetime(events['event_time'], format='%Y/%m/%d %H:%M:%S')
visits['first_visited_at'] = pd.to_datetime(visits['first_visited_at'], format='%Y/%m/%d %H:%M:%S')
visits['last_visited_at'] = pd.to_datetime(visits['last_visited_at'], format='%Y/%m/%d %H:%M:%S')
users.head()
events.head()
visits.head()

Table subscription_events: We will need to allocate visits that lead to the event base on time - visits lead to event happened AFTER the previous event and BEFORE the current event. So this section of code is to identify previous event.

# Identify first event happened for each user
first_event=events.groupby('user_id',as_index = False)['event_time'].min()
first_event['first_event'] = 1

# Merge to event database to get first_event information
events = pd.merge(left = events,
                 right = first_event,
                 how = 'left',
                 left_on = ['user_id','event_time'],
                  right_on = ['user_id','event_time']
                 )

# Fill null value by 0 for calculating later
events['first_event'].fillna(value=0, inplace=True)

# Get information of the previous event
# Sort events dataset by users and ascending events time
events.sort_values(by = ['user_id','event_time'],ascending = True,inplace =True)
# Use shift method to get the previous event time
events['previous_event_time']= events['event_time'].shift().where(events['first_event']==0)

# Preview
events.head()

Merge datasets

We will create 2 combined table for 2 purposes:

  1. Trends table - combine users and events - this will show the MOM trend of acquisition
  2. Funnels - combine events and visits - this will allocate the visits that lead to the events

1. Trend Table

# Merge datasets Users and Events
trend = pd.merge(left = users,
                right = events,
                 how = 'inner',
                left_on = 'id',
                right_on = 'user_id',
                suffixes = ('_u', '_e'))
trend = trend[['id','signed_up_at','plan_code_u','subscription_status','plan_code_e','event_name','event_time','previous_event_time']]