Skip to content
Explore a DataFrame
Explore a DataFrame
Welcome to your workspace! In this walkthrough, you will learn the basics of Workspace as you load data and explore it with Python!
Keep an eye out for 💪 icons throughout the notebook. These will indicate opportunities for you to try out Workspace for yourself!
import pandas as pd
import seaborn as sns
import datetime as dt
import numpy as np
import matplotlib.pyplot as pltdata_2018.head()data_2018 = pd.read_csv('last_year_merchants.csv').reset_index()
data_2019 = pd.read_csv('merchant_transactions.csv').reset_index()
data_2018 = data_2018.drop(labels='index', axis=1)
print('\n')
data_2019 = data_2019.drop(labels='index', axis=1)
data_2019.head()data_2018['live_date'] = pd.to_datetime(data_2018.live_date)
data_2018['last_trx_date'] = pd.to_datetime(data_2018.last_trx_date)
data_2019['trx_date'] = pd.to_datetime(data_2019.trx_date)
data_2019['dow'] = data_2019.trx_date.dt.day_name()
data_2019['month'] = data_2019.trx_date.dt.month_name()
data_2019['date_filter'] = data_2019.trx_date.dt.day
data_2019['cycles'] = data_2019.trx_date.dt.weekdef merch_cat(live_date, last_trx_date):
if (last_trx_date - live_date).days > 30:
return 'active'
if (last_trx_date - live_date).days <= 30:
return 'churn'
if (last_trx_date - live_date).days <= 30 & completed_trx <= 1:
return 'inactive'
else:
return 'undefined'
data_2018['status'] = data_2018.apply(lambda row: merch_cat(row['live_date'], row['last_trx_date']), axis=1)
data_2018.head()data_2019.columnsd = {('trx_id', 'customer_id'):pd.Series.nunique, ('trx_items', 'trx_gmv', 'trx_gbv', 'trx_gmv_after_promo'):'sum'}
data_19_sum = data_2019.groupby('merchant_id').agg({k: v for t, v in d.items() for k in t})
data_19_sum = data_19_sum.reset_index()datelive = pd.DataFrame(data_2019.groupby('merchant_id')['trx_date'].agg(['min', 'max'])).reset_index()
data_19_sum = data_19_sum.merge(datelive, on='merchant_id', how='left')
data_19_sum.head()data_19_sum = data_19_sum.rename(columns={'min':'first_19', 'max':'last_19'})data_2019.head()data_2019['city_id'] = data_2019.merchant_id.str[1:4]
data_19_sum = data_19_sum.merge(data_2019[['merchant_id','city_id']], on='merchant_id', how='left')data_19_sum.head()def merch_cat(first_19, last_19):
if (last_19 - first_19).days > 30:
return 'active'
if (last_19 - first_19).days <= 30:
return 'churn'
if (last_19 - first_19).days <= 30 & trx_id <= 1:
return 'inactive'
else:
return 'undefined'
data_19_sum['status'] = data_19_sum.apply(lambda row: merch_cat(row['first_19'], row['last_19']), axis=1)
data_19_sum.head()churning = data_2018[data_2018.city_id == 'C11'][['merchant_id','status']].merge(data_19_sum[['merchant_id', 'status']], on='merchant_id', how='left')
churning = churning.rename(columns={'status_x':'status_18', 'status_y':'status_19'})
churning.pivot_table(index='status_18', columns='status_19', values='merchant_id', aggfunc=pd.Series.nunique, fill_value=0)