Skip to content
New Workbook
Sign up
JustDiceCase
import pandas as pd
import numpy as np

adspend = pd.read_csv("adspend.csv")
installs = pd.read_csv("installs.csv")
payouts = pd.read_csv("payouts.csv")
revenue = pd.read_csv("revenue.csv")

print(adspend.head())
print(adspend.info())
print(adspend.isnull().sum())
print(adspend["client_id"].unique())
adspend["event_date"] = pd.to_datetime(adspend["event_date"])

col_names = adspend.columns.to_list()
col_names[col_names.index("client_id")] = "app_id"
adspend.columns = col_names
adspend.info()
print(installs.head())
print(installs.info())
print(installs.isnull().sum())

print(installs["country_id"].describe())
print(installs["country_id"].unique())
print(installs["app_id"].unique())
print(installs["network_id"].unique())
a = installs["device_os_version"].unique()
a.sort()
print(a)
installs["event_date"] = pd.to_datetime(installs["event_date"])
#Add a new col for device age based on OS
#installs["device_cat"]

#Revenue/Payout/Counts by country, network
#Top 10 Apps by Revenue/Payout/Counts
#AOV by country/network/app
print(payouts.head())
print(payouts.info())
print(payouts.isnull().sum())

payouts["event_date"] = pd.to_datetime(payouts["event_date"])

#Payout by day, weekday/weekend
print(revenue.head())
print(revenue.info())
print(revenue.isnull().sum())

revenue["event_date"] = pd.to_datetime(revenue["event_date"])
#Convert dates to date data type
#None of the dataset have any null values, data in the columns is also in the expected range

#Revenue by date, by weekday/weekend

Todo #Add a new col for device age based on OS

Deliverables

  • Revenue/Payout/Counts by country, network, device age
  • Top 10 Apps by Revenue/Payout/Counts
  • AOV/CTA by country/network/app/Device age
  • Payout/Revenue/Counts by day, weekday/weekend
  • Profitability by country/network (Ads spend vs revenue)

Dimensions Country, network, device age (no adpsend) , top 10 apps (no adspend), clients (ads only). Client ID may be same thing as app_id

Metrics Revenue, Payout, Counts (no of downloads), AOV, CTA

#Make note about user quality and commericial conditions which are not present but may be useful

#Joins

\Installs with revenue and payoutsZA

installs_revenue = installs.merge(revenue[["install_id","value_usd"]], on="install_id", how="inner")
installs_payouts = installs.merge(payouts[["install_id","value_usd"]], on="install_id", how="inner")

print(installs_revenue.info(show_counts=True))
print(installs_payouts.info(show_counts=True))

#Revenue/Payout/Counts by country, network, device age
#Top 10 Apps by Revenue/Payout/Counts
#AOV by country/network/app/Device age (Rev/Count of rev for that channel)
#Payout/Revenue/Counts by day, weekday/weekend
print(installs_revenue.groupby("country_id")["value_usd"].sum())
print(installs_payouts.groupby("country_id")["value_usd"].sum())
print(installs_revenue.groupby("country_id")["value_usd"].count())
print(installs_payouts.groupby("country_id")["value_usd"].count())

#CTA (Count of installs/Adspend for that channel)
print(adspend.groupby("country_id")["value_usd"].sum())
print(installs.groupby("country_id")["install_id"].count())

#Profitability (Revenue/Adspend for that channel)
print(adspend.groupby("country_id")["value_usd"].sum())
print(installs.groupby("country_id")["install_id"].count())

print(installs_payouts.groupby("country_id")["value_usd"])

dimensions = ["country_id","network_id", "device_cat", "app_id"]

country = pd.DataFrame({
    "revenue": installs_revenue.groupby("country_id")["value_usd"].sum(),
    "adspend": adspend.groupby("country_id")["value_usd"].sum(),
    "payouts": installs_payouts.groupby("country_id")["value_usd"].sum(),
    "installs": installs.groupby("country_id")["event_date"].count()
}
).reset_index()

print(country)