Skip to content
JustDiceCase
  • AI Chat
  • Code
  • Report
  • import pandas as pd
    import numpy as np
    
    adspend = pd.read_csv("adspend.csv")
    installs = pd.read_csv("installs.csv")
    payouts = pd.read_csv("payouts.csv")
    revenue = pd.read_csv("revenue.csv")
    
    print(adspend.head())
    print(adspend.info())
    print(adspend.isnull().sum())
    print(adspend["client_id"].unique())
    adspend["event_date"] = pd.to_datetime(adspend["event_date"])
    
    col_names = adspend.columns.to_list()
    col_names[col_names.index("client_id")] = "app_id"
    adspend.columns = col_names
    adspend.info()
    print(installs.head())
    print(installs.info())
    print(installs.isnull().sum())
    
    print(installs["country_id"].describe())
    print(installs["country_id"].unique())
    print(installs["app_id"].unique())
    print(installs["network_id"].unique())
    a = installs["device_os_version"].unique()
    a.sort()
    print(a)
    installs["event_date"] = pd.to_datetime(installs["event_date"])
    #Add a new col for device age based on OS
    #installs["device_cat"]
    
    #Revenue/Payout/Counts by country, network
    #Top 10 Apps by Revenue/Payout/Counts
    #AOV by country/network/app
    print(payouts.head())
    print(payouts.info())
    print(payouts.isnull().sum())
    
    payouts["event_date"] = pd.to_datetime(payouts["event_date"])
    
    #Payout by day, weekday/weekend
    print(revenue.head())
    print(revenue.info())
    print(revenue.isnull().sum())
    
    revenue["event_date"] = pd.to_datetime(revenue["event_date"])
    #Convert dates to date data type
    #None of the dataset have any null values, data in the columns is also in the expected range
    
    #Revenue by date, by weekday/weekend

    Todo #Add a new col for device age based on OS

    Deliverables

    • Revenue/Payout/Counts by country, network, device age
    • Top 10 Apps by Revenue/Payout/Counts
    • AOV/CTA by country/network/app/Device age
    • Payout/Revenue/Counts by day, weekday/weekend
    • Profitability by country/network (Ads spend vs revenue)

    Dimensions Country, network, device age (no adpsend) , top 10 apps (no adspend), clients (ads only). Client ID may be same thing as app_id

    Metrics Revenue, Payout, Counts (no of downloads), AOV, CTA

    #Make note about user quality and commericial conditions which are not present but may be useful

    #Joins

    \Installs with revenue and payoutsZA

    installs_revenue = installs.merge(revenue[["install_id","value_usd"]], on="install_id", how="inner")
    installs_payouts = installs.merge(payouts[["install_id","value_usd"]], on="install_id", how="inner")
    
    print(installs_revenue.info(show_counts=True))
    print(installs_payouts.info(show_counts=True))
    
    #Revenue/Payout/Counts by country, network, device age
    #Top 10 Apps by Revenue/Payout/Counts
    #AOV by country/network/app/Device age (Rev/Count of rev for that channel)
    #Payout/Revenue/Counts by day, weekday/weekend
    print(installs_revenue.groupby("country_id")["value_usd"].sum())
    print(installs_payouts.groupby("country_id")["value_usd"].sum())
    print(installs_revenue.groupby("country_id")["value_usd"].count())
    print(installs_payouts.groupby("country_id")["value_usd"].count())
    
    #CTA (Count of installs/Adspend for that channel)
    print(adspend.groupby("country_id")["value_usd"].sum())
    print(installs.groupby("country_id")["install_id"].count())
    
    #Profitability (Revenue/Adspend for that channel)
    print(adspend.groupby("country_id")["value_usd"].sum())
    print(installs.groupby("country_id")["install_id"].count())
    
    print(installs_payouts.groupby("country_id")["value_usd"])
    
    dimensions = ["country_id","network_id", "device_cat", "app_id"]
    
    country = pd.DataFrame({
        "revenue": installs_revenue.groupby("country_id")["value_usd"].sum(),
        "adspend": adspend.groupby("country_id")["value_usd"].sum(),
        "payouts": installs_payouts.groupby("country_id")["value_usd"].sum(),
        "installs": installs.groupby("country_id")["event_date"].count()
    }
    ).reset_index()
    
    print(country)