Skip to content
User Retention by Cohort
  • AI Chat
  • Code
  • Report
  • Spinner

    User Retention by Cohort

    This template helps visualize user retention as the percentage of users in an acquisition cohort who are still using the product after several elapsed time periods. Retention can be visualized in two different ways:

    1. Annotated Heatmap: An annotated heatmap of retention by cohort is useful to visualize and compare rates across cohorts and time periods.

    2. Line Plot: A line plot of retention grouped by cohort is useful to visualize the dropoff in users over time periods.

    # Load packages
    import pandas as pd
    import numpy as np
    import plotly.express as px
    import plotly.figure_factory as ff

    1. Load your data

    Each row in the data aggregates the number of users by cohort and segment who were active in a time period. (user-activity.csv)

    # Upload your data as CSV and load as a data frame
    df = pd.read_csv('data/user-activity.csv', parse_dates=["cohort_date", "period_date"])
    df.head()

    2. Compute Retention

    The retention rate is computed as the percentage of users in a cohort (or cohort-segment) who stayed active over time.

    # Compute Retention
    def compute_retention(df):
        df_all = (
          df
            .groupby(['cohort_date', 'period_date'])
            .agg('sum')
            .reset_index()
        )
        df_all['period_index'] = (df_all['period_date'] - df_all['cohort_date']) / np.timedelta64(1, 'W')
        df_all['nb_users_total'] = df_all.groupby(['cohort_date'])['nb_users'].transform(max)
        df_all['pct_users'] = df_all['nb_users'] / df_all['nb_users_total']
        df_all.drop(columns = ['nb_users_total'], inplace=True)
        df_all = df_all[['cohort_date', 'period_date', 'period_index', 'nb_users', 'pct_users']]
        return df_all
    
    df_retention = compute_retention(df)
    df_retention.head()

    3. Visualize retention as heatmap

    Each row in the heatmap represents a cohort and visualizes the percentage of users retained over time.

    # Plot cohort retention heatmap
    def plot_cohorts_heatmap(df, nb_periods=15):
      df = df.query('period_index > 0 & period_index <= @nb_periods')
      df_wide = (df
        .pivot(index="cohort_date", columns='period_index', values='pct_users')
        .sort_values(by=['cohort_date'], ascending=False)
        .fillna(0)
      )
      fig = ff.create_annotated_heatmap(
          z = df_wide.values, 
          annotation_text = df_wide.applymap(lambda x: '{:.1%}'.format(x) if x > 0 else '').values.tolist(),
          y = df_wide.index.strftime('%Y - W%W').values.tolist(),
          x = df_wide.columns.tolist(),
          colorscale='viridis_r',
      )
      fig.update_layout(
          width=900, 
          height=700, 
          xaxis={"title": "# Periods Elapsed"},
          title="User Retention by Cohort: Heatmap")
      return fig
    
    fig = plot_cohorts_heatmap(df_retention)
    fig.show(config={"displayModeBar": False})

    Visualize retention as line plot

    Each line represents a cohort and visualizes the dropoff in number of users over time.

    # Plot cohort retention lines
    def plot_cohort_lines(df, nb_periods=15):
      df['cohort_date'] = df['cohort_date'].astype(str)
      fig = px.line(
        df.query('period_index > 0 & period_index < @nb_periods'), 
        x='period_index', 
        y='pct_users', 
        line_group='cohort_date', 
        color_discrete_sequence=["lightslategray"]
      )
      fig.update_layout(
        xaxis = {"title": "# Periods Elapsed"},
        yaxis = {"title": "% Users Retained"},
        title="User Retention by Cohorts: Line Plot"
      )
      return fig
    
    
    fig_lines = plot_cohort_lines(df_retention)
    fig_lines.show(config = {"displayModeBar": False})