Skip to content

You are a product manager for a fitness studio and are interested in understanding the current demand for digital fitness classes. You plan to conduct a market analysis in Python to gauge demand and identify potential areas for growth of digital products and services.

The Data

You are provided with a number of CSV files in the "Files/data" folder, which offer international and national-level data on Google Trends keyword searches related to fitness and related products.

workout.csv

ColumnDescription
'month'Month when the data was measured.
'workout_worldwide'Index representing the popularity of the keyword 'workout', on a scale of 0 to 100.

three_keywords.csv

ColumnDescription
'month'Month when the data was measured.
'home_workout_worldwide'Index representing the popularity of the keyword 'home workout', on a scale of 0 to 100.
'gym_workout_worldwide'Index representing the popularity of the keyword 'gym workout', on a scale of 0 to 100.
'home_gym_worldwide'Index representing the popularity of the keyword 'home gym', on a scale of 0 to 100.

workout_geo.csv

ColumnDescription
'country'Country where the data was measured.
'workout_2018_2023'Index representing the popularity of the keyword 'workout' during the 5 year period.

three_keywords_geo.csv

ColumnDescription
'country'Country where the data was measured.
'home_workout_2018_2023'Index representing the popularity of the keyword 'home workout' during the 5 year period.
'gym_workout_2018_2023'Index representing the popularity of the keyword 'gym workout' during the 5 year period.
'home_gym_2018_2023'Index representing the popularity of the keyword 'home gym' during the 5 year period.

Global search for 'workout' at its peak

# Import the necessary libraries
import pandas as pd
from plotnine import *

# Read data
workout_worldwide = pd.read_csv("data/workout.csv")

# Inspecting the data
workout_worldwide.info()

print(workout_worldwide.head())

# Global search for 'workout' at its peak
peak_search = (
    workout_worldwide
    .assign(year = pd.to_datetime(workout_worldwide['month']).dt.year)
    .groupby('year')
    .sum()
    .sort_values(by = 'workout_worldwide', ascending=False)
)

print(peak_search.head())

year_str = "2020"

Most popular during the covid pandemic VS the most popular now

# Read data
workout_keywords_worldwide = pd.read_csv("data/three_keywords.csv")

# Inspect data
workout_keywords_worldwide.info()

print(workout_keywords_worldwide.head())

# Extracting year
workout_keywords_worldwide['year'] = pd.to_datetime(workout_keywords_worldwide['month']).dt.year

# Pivot longer for ploting
melted_workout_keywords_worldwide = pd.melt(
    workout_keywords_worldwide,
    id_vars='year',                  # columns to keep fixed
    value_vars=[col for col in workout_keywords_worldwide.columns if col.endswith('worldwide')],
    var_name='workout_type',        # new column for variable names
    value_name='n'                  # new column for values
)

# Ploting
print(
    ggplot(melted_workout_keywords_worldwide, aes(x='factor(year)', y='n', fill='workout_type')) +
    geom_col(position='dodge') +
    theme_minimal()
)

# Answer
peak_covid = "home workout"
current = "gym workout"

Country with the highest interest for workouts among the United States, Australia, or Japan

# Read data
workout_geo = pd.read_csv("data/workout_geo.csv")

# Inspect data
workout_geo.info()

print(workout_geo.head())

# Subsetting
countries = ["United States", "Australia", "Japan"]

subset = workout_geo.query('country in @countries')

# Plotting
print(
    ggplot(subset, aes(x='country', y='workout_2018_2023')) +
    geom_col(fill='#4C72B0') +
    theme_minimal()
)

# Answer
top_country = "United States"

Philippines or Malaysia has the highest interest in home workouts?

# Read data
three_keywords_geo = pd.read_csv("data/three_keywords_geo.csv")

# Inspect data
three_keywords_geo.info()

print(three_keywords_geo.head())

# Subsetting 
subset = three_keywords_geo.loc[three_keywords_geo['Country'].isin(['Philippines', 'Malaysia']), ['Country', 'home_workout_2018_2023']]

print(
    ggplot(subset, aes(x='Country', y='home_workout_2018_2023', fill = 'Country')) +
    geom_col() +
    theme_minimal()
)

home_workout_geo = "Philippines"