Skip to content

You are a product manager for a fitness studio and are interested in understanding the current demand for digital fitness classes. You plan to conduct a market analysis in Python to gauge demand and identify potential areas for growth of digital products and services.

The Data

You are provided with a number of CSV files in the "Files/data" folder, which offer international and national-level data on Google Trends keyword searches related to fitness and related products.

workout.csv

ColumnDescription
'month'Month when the data was measured.
'workout_worldwide'Index representing the popularity of the keyword 'workout', on a scale of 0 to 100.

three_keywords.csv

ColumnDescription
'month'Month when the data was measured.
'home_workout_worldwide'Index representing the popularity of the keyword 'home workout', on a scale of 0 to 100.
'gym_workout_worldwide'Index representing the popularity of the keyword 'gym workout', on a scale of 0 to 100.
'home_gym_worldwide'Index representing the popularity of the keyword 'home gym', on a scale of 0 to 100.

workout_geo.csv

ColumnDescription
'country'Country where the data was measured.
'workout_2018_2023'Index representing the popularity of the keyword 'workout' during the 5 year period.

three_keywords_geo.csv

ColumnDescription
'country'Country where the data was measured.
'home_workout_2018_2023'Index representing the popularity of the keyword 'home workout' during the 5 year period.
'gym_workout_2018_2023'Index representing the popularity of the keyword 'gym workout' during the 5 year period.
'home_gym_2018_2023'Index representing the popularity of the keyword 'home gym' during the 5 year period.
# Import the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt

# Load datasets
workout = pd.read_csv('data/workout.csv')
three_keywords = pd.read_csv('data/three_keywords.csv')
workout_geo = pd.read_csv('data/workout_geo.csv')
three_keywords_geo = pd.read_csv('data/three_keywords_geo.csv')
# 1. When was the global search for 'workout' at its peak? 

# Group by year
workout['date'] = pd.to_datetime(workout.month)
workout_by_year = workout.resample('Y', on='date').mean().reset_index()

# Getting the year with peak interest
year_str = str(workout_by_year.loc[workout_by_year['workout_worldwide'].idxmax(), 'date'].year)

# Plotting
fig, ax = plt.subplots()
workout_by_year.plot(x='date', y='workout_worldwide', ax=ax, legend=False)
ax.set(xlabel='Year', ylabel='Popularity Index', title='Mean Popularity Index of word "Workout"')
plt.show()
# 2. Of the keywords available, what was the most popular during the covid pandemic, and what is the most popular now?

# Setting a datetime index
if 'month' in three_keywords.columns:
    three_keywords = three_keywords.set_index(pd.to_datetime(three_keywords['month']))\
                               .drop('month', axis=1)

# Getting most popular keywords
popularity_covid = three_keywords.loc['2020':'2021']
popularity_current = three_keywords.loc['2022':]

peak_covid = popularity_covid.max().idxmax()
current = popularity_current.max().idxmax()
    
# Plotting
fig, (ax1, ax2) = plt.subplots(2)

popularity_covid.loc['2020':'2021'].plot(ax=ax1)
popularity_current.loc['2022':].plot(ax=ax2, legend=None)

fig.suptitle('Popularity of Keywords')
fig.supylabel('Popularity Index')

ax1.set(title='In the Covid pandemic', xlabel='')
ax1.legend(['home workout', 'gym workout' , 'home gym'])

ax2.set(title='Currently', xlabel='')

fig.tight_layout()
plt.show()
# 3. What country has the highest interest for workouts among the following: United States, Australia, or Japan?

# Filter countries
countries_of_interest = workout_geo[workout_geo['country'].isin(['United States', 'Australia', 'Japan'])]

# Get most interested country
top_country = countries_of_interest.max()['country']

# Plotting
fig, ax = plt.subplots()

countries_of_interest.plot(kind='bar', x='country', legend=None, ax=ax)
ax.set(xlabel='Country', ylabel='Pupularity Index', title='Popularity of the keyword "workout"')

plt.show()
#  Which of the two countries (Philippines  or Malaysia) has the highest interest in home workouts?

# Filter countries
phil_malay = three_keywords_geo[three_keywords_geo['Country'].isin(['Philippines', 'Malaysia'])]\
                                                            .set_index('Country')['home_workout_2018_2023']
# Get most interested
home_workout_geo = phil_malay.idxmax()

# Plotting
fig, ax = plt.subplots()

phil_malay.plot(kind='bar', ax=ax)
ax.set(ylabel='Pupularity Index', title='Popularity of the keywords "home workout"')

plt.show()