Skip to content

You are a product manager for a fitness studio and are interested in understanding the current demand for digital fitness classes. You plan to conduct a market analysis in Python to gauge demand and identify potential areas for growth of digital products and services.

The Data

You are provided with a number of CSV files in the "Files/data" folder, which offer international and national-level data on Google Trends keyword searches related to fitness and related products.

workout.csv

ColumnDescription
'month'Month when the data was measured.
'workout_worldwide'Index representing the popularity of the keyword 'workout', on a scale of 0 to 100.

three_keywords.csv

ColumnDescription
'month'Month when the data was measured.
'home_workout_worldwide'Index representing the popularity of the keyword 'home workout', on a scale of 0 to 100.
'gym_workout_worldwide'Index representing the popularity of the keyword 'gym workout', on a scale of 0 to 100.
'home_gym_worldwide'Index representing the popularity of the keyword 'home gym', on a scale of 0 to 100.

workout_geo.csv

ColumnDescription
'country'Country where the data was measured.
'workout_2018_2023'Index representing the popularity of the keyword 'workout' during the 5 year period.

three_keywords_geo.csv

ColumnDescription
'country'Country where the data was measured.
'home_workout_2018_2023'Index representing the popularity of the keyword 'home workout' during the 5 year period.
'gym_workout_2018_2023'Index representing the popularity of the keyword 'gym workout' during the 5 year period.
'home_gym_2018_2023'Index representing the popularity of the keyword 'home gym' during the 5 year period.
# When was globally the peak of search keyword 'workout'
workout = pd.read_csv('data/workout.csv')
year_str = workout.sort_values(by='workout_worldwide', ascending=False).head(1)['month'].reset_index(drop=True)
year_str = year_str[0][:4]
print(year_str)
workout.plot(x='month', color='red').set_title('Keyword popoularity worldwide')
def find_max_keyword(max_keyword):
    keyword, max = 0, 0
    for key, value in max_keyword.items():
        if max < value:
            keyword, max = key, value
    year = keywords.loc[keywords[keyword]==max, 'month'].reset_index()['month'][0]
    return keyword, year, max
# What was most popular keyword during covid and what is now
keywords = pd.read_csv('data/three_keywords.csv', header=0)
columns = keywords.columns[1:]
# Identify max values for each keyword and store in dictionary

max_keyword = {}

for column in columns:
    max_keyword[column] = keywords.sort_values(by=column, ascending=False) \
    .head(1).reset_index(drop=True)[column][0]
    
keyword, year, max = find_max_keyword(max_keyword)
keyword_clean = keyword.replace('_',' ').strip('worldwide').strip()
peak_covid = keyword_clean       

print(f'In {year} the most popular keyword during COVID was {keyword_clean} with a rating of {max}.')

# Lets find current most popular gym
last_month_row = keywords.tail(1).reset_index(drop=True)

for column in columns:
    max_keyword[column] = last_month_row.sort_values(by=column, ascending=False) \
    .head(1).reset_index(drop=True)[column][0]
    
keyword, year, max = find_max_keyword(max_keyword)
keyword_clean = keyword.replace('_',' ').strip('worldwide').strip()
current = keyword_clean

year = keywords.tail(1).reset_index()['month'][0]

print(f'In {year} the most popular keyword at end of COVID was {keyword_clean} with a rating of {max}.')
# Cleaning the month-column to datetime
keywords['date'] = pd.to_datetime(keywords['month'], format='%Y-%m')
keywords.drop('month', axis=1, inplace=True)
Hidden output
# Plot popular keywords
keywords.plot(kind='line', x='date', ylabel='Index', xlabel='', title='Popular sports keywords during COVID')
# Identify most sport interested country out of three selected countries
workout_geo = pd.read_csv('data/workout_geo.csv', header=0)

# workout_geo.info()
# workout_geo['workout_2018_2023'].describe()

workout_geo_select = workout_geo[workout_geo.country.isin(['United States','Australia','Japan'])]

workout_geo_select.plot(kind='bar', x='country', xlabel='', ylabel='Index', title='Selected countries', legend=False, color='orange', rot=45)

top_country = workout_geo_select[workout_geo_select['workout_2018_2023'] == workout_geo_select['workout_2018_2023'].max()]['country'].reset_index()['country'][0]
# Check were to sell sports home equipment. Philippines or Malaysia
home_workout = pd.read_csv('data/three_keywords_geo.csv', header=0)

home_workout.info()
home_workout_geo = home_workout[home_workout['Country'].isin(['Philippines','Malaysia'])].\
sort_values(by='home_workout_2018_2023', ascending=False).\
head(1).reset_index(drop=True)['Country'][0]

print(home_workout_geo)