Skip to content
Project: Data-Driven Product Management: Conducting a Market Analysis
You are a product manager for a fitness studio and are interested in understanding the current demand for digital fitness classes. You plan to conduct a market analysis in Python to gauge demand and identify potential areas for growth of digital products and services.
The Data
You are provided with a number of CSV files in the "Files/data" folder, which offer international and national-level data on Google Trends keyword searches related to fitness and related products.
workout.csv
| Column | Description |
|---|---|
'month' | Month when the data was measured. |
'workout_worldwide' | Index representing the popularity of the keyword 'workout', on a scale of 0 to 100. |
three_keywords.csv
| Column | Description |
|---|---|
'month' | Month when the data was measured. |
'home_workout_worldwide' | Index representing the popularity of the keyword 'home workout', on a scale of 0 to 100. |
'gym_workout_worldwide' | Index representing the popularity of the keyword 'gym workout', on a scale of 0 to 100. |
'home_gym_worldwide' | Index representing the popularity of the keyword 'home gym', on a scale of 0 to 100. |
workout_geo.csv
| Column | Description |
|---|---|
'country' | Country where the data was measured. |
'workout_2018_2023' | Index representing the popularity of the keyword 'workout' during the 5 year period. |
three_keywords_geo.csv
| Column | Description |
|---|---|
'country' | Country where the data was measured. |
'home_workout_2018_2023' | Index representing the popularity of the keyword 'home workout' during the 5 year period. |
'gym_workout_2018_2023' | Index representing the popularity of the keyword 'gym workout' during the 5 year period. |
'home_gym_2018_2023' | Index representing the popularity of the keyword 'home gym' during the 5 year period. |
# When was globally the peak of search keyword 'workout'
workout = pd.read_csv('data/workout.csv')
year_str = workout.sort_values(by='workout_worldwide', ascending=False).head(1)['month'].reset_index(drop=True)
year_str = year_str[0][:4]
print(year_str)workout.plot(x='month', color='red').set_title('Keyword popoularity worldwide')def find_max_keyword(max_keyword):
keyword, max = 0, 0
for key, value in max_keyword.items():
if max < value:
keyword, max = key, value
year = keywords.loc[keywords[keyword]==max, 'month'].reset_index()['month'][0]
return keyword, year, max
# What was most popular keyword during covid and what is now
keywords = pd.read_csv('data/three_keywords.csv', header=0)
columns = keywords.columns[1:]
# Identify max values for each keyword and store in dictionary
max_keyword = {}
for column in columns:
max_keyword[column] = keywords.sort_values(by=column, ascending=False) \
.head(1).reset_index(drop=True)[column][0]
keyword, year, max = find_max_keyword(max_keyword)
keyword_clean = keyword.replace('_',' ').strip('worldwide').strip()
peak_covid = keyword_clean
print(f'In {year} the most popular keyword during COVID was {keyword_clean} with a rating of {max}.')
# Lets find current most popular gym
last_month_row = keywords.tail(1).reset_index(drop=True)
for column in columns:
max_keyword[column] = last_month_row.sort_values(by=column, ascending=False) \
.head(1).reset_index(drop=True)[column][0]
keyword, year, max = find_max_keyword(max_keyword)
keyword_clean = keyword.replace('_',' ').strip('worldwide').strip()
current = keyword_clean
year = keywords.tail(1).reset_index()['month'][0]
print(f'In {year} the most popular keyword at end of COVID was {keyword_clean} with a rating of {max}.')# Cleaning the month-column to datetime
keywords['date'] = pd.to_datetime(keywords['month'], format='%Y-%m')
keywords.drop('month', axis=1, inplace=True)Hidden output
# Plot popular keywords
keywords.plot(kind='line', x='date', ylabel='Index', xlabel='', title='Popular sports keywords during COVID')# Identify most sport interested country out of three selected countries
workout_geo = pd.read_csv('data/workout_geo.csv', header=0)
# workout_geo.info()
# workout_geo['workout_2018_2023'].describe()
workout_geo_select = workout_geo[workout_geo.country.isin(['United States','Australia','Japan'])]
workout_geo_select.plot(kind='bar', x='country', xlabel='', ylabel='Index', title='Selected countries', legend=False, color='orange', rot=45)
top_country = workout_geo_select[workout_geo_select['workout_2018_2023'] == workout_geo_select['workout_2018_2023'].max()]['country'].reset_index()['country'][0]# Check were to sell sports home equipment. Philippines or Malaysia
home_workout = pd.read_csv('data/three_keywords_geo.csv', header=0)
home_workout.info()home_workout_geo = home_workout[home_workout['Country'].isin(['Philippines','Malaysia'])].\
sort_values(by='home_workout_2018_2023', ascending=False).\
head(1).reset_index(drop=True)['Country'][0]
print(home_workout_geo)