Skip to content
Project: Data-Driven Product Management: Conducting a Market Analysis
You are a product manager for a fitness studio and are interested in understanding the current demand for digital fitness classes. You plan to conduct a market analysis in Python to gauge demand and identify potential areas for growth of digital products and services.
The Data
You are provided with a number of CSV files in the "Files/data" folder, which offer international and national-level data on Google Trends keyword searches related to fitness and related products.
workout.csv
| Column | Description |
|---|---|
'month' | Month when the data was measured. |
'workout_worldwide' | Index representing the popularity of the keyword 'workout', on a scale of 0 to 100. |
three_keywords.csv
| Column | Description |
|---|---|
'month' | Month when the data was measured. |
'home_workout_worldwide' | Index representing the popularity of the keyword 'home workout', on a scale of 0 to 100. |
'gym_workout_worldwide' | Index representing the popularity of the keyword 'gym workout', on a scale of 0 to 100. |
'home_gym_worldwide' | Index representing the popularity of the keyword 'home gym', on a scale of 0 to 100. |
workout_geo.csv
| Column | Description |
|---|---|
'country' | Country where the data was measured. |
'workout_2018_2023' | Index representing the popularity of the keyword 'workout' during the 5 year period. |
three_keywords_geo.csv
| Column | Description |
|---|---|
'country' | Country where the data was measured. |
'home_workout_2018_2023' | Index representing the popularity of the keyword 'home workout' during the 5 year period. |
'gym_workout_2018_2023' | Index representing the popularity of the keyword 'gym workout' during the 5 year period. |
'home_gym_2018_2023' | Index representing the popularity of the keyword 'home gym' during the 5 year period. |
# Import the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
work_outs = pd.read_csv('data/workout.csv')
work_outs_geo = pd.read_csv('data/workout_geo.csv')
three_keyword = pd.read_csv('data/three_keywords.csv')
print('\n',work_outs,'\n')
print('\n',work_outs_geo,'\n')
print('\n',three_keyword,'\n')work_outs['month'] = pd.to_datetime(work_outs['month'])
work_outs['year'] = work_outs['month'].dt.year
work_out_year = work_outs.groupby('year')['workout_worldwide'].mean().reset_index()
sorted_workout_year = work_out_year.sort_values('workout_worldwide', ascending=False)
print(sorted_workout_year)
year_str = str(sorted_workout_year.iloc[0,0])
print(year_str)
fig, ax = plt.subplots()
ax.plot(work_outs['month'], work_outs['workout_worldwide'], marker='x', linestyle='-', color='blue', linewidth=2)
# Set labels and title
ax.set_xlabel('Year')
ax.set_ylabel('Work_out_index')
ax.set_title('workout_index_by_year')
# Add grid lines
ax.grid(True)
# Show the plot
plt.show()
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Load the data
three_keyword = pd.read_csv('data/three_keywords.csv')
three_keyword['month'] = pd.to_datetime(three_keyword['month'])
three_keyword['year'] = three_keyword['month'].dt.year
# Filter the data for the years 2019, 2020, and the maximum year
keyword2019_2020_and2023 = three_keyword[three_keyword['year'].isin([2019, 2020, three_keyword['year'].max()])]
print(keyword2019_2020_and2023)
grouped_keyword_workout = keyword2019_2020_and2023.groupby('year').mean(['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']).reset_index()
sorted_keyword_workout = grouped_keyword_workout.sort_values(['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide'], ascending=[False, False, False])
print(sorted_keyword_workout)
# Create a single plot with all workout types
fig, ax = plt.subplots(figsize=(10, 6))
bar_width = 0.25
r1 = np.arange(len(grouped_keyword_workout))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]
ax.bar(r1, grouped_keyword_workout['home_workout_worldwide'], color='b', width=bar_width, edgecolor='grey', label='Home Workout')
ax.bar(r2, grouped_keyword_workout['gym_workout_worldwide'], color='g', width=bar_width, edgecolor='grey', label='Gym Workout')
ax.bar(r3, grouped_keyword_workout['home_gym_worldwide'], color='r', width=bar_width, edgecolor='grey', label='Home Gym Workout')
ax.set_xlabel('Year', fontweight='bold')
ax.set_xticks([r + bar_width for r in range(len(grouped_keyword_workout))])
ax.set_xticklabels(grouped_keyword_workout['year'].astype(str).tolist())
ax.set_ylabel('Values', fontweight='bold')
ax.set_title('Comparison of Workout Types Over Years', fontweight='bold')
# Add legend
ax.legend()
# Add grid lines
ax.grid(True)
# Adjust layout
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
# Show the plot
plt.show()
import pandas as pd
# Load the data
three_keyword = pd.read_csv('data/three_keywords.csv')
three_keyword['month'] = pd.to_datetime(three_keyword['month'])
three_keyword['year'] = three_keyword['month'].dt.year
# Filter the data for the years 2019, 2020, and the maximum year
keyword2019_2020_and2023 = three_keyword[three_keyword['year'].isin([2019, 2020, three_keyword['year'].max()])]
grouped_keyword_workout = keyword2019_2020_and2023.groupby('year').mean(['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']).reset_index()
# Get the highest index workout type for 2020 and 2023
peak_covid = grouped_keyword_workout[grouped_keyword_workout['year'] == 2020][['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']].idxmax(axis=1).values[0]
current = grouped_keyword_workout[grouped_keyword_workout['year'] == three_keyword['year'].max()][['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']].idxmax(axis=1).values[0]
print(f"Covid Peak: {peak_covid}")
print(f"Current: {current}")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
work_outs_geo = pd.read_csv('data/workout_geo.csv')
work_out_country = work_outs_geo[work_outs_geo['country'].isin(['United States', 'Australia', 'Japan'])]
top_country = work_out_country.loc[work_out_country['workout_2018_2023'].idxmax(), 'country']
print(top_country)import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Load the CSV file
three_keyword_geo = pd.read_csv('data/three_keywords_geo.csv')
malaysia_philippines = three_keyword_geo[three_keyword_geo['Country'].isin(['Philippines', 'Malaysia'])]
home_workout_geo_idx = malaysia_philippines['home_workout_2018_2023'].idxmax(axis=0)
home_workout_geo = malaysia_philippines.loc[home_workout_geo_idx, 'Country']
print(malaysia_philippines)
print(home_workout_geo)