Project: Data-Driven Product Management: Conducting a Market Analysis

You are a product manager for a fitness studio and are interested in understanding the current demand for digital fitness classes. You plan to conduct a market analysis in Python to gauge demand and identify potential areas for growth of digital products and services.

The Data

You are provided with a number of CSV files in the "Files/data" folder, which offer international and national-level data on Google Trends keyword searches related to fitness and related products.

workout.csv

Column	Description
`'month'`	Month when the data was measured.
`'workout_worldwide'`	Index representing the popularity of the keyword 'workout', on a scale of 0 to 100.

three_keywords.csv

Column	Description
`'month'`	Month when the data was measured.
`'home_workout_worldwide'`	Index representing the popularity of the keyword 'home workout', on a scale of 0 to 100.
`'gym_workout_worldwide'`	Index representing the popularity of the keyword 'gym workout', on a scale of 0 to 100.
`'home_gym_worldwide'`	Index representing the popularity of the keyword 'home gym', on a scale of 0 to 100.

workout_geo.csv

Column	Description
`'country'`	Country where the data was measured.
`'workout_2018_2023'`	Index representing the popularity of the keyword 'workout' during the 5 year period.

three_keywords_geo.csv

Column	Description
`'country'`	Country where the data was measured.
`'home_workout_2018_2023'`	Index representing the popularity of the keyword 'home workout' during the 5 year period.
`'gym_workout_2018_2023'`	Index representing the popularity of the keyword 'gym workout' during the 5 year period.
`'home_gym_2018_2023'`	Index representing the popularity of the keyword 'home gym' during the 5 year period.

# Import the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
work_outs = pd.read_csv('data/workout.csv')
work_outs_geo = pd.read_csv('data/workout_geo.csv')
three_keyword = pd.read_csv('data/three_keywords.csv')
print('\n',work_outs,'\n')
print('\n',work_outs_geo,'\n')
print('\n',three_keyword,'\n')

work_outs['month'] = pd.to_datetime(work_outs['month'])
work_outs['year'] = work_outs['month'].dt.year
work_out_year = work_outs.groupby('year')['workout_worldwide'].mean().reset_index()
sorted_workout_year = work_out_year.sort_values('workout_worldwide', ascending=False)
print(sorted_workout_year)
year_str = str(sorted_workout_year.iloc[0,0])
print(year_str)
fig, ax = plt.subplots()
ax.plot(work_outs['month'], work_outs['workout_worldwide'], marker='x', linestyle='-', color='blue', linewidth=2)

# Set labels and title
ax.set_xlabel('Year')
ax.set_ylabel('Work_out_index')
ax.set_title('workout_index_by_year')

# Add grid lines
ax.grid(True)

# Show the plot
plt.show()

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Load the data
three_keyword = pd.read_csv('data/three_keywords.csv')
three_keyword['month'] = pd.to_datetime(three_keyword['month'])
three_keyword['year'] = three_keyword['month'].dt.year

# Filter the data for the years 2019, 2020, and the maximum year
keyword2019_2020_and2023 = three_keyword[three_keyword['year'].isin([2019, 2020, three_keyword['year'].max()])]
print(keyword2019_2020_and2023)
grouped_keyword_workout = keyword2019_2020_and2023.groupby('year').mean(['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']).reset_index()
sorted_keyword_workout = grouped_keyword_workout.sort_values(['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide'], ascending=[False, False, False])
print(sorted_keyword_workout)

# Create a single plot with all workout types
fig, ax = plt.subplots(figsize=(10, 6))

bar_width = 0.25

r1 = np.arange(len(grouped_keyword_workout))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]

ax.bar(r1, grouped_keyword_workout['home_workout_worldwide'], color='b', width=bar_width, edgecolor='grey', label='Home Workout')
ax.bar(r2, grouped_keyword_workout['gym_workout_worldwide'], color='g', width=bar_width, edgecolor='grey', label='Gym Workout')
ax.bar(r3, grouped_keyword_workout['home_gym_worldwide'], color='r', width=bar_width, edgecolor='grey', label='Home Gym Workout')

ax.set_xlabel('Year', fontweight='bold')
ax.set_xticks([r + bar_width for r in range(len(grouped_keyword_workout))])
ax.set_xticklabels(grouped_keyword_workout['year'].astype(str).tolist())
ax.set_ylabel('Values', fontweight='bold')
ax.set_title('Comparison of Workout Types Over Years', fontweight='bold')

# Add legend
ax.legend()

# Add grid lines
ax.grid(True)

# Adjust layout
plt.tight_layout(rect=[0, 0.03, 1, 0.95])

# Show the plot
plt.show()

import pandas as pd

# Load the data
three_keyword = pd.read_csv('data/three_keywords.csv')
three_keyword['month'] = pd.to_datetime(three_keyword['month'])
three_keyword['year'] = three_keyword['month'].dt.year

# Filter the data for the years 2019, 2020, and the maximum year
keyword2019_2020_and2023 = three_keyword[three_keyword['year'].isin([2019, 2020, three_keyword['year'].max()])]
grouped_keyword_workout = keyword2019_2020_and2023.groupby('year').mean(['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']).reset_index()

# Get the highest index workout type for 2020 and 2023
peak_covid = grouped_keyword_workout[grouped_keyword_workout['year'] == 2020][['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']].idxmax(axis=1).values[0]
current = grouped_keyword_workout[grouped_keyword_workout['year'] == three_keyword['year'].max()][['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']].idxmax(axis=1).values[0]

print(f"Covid Peak: {peak_covid}")
print(f"Current: {current}")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

work_outs_geo = pd.read_csv('data/workout_geo.csv')
work_out_country = work_outs_geo[work_outs_geo['country'].isin(['United States', 'Australia', 'Japan'])]
top_country = work_out_country.loc[work_out_country['workout_2018_2023'].idxmax(), 'country']
print(top_country)