Skip to content

You are a product manager for a fitness studio and are interested in understanding the current demand for digital fitness classes. You plan to conduct a market analysis in Python to gauge demand and identify potential areas for growth of digital products and services.

The Data

You are provided with a number of CSV files in the "Files/data" folder, which offer international and national-level data on Google Trends keyword searches related to fitness and related products.

workout.csv

ColumnDescription
'month'Month when the data was measured.
'workout_worldwide'Index representing the popularity of the keyword 'workout', on a scale of 0 to 100.

three_keywords.csv

ColumnDescription
'month'Month when the data was measured.
'home_workout_worldwide'Index representing the popularity of the keyword 'home workout', on a scale of 0 to 100.
'gym_workout_worldwide'Index representing the popularity of the keyword 'gym workout', on a scale of 0 to 100.
'home_gym_worldwide'Index representing the popularity of the keyword 'home gym', on a scale of 0 to 100.

workout_geo.csv

ColumnDescription
'country'Country where the data was measured.
'workout_2018_2023'Index representing the popularity of the keyword 'workout' during the 5 year period.

three_keywords_geo.csv

ColumnDescription
'country'Country where the data was measured.
'home_workout_2018_2023'Index representing the popularity of the keyword 'home workout' during the 5 year period.
'gym_workout_2018_2023'Index representing the popularity of the keyword 'gym workout' during the 5 year period.
'home_gym_2018_2023'Index representing the popularity of the keyword 'home gym' during the 5 year period.
# Import the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import os
# Start coding here
possible_paths = [
    'Files/data/workout.csv',
    'data/workout.csv',
    './workout.csv'
]

file_found = False
for file_path in possible_paths:
    if os.path.exists(file_path):
        file_found = True
        break

if not file_found:
    raise FileNotFoundError(
        f"File not found in any of the following locations: {possible_paths}. "
        "Please check the file path and ensure the file exists."
    )

workout_df = pd.read_csv(file_path, parse_dates=['month'])
print(workout_df.head())
# Plot the data to visualize peak interest
plt.figure(figsize=(12, 6))
plt.plot(workout_df['month'], workout_df['workout_worldwide'])
plt.title('Global Search Interest for "Workout" Over Time')
plt.xlabel('Date')
plt.ylabel('Interest Index (0-100)')
plt.grid(True)
plt.show()

# Find the row with the maximum 'workout_worldwide' value
peak_workout = workout_df.loc[workout_df['workout_worldwide'].idxmax()]

# Extract the year of peak interest
year_str = peak_workout['month'].year
year_str = str(year_str) # Convert to string in "yyyy" format

print(f"The global search for 'workout' was at its peak in: {year_str}")
try:
    keywords_df = pd.read_csv('three_keywords.csv', parse_dates=['month'])
except FileNotFoundError:
    # Try a relative path if running from project root
    try:
        keywords_df = pd.read_csv('data/three_keywords.csv', parse_dates=['month'])
    except FileNotFoundError:
        # If still not found, raise a clear error
        raise FileNotFoundError(
            "Could not find 'three_keywords.csv'. Please make sure the file exists in the current directory or in a 'data/' subdirectory."
        )

print(keywords_df.head())

# Filter data for COVID pandemic period (e.g., March 2020 - Dec 2021)
covid_start = pd.to_datetime('2020-03-01')
covid_end = pd.to_datetime('2021-12-31')
covid_df = keywords_df[(keywords_df['month'] >= covid_start) & (keywords_df['month'] <= covid_end)]

# Calculate average interest during COVID
covid_avg_interest = covid_df[['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']].mean()
peak_covid = covid_avg_interest.idxmax().replace('_worldwide', '')

# Find the most recent year in the dataset
current_year = keywords_df['month'].dt.year.max()
current_df = keywords_df[keywords_df['month'].dt.year == current_year]

# Calculate average interest for the current year
current_avg_interest = current_df[['home_workout_worldwide', 'gym_workout_worldwide', 'home_gym_worldwide']].mean()
current = current_avg_interest.idxmax().replace('_worldwide', '')

print(f"Most popular keyword during COVID pandemic: {peak_covid}")
print(f"Most popular keyword in the current year ({current_year}): {current}")
# Load the workout_geo data
file_paths = ['workout_geo.csv', 'data/workout_geo.csv', 'Files/data/workout_geo.csv']
for path in file_paths:
    if os.path.exists(path):
        workout_geo_df = pd.read_csv(path)
        break
else:
    raise FileNotFoundError("Could not find 'workout_geo.csv' in the current, 'data/', or 'Files/data/' directories.")

print(workout_geo_df.head())

# Filter for the specified countries
target_countries = ['United States', 'Australia', 'Japan']
filtered_geo_df = workout_geo_df[workout_geo_df['country'].isin(target_countries)]

# Find the country with the highest interest
top_country = filtered_geo_df.loc[filtered_geo_df['workout_2018_2023'].idxmax()]['country']

print(f"The country with the highest interest in workouts among the United States, Australia, or Japan is: {top_country}")
import pandas as pd
csv_path = 'data/three_keywords_geo.csv'  

try:
    # Load the three_keywords_geo data
    keywords_geo_df = pd.read_csv(csv_path)
    print(keywords_geo_df.head())
    keywords_geo_df.columns = keywords_geo_df.columns.str.strip()
    print("Columns after stripping:", keywords_geo_df.columns.tolist())
    country_col = None
    for col in keywords_geo_df.columns:
        if col.strip().lower() == 'country':
            country_col = col
            break
    if country_col is None:
        raise KeyError("No 'country' column found in the CSV file after stripping whitespace.")

    # Filter for the specified countries (Philippines and Malaysia)
    target_home_workout_countries = ['Philippines', 'Malaysia']
    filtered_home_workout_geo_df = keywords_geo_df[keywords_geo_df[country_col].isin(target_home_workout_countries)]

    # Find the country with the highest interest in 'home workout'
    hw_col = None
    for col in keywords_geo_df.columns:
        if col.strip().lower() == 'home_workout_2018_2023':
            hw_col = col
            break
    if hw_col is None:
        raise KeyError("No 'home_workout_2018_2023' column found in the CSV file after stripping whitespace.")

    home_workout_geo = filtered_home_workout_geo_df.loc[filtered_home_workout_geo_df[hw_col].idxmax()][country_col]

    print(f"Between the Philippines and Malaysia, {home_workout_geo} has the highest interest in home workouts.")
except FileNotFoundError:
    print(f"File not found: {csv_path}. Please check the file path and try again.")
except KeyError as e:
    print(f"KeyError: {e}. Please check the column names in your CSV file.")