Project: Hypothesis Testing with Men's and Women's Soccer Matches

# Start your code here!
import pandas as pd

women_results=pd.read_csv('women_results.csv')
women_results.head(10)

men_results=pd.read_csv('men_results.csv')
men_results.head(10)

import pandas as pd
from scipy.stats import mannwhitneyu

# Load the datasets


# Filter data for matches since 2002-01-01 and only World Cup matches
women_wc_matches = women_results[(women_results['date'] >= '2002-01-01') & (women_results['tournament'] == 'FIFA World Cup')]
men_wc_matches = men_results[(men_results['date'] >= '2002-01-01') & (men_results['tournament'] == 'FIFA World Cup')]

# Calculate total goals per match
women_wc_matches['total_goals'] = women_wc_matches['home_score'] + women_wc_matches['away_score']
men_wc_matches['total_goals'] = men_wc_matches['home_score'] + men_wc_matches['away_score']

# Extract total goals
women_goals = women_wc_matches['total_goals']
men_goals = men_wc_matches['total_goals']

# Perform the Mann-Whitney U test
stat, p_value = mannwhitneyu(women_goals, men_goals, alternative='greater')

# Determine the result of the hypothesis test
result = "reject" if p_value < 0.10 else "fail to reject"

# Store the p-value and result in a dictionary
result_dict = {"p_val": p_value, "result": result}

# Print the result
print(result_dict)