Skip to content
# Start your code here!
import pandas as pd
women_results=pd.read_csv('women_results.csv')
women_results.head(10)
men_results=pd.read_csv('men_results.csv')
men_results.head(10)
import pandas as pd
from scipy.stats import mannwhitneyu
# Load the datasets
# Filter data for matches since 2002-01-01 and only World Cup matches
women_wc_matches = women_results[(women_results['date'] >= '2002-01-01') & (women_results['tournament'] == 'FIFA World Cup')]
men_wc_matches = men_results[(men_results['date'] >= '2002-01-01') & (men_results['tournament'] == 'FIFA World Cup')]
# Calculate total goals per match
women_wc_matches['total_goals'] = women_wc_matches['home_score'] + women_wc_matches['away_score']
men_wc_matches['total_goals'] = men_wc_matches['home_score'] + men_wc_matches['away_score']
# Extract total goals
women_goals = women_wc_matches['total_goals']
men_goals = men_wc_matches['total_goals']
# Perform the Mann-Whitney U test
stat, p_value = mannwhitneyu(women_goals, men_goals, alternative='greater')
# Determine the result of the hypothesis test
result = "reject" if p_value < 0.10 else "fail to reject"
# Store the p-value and result in a dictionary
result_dict = {"p_val": p_value, "result": result}
# Print the result
print(result_dict)