Skip to content
Project: Hypothesis Testing with Men's and Women's Soccer Matches
  • AI Chat
  • Code
  • Report
  • # Start your code here!
    import pandas as pd
    women_results=pd.read_csv('women_results.csv')
    women_results.head(10)
    men_results=pd.read_csv('men_results.csv')
    men_results.head(10)
    import pandas as pd
    from scipy.stats import mannwhitneyu
    
    # Load the datasets
    
    
    # Filter data for matches since 2002-01-01 and only World Cup matches
    women_wc_matches = women_results[(women_results['date'] >= '2002-01-01') & (women_results['tournament'] == 'FIFA World Cup')]
    men_wc_matches = men_results[(men_results['date'] >= '2002-01-01') & (men_results['tournament'] == 'FIFA World Cup')]
    
    # Calculate total goals per match
    women_wc_matches['total_goals'] = women_wc_matches['home_score'] + women_wc_matches['away_score']
    men_wc_matches['total_goals'] = men_wc_matches['home_score'] + men_wc_matches['away_score']
    
    # Extract total goals
    women_goals = women_wc_matches['total_goals']
    men_goals = men_wc_matches['total_goals']
    
    # Perform the Mann-Whitney U test
    stat, p_value = mannwhitneyu(women_goals, men_goals, alternative='greater')
    
    # Determine the result of the hypothesis test
    result = "reject" if p_value < 0.10 else "fail to reject"
    
    # Store the p-value and result in a dictionary
    result_dict = {"p_val": p_value, "result": result}
    
    # Print the result
    print(result_dict)