Skip to content
Introduction to Statistics in Python
Introduction to Statistics in Python
Run the hidden code cell below to import the data used in this course.
# Importing numpy and pandas
import numpy as np
import pandas as pd
# Importing the course datasets
amir_deals = pd.read_csv("datasets/amir_deals.csv")
happiness = pd.read_csv("datasets/world_happiness.csv")
food_consumption =pd.read_csv("datasets/food_consumption.csv")
restaurant_groups =pd.read_csv("datasets/restaurant_groups.csv")
Ch1 Summary Statistics
--Descriptive and inferential statistics
--Data type classification
--Mean and median
if the data is skewed, meaning it's not symmetrical, median is usually better to use.
print(food_consumption)
# Filter for Belgium
be_consumption = food_consumption[food_consumption["country"]=="Belgium"]
# Filter for USA
usa_consumption = food_consumption[food_consumption["country"]=="USA"]
# Calculate mean and median consumption in Belgium
print(np.mean(be_consumption["consumption"]))
print(np.median(be_consumption["consumption"]))
# Calculate mean and median consumption in USA
print(np.mean(usa_consumption["consumption"]))
print(np.median(usa_consumption["consumption"]))
# Subset for Belgium and USA only
be_and_usa = food_consumption[(food_consumption["country"]==("Belgium")) | (food_consumption['country'] == 'USA')]
# Group by country, select consumption column, and compute mean and median
print(be_and_usa.groupby("country")["consumption"].agg([np.mean, np.median]))
--Mean vs. median
# Import matplotlib.pyplot with alias plt
import matplotlib.pyplot as plt
# Subset for food_category equals rice
rice_consumption = food_consumption[food_consumption["food_category"]=="rice"]
# Histogram of co2_emission for rice and show plot
rice_consumption['co2_emission'].hist()
plt.show()