Skip to content

Data Manipulation with pandas

Run the hidden code cell below to import the data used in this course.

# Import the course packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import the four datasets
avocado = pd.read_csv("datasets/avocado.csv")
homelessness = pd.read_csv("datasets/homelessness.csv")
temperatures = pd.read_csv("datasets/temperatures.csv")
walmart = pd.read_csv("datasets/walmart.csv")
import datetime as dt

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

temperatures.head()
#transform column date to dattime
temperatures['date'] = pd.to_datetime(temperatures['date'])
# add column year to our dataframe from date column
temperatures['year'] = temperatures['date'].dt.year
temp_by_country_city_vs_year = temperatures.pivot_table('avg_temp_c', index = ['country', 'city'], columns = 'year')
print(temp_by_country_city_vs_year)
# Get the worldwide mean temp by year
mean_temp_by_year = temp_by_country_city_vs_year.mean(axis = 'index')
print(mean_temp_by_year)
mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]
# Filter for the year that had the highest mean temp
print(mean_temp_by_year.max())
# Get the mean temp by city
mean_temp_by_city = temp_by_country_city_vs_year.mean(axis = 'columns')
print(mean_temp_by_city)
mean_temp_by_city[mean_temp_by_city == mean_temp_by_city.min()]
# Filter for the city that had the lowest mean temp
print(mean_temp_by_city.min())