Skip to content

Introduction to Data Visualization with Matplotlib

👋 Welcome to your workspace! Here, you can write and run Python code and add text in Markdown. Below, we've imported the datasets from the course Introduction to Data Visualization with Matplotlib as DataFrames as well as the packages used in the course. This is your sandbox environment: analyze the course datasets further, take notes, or experiment with code!

# Importing course packages; you can add more too!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importing course datasets as DataFrames
climate_change = pd.read_csv('datasets/climate_change.csv', parse_dates=["date"], index_col="date")
medals = pd.read_csv('datasets/medals_by_country_2016.csv', index_col=0)
summer_2016 = pd.read_csv('datasets/summer2016.csv')
austin_weather = pd.read_csv("datasets/austin_weather.csv", index_col="DATE")
weather = pd.read_csv("datasets/seattle_weather.csv", index_col="DATE")

# Some pre-processing on the weather datasets, including adding a month column
seattle_weather = weather[weather["STATION"] == "USW00094290"].copy()  # Make a copy of the DataFrame
month = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] 
seattle_weather.loc[:, "MONTH"] = month  # Use .loc to assign the values
austin_weather.loc[:, "MONTH"] = month  # Use .loc to assign the values

# seattle_weather["MONTH"] = month 
# austin_weather["MONTH"] = month

austin_weather.head() # Display the first five rows of this DataFrame
# Begin writing your own code here!
# Task 1: Plotting Seattle and Austin weather
fig, axes = plt.subplots(nrows=2, ncols=1, sharey=True, figsize=(10, 8))

# Plotting average temperature
axes[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"], color="blue", linestyle="-", label="Seattle")
axes[0].plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"], color="red", linestyle="-", label="Austin")
axes[0].set_ylabel("Average Temperature (°C)")
axes[0].set_title("Monthly Average Temperature")
axes[0].legend()

# Plotting average precipitation
axes[1].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], color="blue", linestyle="--", label="Seattle")
axes[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-NORMAL"], color="red", linestyle="--", label="Austin")
axes[1].set_xlabel("Months")
axes[1].set_ylabel("Average Precipitation (mm)")
axes[1].set_title("Monthly Average Precipitation")
axes[1].legend()

plt.tight_layout()
plt.show()

# Task 2: Plotting climate change data
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()

# Filtering data for the 2000s
climate_change_2000s = climate_change[climate_change["date"].dt.year >= 2000]

# Plotting CO2 levels
ax1.plot(climate_change_2000s["date"], climate_change_2000s["co2"], color="blue", label="CO2")
ax1.set_ylabel("CO2 (ppm)")

# Plotting relative temperature
ax2.plot(climate_change_2000s["date"], climate_change_2000s["relative_temp"], color="red", label="Relative Temp")
ax2.set_ylabel("Relative Temperature (°C)")

# Annotating the first date when CO2 exceeded 400
first_date_exceed_400 = climate_change_2000s[climate_change_2000s["co2"] > 400]["date"].iloc[0]
ax1.annotate("CO2 > 400", xy=(first_date_exceed_400, climate_change_2000s["co2"].max()), xytext=(first_date_exceed_400, 410),
             arrowprops=dict(arrowstyle="->"), fontsize=12)

ax1.set_title("Climate Change")
ax1.set_xlabel("Year")
ax1.legend(loc="upper left")
ax2.legend(loc="upper right")

plt.show()

# Task 3: Scatter plot of Gold vs Silver medals
fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(medals["Gold"], medals["Silver"], c=medals.index, cmap="tab20")
ax.set_xlabel("Gold Medals")
ax.set_ylabel("Silver Medals")
ax.set_title("Gold vs Silver Medals")
for i, country in enumerate(medals.index):
    ax.annotate(country, (medals["Gold"][i], medals["Silver"][i]), fontsize=8)

plt.show()

# Task 4: Histograms of Age by sport
sports = summer_2016["Sport"].unique()
fig, axes = plt.subplots(nrows=len(sports), ncols=1, figsize=(8, 20), sharex=True)

for i, sport in enumerate(sports):
    axes[i].hist(summer_2016[summer_2016["Sport"] == sport]["Age"], bins=15, alpha=0.7)
    axes[i].set_title(sport)
    axes[i].set_ylabel("Frequency")

plt.xlabel("Age")
plt.tight_layout()
plt.show()

Don't know where to start?

Try completing these tasks:

  • Using austin_weather and seattle_weather, create a Figure with an array of two Axes objects that share a y-axis range (MONTHS in this case). Plot Seattle's and Austin's MLY-TAVG-NORMAL (for average temperature) in the top Axes and plot their MLY-PRCP-NORMAL (for average precipitation) in the bottom axes. The cities should have different colors and the line style should be different between precipitation and temperature. Make sure to label your viz!
  • Using climate_change, create a twin Axes object with the shared x-axis as time. There should be two lines of different colors not sharing a y-axis: co2 and relative_temp. Only include dates from the 2000s and annotate the first date at which co2 exceeded 400.
  • Create a scatter plot from medals comparing the number of Gold medals vs the number of Silver medals with each point labeled with the country name.
  • Explore if the distribution of Age varies in different sports by creating histograms from summer_2016.
  • Try out the different Matplotlib styles available and save your visualizations as a PNG file.