Skip to content

Intermediate Python

Run the hidden code cell below to import the data used in this course.

# Import the course packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import the two datasets
gapminder = pd.read_csv("datasets/gapminder.csv")
brics = pd.read_csv("datasets/brics.csv")

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

# Define year and pop variables
year = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]
pop = [157, 159, 162, 165, 168, 171, 174, 177, 180, 183]

# Print the last item from year and pop
print(year[-1])
print(pop[-1])

# Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

# Make a line plot: year on the x-axis, pop on the y-axis
plt.plot(year,pop)

# Display the plot with plt.show()
plt.show()
import matplotlib.pyplot as plt

# Define gdp_cap and life_exp variables
gdp_cap = [1000, 2000, 3000, 4000, 5000]
life_exp = [50, 60, 70, 80, 90]

# Change the line plot below to a scatter plot
plt.scatter(gdp_cap, life_exp)

# Put the x-axis on a logarithmic scale
plt.xscale('log')

# Show plot
plt.show()


import matplotlib.pyplot as plt
# Build histogram with 5 bins
plt.hist(life_exp,5)

# Show and clean up plot
plt.show()
plt.clf()

# Build histogram with 20 bins

plt.hist(life_exp,20)
# Show and clean up again
plt.show()
plt.clf()
import matplotlib.pyplot as plt

# Basic scatter plot, log scale
plt.scatter(gdp_cap, life_exp)
plt.xscale('log') 

# Strings
xlab = 'GDP per Capita [in USD]'
ylab = 'Life Expectancy [in years]'
title = 'World Development in 2007'

# Add axis labels

plt.xlabel('GDP per Capita [in USD]')
plt.ylabel('Life Expectancy [in years]')

# Add title

plt.title('World Development in 2007')

# After customizing, display the plot
plt.show()
# Scatter plot
plt.scatter(gdp_cap, life_exp)

# Previous customizations
plt.xscale('log') 
plt.xlabel('GDP per Capita [in USD]')
plt.ylabel('Life Expectancy [in years]')
plt.title('World Development in 2007')

# Definition of tick_val and tick_lab
tick_val = [1000, 10000, 100000]
tick_lab = ['1k', '10k', '100k']

# Adapt the ticks on the x-axis

plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])

# After customizing, display the plot

plt.show()
# Import numpy as np, will change the size of the bubbles depending on the poulation

import numpy as np

# Define pop as a list of population values
pop = [10000000, 20000000, 30000000, 40000000, 50000000]

# Store pop as a numpy array: np_pop

np_pop = np.array(pop)

# Double np_pop

np_pop=np_pop*2

# Update: set s argument to np_pop
plt.scatter(gdp_cap, life_exp, s = np_pop)

# Previous customizations
plt.xscale('log') 
plt.xlabel('GDP per Capita [in USD]')
plt.ylabel('Life Expectancy [in years]')
plt.title('World Development in 2007')
plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])

# Display the plot
plt.show()

Colors The code you've written up to now is available in the script.

The next step is making the plot more colorful! To do this, a list col has been created for you. It's a list with a color for each corresponding country, depending on the continent the country is part of.

How did we make the list col you ask? The Gapminder data contains a list continent with the continent each country belongs to. A dictionary is constructed that maps continents onto colors:

dict = { 'Asia':'red', 'Europe':'green', 'Africa':'blue', 'Americas':'yellow', 'Oceania':'black' } Nothing to worry about now; you will learn about dictionaries in the next chapter.

Add c = col to the arguments of the plt.scatter() function. Change the opacity of the bubbles by setting the alpha argument to 0.8 inside plt.scatter(). Alpha can be set from zero to one, where zero is totally transparent, and one is not at all transparent.

import matplotlib.pyplot as plt 
import numpy as np

c = {
    'Asia':'red',
    'Europe':'green',
    'Africa':'blue',
    'Americas':'yellow',
    'Oceania':'black'
}

# Define countries list
countries = ['Asia', 'Europe', 'Africa', 'Americas', 'Oceania']

# Specify c and alpha inside plt.scatter()
col = [c[i] for i in countries]
plt.scatter(x = gdp_cap, y = life_exp, s = np.array(pop) * 2, c=col, alpha=0.8)

# Previous customizations
plt.xscale('log') 
plt.xlabel('GDP per Capita [in USD]')
plt.ylabel('Life Expectancy [in years]')
plt.title('World Development in 2007')
plt.xticks([1000,10000,100000], ['1k','10k','100k'])

# Show the plot
plt.show()

# Scatter plot
plt.scatter(x = gdp_cap, y = life_exp, s = np.array(pop) * 2, c = col, alpha = 0.8)

# Previous customizations
plt.xscale('log') 
plt.xlabel('GDP per Capita [in USD]')
plt.ylabel('Life Expectancy [in years]')
plt.title('World Development in 2007')
plt.xticks([1000,10000,100000], ['1k','10k','100k'])

# Additional customizations
plt.text(1550, 71, 'India')
plt.text(5700, 80, 'China')

# Add grid() call

plt.grid(True)

# Show the plot
plt.show()