Skip to content

Intermediate Python

Run the hidden code cell below to import the data used in this course.

# Import the course packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import the two datasets
gapminder = pd.read_csv("datasets/gapminder.csv")
brics = pd.read_csv("datasets/brics.csv")

Some reminders about MAtplotlib

Print the last item from year and pop

print(year[-1], pop[-1])

Import matplotlib.pyplot as plt

import matplotlib.pyplot as plt

Make a line plot: year on the x-axis, pop on the y-axis

plt.plot(year, pop)

Display the plot with plt.show()

plt.show ()

###########################################################################################

Print the last item of gdp_cap and life_exp

print(gdp_cap[-1], life_exp[-1])

Make a line plot, gdp_cap on the x-axis, life_exp on the y-axis

plt.plot(gdp_cap, life_exp)

Display the plot

plt.show()

#########################################################################################

Change the line plot below to a scatter plot

plt.scatter(gdp_cap, life_exp)

Put the x-axis on a logarithmic scale

plt.xscale('log')

Show plot

plt.show()

#########################################################################################

Build histogram with 5 bins

plt.hist(life_exp, bins = 5)

Show and clean up plot

plt.show() plt.clf() # To clean the current figure before showing the next. If not, we'll get both of the plots on the same graph.

Build histogram with 20 bins

plt.hist(life_exp, bins = 20)

Show and clean up again

plt.show() plt.clf()

################################################################################

Scatter plot

plt.scatter(gdp_cap, life_exp)

Previous customizations

plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007')

Definition of tick_val and tick_lab

tick_val = [1000, 10000, 100000] tick_lab = ['1k', '10k', '100k']

Adapt the ticks on the x-axis

plt.xticks(tick_val, tick_lab)

After customizing, display the plot

plt.show()

#################################################################################

Import numpy as np

import numpy as np

Store pop as a numpy array: np_pop

np_pop = np.array(pop)

Double np_pop

np_pop = np_pop * 2

Update: set s argument to np_pop

plt.scatter(gdp_cap, life_exp, s = np_pop)

Previous customizations

plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])

Display the plot

plt.show()

##############################################################################

Scatter plot

plt.scatter(x = gdp_cap, y = life_exp, s = np.array(pop) * 2, c = col, alpha = 0.8)

Previous customizations

plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000,10000,100000], ['1k','10k','100k'])

Additional customizations

plt.text(1550, 71, 'India') plt.text(5700, 80, 'China')

Add grid() call

plt.grid(True)

Show the plot

plt.show() #############################################################################################

About dictionnaries

Definition of dictionary

europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo' }

Add italy to europe

europe["italy"] = "rome"

Print out italy in europe

print("italy" in europe)

Add poland to europe

europe["poland"] = "warsaw"

Print europe

print(europe)

Update capital of germany

europe["germany"] = "berlin"

Remove australia

del(europe["australia"])

Print europe

print(europe)

########################################################################################

Dictionary of dictionaries

europe = { 'spain': { 'capital':'madrid', 'population':46.77 }, 'france': { 'capital':'paris', 'population':66.03 }, 'germany': { 'capital':'berlin', 'population':80.62 }, 'norway': { 'capital':'oslo', 'population':5.084 } }

Print out the capital of France

print(europe["france"]["capital"])

Create sub-dictionary data

data = {"capital" : "rome", "population" : 59.83}

Add data to europe under key 'italy'

europe["italy"] = data

Print europe

print(europe)

Let's talk a bit about Pandas

import pandas as pd

Build cars DataFrame

names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt'] dr = [True, False, False, False, True, True, True] cpc = [809, 731, 588, 18, 200, 70, 45]

cars_dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }

cars = pd.DataFrame(cars_dict)

print(cars)

Definition of row_labels

row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']

Specify row labels of cars

cars.index = row_labels

Print cars again

print(cars)

####################################################################################

Import pandas as pd

import pandas as pd

Fix import by including index_col

cars = pd.read_csv('cars.csv', index_col = 0)

Print out cars

print(cars)

##################################################################################

Import cars data

import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)

Print out country column as Pandas Series

print(cars.country) print(cars["country"])

Print out country column as Pandas DataFrame

print(cars[['country']])

Print out DataFrame with country and drives_right columns

print(cars[['country', 'drives_right']])

Print out first 3 observations

print(cars.iloc[:3])

Print out fourth, fifth and sixth observation

print(cars.iloc[3:6])

Print out observation for Japan

print(cars.loc["JPN"])

print(cars.iloc[2])

Print out observations for Australia and Egypt

print(cars.loc[["AUS", 'EG']])

print(cars.iloc[[1,6]])

Print out drives_right value of Morocco

print(cars.loc["MOR", 'drives_right'])

Print sub-DataFrame

print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

################################################################################""

FILTERING WITH PANDAS

Import cars data

import pandas as pd import numpy as np cars = pd.read_csv('cars.csv', index_col = 0)

Create car_maniac: observations that have a cars_per_cap over 500

car_maniac = cars[cars.cars_per_cap > 500]

Print car_maniac

print(car_maniac)

Create medium: observations with cars_per_cap between 100 and 500

medium = cars[np.logical_and(cars.cars_per_cap > 100, cars.cars_per_cap < 500)]

Print medium

print(medium)

LOOPS

areas list

areas = [11.25, 18.0, 20.0, 10.75, 9.50]

Change for loop to use enumerate() and update print()

for index, area in enumerate(areas) : print('room' + str(index) + ':' + str(area) )

########################################################################################

house list of lists

house = [["hallway", 11.25], ["kitchen", 18.0], ["living room", 20.0], ["bedroom", 10.75], ["bathroom", 9.50]]

Build a for loop from scratch

for element in house: print ('the ' + element[0] + " is " + str(element[1]) + " sqm")

############################################################################################

Definition of dictionary

europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo', 'italy':'rome', 'poland':'warsaw', 'austria':'vienna' }

Iterate over europe

for k, v in europe.items() : print ("the capital of " + k + " is " + v)

##########################################################################################

Import numpy as np

import numpy as np

For loop over np_height

for height in np_height: print (str(height) + ' inches')

For loop over np_baseball

for element in np.nditer(np_baseball): print(element)

###########################################################################################

Import cars data

import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)

Iterate over rows of cars

for lab, row in cars.iterrows(): print(lab) print(row)

Adapt for loop

for lab, row in cars.iterrows() : print(lab + ': ' + str(row["cars_per_cap"]))

Code for loop that adds COUNTRY column

for lab, row in cars.iterrows(): cars.loc[lab, "COUNTRY"] = row["country"].upper()

Print cars

print(cars)

We can also do like this

Import cars data

import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)

Use .apply(str.upper)

cars["COUNTRY"] = cars.country.apply(str.upper)

print(cars)

THE PROJECT

Import numpy and set seed

import numpy as np

np.random.seed(123)

np.random.rand()

Use randint() to simulate a dice

print(np.random.randint(1,7))

Use randint() again to see if it's the same result

print(np.random.randint(1,7))

######################################################################################

now, let's start the project correctly

NumPy is imported, seed is set

Starting step

step = 50

Roll the dice

dice = np.random.randint(1,7)

Finish the control construct

if dice <= 2 : step += - 1 elif dice <= 5 : step += 1 else : step = step + np.random.randint(1,7)

Print out dice and step

print(dice) print(step)

np.random.seed(123)
# NumPy is imported, seed is set
# Initialize random_walk
random_walk = [0]

# Complete the ___
for x in range(100) :
    # Set step: last element in random_walk
    step = random_walk[-1]

    # Roll the dice
    dice = np.random.randint(1,7)

    # Determine next step
    if dice <= 2:
        step = step - 1
    elif dice <= 5:
        step = step + 1
    else:
        step = step + np.random.randint(1,7)

    # append next_step to random_walk
    random_walk.append(step)

# Print random_walk
print(random_walk)

We can do more interesting than the last code, because in the result, we have negative steps, what is impossible

## NumPy is imported, seed is set

## Initialize random_walk
random_walk = [0]

for x in range(100) :
    step = random_walk[-1]
    dice = np.random.randint(1,7)

    if dice <= 2:
        # Replace below: use max to make sure step can't go below 0
        step = max(0, step - 1)
    elif dice <= 5:
        step = step + 1
    else:
        step = step + np.random.randint(1,7)

    random_walk.append(step)

print(random_walk)

Let's visualize this random walk!

# Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

# Plot random_walk
plt.plot(random_walk)

# Show the plot
plt.show()

Let's now finish the exercice by simulate the random walk many times to see the distrubution in order to commute the probability of reaching the 60'st step.