Intermediate Python
Run the hidden code cell below to import the data used in this course.
# Import the course packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import the two datasets
gapminder = pd.read_csv("datasets/gapminder.csv")
brics = pd.read_csv("datasets/brics.csv")
Some reminders about MAtplotlib
Print the last item from year and pop
print(year[-1], pop[-1])
Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
Make a line plot: year on the x-axis, pop on the y-axis
plt.plot(year, pop)
Display the plot with plt.show()
plt.show ()
###########################################################################################
Print the last item of gdp_cap and life_exp
print(gdp_cap[-1], life_exp[-1])
Make a line plot, gdp_cap on the x-axis, life_exp on the y-axis
plt.plot(gdp_cap, life_exp)
Display the plot
plt.show()
#########################################################################################
Change the line plot below to a scatter plot
plt.scatter(gdp_cap, life_exp)
Put the x-axis on a logarithmic scale
plt.xscale('log')
Show plot
plt.show()
#########################################################################################
Build histogram with 5 bins
plt.hist(life_exp, bins = 5)
Show and clean up plot
plt.show() plt.clf() # To clean the current figure before showing the next. If not, we'll get both of the plots on the same graph.
Build histogram with 20 bins
plt.hist(life_exp, bins = 20)
Show and clean up again
plt.show() plt.clf()
################################################################################
Scatter plot
plt.scatter(gdp_cap, life_exp)
Previous customizations
plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007')
Definition of tick_val and tick_lab
tick_val = [1000, 10000, 100000] tick_lab = ['1k', '10k', '100k']
Adapt the ticks on the x-axis
plt.xticks(tick_val, tick_lab)
After customizing, display the plot
plt.show()
#################################################################################
Import numpy as np
import numpy as np
Store pop as a numpy array: np_pop
np_pop = np.array(pop)
Double np_pop
np_pop = np_pop * 2
Update: set s argument to np_pop
plt.scatter(gdp_cap, life_exp, s = np_pop)
Previous customizations
plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])
Display the plot
plt.show()
##############################################################################
Scatter plot
plt.scatter(x = gdp_cap, y = life_exp, s = np.array(pop) * 2, c = col, alpha = 0.8)
Previous customizations
plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000,10000,100000], ['1k','10k','100k'])
Additional customizations
plt.text(1550, 71, 'India') plt.text(5700, 80, 'China')
Add grid() call
plt.grid(True)
Show the plot
plt.show() #############################################################################################
About dictionnaries
Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo' }
Add italy to europe
europe["italy"] = "rome"
Print out italy in europe
print("italy" in europe)
Add poland to europe
europe["poland"] = "warsaw"
Print europe
print(europe)
Update capital of germany
europe["germany"] = "berlin"
Remove australia
del(europe["australia"])
Print europe
print(europe)
########################################################################################
Dictionary of dictionaries
europe = { 'spain': { 'capital':'madrid', 'population':46.77 }, 'france': { 'capital':'paris', 'population':66.03 }, 'germany': { 'capital':'berlin', 'population':80.62 }, 'norway': { 'capital':'oslo', 'population':5.084 } }
Print out the capital of France
print(europe["france"]["capital"])
Create sub-dictionary data
data = {"capital" : "rome", "population" : 59.83}
Add data to europe under key 'italy'
europe["italy"] = data
Print europe
print(europe)
Let's talk a bit about Pandas
import pandas as pd
Build cars DataFrame
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt'] dr = [True, False, False, False, True, True, True] cpc = [809, 731, 588, 18, 200, 70, 45]
cars_dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }
cars = pd.DataFrame(cars_dict)
print(cars)
Definition of row_labels
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']
Specify row labels of cars
cars.index = row_labels
Print cars again
print(cars)
####################################################################################
Import pandas as pd
import pandas as pd
Fix import by including index_col
cars = pd.read_csv('cars.csv', index_col = 0)
Print out cars
print(cars)
##################################################################################
Import cars data
import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)
Print out country column as Pandas Series
print(cars.country) print(cars["country"])
Print out country column as Pandas DataFrame
print(cars[['country']])
Print out DataFrame with country and drives_right columns
print(cars[['country', 'drives_right']])
Print out first 3 observations
print(cars.iloc[:3])
Print out fourth, fifth and sixth observation
print(cars.iloc[3:6])
Print out observation for Japan
print(cars.loc["JPN"])
print(cars.iloc[2])
Print out observations for Australia and Egypt
print(cars.loc[["AUS", 'EG']])
print(cars.iloc[[1,6]])
Print out drives_right value of Morocco
print(cars.loc["MOR", 'drives_right'])
Print sub-DataFrame
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])
################################################################################""
FILTERING WITH PANDAS
Import cars data
import pandas as pd import numpy as np cars = pd.read_csv('cars.csv', index_col = 0)
Create car_maniac: observations that have a cars_per_cap over 500
car_maniac = cars[cars.cars_per_cap > 500]
Print car_maniac
print(car_maniac)
Create medium: observations with cars_per_cap between 100 and 500
medium = cars[np.logical_and(cars.cars_per_cap > 100, cars.cars_per_cap < 500)]
Print medium
print(medium)
LOOPS
areas list
areas = [11.25, 18.0, 20.0, 10.75, 9.50]
Change for loop to use enumerate() and update print()
for index, area in enumerate(areas) : print('room' + str(index) + ':' + str(area) )
########################################################################################
house list of lists
house = [["hallway", 11.25], ["kitchen", 18.0], ["living room", 20.0], ["bedroom", 10.75], ["bathroom", 9.50]]
Build a for loop from scratch
for element in house: print ('the ' + element[0] + " is " + str(element[1]) + " sqm")
############################################################################################
Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo', 'italy':'rome', 'poland':'warsaw', 'austria':'vienna' }
Iterate over europe
for k, v in europe.items() : print ("the capital of " + k + " is " + v)
##########################################################################################
Import numpy as np
import numpy as np
For loop over np_height
for height in np_height: print (str(height) + ' inches')
For loop over np_baseball
for element in np.nditer(np_baseball): print(element)
###########################################################################################
Import cars data
import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)
Iterate over rows of cars
for lab, row in cars.iterrows(): print(lab) print(row)
Adapt for loop
for lab, row in cars.iterrows() : print(lab + ': ' + str(row["cars_per_cap"]))
Code for loop that adds COUNTRY column
for lab, row in cars.iterrows(): cars.loc[lab, "COUNTRY"] = row["country"].upper()
Print cars
print(cars)
We can also do like this
Import cars data
import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)
Use .apply(str.upper)
cars["COUNTRY"] = cars.country.apply(str.upper)
print(cars)
THE PROJECT
Import numpy and set seed
import numpy as np
np.random.seed(123)
np.random.rand()
Use randint() to simulate a dice
print(np.random.randint(1,7))
Use randint() again to see if it's the same result
print(np.random.randint(1,7))
######################################################################################
now, let's start the project correctly
NumPy is imported, seed is set
Starting step
step = 50
Roll the dice
dice = np.random.randint(1,7)
Finish the control construct
if dice <= 2 : step += - 1 elif dice <= 5 : step += 1 else : step = step + np.random.randint(1,7)
Print out dice and step
print(dice) print(step)
np.random.seed(123)
# NumPy is imported, seed is set
# Initialize random_walk
random_walk = [0]
# Complete the ___
for x in range(100) :
# Set step: last element in random_walk
step = random_walk[-1]
# Roll the dice
dice = np.random.randint(1,7)
# Determine next step
if dice <= 2:
step = step - 1
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
# append next_step to random_walk
random_walk.append(step)
# Print random_walk
print(random_walk)
We can do more interesting than the last code, because in the result, we have negative steps, what is impossible
## NumPy is imported, seed is set
## Initialize random_walk
random_walk = [0]
for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)
if dice <= 2:
# Replace below: use max to make sure step can't go below 0
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
random_walk.append(step)
print(random_walk)
Let's visualize this random walk!
# Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
# Plot random_walk
plt.plot(random_walk)
# Show the plot
plt.show()
Let's now finish the exercice by simulate the random walk many times to see the distrubution in order to commute the probability of reaching the 60'st step.