Intermediate Python
Run the hidden code cell below to import the data used in this course.
# Import the course packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import the two datasets
gapminder = pd.read_csv("datasets/gapminder.csv")
brics = pd.read_csv("datasets/brics.csv")Some reminders about MAtplotlib
Print the last item from year and pop
print(year[-1], pop[-1])
Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
Make a line plot: year on the x-axis, pop on the y-axis
plt.plot(year, pop)
Display the plot with plt.show()
plt.show ()
###########################################################################################
Print the last item of gdp_cap and life_exp
print(gdp_cap[-1], life_exp[-1])
Make a line plot, gdp_cap on the x-axis, life_exp on the y-axis
plt.plot(gdp_cap, life_exp)
Display the plot
plt.show()
#########################################################################################
Change the line plot below to a scatter plot
plt.scatter(gdp_cap, life_exp)
Put the x-axis on a logarithmic scale
plt.xscale('log')
Show plot
plt.show()
#########################################################################################
Build histogram with 5 bins
plt.hist(life_exp, bins = 5)
Show and clean up plot
plt.show() plt.clf() # To clean the current figure before showing the next. If not, we'll get both of the plots on the same graph.
Build histogram with 20 bins
plt.hist(life_exp, bins = 20)
Show and clean up again
plt.show() plt.clf()
################################################################################
Scatter plot
plt.scatter(gdp_cap, life_exp)
Previous customizations
plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007')
Definition of tick_val and tick_lab
tick_val = [1000, 10000, 100000] tick_lab = ['1k', '10k', '100k']
Adapt the ticks on the x-axis
plt.xticks(tick_val, tick_lab)
After customizing, display the plot
plt.show()
#################################################################################
Import numpy as np
import numpy as np
Store pop as a numpy array: np_pop
np_pop = np.array(pop)
Double np_pop
np_pop = np_pop * 2
Update: set s argument to np_pop
plt.scatter(gdp_cap, life_exp, s = np_pop)
Previous customizations
plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])
Display the plot
plt.show()
##############################################################################
Scatter plot
plt.scatter(x = gdp_cap, y = life_exp, s = np.array(pop) * 2, c = col, alpha = 0.8)
Previous customizations
plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000,10000,100000], ['1k','10k','100k'])
Additional customizations
plt.text(1550, 71, 'India') plt.text(5700, 80, 'China')
Add grid() call
plt.grid(True)
Show the plot
plt.show() #############################################################################################
About dictionnaries
Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo' }
Add italy to europe
europe["italy"] = "rome"
Print out italy in europe
print("italy" in europe)
Add poland to europe
europe["poland"] = "warsaw"
Print europe
print(europe)
Update capital of germany
europe["germany"] = "berlin"
Remove australia
del(europe["australia"])
Print europe
print(europe)
########################################################################################
Dictionary of dictionaries
europe = { 'spain': { 'capital':'madrid', 'population':46.77 }, 'france': { 'capital':'paris', 'population':66.03 }, 'germany': { 'capital':'berlin', 'population':80.62 }, 'norway': { 'capital':'oslo', 'population':5.084 } }
Print out the capital of France
print(europe["france"]["capital"])
Create sub-dictionary data
data = {"capital" : "rome", "population" : 59.83}
Add data to europe under key 'italy'
europe["italy"] = data
Print europe
print(europe)
Let's talk a bit about Pandas
import pandas as pd
Build cars DataFrame
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt'] dr = [True, False, False, False, True, True, True] cpc = [809, 731, 588, 18, 200, 70, 45]
cars_dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }
cars = pd.DataFrame(cars_dict)
print(cars)
Definition of row_labels
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']
Specify row labels of cars
cars.index = row_labels
Print cars again
print(cars)
####################################################################################
Import pandas as pd
import pandas as pd
Fix import by including index_col
cars = pd.read_csv('cars.csv', index_col = 0)
Print out cars
print(cars)
##################################################################################
Import cars data
import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)
Print out country column as Pandas Series
print(cars.country) print(cars["country"])
Print out country column as Pandas DataFrame
print(cars[['country']])
Print out DataFrame with country and drives_right columns
print(cars[['country', 'drives_right']])
Print out first 3 observations
print(cars.iloc[:3])
Print out fourth, fifth and sixth observation
print(cars.iloc[3:6])
Print out observation for Japan
print(cars.loc["JPN"])
print(cars.iloc[2])
Print out observations for Australia and Egypt
print(cars.loc[["AUS", 'EG']])
print(cars.iloc[[1,6]])
Print out drives_right value of Morocco
print(cars.loc["MOR", 'drives_right'])
Print sub-DataFrame
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])
################################################################################""
FILTERING WITH PANDAS
Import cars data
import pandas as pd import numpy as np cars = pd.read_csv('cars.csv', index_col = 0)
Create car_maniac: observations that have a cars_per_cap over 500
car_maniac = cars[cars.cars_per_cap > 500]
Print car_maniac
print(car_maniac)
Create medium: observations with cars_per_cap between 100 and 500
medium = cars[np.logical_and(cars.cars_per_cap > 100, cars.cars_per_cap < 500)]
Print medium
print(medium)
LOOPS
areas list
areas = [11.25, 18.0, 20.0, 10.75, 9.50]
Change for loop to use enumerate() and update print()
for index, area in enumerate(areas) : print('room' + str(index) + ':' + str(area) )
########################################################################################
house list of lists
house = [["hallway", 11.25], ["kitchen", 18.0], ["living room", 20.0], ["bedroom", 10.75], ["bathroom", 9.50]]
Build a for loop from scratch
for element in house: print ('the ' + element[0] + " is " + str(element[1]) + " sqm")
############################################################################################
Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo', 'italy':'rome', 'poland':'warsaw', 'austria':'vienna' }
Iterate over europe
for k, v in europe.items() : print ("the capital of " + k + " is " + v)
##########################################################################################
Import numpy as np
import numpy as np
For loop over np_height
for height in np_height: print (str(height) + ' inches')
For loop over np_baseball
for element in np.nditer(np_baseball): print(element)
###########################################################################################
Import cars data
import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)
Iterate over rows of cars
for lab, row in cars.iterrows(): print(lab) print(row)
Adapt for loop
for lab, row in cars.iterrows() : print(lab + ': ' + str(row["cars_per_cap"]))
Code for loop that adds COUNTRY column
for lab, row in cars.iterrows(): cars.loc[lab, "COUNTRY"] = row["country"].upper()
Print cars
print(cars)
We can also do like this
Import cars data
import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)
Use .apply(str.upper)
cars["COUNTRY"] = cars.country.apply(str.upper)
print(cars)
THE PROJECT
Import numpy and set seed
import numpy as np
np.random.seed(123)
np.random.rand()
Use randint() to simulate a dice
print(np.random.randint(1,7))
Use randint() again to see if it's the same result
print(np.random.randint(1,7))
######################################################################################
now, let's start the project correctly
NumPy is imported, seed is set
Starting step
step = 50
Roll the dice
dice = np.random.randint(1,7)
Finish the control construct
if dice <= 2 : step += - 1 elif dice <= 5 : step += 1 else : step = step + np.random.randint(1,7)
Print out dice and step
print(dice) print(step)
np.random.seed(123)# NumPy is imported, seed is set
# Initialize random_walk
random_walk = [0]
# Complete the ___
for x in range(100) :
# Set step: last element in random_walk
step = random_walk[-1]
# Roll the dice
dice = np.random.randint(1,7)
# Determine next step
if dice <= 2:
step = step - 1
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
# append next_step to random_walk
random_walk.append(step)
# Print random_walk
print(random_walk)We can do more interesting than the last code, because in the result, we have negative steps, what is impossible
## NumPy is imported, seed is set
## Initialize random_walk
random_walk = [0]
for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)
if dice <= 2:
# Replace below: use max to make sure step can't go below 0
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
random_walk.append(step)
print(random_walk)Let's visualize this random walk!
# Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
# Plot random_walk
plt.plot(random_walk)
# Show the plot
plt.show()Let's now finish the exercice by simulate the random walk many times to see the distrubution in order to commute the probability of reaching the 60'st step.