Skip to content
Intermediate Python
# Dictionary of dictionaries
europe = { 'spain': { 'capital':'madrid', 'population':46.77 },
'france': { 'capital':'paris', 'population':66.03 },
'germany': { 'capital':'berlin', 'population':80.62 },
'norway': { 'capital':'oslo', 'population':5.084 } }
# Print out the capital of France
print(europe['france']['capital'])
# Create sub-dictionary data
data = { 'capital':'rome', 'population':59.83 }
# Add data to europe under key 'italy'
europe['italy'] = data
# Print europe
print(europe)
#---------------------------------------------------------------------------------------------------------
# Pandas pt 1
# Dictionary to DataFrame (1)
# Pre-defined lists
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr = [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]
# Import pandas as pd
import pandas as pd
# Create dictionary my_dict with three key:value pairs: my_dict
my_dict = {'country':names,'drives_right':dr,'cars_per_cap':cpc}
# Build a DataFrame cars from my_dict: cars
cars = pd.DataFrame(my_dict)
# Print cars
print(cars)
# Dictionary to DataFrame (2)
# Definition of row_labels
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']
# Specify row labels of cars
cars.index = row_labels
# Print cars again
print(cars)
'''from this:
country drives_right cars_per_cap
0 United States True 809
1 Australia False 731
2 Japan False 588
3 India False 18
4 Russia True 200
5 Morocco True 70
6 Egypt True 45
to this:
country drives_right cars_per_cap
US United States True 809
AUS Australia False 731
JPN Japan False 588
IN India False 18
RU Russia True 200
MOR Morocco True 70
EG Egypt True 45'''
# CSV to DataFrame (2)
# Import pandas as pd
import pandas as pd
# Fix import by including index_col
cars = pd.read_csv('cars.csv', index_col = 0)
# Print out cars
print(cars)
'''
now (main table):
cars_per_cap country drives_right
US 809 United States True
AUS 731 Australia False
JPN 588 Japan False
IN 18 India False
RU 200 Russia True
MOR 70 Morocco True
EG 45 Egypt True
'''
# Square Brackets (1, 2)
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)
# Print out country column as Pandas Series
print(cars['country'])
'''
US United States
AUS Australia
JPN Japan
IN India
RU Russia
MOR Morocco
EG Egypt
Name: country, dtype: object
'''
# Print out country column as Pandas DataFrame
print(cars[['country']])
'''
country
US United States
AUS Australia
JPN Japan
IN India
RU Russia
MOR Morocco
EG Egypt
'''
# Print out DataFrame with country and drives_right columns
print(cars[['country','drives_right']])
'''
country drives_right
US United States True
AUS Australia False
JPN Japan False
IN India False
RU Russia True
MOR Morocco True
EG Egypt True
'''
# Print out first 3 observations
print(cars[0:3])
# Print out fourth, fifth and sixth observation
print(cars[4:7])
'''
cars_per_cap country drives_right
US 809 United States True
AUS 731 Australia False
JPN 588 Japan False
cars_per_cap country drives_right
IN 18 India False
RU 200 Russia True
MOR 70 Morocco True
'''
# Print out observation for Japan (series, not dataframe)
print(cars.loc['JPN'])
print()
print(cars.iloc[2])
print()
# Print out observations for Australia and Egypt(dataframe not series)
print(cars.loc[['AUS','EG']])
print()
print(cars.iloc[[1,5]])
# Print out drives_right value of Morocco (series)
print(cars)
print()
print(cars.loc['MOR', 'drives_right'])
print()
print(cars.iloc[6, 2])
# Print out a sub-DataFrame, containing the observations for Russia and Morocco and the columns country and drives_right.(dataframe)
print()
print(cars.iloc[[4, 6], [1, 2]])
print()
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])
# Print out drives_right column as Series
print(cars.loc[:, 'drives_right'])
print()
print(cars.iloc[:, 2])
# Print out drives_right column as DataFrame
print()
print(cars.loc[:, ['drives_right']])
print()
print(cars.iloc[:, [2]])
# Print out cars_per_cap and drives_right as DataFrame
print()
print(cars.loc[:, ['cars_per_cap', 'drives_right']])
print()
print(cars.iloc[:, [0, 2]])
#-----------------------------------------------------------------------------
# Comparison Operators
'''
- Select the cars_per_cap column from cars as a Pandas Series and store it as cpc.
- Use cpc in combination with a comparison operator and 500. You want to end up with a boolean Series that's True if the corresponding country has a cars_per_cap of more than 500 and False otherwise. Store this boolean Series as many_cars.
- Use many_cars to subset cars, similar to what you did before. Store the result as car_maniac.
- Print out car_maniac to see if you got it right.
'''
# Create car_maniac: observations that have a cars_per_cap over 500
cpc = cars['cars_per_cap'] > 500
# subsetting cars
many_cars = cars[cpc]
car_maniac = many_cars
# Print car_maniac
print(car_maniac)
# or
print(cars[cars['cars_per_cap'] > 500])
# Create medium: observations with cars_per_cap between 100 and 500
medium = cars[np.logical_and(cars['cars_per_cap'] > 100, cars['cars_per_cap'] < 500)]
#-----------------------------------------------------------------------------
# Loops
areas = [11.25, 18.0, 20.0, 10.75, 9.50]
for room, area in enumerate(areas) :
print("room " + str(room) + ": " + str(area))
# room = room + 1
#print(str(room)) times the loop occurred
'''
room 0: 11.25
room 1: 18.0
room 2: 20.0
room 3: 10.75
room 4: 9.5
'''
# Write a for loop that goes through each sublist of house and prints out the x is y sqm, where x is the name of the room and y is the area of the room.
house = [["hallway", 11.25], ["kitchen", 18.0], ["living room", 20.0], ["bedroom", 10.75], ["bathroom", 9.50]]
for x in house :
print("the " + x[0] + " is " + str(x[1]) + " sqm")
# Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin',
'norway':'oslo', 'italy':'rome', 'poland':'warsaw', 'austria':'vienna' }
# Iterate over europe
for key, value in europe.items() :
print("the capital of " + key + " is " + str(value))
# Write a for loop that iterates over all elements in np_height and prints out "x inches" for each element, where x is the value in the array.
for x in np_height :
print(str(x) + " inches")
# Write a for loop that visits every element of the np_baseball array and prints it out.
for x in np.nditer(np_baseball) :
print(str(x))
# Write a for loop that iterates over the rows of cars and on each iteration perform two print() calls: one to print out the row label and one to print out all of the rows contents.
# Iterate over rows of cars
print(cars)
for lab, row in cars.iterrows() :
print()
print(lab)
print(row)
# Using the iterators lab and row, adapt the code in the for loop such that the first iteration prints out "US: 809", the second iteration "AUS: 731", and so on.
for lab, row in cars.iterrows() :
print(lab + ": " + str(row['cars_per_cap']))
# Code for loop that adds COUNTRY column with uppercase countries name
for lab, row in cars.iterrows() :
cars.loc[lab, "COUNTRY"] = row["country"].upper()
# Code for loop that adds COUNTRY column with uppercase countries name but using .apply(str.upper) without a loop because to use iterrows() is not that efficient.
cars["COUNTRY"] = cars["country"].apply(str.upper)
#--------------------------------------------------------------------------------------------
# Random numbers
# Heads or tails
import numpy as np
np.random.seed(123)
coin = np.random.randint(0,2) # Randomly generate 0 or 1
print(coin)
if coin == 0:
print("heads")
else:
print("tails")
#-------------------------
#Random walk
#Throw the dice
import numpy as np
np.random.seed(123)
# Starting step
step = 50
# Roll the dice from 1 to 6 (its sides)
dice = np.random.randint(1, 7)
# Finish the control construct
if dice <= 2 :
step = step - 1
elif dice <= 5 :
step = step + 1
else :
step = step + np.random.randint(1,7)
# Print out dice and step
print("dice: " + dice + " and step: " + step)
#-------------------------
#Random walk - throwing dices a lot of times
# Initialize random_walk
random_walk = [0]
for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)
if dice <= 2:
# Replace below: use max to make sure step can't go below 0
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
random_walk.append(step)
print(random_walk)
#-------------------------
#Distribution
#To get an idea about how big your chances are of reaching 60 steps, you can repeatedly simulate the random walk and collect the results. That's exactly what you'll do in this exercise.To get an idea about how big your chances are of reaching 60 steps, you can repeatedly simulate the random walk and collect the results. That's exactly what you'll do in this exercise.
# Initialize all_walks (don't change this line)
all_walks = []
# Simulate random walk 10 times
for i in range(10) :
# Code from before
random_walk = [0]
for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)
if dice <= 2:
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
random_walk.append(step)
# Append random_walk to all_walks
all_walks.append(random_walk)
# Print all_walks
print(all_walks)
#-------------------------
#Visualizing all walks
# all_walks is a list of lists: every sub-list represents a single random walk. If you convert this list of lists to a NumPy array, you can start making interesting plots.
# numpy and matplotlib imported, seed set.
# initialize and populate all_walks
all_walks = []
for i in range(10) :
random_walk = [0]
for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)
if dice <= 2:
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
random_walk.append(step)
all_walks.append(random_walk)
# Convert all_walks to NumPy array: np_aw
np_aw = np.array(all_walks)
# Plot np_aw and show
plt.plot(np_aw)
plt.show()
# Clear the figure
plt.clf()
# Transpose np_aw: np_aw_t
# now every row in np_all_walks represents the position after 1 throw for the 10 random walks.
np_aw_t = np.transpose(np_aw)
# Plot np_aw_t and show
plt.plot(np_aw_t)
plt.show()
#There's still something we forgot! You're a bit clumsy and you have a 0.1% chance of falling down. That calls for another random number generation. Basically, you can generate a random float between 0 and 1. If this value is less than or equal to 0.001, you should reset step to 0.
# Simulate random walk 250 times
all_walks = []
for i in range(250) :
random_walk = [0]
for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)
if dice <= 2:
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
# Implement clumsiness
if np.random.rand() <= 0.001 :
step = 0
random_walk.append(step)
all_walks.append(random_walk)
# Create and plot np_aw_t
np_aw_t = np.transpose(np.array(all_walks))
plt.plot(np_aw_t)
plt.show()
#-------------------------
# Plot the distribution
# What are the odds that you'll reach 60 steps high on the Empire State Building? Basically, you want to know about the end points of all the random walks you've simulated. These end points have a certain distribution that you can visualize with a histogram
# numpy and matplotlib imported, seed set
# Simulate random walk 500 times
all_walks = []
for i in range(500) :
random_walk = [0]
for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)
if dice <= 2:
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)
if np.random.rand() <= 0.001 :
step = 0
random_walk.append(step)
all_walks.append(random_walk)
# Create and plot np_aw_t
np_aw_t = np.transpose(np.array(all_walks))
# Select last row from np_aw_t: ends
ends = np.array(np_aw_t[-1, :])
# Plot histogram of ends, display plot
plt.hist(ends)
plt.show()