Intermediate Python
Run the hidden code cell below to import the data used in this course.

```.mfe-app-workspace-11z5vno{font-family:JetBrainsMonoNL,Menlo,Monaco,'Courier New',monospace;font-size:13px;line-height:20px;}```# Import the course packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import the two datasets

### Print the last item from year and pop

print(year[-1], pop[-1])

### Import matplotlib.pyplot as plt

import matplotlib.pyplot as plt

### Make a line plot: year on the x-axis, pop on the y-axis

plt.plot(year, pop)

### Display the plot with plt.show()

plt.show ()

### Print the last item of gdp_cap and life_exp

print(gdp_cap[-1], life_exp[-1])

### Make a line plot, gdp_cap on the x-axis, life_exp on the y-axis

plt.plot(gdp_cap, life_exp)

### Display the plot

plt.show()

### Change the line plot below to a scatter plot

plt.scatter(gdp_cap, life_exp)

### Put the x-axis on a logarithmic scale

plt.xscale('log')

### Show plot

plt.show()

### Build histogram with 5 bins

plt.hist(life_exp, bins = 5)

### Show and clean up plot

plt.show() plt.clf() # To clean the current figure before showing the next. If not, we'll get both of the plots on the same graph.

### Build histogram with 20 bins

plt.hist(life_exp, bins = 20)

### Show and clean up again

plt.show() plt.clf()

### Scatter plot

plt.scatter(gdp_cap, life_exp)

### Previous customizations

plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007')

### Definition of tick_val and tick_lab

tick_val = [1000, 10000, 100000] tick_lab = ['1k', '10k', '100k']

### Adapt the ticks on the x-axis

plt.xticks(tick_val, tick_lab)

### After customizing, display the plot

plt.show()

### Import numpy as np

import numpy as np

### Store pop as a numpy array: np_pop

np_pop = np.array(pop)

### Double np_pop

np_pop = np_pop * 2

### Update: set s argument to np_pop

plt.scatter(gdp_cap, life_exp, s = np_pop)

### Previous customizations

plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])

### Display the plot

plt.show()

### Scatter plot

plt.scatter(x = gdp_cap, y = life_exp, s = np.array(pop) * 2, c = col, alpha = 0.8)

### Previous customizations

plt.xscale('log') plt.xlabel('GDP per Capita [in USD]') plt.ylabel('Life Expectancy [in years]') plt.title('World Development in 2007') plt.xticks([1000,10000,100000], ['1k','10k','100k'])

plt.text(1550, 71, 'India') plt.text(5700, 80, 'China')

plt.grid(True)

### Show the plot

plt.show() #############################################################################################

### Definition of dictionary

europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo' }

europe["italy"] = "rome"

### Print out italy in europe

print("italy" in europe)

europe["poland"] = "warsaw"

### Print europe

print(europe)

### Update capital of germany

europe["germany"] = "berlin"

### Remove australia

del(europe["australia"])

### Print europe

print(europe)

### Dictionary of dictionaries

europe = { 'spain': { 'capital':'madrid', 'population':46.77 }, 'france': { 'capital':'paris', 'population':66.03 }, 'germany': { 'capital':'berlin', 'population':80.62 }, 'norway': { 'capital':'oslo', 'population':5.084 } }

### Print out the capital of France

print(europe["france"]["capital"])

### Create sub-dictionary data

data = {"capital" : "rome", "population" : 59.83}

### Add data to europe under key 'italy'

europe["italy"] = data

### Print europe

print(europe)

## Let's talk a bit about Pandas

import pandas as pd

### Build cars DataFrame

names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt'] dr = [True, False, False, False, True, True, True] cpc = [809, 731, 588, 18, 200, 70, 45]

cars_dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }

cars = pd.DataFrame(cars_dict)

print(cars)

### Definition of row_labels

row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']

### Specify row labels of cars

cars.index = row_labels

### Print cars again

print(cars)

### Import pandas as pd

import pandas as pd

### Fix import by including index_col

cars = pd.read_csv('cars.csv', index_col = 0)

### Print out cars

print(cars)

### Import cars data

import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)

### Print out country column as Pandas Series

print(cars.country) print(cars["country"])

### Print out country column as Pandas DataFrame

print(cars[['country']])

### Print out DataFrame with country and drives_right columns

print(cars[['country', 'drives_right']])

### Print out first 3 observations

print(cars.iloc[:3])

### Print out fourth, fifth and sixth observation

print(cars.iloc[3:6])

### Print out observation for Japan

print(cars.loc["JPN"])

print(cars.iloc[2])

### Print out observations for Australia and Egypt

print(cars.loc[["AUS", 'EG']])

print(cars.iloc[[1,6]])

### Print out drives_right value of Morocco

print(cars.loc["MOR", 'drives_right'])

### Print sub-DataFrame

print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

## FILTERING WITH PANDAS

### Import cars data

import pandas as pd import numpy as np cars = pd.read_csv('cars.csv', index_col = 0)

### Create car_maniac: observations that have a cars_per_cap over 500

car_maniac = cars[cars.cars_per_cap > 500]

### Print car_maniac

print(car_maniac)

### Create medium: observations with cars_per_cap between 100 and 500

medium = cars[np.logical_and(cars.cars_per_cap > 100, cars.cars_per_cap < 500)]

### Print medium

print(medium)

## LOOPS

### areas list

areas = [11.25, 18.0, 20.0, 10.75, 9.50]

### Change for loop to use enumerate() and update print()

for index, area in enumerate(areas) : print('room' + str(index) + ':' + str(area) )

### house list of lists

house = [["hallway", 11.25], ["kitchen", 18.0], ["living room", 20.0], ["bedroom", 10.75], ["bathroom", 9.50]]

### Build a for loop from scratch

for element in house: print ('the ' + element[0] + " is " + str(element[1]) + " sqm")

### Definition of dictionary

europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin', 'norway':'oslo', 'italy':'rome', 'poland':'warsaw', 'austria':'vienna' }

### Iterate over europe

for k, v in europe.items() : print ("the capital of " + k + " is " + v)

### Import numpy as np

import numpy as np

### For loop over np_height

for height in np_height: print (str(height) + ' inches')

### For loop over np_baseball

for element in np.nditer(np_baseball): print(element)

### Import cars data

import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)

### Iterate over rows of cars

for lab, row in cars.iterrows(): print(lab) print(row)

for lab, row in cars.iterrows() : print(lab + ': ' + str(row["cars_per_cap"]))

### Code for loop that adds COUNTRY column

for lab, row in cars.iterrows(): cars.loc[lab, "COUNTRY"] = row["country"].upper()

print(cars)

### Import cars data

import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0)

### Use .apply(str.upper)

cars["COUNTRY"] = cars.country.apply(str.upper)

print(cars)

## THE PROJECT

### Import numpy and set seed

import numpy as np

np.random.seed(123)

np.random.rand()

### Use randint() to simulate a dice

print(np.random.randint(1,7))

### Use randint() again to see if it's the same result

print(np.random.randint(1,7))

step = 50

### Roll the dice

dice = np.random.randint(1,7)

### Finish the control construct

if dice <= 2 : step += - 1 elif dice <= 5 : step += 1 else : step = step + np.random.randint(1,7)

### Print out dice and step

print(dice) print(step)

``np.random.seed(123)``
``````# NumPy is imported, seed is set
# Initialize random_walk
random_walk = [0]

# Complete the ___
for x in range(100) :
# Set step: last element in random_walk
step = random_walk[-1]

# Roll the dice
dice = np.random.randint(1,7)

# Determine next step
if dice <= 2:
step = step - 1
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)

# append next_step to random_walk
random_walk.append(step)

# Print random_walk
print(random_walk)``````

#### We can do more interesting than the last code, because in the result, we have negative steps, what is impossible

``````## NumPy is imported, seed is set

## Initialize random_walk
random_walk = [0]

for x in range(100) :
step = random_walk[-1]
dice = np.random.randint(1,7)

if dice <= 2:
# Replace below: use max to make sure step can't go below 0
step = max(0, step - 1)
elif dice <= 5:
step = step + 1
else:
step = step + np.random.randint(1,7)

random_walk.append(step)

print(random_walk)``````

### Let's visualize this random walk!

``````# Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

# Plot random_walk
plt.plot(random_walk)

# Show the plot
plt.show()``````