Skip to content
# Dictionary of dictionaries
europe = { 'spain': { 'capital':'madrid', 'population':46.77 },
           'france': { 'capital':'paris', 'population':66.03 },
           'germany': { 'capital':'berlin', 'population':80.62 },
           'norway': { 'capital':'oslo', 'population':5.084 } }

# Print out the capital of France
print(europe['france']['capital'])

# Create sub-dictionary data
data = { 'capital':'rome', 'population':59.83 } 

# Add data to europe under key 'italy'
europe['italy'] = data

# Print europe
print(europe)

#---------------------------------------------------------------------------------------------------------
# Pandas pt 1
# Dictionary to DataFrame (1)
# Pre-defined lists
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]

# Import pandas as pd
import pandas as pd

# Create dictionary my_dict with three key:value pairs: my_dict
my_dict = {'country':names,'drives_right':dr,'cars_per_cap':cpc}

# Build a DataFrame cars from my_dict: cars
cars = pd.DataFrame(my_dict)

# Print cars
print(cars)

# Dictionary to DataFrame (2)
# Definition of row_labels
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']

# Specify row labels of cars
cars.index = row_labels

# Print cars again
print(cars)

'''from this:
 country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45

to this:
           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JPN          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45'''

# CSV to DataFrame (2)
# Import pandas as pd
import pandas as pd

# Fix import by including index_col
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out cars
print(cars)

'''
now (main table): 
     cars_per_cap        country  drives_right
US            809  United States          True
AUS           731      Australia         False
JPN           588          Japan         False
IN             18          India         False
RU            200         Russia          True
MOR            70        Morocco          True
EG             45          Egypt          True 
'''

# Square Brackets (1, 2)
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out country column as Pandas Series
print(cars['country'])
'''
US     United States
AUS        Australia
JPN            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object
'''

# Print out country column as Pandas DataFrame
print(cars[['country']])
'''
 country
US   United States
AUS      Australia
JPN          Japan
IN           India
RU          Russia
MOR        Morocco
EG           Egypt
'''

# Print out DataFrame with country and drives_right columns
print(cars[['country','drives_right']])
'''
 country  drives_right
US   United States          True
AUS      Australia         False
JPN          Japan         False
IN           India         False
RU          Russia          True
MOR        Morocco          True
EG           Egypt          True
'''

# Print out first 3 observations
print(cars[0:3])

# Print out fourth, fifth and sixth observation
print(cars[4:7])
'''
     cars_per_cap        country  drives_right
US            809  United States          True
AUS           731      Australia         False
JPN           588          Japan         False
     cars_per_cap  country  drives_right
IN             18    India         False
RU            200   Russia          True
MOR            70  Morocco          True
'''

# Print out observation for Japan (series, not dataframe)
print(cars.loc['JPN'])
print()
print(cars.iloc[2])
print()

# Print out observations for Australia and Egypt(dataframe not series)
print(cars.loc[['AUS','EG']])
print()
print(cars.iloc[[1,5]])

# Print out drives_right value of Morocco (series)
print(cars)
print()
print(cars.loc['MOR', 'drives_right'])
print()
print(cars.iloc[6, 2])

# Print out a sub-DataFrame, containing the observations for Russia and Morocco and the columns country and drives_right.(dataframe)
print()
print(cars.iloc[[4, 6], [1, 2]])
print()
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

# Print out drives_right column as Series
print(cars.loc[:, 'drives_right'])
print()
print(cars.iloc[:, 2])

# Print out drives_right column as DataFrame
print()
print(cars.loc[:, ['drives_right']])
print()
print(cars.iloc[:, [2]])

# Print out cars_per_cap and drives_right as DataFrame
print()
print(cars.loc[:, ['cars_per_cap', 'drives_right']])
print()
print(cars.iloc[:, [0, 2]])

#-----------------------------------------------------------------------------
# Comparison Operators
'''
 - Select the cars_per_cap column from cars as a Pandas Series and store it as cpc.
 - Use cpc in combination with a comparison operator and 500. You want to end up with a boolean Series that's True if the corresponding country has a cars_per_cap of more than 500 and False otherwise. Store this boolean Series as many_cars.
 - Use many_cars to subset cars, similar to what you did before. Store the result as car_maniac.
 - Print out car_maniac to see if you got it right.
'''
# Create car_maniac: observations that have a cars_per_cap over 500
cpc = cars['cars_per_cap'] > 500

# subsetting cars
many_cars = cars[cpc] 
car_maniac = many_cars

# Print car_maniac
print(car_maniac)

# or
print(cars[cars['cars_per_cap'] > 500])

# Create medium: observations with cars_per_cap between 100 and 500
medium = cars[np.logical_and(cars['cars_per_cap'] > 100, cars['cars_per_cap'] < 500)]

#-----------------------------------------------------------------------------
# Loops
areas = [11.25, 18.0, 20.0, 10.75, 9.50]

for room, area in enumerate(areas) :
    print("room " + str(room) + ": " + str(area))
#   room = room + 1
#print(str(room)) times the loop occurred
'''
room 0: 11.25
room 1: 18.0
room 2: 20.0
room 3: 10.75
room 4: 9.5
'''

# Write a for loop that goes through each sublist of house and prints out the x is y sqm, where x is the name of the room and y is the area of the room.
house = [["hallway", 11.25], ["kitchen", 18.0], ["living room", 20.0], ["bedroom", 10.75], ["bathroom", 9.50]]

for x in house :
    print("the " + x[0] + " is " + str(x[1]) + " sqm")

# Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'berlin',
          'norway':'oslo', 'italy':'rome', 'poland':'warsaw', 'austria':'vienna' }
          
# Iterate over europe
for key, value in europe.items() :
    print("the capital of " + key + " is " + str(value))

# Write a for loop that iterates over all elements in np_height and prints out "x inches" for each element, where x is the value in the array.
for x in np_height :
    print(str(x) + " inches")

# Write a for loop that visits every element of the np_baseball array and prints it out.
for x in np.nditer(np_baseball) :
    print(str(x))
    
# Write a for loop that iterates over the rows of cars and on each iteration perform two print() calls: one to print out the row label and one to print out all of the rows contents.
# Iterate over rows of cars
print(cars)
for lab, row in cars.iterrows() :
    print()
    print(lab)
    print(row)

# Using the iterators lab and row, adapt the code in the for loop such that the first iteration prints out "US: 809", the second iteration "AUS: 731", and so on.
for lab, row in cars.iterrows() :
    print(lab + ": " + str(row['cars_per_cap']))
    
# Code for loop that adds COUNTRY column with uppercase countries name
for lab, row in cars.iterrows() : 
    cars.loc[lab, "COUNTRY"] = row["country"].upper()

# Code for loop that adds COUNTRY column with uppercase countries name but using .apply(str.upper) without a loop because to use iterrows() is not that efficient.
cars["COUNTRY"] = cars["country"].apply(str.upper)

#--------------------------------------------------------------------------------------------
# Random numbers
# Heads or tails
import numpy as np
np.random.seed(123)
coin = np.random.randint(0,2) # Randomly generate 0 or 1
print(coin)
if coin == 0:
    print("heads")
else:
    print("tails")
#-------------------------
#Random walk
#Throw the dice
import numpy as np
np.random.seed(123)
# Starting step
step = 50

# Roll the dice from 1 to 6 (its sides)
dice = np.random.randint(1, 7)

# Finish the control construct
if dice <= 2 :
    step = step - 1
elif dice <= 5 :
    step = step + 1
else :
    step = step + np.random.randint(1,7)

# Print out dice and step
print("dice: " + dice + " and step: " + step)
#-------------------------
#Random walk - throwing dices a lot of times
# Initialize random_walk
random_walk = [0]

for x in range(100) :
    step = random_walk[-1]
    dice = np.random.randint(1,7)

    if dice <= 2:
        # Replace below: use max to make sure step can't go below 0
        step = max(0, step - 1)
    elif dice <= 5:
        step = step + 1
    else:
        step = step + np.random.randint(1,7)

    random_walk.append(step)

print(random_walk)
#-------------------------
#Distribution
#To get an idea about how big your chances are of reaching 60 steps, you can repeatedly simulate the random walk and collect the results. That's exactly what you'll do in this exercise.To get an idea about how big your chances are of reaching 60 steps, you can repeatedly simulate the random walk and collect the results. That's exactly what you'll do in this exercise.
# Initialize all_walks (don't change this line)
all_walks = []

# Simulate random walk 10 times
for i in range(10) :

    # Code from before
    random_walk = [0]
    for x in range(100) :
        step = random_walk[-1]
        dice = np.random.randint(1,7)

        if dice <= 2:
            step = max(0, step - 1)
        elif dice <= 5:
            step = step + 1
        else:
            step = step + np.random.randint(1,7)
        random_walk.append(step)

    # Append random_walk to all_walks
    all_walks.append(random_walk)

# Print all_walks
print(all_walks)
#-------------------------
#Visualizing all walks
# all_walks is a list of lists: every sub-list represents a single random walk. If you convert this list of lists to a NumPy array, you can start making interesting plots. 
# numpy and matplotlib imported, seed set.

# initialize and populate all_walks
all_walks = []
for i in range(10) :
    random_walk = [0]
    for x in range(100) :
        step = random_walk[-1]
        dice = np.random.randint(1,7)
        if dice <= 2:
            step = max(0, step - 1)
        elif dice <= 5:
            step = step + 1
        else:
            step = step + np.random.randint(1,7)
        random_walk.append(step)
    all_walks.append(random_walk)

# Convert all_walks to NumPy array: np_aw
np_aw = np.array(all_walks)

# Plot np_aw and show
plt.plot(np_aw)
plt.show()

# Clear the figure
plt.clf()

# Transpose np_aw: np_aw_t
# now every row in np_all_walks represents the position after 1 throw for the 10 random walks.
np_aw_t = np.transpose(np_aw)

# Plot np_aw_t and show
plt.plot(np_aw_t)
plt.show()

#There's still something we forgot! You're a bit clumsy and you have a 0.1% chance of falling down. That calls for another random number generation. Basically, you can generate a random float between 0 and 1. If this value is less than or equal to 0.001, you should reset step to 0.
# Simulate random walk 250 times
all_walks = []
for i in range(250) :
    random_walk = [0]
    for x in range(100) :
        step = random_walk[-1]
        dice = np.random.randint(1,7)
        if dice <= 2:
            step = max(0, step - 1)
        elif dice <= 5:
            step = step + 1
        else:
            step = step + np.random.randint(1,7)

        # Implement clumsiness
        if np.random.rand() <= 0.001 :
            step = 0

        random_walk.append(step)
    all_walks.append(random_walk)

# Create and plot np_aw_t
np_aw_t = np.transpose(np.array(all_walks))
plt.plot(np_aw_t)
plt.show()
#-------------------------
# Plot the distribution
# What are the odds that you'll reach 60 steps high on the Empire State Building? Basically, you want to know about the end points of all the random walks you've simulated. These end points have a certain distribution that you can visualize with a histogram
# numpy and matplotlib imported, seed set
# Simulate random walk 500 times
all_walks = []
for i in range(500) :
    random_walk = [0]
    for x in range(100) :
        step = random_walk[-1]
        dice = np.random.randint(1,7)
        if dice <= 2:
            step = max(0, step - 1)
        elif dice <= 5:
            step = step + 1
        else:
            step = step + np.random.randint(1,7)
        if np.random.rand() <= 0.001 :
            step = 0
        random_walk.append(step)
    all_walks.append(random_walk)

# Create and plot np_aw_t
np_aw_t = np.transpose(np.array(all_walks))

# Select last row from np_aw_t: ends
ends = np.array(np_aw_t[-1, :])

# Plot histogram of ends, display plot
plt.hist(ends)
plt.show()