Skip to content
Python code cheatsheet
# Import pandas
import pandas as pd
# Import some of the course datasets
wards = pd.read_pickle("ward.p")
census = pd.read_pickle("census.p")
census
wards
#INNER JOIN
wards.shape
census.shape
#MERGING TABLES
wards_census = wards.merge(census, on="ward")
wards_census.shape
wards_census.columns
#rajouter des suffixes pour préciser
wards_census = wards.merge(census, on="ward", suffixes=("_ward", "_cen"))
wards_census
#on peut merger single ou multiple
wards_census = wards.merge(census, on=["ward", "zip"]) #single
#version mutliple
df1.merge(df2, on="col") \
.merge(df3, on ="col") #et ainsi de suite pour chaque
#LEFT JOINS
import pandas as pd
movies = pd.read_pickle("movies.p")
taglines = pd.read_pickle("taglines.p")
movie_to_genres = pd.read_pickle("movie_to_genres.p")
sequels = pd.read_pickle("sequels.p")
sequels
movies_taglines = movies.merge(taglines, on ="id", how = "left")
movies_taglines.shape
#RIGHT JOINS & filtering data
m = movie_to_genres[movie_to_genres["genre"] == "TV Movie"]
#merge with right join
tv_movies = movies.merge(m, how="right", left_on="id", right_on="movie_id")
tv_movies.head()
#OUTER JOINS
#pareil que inner mais on écrit how="outer", et on rajoute des suffixes
#SELF MERGE
original_sequels = sequels.merge(sequels, left_on="sequel", right_on ="id", suffixes=("_org","_seq"))
original_sequels
#merging itself with left join, is the same code but we add how="left"
#MERGING ON INDEXES