Skip to content
# Import pandas
import pandas as pd

# Import some of the course datasets 
wards = pd.read_pickle("ward.p")
census = pd.read_pickle("census.p")
census
wards

#INNER JOIN 
wards.shape
census.shape

#MERGING TABLES
wards_census = wards.merge(census, on="ward")
wards_census.shape
wards_census.columns

#rajouter des suffixes pour préciser
wards_census = wards.merge(census, on="ward", suffixes=("_ward", "_cen"))
wards_census

#on peut merger single ou multiple
wards_census = wards.merge(census, on=["ward", "zip"]) #single
#version mutliple
df1.merge(df2, on="col") \
    .merge(df3, on ="col") #et ainsi de suite pour chaque
#LEFT JOINS
import pandas as pd

movies = pd.read_pickle("movies.p")
taglines = pd.read_pickle("taglines.p")
movie_to_genres = pd.read_pickle("movie_to_genres.p")
sequels = pd.read_pickle("sequels.p")
sequels

movies_taglines = movies.merge(taglines, on ="id", how = "left")
movies_taglines.shape

#RIGHT JOINS & filtering data

m = movie_to_genres[movie_to_genres["genre"] == "TV Movie"]

#merge with right join
tv_movies = movies.merge(m, how="right", left_on="id", right_on="movie_id")
tv_movies.head()

#OUTER JOINS
#pareil que inner mais on écrit how="outer", et on rajoute des suffixes

#SELF MERGE 

original_sequels = sequels.merge(sequels, left_on="sequel", right_on ="id", suffixes=("_org","_seq"))
original_sequels

#merging itself with left join, is the same code but we add how="left"

#MERGING ON INDEXES