Skip to content
Merge your data
Merge your data
Merging your dataset allows you to better understand your data and analyze it holistically.
# Load packages
import pandas as pd# Upload your data as CSV and load as the first data frame
df1 = pd.read_csv("https://assets.datacamp.com/production/repositories/1646/datasets/c3a701a4729471ae0b92d8c300b470fd2ec0a73a/user_demographics_v1.csv")
df1.head()# Upload your data as CSV and load as the second data frame
df2 = pd.read_csv("https://assets.datacamp.com/production/repositories/1646/datasets/5decd183ef3710475958bbc903160fd6354379d5/purchase_data_v1.csv")
df2.head()Choose your merging strategy
joined = pd.merge(left=df1, # pick dataframe on the left to merge
right=df2, # pick dataframe on the right to merge
how="inner", # choose 'inner', 'outer', 'left' or 'right'
on=['uid'], # choose column(s) to merge on
sort=['reg_date'], # sort values by given column(s)
indicator=True, # adds “_merge” to df with information on the source of each row
suffixes= ['_1','_2']) # adds suffixes in case of overlapping column names
print(joined.shape)
joined.head()