Skip to content

Merge your data

Merging your dataset allows you to better understand your data and analyze it holistically.

# Load packages
import pandas as pd
# Upload your data as CSV and load as the first data frame
df1 = pd.read_csv("https://assets.datacamp.com/production/repositories/1646/datasets/c3a701a4729471ae0b92d8c300b470fd2ec0a73a/user_demographics_v1.csv")
df1.head()
# Upload your data as CSV and load as the second data frame
df2 = pd.read_csv("https://assets.datacamp.com/production/repositories/1646/datasets/5decd183ef3710475958bbc903160fd6354379d5/purchase_data_v1.csv")
df2.head()

Choose your merging strategy

joined = pd.merge(left=df1,               # pick dataframe on the left to merge  
                  right=df2,              # pick dataframe on the right to merge 
                  how="inner",            # choose 'inner', 'outer', 'left' or 'right'
                  on=['uid'],             # choose column(s) to merge on
                  sort=['reg_date'],      # sort values by given column(s)
                  indicator=True,         # adds “_merge” to df with information on the source of each row
                  suffixes= ['_1','_2'])  # adds suffixes in case of overlapping column names
print(joined.shape)
joined.head()