Joining Data with pandas
Run the hidden code cell below to import a few of the datasets used in this course.
Note: There are a large number of datasets in the datasets/ folder. Many of these are Pickle files, which you can read using pd.read_pickle(path_to_file). An example is included in the cell below.
# Import pandas
import pandas as pd
# Import some of the course datasets
actors_movies = pd.read_csv("datasets/actors_movies.csv")
business_owners = pd.read_pickle("datasets/business_owners.p")
casts = pd.read_pickle("datasets/casts.p")
# Preview one of the DataFrames
casts--Inner Join
--Look at the ward data wards = pd.read_csv('Ward_Officies.csv') print(wards.head()) print(wards.shape)
--The cencus data cencus = pr.read_csv('Ward_Cencus.csv') print(cencus.head()) print(cencus.shape)
--inner join wards_cencus = wards.merge(cencus, on='ward') print(wards_cencus.head(4))
print(wards_cencus.collums)
---Suffixes wards_cesus = wards.merge(census, on='ward', suffixes=('_ward','_cen')) print(wards_census.head()) print(ward_census.shape
Add your notes here
# Merge the licenses and biz_owners table on account
licenses_owners = licenses.merge(biz_owners, on='account')
# Group the results by title then count the number of accounts
counted_df = licenses_owners.groupby('title').agg({'account':'count'})
# Sort the counted_df in desending order
sorted_df = counted_df.sort_values('account', ascending=False)
# Use .head() method to print the first few rows of sorted_df
print(sorted_df.head())# Add your code snippets here