Skip to content
Intermediate Data Visualization with Seaborn
Intermediate Data Visualization with Seaborn
Run the hidden code cell below to import the data used in this course.
# Importing the course packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
# Importing the course datasets
bike_share = pd.read_csv('datasets/bike_share.csv')
college_data = pd.read_csv('datasets/college_datav3.csv')
daily_show = pd.read_csv('datasets/daily_show_guests_cleaned.csv')
insurance = pd.read_csv('datasets/insurance_premiums.csv')
grants = pd.read_csv('datasets/schoolimprovement2010grants.csv', index_col=0)Take Notes
Add notes about the concepts you've learned and code cells with code you want to keep.
Add your notes here
# Display dataset first five rows bike_share = pd.read_csv('datasets/bike_share.csv')
bike_share.head()
# First look to your data types
bike_share.dtypes# Convert date object to datetime
bike_share['dteday'] = pd.to_datetime(bike_share['dteday'])
bike_share# Calculate relationships between t emperature and total rentals
plt.figure(figsize=(12, 12))
sns.set_style('darkgrid')
sns.set_palette('RdBu')
sns.lmplot(x="temp", y="total_rentals", data=bike_share, fit_reg=False)
plt.xlabel('Temperature ')
plt.ylabel('Total of rental')
plt.title('relation between temperature and Total rental', color='b')# Filter working and non working days use query function
working_non_working = bike_share.query('workingday == 1 | workingday == 0' )
working_non_working# Draw two regression lines for working and non working da
sns.lmplot(x="temp", y="total_rentals", hue="workingday", fit_reg=True , data=working_non_working)
plt.title('Working day and non working day', color='orange')
plt.show()# Creat two new columns name daily_showso and year
bike_share['daily_show'] = bike_share['dteday'].dt.day
bike_share['year'] = bike_share['dteday'].dt.year
bike_share# Groupeo year by daily_show and calculate mean
daily_show_change = bike_share.groupby('year')['daily_show'].agg('mean')
daily_show_change
pd_cross_tab = pd.crosstab(bike_share['daily_show'], bike_share['year'])
print(pd_cross_tab.head())sns.heatmap(pd_cross_tab, fmt='d', linewidths=5, cmap='YlGnBu')
plt.yticks(rotation=45)
plt.show()