Skip to content

Intermediate Data Visualization with Seaborn

Run the hidden code cell below to import the data used in this course.

# Importing the course packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Importing the course datasets
bike_share = pd.read_csv('datasets/bike_share.csv')
college_data = pd.read_csv('datasets/college_datav3.csv')
daily_show = pd.read_csv('datasets/daily_show_guests_cleaned.csv')
insurance = pd.read_csv('datasets/insurance_premiums.csv')
grants = pd.read_csv('datasets/schoolimprovement2010grants.csv', index_col=0)

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

Add your notes here

# Display dataset first five rows 
bike_share = pd.read_csv('datasets/bike_share.csv')
bike_share.head()
# First look to your data types
bike_share.dtypes
# Convert date object to datetime
bike_share['dteday'] = pd.to_datetime(bike_share['dteday'])
bike_share
# Calculate relationships between t emperature and total rentals
plt.figure(figsize=(12, 12))
sns.set_style('darkgrid')
sns.set_palette('RdBu')
sns.lmplot(x="temp", y="total_rentals", data=bike_share, fit_reg=False)
plt.xlabel('Temperature ')
plt.ylabel('Total of rental')
plt.title('relation between temperature and Total rental', color='b')
# Filter working and non working days use query function 
working_non_working = bike_share.query('workingday == 1 | workingday == 0' )
working_non_working
# Draw two regression lines for working and non working da
sns.lmplot(x="temp", y="total_rentals", hue="workingday", fit_reg=True , data=working_non_working)
plt.title('Working day and non working day', color='orange')
plt.show()
# Creat two  new columns name daily_showso and year
bike_share['daily_show'] = bike_share['dteday'].dt.day
bike_share['year'] = bike_share['dteday'].dt.year
bike_share
# Groupeo year by daily_show and calculate mean
daily_show_change =  bike_share.groupby('year')['daily_show'].agg('mean')
daily_show_change

pd_cross_tab =  pd.crosstab(bike_share['daily_show'], bike_share['year'])
print(pd_cross_tab.head())
sns.heatmap(pd_cross_tab, fmt='d', linewidths=5, cmap='YlGnBu')
plt.yticks(rotation=45)
plt.show()