Skip to content
1 hidden cell
Introduction to Data Visualization with Seaborn
Introduction to Data Visualization with Seaborn
Run the hidden code cell below to import the data used in this course.
1 hidden cell
Take Notes
Add notes about the concepts you've learned and code cells with code you want to keep.
Add your notes here
# Add your code snippets here
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Change this scatter plot to have percent literate on the y-axis
sns.scatterplot(x=gdp, y=percent_literate)
# Show plot
plt.show()
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Create count plot with region on the y-axis
sns.countplot(y=region)
# Show plot
plt.show()
# Import pandas
import pandas as pd
# Create a DataFrame from csv file
df = pd.read_csv(csv_filepath)
# Print the head of df
print(df.head())
# Import Matplotlib, pandas, and Seaborn
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# Create a DataFrame from csv file
df= pd.read_csv(csv_filepath)
# Create a count plot with "Spiders" on the x-axis
sns.countplot(x= 'Spiders', data= df)
# Display the plot
plt.show()
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Create a scatter plot of absences vs. final grade
sns.scatterplot(x='absences', y='G3', data= student_data, hue='location')
# Show plot
plt.show()
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Change the legend order in the scatter plot
sns.scatterplot(x="absences", y="G3",
data=student_data,
hue="location", hue_order= ['Rural', 'Urban'])
# Show plot
plt.show()
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Create a dictionary mapping subgroup values to colors
palette_colors = {'Rural': "green", 'Urban': "blue"}
# Create a count plot of school with location subgroups
sns.countplot(x='school', data=student_data, hue='location', palette=palette_colors)
# Display plot
plt.show()# Change to make subplots based on study time
sns.relplot(x="absences", y="G3",
data=student_data,
kind="scatter",
col='study_time')
# Show plot
plt.show()
# Adjust to add subplots based on school support
sns.relplot(x="G1", y="G3",
data=student_data,
kind="scatter",
col="schoolsup",
col_order= ["yes", "no"])
# Show plot
plt.show()
# Adjust further to add subplots based on family support
sns.relplot(x="G1", y="G3",
data=student_data,
kind="scatter",
col="schoolsup",
col_order=["yes", "no"],
row="famsup",
row_order= ["yes", "no"])
# Show plot
plt.show()
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Create scatter plot of horsepower vs. mpg
sns.relplot(x="horsepower", y="mpg",
data=mpg, kind="scatter",
size="cylinders",
hue='cylinders')
# Show plot
plt.show()
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Create a scatter plot of acceleration vs. mpg
sns.relplot(x='acceleration', y='mpg',
data=mpg,
kind='scatter',
hue='origin',
style='origin')
# Show plot
plt.show()
# Make the shaded area show the standard deviation
sns.relplot(x="model_year", y="mpg",
data=mpg, kind="line", ci='sd')
# Show plot
plt.show()
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
# Add markers and make each line have the same style
sns.relplot(x="model_year", y="horsepower",
data=mpg, kind="line",
ci=None, style="origin",
hue="origin", markers=True, dashes=False)
# Show plot
plt.show()# Specify the category ordering
study_time_order = ["<2 hours", "2 to 5 hours",
"5 to 10 hours", ">10 hours"]
# Create a box plot and set the order of the categories
sns.catplot(data=student_data, x="study_time", y="G3", kind='box', order=study_time_order)
# Show plot
plt.show()
# Set the whiskers at the min and max values
sns.catplot(x="romantic", y="G3",
data=student_data,
kind="box",
whis=[0,100])
# Show plot
plt.show()# Remove the lines joining the points
sns.catplot(x="famrel", y="absences",
data=student_data,
kind="point",
capsize=0.2, join=False)
# Show plot
plt.show()
# Import median function from numpy
from numpy import median
# Plot the median number of absences instead of the mean
sns.catplot(x="romantic", y="absences",
data=student_data,
kind="point",
hue="school",
ci=None, estimator=median)
# Show plot
plt.show()# Set the style to "whitegrid"
sns.set_style("whitegrid")
# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes",
"Often", "Always"]
sns.catplot(x="Parents Advice",
data=survey_data,
kind="count",
order=category_order)
# Show plot
plt.show()
sns.set_style("whitegrid")
sns.set_palette("Purples")
# Set the context to "paper"
sns.set_context("paper")
# Set the style to "darkgrid"
sns.set_style("darkgrid")
# Set a custom color palette
custom_palette=['#39A7D0', "#36ADA4"]
sns.set_palette(custom_palette)
# Create the box plot of age distribution by gender
sns.catplot(x="Gender", y="Age",
data=survey_data, kind="box")
# Show plot
plt.show()# Create scatter plot
g = sns.relplot(x="weight",
y="horsepower",
data=mpg,
kind="scatter")
# Add a title "Car Weight vs. Horsepower"
g.fig.suptitle("Car Weight vs. Horsepower")
# Add a title "Average MPG Over Time"
g.set_title("Average MPG Over Time")
# Add x-axis and y-axis labels
g.set(xlabel="Car Model Year", ylabel="Average MPG")
# Rotate x-tick labels
plt.xticks(rotation=90)Explore Datasets
Use the DataFrames imported in the first cell to explore the data and practice your skills!
- From
country_data, create a scatter plot to look at the relationship between GDP and Literacy. Use color to segment the data points by region. - Use
mpgto create a line plot withmodel_yearon the x-axis andweighton the y-axis. Create differentiating lines for each country of origin (origin). - Create a box plot from
student_datato explore the relationship between the number of failures (failures) and the average final grade (G3). - Create a bar plot from
surveyto compare howLonelinessdiffers across values forInternet usage. Format it to have two subplots for gender. - Make sure to add titles and labels to your plots and adjust their format for readability!