Skip to content
Common Graphs used for Data Visualization
Frequency Table
"""
source: https://heartbeat.comet.ml/exploratory-data-analysis-eda-for-categorical-data-870b37a79b65
"""
import pandas as pd
cols = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"]
car_data = pd.read_csv("car.data", names=cols)
def frequency_table(data:pd.DataFrame, col:str, column:str):
freq_table = pd.crosstab(index=data[col],
columns=data[column],
margins=True)
rel_table = round(freq_table/freq_table.loc["All"], 2)
return freq_table, rel_table
buying_freq, buying_rel = frequency_table(car_data, "class", "buying")
print("Two-way frequency table")
print(buying_freq)
print("---" * 15)
print("Two-way relative frequency table")
print(buying_rel)
Bar Graph
"""
Starter code from tutorials point
see: https://bit.ly/3x9Z6HU
"""
import matplotlib.pyplot as plt
# Dataset creation.
programming_languages = ['C', 'C++', 'Java', 'Python', 'PHP', "Other", "None"]
employees_frequency = [23, 17, 35, 29, 12, 5, 38]
# Bar graph creation.
fig, ax = plt.subplots(figsize=(10, 5))
plt.bar(programming_languages, employees_frequency)
plt.title("The number of employees competent in a programming langauge")
plt.xlabel("Programming languages")
plt.ylabel("Frequency")
plt.show()
Pie Charts
"""
Example to demonstrate how a pie chart can be used to represent the market
share for smartphones.
Note: These are not real figures. They were created for demonstration purposes.
"""
import numpy as np
from matplotlib import pyplot as plt
# Dataset creation.
smartphones = ["Apple", "Samsung", "Huawei", "Google", "Other"]
market_share = [50, 30, 5, 12, 2]
# Pie chart creation
fig, ax = plt.subplots(figsize=(10, 6))
plt.pie(market_share,
labels = smartphones,
autopct='%1.2f%%')
plt.title("Smartphone Marketshare for April 2021 - April 2022",
fontsize=14)
plt.show()
Line Graphs and Area Charts
import matplotlib.pyplot as plt
# Data creation.
sneakers_sold = [10, 12, 8, 7, 7, 10]
dates = ["Jul '1", "Jul '7", "Jul '14", "Jul '21", "Jul '28", "Jul '31"]
# Line graph creation
fig, ax = plt.subplots(figsize=(10, 6))
plt.plot(dates, sneakers_sold)
plt.title("Sneakers sold in Jul")
plt.ylim(0, 15) # Change the range of y-axis.
plt.xlabel("Dates")
plt.ylabel("Number of sales")
plt.show()
# Area chart creation
fig, ax = plt.subplots(figsize=(10, 6))
plt.fill_between(dates, sneakers_sold)
plt.title("Sneakers sold in Jul")
plt.ylim(0, 15) # Change the range of y-axis.
plt.xlabel("Dates")
plt.ylabel("Number of sales")
plt.show()
# Data creation.
sneakers_sold = [[3, 4, 2, 4, 3, 1], [3, 2, 6, 1, 3, 5], [4, 6, 0, 2, 1, 4]]
dates = ["Jul '1", "Jul '7", "Jul '14", "Jul '21", "Jul '28", "Jul '31"]
# Multiple area chart creation
fig, ax = plt.subplots(figsize=(10, 6))
plt.stackplot(dates, sneakers_sold, labels=["Nike", "Adidas", "Puma"])
plt.title("Sneakers sold in Jul")
plt.ylim(0, 15) # Change the range of y-axis.
plt.xlabel("Dates")
plt.ylabel("Number of sales")
plt.legend()
plt.show()
Histogram
import numpy as np
import matplotlib.pyplot as plt
data = np.random.sample(size=100) # Graph will change with each run
fig, ax = plt.subplots(figsize=(10, 6))
plt.hist(data, bins=6)
plt.title("The distribution of data")
plt.xlabel("Data")
plt.ylabel("Frequency")
plt.show()
Scatter plots
import numpy as np
import matplotlib.pyplot as plt
# Data creation.
temperature = np.array([30, 21, 19, 25, 28, 28]) # Degree's celsius
ice_cream_sales = np.array([482, 393, 370, 402, 412, 450])
# Calculate the line of best fit
X_reshape = temperature.reshape(temperature.shape[0], 1)
X_reshape = np.append(X_reshape, np.ones((temperature.shape[0], 1)), axis=1)
y_reshaped = ice_cream_sales.reshape(ice_cream_sales.shape[0], 1)
theta = np.linalg.inv(X_reshape.T.dot(X_reshape)).dot(X_reshape.T).dot(y_reshaped)
best_fit = X_reshape.dot(theta)
# Create and plot scatter chart
fig, ax = plt.subplots(figsize=(10, 6))
plt.scatter(temperature, ice_cream_sales)
plt.plot(temperature, best_fit, color="red")
plt.title("The impact of weather on ice cream sales")
plt.xlabel("Temperature (Celsius)")
plt.ylabel("Ice cream sales")
plt.show()
Heatmaps