Course Notes: Exploratory Data Analysis in Python

Course Notes

# Import any packages you want to use here
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

books = pd.read_csv("datasets/clean_books.csv")

# first five lines in dataframa
books.head()

# info about dataframe, esp. data types and number of missing values
books.info()

# stats for numerical attributes
books.describe()

# count of attribute values for a specific attribute
books["genre"].value_counts()

# basic histogram in Seaborn
sns.histplot(data=books, x="rating", binwidth=0.1)
plt.show()

# show data types
books.dtypes

# change data type
books["year"] = books["year"].astype(int)

# validate categorical values
books["genre"].isin(["Fiction", "Non Fiction"])
# returns Boolean values of same length

# use tilde to negate
~books["genre"].isin(["Fiction", "Non Fiction"])

# put it in brackets for filtering
books[books["genre"].isin(["Fiction", "Non Fiction"])]

‌
‌
‌