Skip to content
0
# Importing the pandas module
import pandas as pd

# Reading in the data
df = pd.read_csv('data/exams.csv')

# Take a look at the first datapoints
df.head()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df.head()
df.info()

Observation: The data contains 1000 entries and 8 columns

df.isna().sum()

There are no null values

df.columns
df.groupby("test_prep_course")[["reading"]].mean()
df.groupby("test_prep_course")["reading"].mean().plot(kind="bar")
plt.title("Average reading score with/without test prep course")
plt.xlabel("test_prep_course")
plt.ylabel("reading")
plt.show()

observation: Average reading scores for students who completed test prep course is 73.89 Average reading scores for students with no test prep course is 66.53. This shows that students who took test prep courses scored more in their reading exams

df.gender.value_counts()
sns.countplot(data=df, x="test_prep_course", hue="gender")
plt.show()
df.groupby("parent_education_level")["math"].mean()
df.groupby("parent_education_level")["math"].mean().plot(kind="bar")
plt.title("Average math scores for parent education level")
plt.xlabel("Parent education level")
plt.ylabel("math")
plt.show()