Skip to content
EDA Practice on Automotive Data
EDA Practice in Pandas - Chris Gochis
This notebook contains some practice EDA on an automotive dataset. The goal was to extract insights using descriptive statistics and charts.
# Import Packages and Modules
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#From Scikit Learn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_reportRead in AUTO.csv file into a data frame and call it Auto.
#Import csv into Pandas Dataset called Auto
Auto = pd.read_csv('AUTO.csv', sep = ",", encoding = 'ISO-8859-1')#Check the dataset head
Auto.head()#Check dataset tail
Auto.tail()
#It appears that the CVS was read in with rows 106-189 full of NaN. Let's remove these.#Drop rows at bottom with no data
Auto = Auto.drop(Auto.index[106:190])#Check Tail again
Auto.tail()
#NaN rows are gone!#Check Data Types of Columns
Auto.dtypes#Weight is shown as object
#Need to remove commas from Weight and convert to a float
Auto['Weight (lbs)']= Auto['Weight (lbs)'].str.replace(',','')
Auto['Weight (lbs)'] = Auto['Weight (lbs)'].astype(float)
Auto['Weight (lbs)']#Confirm type conversion worked
Auto.dtypes
#Sucess!Replace any missing values with the median value of the variable (feature).
#Check to see if any columns have a NaN
Auto.isna().any()
#Luggage has NaN values.#Find Luggage values of NaN.
Auto[85:90]
#Rows 87 and 88 are NaN