Skip to content
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.read_csv('boat_data.csv')
df.head()

Data Cleaning

df.rename(columns={'Number of views last 7 days': 'Views'}, inplace=True)
print(df.shape)
df.info()
# Checking for null
df.isna().sum()

Split Price into Amount (Price) and Currency

# Split price and currency
df[['Currency', 'Price']] = df.Price.str.split(expand = True)
# Pound Symbol error
print(df.Currency.value_counts())
# We can safely replace pound
df.loc[((df.Currency != 'EUR') & (df.Currency != 'CHF') & (df.Currency != 'DKK')), 'Currency'] = 'GBP'
df.Price = df.Price.astype(int)
df.head()
df.Currency.value_counts()

Since EUR CHF and GBP doesnt differ too much, we will only be scaling DKK to match the other currency

# Scale up Price for DKK
df.loc[df.Currency == 'DKK', 'Price'] = df.loc[df.Currency == 'DKK', 'Price'].apply(lambda x: int(x*0.13))
df.head()

Update Location base on currency

# Truncate Location base on currency
df.loc[df.Currency == 'EUR', 'Location'] = 'Euro'
df.loc[df.Currency == 'CHF', 'Location'] = 'Switzerland'
df.loc[df.Currency == 'GBP', 'Location'] = 'United Kingdom'
df.loc[df.Currency == 'DKK', 'Location'] = 'Denmark'
df.head()

Fill null value of Manufacturer

df.Manufacturer.unique()

As there are too many different manufacturer, we will put Unknown for null value