Skip to content
Space Missions_
Data validation & Cleaning
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sp = pd.read_csv('space.csv')
# Renaming multiple columns
sp = sp.rename(columns={
'Company': 'company',
'Location': 'location',
'Mission': 'mission',
'Date': 'date',
'Time': 'time',
'Rocket': 'rocket',
'RocketStatus': 'rocketstatus',
'Price': 'price',
'MissionStatus': 'missionstatus'
})
Hidden code
#changing datatypes
sp['company'] = sp['company'].astype(str)
sp['location'] = sp['location'].astype(str)
sp['mission'] = sp['mission'].astype(str)
sp['rocket'] = sp['rocket'].astype(str)
sp['rocketstatus'] = sp['rocketstatus'].astype(str)
sp['facility'] = sp['facility'].astype(str)
sp['missionstatus'] = sp['missionstatus'].astype(str)
Hidden output
# there are missing values in the time column
# first step is to Convert 'time' column to datetime objects to get rid of the AM/PM
sp['time'] = pd.to_datetime(sp['time'], format='%I:%M:%S %p').dt.time
#second step is to convert back to string
sp['time'] = pd.to_timedelta(sp['time'].astype(str))
# third step is to calculate the mean time per country and facility and put it in a dictionary
mean_time= sp.groupby(['facility','country'])['time'].mean()
mean_time_dict = mean_time.to_dict()
# fourth step is filling missing values with the mean put in dictionary
sp['time'] = sp['time'].fillna(sp['time'].map(mean_time_dict))
#there are still missing values so we fill the balance with the mean of the dataset
m_time = sp['time'].mean()
sp['time'] = sp['time'].fillna(m_time)
#checking the number of null values per column
null = ['location','company','facility','mission','date'
,'time','rocket','rocketstatus','price','missionstatus','country']
null_count = sp[null].isnull().sum()
print(null_count)
#there are missing values in the price column
#first step: getting the mean price of country and facility
mean_price = sp.groupby(['facility','country'])['price'].mean()
# putting the mean in a dictionary
price_dict = mean_price.to_dict()
# filling blanks with mean price
sp['price'] = sp['time'].fillna(sp['price'].map(price_dict))
Objective one: How have rocket launches trended across time? Has mission success rate increased?
#making a copy of the df to work with
import copy
sp_c = copy.copy(sp)
#converting the date column to date format
sp_c['year'] = pd.to_datetime(sp_c['date']).dt.year
# total mission success
mission_success = sp_c[sp_c['missionstatus'] == 'Success']
#mission success per year
mission_success_per_year = mission_success.groupby('year').size().rename('success_rate').reset_index()
print(mission_success_per_year)
Hidden output
# total mission failure
mission_failure = sp_c[sp_c['missionstatus'] == 'Failure']
#mission failure per year
mission_failure_per_year = mission_failure.groupby('year').size().rename('failure_rate').reset_index()
print(mission_failure_per_year)
Hidden output
#converting success & failure into a df
success = pd.DataFrame(mission_success_per_year)
failure = pd.DataFrame(mission_failure_per_year)