Skip to content
0

3 hidden cells
Hidden code
Hidden code
Hidden code
Hidden code

1 hidden cell
df.head()
df.info()
df.isna().sum()
## calculate ratio of missing values according to the all data
import pandas as pd

# Suppose your dataframe is df
missing_summary = (
    df.isna().sum()
    .to_frame(name="Missing Values")
    .assign(Total_Rows=df.shape[0])
    .assign(Missing_Ratio=lambda x: (x["Missing Values"] / x["Total_Rows"]) * 100)
)

print(missing_summary)

As the main goal is about deley so i will remove any null values

# Drop rows where departure time or arrival time is missing
df_clean = df.dropna(subset=["dep_time", "arr_time","tailnum","air_time","arr_delay"])

print(df.shape)      # before
print(df_clean.shape) # after
df_clean.isna().sum()