Skip to content
Project
# Package imports
import pandas as pd
from scipy.stats import iqr
def IQR(column):
q25, q75 = column.quantile([0.25, 0.75])
return q75-q25
# Load the data from London Bridge
lb = pd.read_csv('data/10-11_London_Bridge.txt') # Comma-separated .txt file
# Take only the first three columns
df = lb.iloc[:, :3]
# Rename columns
df.columns = ['datetime', 'water_level', 'is_high_tide']
# Convert to datetime
df['datetime'] = pd.to_datetime(df['datetime'])
# Convert to float
df['water_level'] = df.water_level.astype(float)
# Create extra month and year columns for easy access
df['month'] = df['datetime'].dt.month
df['year'] = df['datetime'].dt.year
# Filter df for high and low tide
tide_high = df.query('is_high_tide==1')['water_level']
tide_low = df.query('is_high_tide==0')['water_level']
# Create summary statistics
high_statistics = tide_high.agg(['mean', 'median', IQR])
low_statistics = tide_low.agg(['mean', 'median', IQR])
# Calculate ratio of high tide days
all_high_days = df.query('is_high_tide==1').groupby('year').count()['water_level']
high_days = df.query(f'(water_level>{tide_high.quantile(.75)}) & (is_high_tide==1)').groupby('year').count()['water_level']
high_ratio = (high_days/all_high_days).reset_index()
# Calculate ratio of low tide days
all_low_days = df.query('is_high_tide==0').groupby('year').count()['water_level']
low_days = df.query(f'(water_level<{tide_low.quantile(.25)}) & (is_high_tide==0)').groupby('year').count()['water_level']
low_ratio = (low_days/all_low_days).reset_index()
solution = {'high_statistics': high_statistics, 'low_statistics': low_statistics, 'high_ratio': high_ratio, 'low_ratio':low_ratio}
print(solution)