Earthquake Prediction Model with Machine Learning

!pip install pandas numpy matplotlib seaborn scikit-learn scikeras geopandas --quiet

# ========================================
# 1. Import Libraries
# ========================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasRegressor  # ← FIXED

import warnings
warnings.filterwarnings("ignore")

# ========================================
# 2. Load the Dataset
# ========================================
# Download from: https://github.com/thecleverprogrammer/Earthquake-Prediction-Model
df = pd.read_csv('database.csv')  # Make sure file is in your directory

print(df.columns)

# ========================================
# 3. Data Preprocessing
# ========================================
# Convert Date and Time to Timestamp (Unix time)
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y', errors='coerce')
df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S', errors='coerce').dt.time

# Combine Date and Time into a full datetime
df['Timestamp'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Time'].astype(str), errors='coerce')
df['Timestamp'] = (df['Timestamp'] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')  # Unix timestamp

# Select relevant features
data = df[['Timestamp', 'Latitude', 'Longitude', 'Depth', 'Magnitude']].dropna()

# Features (X) and Targets (y)
X = data[['Timestamp', 'Latitude', 'Longitude']]
y = data[['Magnitude', 'Depth']]

print(X.shape, y.shape)

# ========================================
# 4. Scale the Data
# ========================================
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# ========================================
# 5. Define Neural Network Model
# ========================================
from keras.models import Sequential
from keras.layers import Dense

def create_model(neurons=32, activation='relu', optimizer='adam'):
    model = Sequential()
    model.add(Dense(neurons, input_dim=3, activation=activation))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(2))  # Magnitude + Depth
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

# ========================================
# 6. Hyperparameter Tuning (Grid Search) — ONLY ONE!
# ========================================
!pip install scikeras --quiet

from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import GridSearchCV

model = KerasRegressor(model=create_model, verbose=0)

param_grid = {
    'model__neurons': [32, 64],
    'model__activation': ['relu'],
    'model__optimizer': ['adam'],
    'batch_size': [32],
    'epochs': [50]
}

grid = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=1
)

grid_result = grid.fit(X_train, y_train)

print(f"Best: {grid_result.best_score_:.6f} using {grid_result.best_params_}")

# ========================================
# 7. Train Final Model with Best Params
# ========================================
best_params = grid_result.best_params_
print("Best Params:", best_params)

final_model = create_model(
    neurons=best_params['model__neurons'],
    activation=best_params['model__activation'],
    optimizer=best_params['model__optimizer']
)

history = final_model.fit(
    X_train, y_train,
    epochs=best_params['epochs'],
    batch_size=best_params['batch_size'],
    validation_split=0.2,
    verbose=1
)

# ========================================
# 8. Evaluate on Test Data
# ========================================
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Predict
y_pred_scaled = final_model.predict(X_test)

# Safe reshape
if y_pred_scaled.ndim == 1:
    y_pred_scaled = y_pred_scaled.reshape(-1, 1)
y_pred_scaled = y_pred_scaled.reshape(-1, 2)

# Inverse transform
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_orig = scaler_y.inverse_transform(y_test)

# Metrics
rmse_mag = np.sqrt(mean_squared_error(y_test_orig[:, 0], y_pred[:, 0]))
rmse_depth = np.sqrt(mean_squared_error(y_test_orig[:, 1], y_pred[:, 1]))
r2_mag = r2_score(y_test_orig[:, 0], y_pred[:, 0])
r2_depth = r2_score(y_test_orig[:, 1], y_pred[:, 1])

print(f"Magnitude - RMSE: {rmse_mag:.3f}, R²: {r2_mag:.3f}")
print(f"Depth - RMSE: {rmse_depth:.3f}, R²: {r2_depth:.3f}")

# === PREDICT A NEW EARTHQUAKE ===
import numpy as np

# Example: Future quake in California
new_data = np.array([[ 
    1767225600,   # Timestamp: Jan 1, 2026
    34.05,        # Lat: Los Angeles
    -118.25       # Lon
]])

new_scaled = scaler_X.transform(new_data)
pred_scaled = final_model.predict(new_scaled)
pred = scaler_y.inverse_transform(pred_scaled)

print(f"Predicted Magnitude: {pred[0][0]:.2f}")
print(f"Predicted Depth: {pred[0][1]:.1f} km")

!pip install plotly --quiet

# ========================================
# 9. Interactive Map (Bonus)
# ========================================
!pip install plotly --quiet
import plotly.express as px

fig = px.scatter_mapbox(
    data,
    lat='Latitude', lon='Longitude',
    color='Magnitude', size='Magnitude',
    color_continuous_scale='Reds',
    size_max=10, zoom=1, height=600,
    mapbox_style="open-street-map",
    title="Earthquake Locations (1965–2016)"
)
fig.update_layout(margin=dict(l=0, r=0, t=40, b=0))
fig.show()
fig.write_html("earthquake_map.html")

# === PREDICTION + MAP OVERLAY ===
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

future_quakes = pd.DataFrame({
    'Timestamp': [1767225600] * 5,
    'Latitude': [35.6, 36.7, -33.9, 38.8, 34.0],
    'Longitude': [-118.2, 138.2, -71.5, 35.2, -118.4],
    'Location': ['California', 'Japan', 'Chile', 'Turkey', 'Los Angeles']
})

X_future = future_quakes[['Timestamp', 'Latitude', 'Longitude']]
X_future_scaled = scaler_X.transform(X_future)
pred_scaled = final_model.predict(X_future_scaled)
pred = scaler_y.inverse_transform(pred_scaled)
future_quakes['Pred_Magnitude'] = pred[:, 0]

# === PLOT ===
# === REAL EARTHQUAKES (red) ===
fig = px.scatter_mapbox(
    data,
    lat='Latitude', lon='Longitude',
    color='Magnitude',
    size='Magnitude',
    color_continuous_scale='Reds',
    size_max=8,
    zoom=1,
    height=700,
    title="Earthquake Prediction: Real (Red) vs Predicted (Blue)",
    mapbox_style="open-street-map"
)

# === PREDICTED QUAKES (blue + labels) ===
fig.add_trace(go.Scattermapbox(
    lat=future_quakes['Latitude'],
    lon=future_quakes['Longitude'],
    mode='markers+text',
    marker=dict(size=future_quakes['Pred_Magnitude'] * 2, color='blue', opacity=0.8),
    text=future_quakes['Location'] + "<br>M" + future_quakes['Pred_Magnitude'].round(1).astype(str),
    textposition="top center",
    name="Predicted"
))

# === STYLE ===
fig.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
    coloraxis_colorbar=dict(title="Real Magnitude")
)

fig.show()
fig.write_html("earthquake_prediction_overlay.html")

# ========================================
# 8. Time-Series Animated Earthquake Map
# ========================================

df = pd.read_csv('database.csv')

# Create Datetime column for animation
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%m/%d/%Y %H:%M:%S', errors='coerce')

# Drop any rows with invalid dates (small fraction)
df = df.dropna(subset=['Datetime'])

# Extract Year for animation frame
df['Year'] = df['Datetime'].dt.year

# Sort by Year for proper animation sequencing
df = df.sort_values('Year')

# Create animated scatter map
fig_animated = px.scatter_mapbox(
    df,
    lat='Latitude',
    lon='Longitude',
    color='Magnitude',
    size='Magnitude',
    animation_frame='Year',  # Animate over years
    animation_group='Magnitude',  # Groups points for smooth transitions
    color_continuous_scale='Reds',
    size_max=20,  # Max bubble size
    zoom=1,  # Initial zoom level (world view)
    height=700,
    mapbox_style="open-street-map",
    title="Earthquake Animation Over Time (1965–2016)"
)

# Update layout for better visuals
fig_animated.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),
    coloraxis_colorbar=dict(title="Magnitude")
)

# Show the animated figure
fig_animated.show()