Skip to content
Project: PH Earthquake Prediction Model Scripts
import pandas as pd
phivolcs_earthquake_data_raw = pd.read_csv('phivolcs_earthquake_data_raw.csv')
phivolcs_earthquake_data_raw.head()import pandas as pd
phivolcs_earthquake_data_raw = pd.read_csv('phivolcs_earthquake_data_raw.csv')
phivolcs_earthquake_data_raw.describe(include='all')import pandas as pd
phivolcs_earthquake_data_raw = pd.read_csv('phivolcs_earthquake_data_raw.csv')
phivolcs_earthquake_data_raw.dtypesimport pandas as pd
phivolcs_earthquake_data_raw = pd.read_csv('phivolcs_earthquake_data_raw.csv')
phivolcs_earthquake_data_cleaned = phivolcs_earthquake_data_raw.dropna()
phivolcs_earthquake_data_cleaned['Date_Time_PH'] = pd.to_datetime(phivolcs_earthquake_data_cleaned['Date_Time_PH'])
phivolcs_earthquake_data_cleaned['Latitude'] = pd.to_numeric(phivolcs_earthquake_data_cleaned['Latitude'], errors='coerce')
phivolcs_earthquake_data_cleaned['Longitude'] = pd.to_numeric(phivolcs_earthquake_data_cleaned['Longitude'], errors='coerce')
phivolcs_earthquake_data_cleaned['Magnitude'] = pd.to_numeric(phivolcs_earthquake_data_cleaned['Magnitude'], errors='coerce')
phivolcs_earthquake_data_cleaned['Depth_In_Km'] = pd.to_numeric(phivolcs_earthquake_data_cleaned['Depth_In_Km'], errors='coerce')
phivolcs_earthquake_data_cleaned['Latitude'] = phivolcs_earthquake_data_cleaned['Latitude'].astype(float)
phivolcs_earthquake_data_cleaned['Longitude'] = phivolcs_earthquake_data_cleaned['Longitude'].astype(float)
phivolcs_earthquake_data_cleaned['Magnitude'] = phivolcs_earthquake_data_cleaned['Magnitude'].astype(float)
phivolcs_earthquake_data_cleaned['Depth_In_Km'] = phivolcs_earthquake_data_cleaned['Depth_In_Km'].astype('Int64')
phivolcs_earthquake_data_cleaned['Latitude'] = phivolcs_earthquake_data_cleaned['Latitude'].round(2)
phivolcs_earthquake_data_cleaned['Longitude'] = phivolcs_earthquake_data_cleaned['Longitude'].round(2)
phivolcs_earthquake_data_cleaned['Magnitude'] = phivolcs_earthquake_data_cleaned['Magnitude'].round(2)
phivolcs_earthquake_data_cleaned = phivolcs_earthquake_data_cleaned.dropna()
phivolcs_earthquake_data_cleaned = phivolcs_earthquake_data_cleaned.drop_duplicates()
phivolcs_earthquake_data_cleaned.reset_index(drop=True, inplace=True)
phivolcs_earthquake_data_cleaned.to_csv('phivolcs_earthquake_data_cleaned.csv', index=False)
phivolcs_earthquake_data_cleaned.head()import pandas as pd
phivolcs_earthquake_data_raw = pd.read_csv('phivolcs_earthquake_data_cleaned.csv')
phivolcs_earthquake_data_cleaned.describe(include='all')import pandas as pd
phivolcs_earthquake_data_raw = pd.read_csv('phivolcs_earthquake_data_cleaned.csv')
phivolcs_earthquake_data_cleaned.dtypesimport pandas as pd
phivolcs_earthquake_data_cleaned = pd.read_csv('phivolcs_earthquake_data_cleaned.csv')
earthquake_new_features = pd.read_csv('earthquake_new_features.csv')
phivolcs_earthquake_data_cleaned['Year'] = earthquake_new_features['Year']
phivolcs_earthquake_data_cleaned['Month'] = earthquake_new_features['Month']
phivolcs_earthquake_data_cleaned['Day'] = earthquake_new_features['Day']
phivolcs_earthquake_data_cleaned['Hour'] = earthquake_new_features['Hour']
location_split = phivolcs_earthquake_data_cleaned['Location'].str.extract(r'(?P<Distance>\d+\.?\d*)\s*km\s*(?P<Direction>[A-Za-z\s\d°]+)\s*of\s*(?P<Place>.+)')
phivolcs_earthquake_data_cleaned['Distance'] = location_split['Distance']
phivolcs_earthquake_data_cleaned['Direction'] = location_split['Direction'].str.replace('km', '').str.strip()
phivolcs_earthquake_data_cleaned['Place'] = location_split['Place']
phivolcs_earthquake_data_cleaned.to_csv('earthquake_new_features.csv', index=False)
phivolcs_earthquake_data_cleaned[['Year', 'Month', 'Day', 'Hour', 'Distance', 'Direction', 'Place']].head()import pandas as pd
earthquake_new_features = pd.read_csv('earthquake_new_features.csv')
earthquake_new_features['Month_Str'] = earthquake_new_features['Month'].apply(lambda x: pd.to_datetime(x, format='%m').strftime('%B'))
earthquake_new_features['Day_Str'] = earthquake_new_features['Day'].apply(lambda x: pd.to_datetime(x, format='%d').strftime('%A'))
earthquake_new_features.to_csv('earthquake_added_features.csv', index=False)
earthquake_new_features[['Year', 'Month', 'Day', 'Hour', 'Distance', 'Direction', 'Place', 'Month_Str', 'Day_Str']].head()import pandas as pd
import re
earthquake_new_features = pd.read_csv('earthquake_new_features.csv')
def normalize_place(place):
if isinstance(place, str):
parts = re.findall(r'\(.*?\)', place)
if len(parts) > 1:
place = re.sub(r'\(.*?\)', '', place, count=1).strip()
return re.sub(r'\s+', ' ', place).lower()
return place
earthquake_new_features['Place_Normalized'] = earthquake_new_features['Place'].apply(normalize_place)
def extract_town_province(place_normalized):
if isinstance(place_normalized, str):
match = re.match(r'^(.*?)\s*\((.*?)\)$', place_normalized)
if match:
return match.group(1).strip().title(), match.group(2).strip().title()
else:
return place_normalized.strip().title(), None
return place_normalized, None
earthquake_new_features[['Town', 'Province']] = earthquake_new_features['Place_Normalized'].apply(lambda x: pd.Series(extract_town_province(x)))
place_anomalies = earthquake_new_features[earthquake_new_features.duplicated('Place_Normalized', keep=False)]
earthquake_new_features.to_csv('earthquake_updated_features.csv', index=False)
place_anomalies[['Place', 'Place_Normalized', 'Town', 'Province']].drop_duplicates()import pandas as pd
earthquake_updated_features = pd.read_csv('earthquake_updated_features.csv')
earthquake_updated_features.describe(include='all')import pandas as pd
earthquake_updated_features = pd.read_csv('earthquake_updated_features.csv')
earthquake_updated_features.dtypesimport pandas as pd
import re
earthquake_updated_features = pd.read_csv('earthquake_updated_features.csv')
earthquake_updated_features = earthquake_updated_features.dropna()
earthquake_updated_features['Year'] = earthquake_updated_features['Year'].astype(int)
earthquake_updated_features['Month'] = earthquake_updated_features['Month'].astype(int)
earthquake_updated_features['Day'] = earthquake_updated_features['Day'].astype(int)
earthquake_updated_features['Hour'] = earthquake_updated_features['Hour'].astype(int)
earthquake_updated_features['Distance'] = earthquake_updated_features['Distance'].astype(float)
earthquake_updated_features['Direction'] = earthquake_updated_features['Direction'].astype(str)
earthquake_updated_features['Place'] = earthquake_updated_features['Place'].astype(str)
earthquake_updated_features['Province'] = earthquake_updated_features['Province'].str.strip()
earthquake_updated_features['Province'] = earthquake_updated_features['Province'].apply(lambda x: re.sub(r'[()]', '', x))
earthquake_updated_features['Province'] = earthquake_updated_features['Province'].str.title()
earthquake_updated_features['Town'] = earthquake_updated_features['Town'].str.strip()
earthquake_updated_features['Town'] = earthquake_updated_features['Town'].apply(lambda x: re.sub(r'[()]', '', x))
earthquake_updated_features['Town'] = earthquake_updated_features['Town'].str.title()
earthquake_updated_features.to_csv('earthquake_cleaned_features.csv', index=False)
earthquake_updated_features.head()import pandas as pd
earthquake_cleaned_features = pd.read_csv('earthquake_cleaned_features.csv')
valid_provinces = [
'Abra', 'Agusan Del Norte', 'Agusan Del Sur', 'Aklan', 'Albay', 'Antique', 'Apayao', 'Aurora',
'Basilan', 'Bataan', 'Batanes', 'Batangas', 'Benguet', 'Biliran', 'Bohol', 'Bukidnon', 'Bulacan',
'Cagayan', 'Camarines Norte', 'Camarines Sur', 'Camiguin', 'Capiz', 'Catanduanes', 'Cavite',
'Cebu', 'Compostela Valley', 'Cotabato', 'Davao Del Norte', 'Davao Del Sur', 'Davao Occidental',
'Davao Oriental', 'Dinagat', 'Eastern Samar', 'Guimaras', 'Ifugao', 'Ilocos Norte',
'Ilocos Sur', 'Iloilo', 'Isabela', 'Kalinga', 'La Union', 'Laguna', 'Lanao Del Norte',
'Lanao Del Sur', 'Leyte', 'Maguindanao', 'Marinduque', 'Masbate', 'Metro Manila', 'Misamis Occidental',
'Misamis Oriental', 'Mountain Province', 'Negros Occidental', 'Negros Oriental', 'Northern Samar', 'North Cotabato',
'Nueva Ecija', 'Nueva Vizcaya', 'Occidental Mindoro', 'Oriental Mindoro', 'Palawan', 'Pampanga',
'Pangasinan', 'Quezon', 'Quirino', 'Rizal', 'Romblon', 'Southern Samar', 'Southern Leyte', 'Sarangani', 'Siquijor',
'Sorsogon', 'South Cotabato', 'Sultan Kudarat', 'Sulu', 'Surigao Del Norte',
'Surigao Del Sur', 'Tarlac', 'Tawi-Tawi', 'Zambales', 'Zamboanga Del Norte', 'Zamboanga Del Sur',
'Zamboanga Sibugay'
]
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Dinagat.*', value='Dinagat', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Compostela.*', r'.*Compostella.*'], value='Compostela Valley', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Samal.*', value='Davao Del Norte', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Camarinessur.*',r'.*Camarines Surl.*'], value='Camarines Sur', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Cataduanes.*',r'.*Catadauanes.*'], value='Catanduanes', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*South Cotobato.*', value='South Cotabato', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Sultan Kudaratl.*',r'.*Sutan Kudarat.*'], value='Sultan Kudarat', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Northen Samar.*', value='Northern Samar', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Negors Oriental.*', value='Negros Oriental', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Negros Occedental.*', value='Negros Occidental', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Saragani.*', r'.*Saranggani.*'], value='Sarangani', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Oriemtal Mindoro.*', value='Oriental Mindoro', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Davao Occidenta.*',r'.*Davao Occidentall.*'], value='Davao Occidental', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Davao Orientall.*',r'.*Davao Orinetal.*'], value='Davao Oriental', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=[r'.*Easterm Samar.*',r'.*Easter Samar.*',r'.*Eastren Samar.*', r'.*Eatern Samar.*'], value='Eastern Samar', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Lanao Del Nortel.*', value='Lanao Del Norte', regex=True
)
earthquake_cleaned_features['Province'] = earthquake_cleaned_features['Province'].replace(
to_replace=r'.*Agusan De Sur.*', value='Agusan Del Sur', regex=True
)
anomalies = earthquake_cleaned_features[~earthquake_cleaned_features['Province'].isin(valid_provinces)]
earthquake_cleaned_features.to_csv('earthquake_preprocessed_features.csv', index=False)
earthquake_cleaned_features.head()import pandas as pd
earthquake_preprocessed_features = pd.read_csv('earthquake_preprocessed_features.csv')
valid_towns = [
'Hinatuan', 'Governor Generoso', 'Calatagan', 'Jose Abad Santos', 'General Luna', 'Lubang', 'Tulunan', 'Dalupiri Island', 'Kiblawan', 'New Bataan',
]
town_anomalies = earthquake_preprocessed_features[~earthquake_preprocessed_features['Town'].isin(valid_towns)]
town_anomalies.to_csv('town_anomalies.csv', index=False)
town_anomalies.head()import pandas as pd
earthquake_preprocessed_features = pd.read_csv('earthquake_preprocessed_features.csv')
earthquake_preprocessed_features.describe(include='all')