Skip to content
Location or Size: What Influences House Prices in Mexico?
Location or Size: What Influences House Prices in Mexico?
Importing and Preparing Mexico Data
#Import Libraries
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px# Read the CSV with dollar sign and commas
df1 = pd.read_csv('mexico-real-estate-1.csv', encoding='latin1')
df2 = pd.read_csv('mexico-real-estate-2.csv', encoding='latin1')
df3 = pd.read_csv('mexico-real-estate-3.csv', encoding='latin1')
df1.drop(columns="s/n", inplace=True)
df2.drop(df2.columns[0], axis=1, inplace=True)
df3.drop(df3.columns[0], axis=1, inplace=True)
# Print object type and shape for DataFrames
print("df1 type:", type(df1))
print("df1 shape:", df1.shape)
print()
print("df2 type:", type(df2))
print("df2 shape:", df2.shape)
print()
print("df3 type:", type(df3))
print("df3 shape:", df3.shape)
df3.head()Clean df1
# Print df1 shape
print(df1.shape)
# Print df1 info
df1.info()
# Get output of df1 head
df1.head()# Drop null values from df1
df1.dropna(inplace=True)
# Clean "price_usd" column in df1
df1["price_usd"] = df1["price_usd"].str.replace(r"[\$,]", "", regex=True).astype(float)
# Print object type, shape, and head
print("df1 type:", type(df1))
print("df1 shape:", df1.shape)
df1.head()Clean df2
# Drop null values from df2
df2.dropna(inplace=True)
# Create "price_usd" column for df2 (19 pesos to the dollar in 2014)
df2["price_usd"] = (df2["price_mxn"] / 19).round(2)
# Drop "price_mxn" column from df2
df2.drop(columns="price_mxn", inplace=True)
# Print object type, shape, and head
print("df2 type:", type(df2))
print("df2 shape:", df2.shape)
df2.head()Clean df3
# Drop null values from df3
df3.dropna(inplace=True)
# Create "lat" and "lon" columns for df3
df3[["lat", "lon"]] = df3["lat-lon"].str.split(",", expand=True)
# Print object type, shape, and head
print("df3 type:", type(df3))
print("df3 shape:", df3.shape)
df3.head()# Create "state" column for df3
df3["state"] = df3["place_with_parent_names"].str.split("|", expand=True)[2]
# Drop "place_with_parent_names" and "lat-lon" from df3
df3.drop(columns=["place_with_parent_names", "lat-lon"], inplace=True)
# Print object type, shape, and head
print("df3 type:", type(df3))
print("df3 shape:", df3.shape)
df3.head()Concatenate DataFrames
# Concatenate df1, df2, and df3
df = pd.concat([df1, df2, df3], axis=0)
# Print object type, shape, and head
print("df type:", type(df))
print("df shape:", df.shape)
df.head()Exploratory Data Analysis