Skip to content

Location or Size: What Influences House Prices in Mexico?

Importing and Preparing Mexico Data

#Import Libraries
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
# Read the CSV with dollar sign and commas
df1 = pd.read_csv('mexico-real-estate-1.csv', encoding='latin1')
df2 = pd.read_csv('mexico-real-estate-2.csv', encoding='latin1')
df3 = pd.read_csv('mexico-real-estate-3.csv', encoding='latin1')

df1.drop(columns="s/n", inplace=True)
df2.drop(df2.columns[0], axis=1, inplace=True)
df3.drop(df3.columns[0], axis=1, inplace=True)

# Print object type and shape for DataFrames
print("df1 type:", type(df1))
print("df1 shape:", df1.shape)
print()
print("df2 type:", type(df2))
print("df2 shape:", df2.shape)
print()
print("df3 type:", type(df3))
print("df3 shape:", df3.shape)

df3.head()

Clean df1

# Print df1 shape
print(df1.shape)

# Print df1 info
df1.info()

# Get output of df1 head
df1.head()
# Drop null values from df1
df1.dropna(inplace=True)

# Clean "price_usd" column in df1
df1["price_usd"] = df1["price_usd"].str.replace(r"[\$,]", "", regex=True).astype(float)

# Print object type, shape, and head
print("df1 type:", type(df1))
print("df1 shape:", df1.shape)
df1.head()

Clean df2

# Drop null values from df2
df2.dropna(inplace=True)

# Create "price_usd" column for df2 (19 pesos to the dollar in 2014)
df2["price_usd"] = (df2["price_mxn"] / 19).round(2)

# Drop "price_mxn" column from df2
df2.drop(columns="price_mxn", inplace=True)

# Print object type, shape, and head
print("df2 type:", type(df2))
print("df2 shape:", df2.shape)
df2.head()

Clean df3

# Drop null values from df3
df3.dropna(inplace=True)

# Create "lat" and "lon" columns for df3
df3[["lat", "lon"]] = df3["lat-lon"].str.split(",", expand=True)

# Print object type, shape, and head
print("df3 type:", type(df3))
print("df3 shape:", df3.shape)
df3.head()
# Create "state" column for df3
df3["state"] = df3["place_with_parent_names"].str.split("|", expand=True)[2]

# Drop "place_with_parent_names" and "lat-lon" from df3
df3.drop(columns=["place_with_parent_names", "lat-lon"], inplace=True)

# Print object type, shape, and head
print("df3 type:", type(df3))
print("df3 shape:", df3.shape)
df3.head()

Concatenate DataFrames

# Concatenate df1, df2, and df3
df = pd.concat([df1, df2, df3], axis=0)

# Print object type, shape, and head
print("df type:", type(df))
print("df shape:", df.shape)
df.head()

Exploratory Data Analysis