Skip to content
Buenos-Aires_Real_Estate: Predicting Price with Location
Buenos_Aires_Real_Estate: Predicting Price with Location
# Imported libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline, make_pipelineIn this lesson, We're going to create a more complex wrangle function, use it to clean more data, and build a model that considers more features when predicting apartment price.
Import data
df = pd.read_csv("buenos-aires-real-estate-1.csv")
df.head()Task 2.1.1: Write a function named wrangle that takes a file path as an argument and returns a DataFrame.
def wrangle(filepath):
# Read CSV file
df = pd.read_csv(filepath)
# Subset data: Apartments in "Capital Federal", less than 400,000
mask_ba = df["place_with_parent_names"].str.contains("Capital Federal")
mask_apt = df["property_type"] == "apartment"
mask_price = df["price_aprox_usd"] < 400_000
df = df[mask_ba & mask_apt & mask_price]
# Subset data: Remove outliers for "surface_covered_in_m2"
low, high = df["surface_covered_in_m2"].quantile([0.1, 0.9])
mask_area = df["surface_covered_in_m2"].between(low, high)
df = df[mask_area]
#Subset split for "lat" and "lon"
df[["lat", "lon"]] = df["lat-lon"].str.split(",", expand=True).astype(float)
df.drop(columns=["lat-lon"] , inplace=True)
return dfTask 2.1.2: Use your wrangle function to create a DataFrame from the CSV file data/buenos-aires-real-estate-1.csv.
frame1 = wrangle("buenos-aires-real-estate-1.csv")
print(frame1.info())
frame1.head()Task 2.2.3: Use you revised wrangle function create a DataFrames frame2 from the file data/buenos-aires-real-estate-2.csv.
frame2 = wrangle("buenos-aires-real-estate-2.csv")
print("Frame2 Shape:", frame2.shape)
frame2.head()Task 2.2.4: Use pd.concat to concatenate frame1 and frame2 into a new DataFrame df. Make sure you set the ignore_index argument to True
df = pd.concat([frame1, frame2])
print(df.info())
df.head()Explore
Task 2.2.5: Create a Mapbox scatter plot that shows the location of the apartments in df.