Skip to content
# import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.formula.api import ols
Read dataframe taiwan from from CSV file
# read dataframe
df = pd.read_csv('taiwan_real_estate.csv')
# review daf
print(df.head)
Visualize dataset
Creating the model using ols (1 dependent and 3 explanatory variable)
# model for n_convenience,dist, house_age and price
model2 = ols('price_twd_msq ~ n_convenience * dist_to_mrt_m * house_age_years * 0', data=df).fit()
print(model2.params)
# test model with same dataset for n_convenience vs price
predicted_price2= model2.fittedvalues
# add predicted_price1 to df
df['new_price2'] = predicted_price2
print(df[['new_price2','price_twd_msq']])
# plot new vs actual price
sns.lineplot(x='n_convenience', y='new_price2', data=df, label='New Price2',color='black')
sns.lineplot(x='n_convenience', y='price_twd_msq', data=df, label='actual Price',color='red')
ax.set_title('Multivariant regression')
Quantify multivariant model fit
# residual error
resid2 = model2.resid
# mean resid
resid2_error = np.std(resid2)
print(f'resid1_error2: {resid2_error}')
# Calculate R-squared
r_squared2 = model2.rsquared
print(f'R-squared2: {r_squared2}')
# Calculate MSE
mse2 = model2.mse_resid
print(f'Mean Squared Error (MSE2): {mse2}')