Course Notes: Introduction to Regression in R
```.mfe-app-workspace-11z5vno{font-family:JetBrainsMonoNL,Menlo,Monaco,'Courier New',monospace;font-size:13px;line-height:20px;}```# Import any packages you want to use here
``````

``````# Add your code snippets here
# Add a linear trend line without a confidence ribbon
ggplot(taiwan_real_estate, aes(n_convenience, price_twd_msq)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE)``````
``````# Run a linear regression of price_twd_msq vs. n_convenience
lm(price_twd_msq ~ n_convenience, data = taiwan_real_estate)``````
``````# Using taiwan_real_estate, plot price_twd_msq
ggplot(taiwan_real_estate, aes(price_twd_msq)) +
# Make it a histogram with 10 bins
geom_histogram(bins = 10) +
# Facet the plot so each house age group gets its own panel
facet_wrap(~house_age_years)``````
``````summary_stats <- taiwan_real_estate %>%
# Group by house age
group_by(house_age_years) %>%
# Summarize to calculate the mean house price/area
summarize(mean_by_group = mean(price_twd_msq))

# See the result
summary_stats``````
``````# Update the model formula to remove the intercept
mdl_price_vs_age_no_intercept <- lm(
price_twd_msq ~ house_age_years + 0,
data = taiwan_real_estate
)

# See the result
mdl_price_vs_age_no_intercept``````
``````# Create a tibble with n_convenience column from zero to ten
explanatory_data <- tibble(
n_convenience = 0:10
)

# Edit this, so predictions are stored in prediction_data
prediction_data <- explanatory_data %>%
mutate( price_twd_msq = predict(mdl_price_vs_conv, explanatory_data))
# See the result
prediction_data``````
``````# Add to the plot
ggplot(taiwan_real_estate, aes(n_convenience, price_twd_msq)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
# Add a point layer of prediction data, colored yellow
geom_point(
data = prediction_data,
color = "yellow"
)``````
``````# Get the coefficients of mdl_price_vs_conv
coeffs <- coefficients(mdl_price_vs_conv)

# Get the intercept
intercept <- coeffs[1]

# Get the slope
slope <- coeffs[2]

explanatory_data %>%
mutate(
# Manually calculate the predictions
price_twd_msq = intercept + slope*n_convenience
)

# Compare to the results from predict()
predict(mdl_price_vs_conv, explanatory_data)``````
``````# Using sp500_yearly_returns, plot return_2019 vs. return_2018
ggplot(sp500_yearly_returns, aes(return_2018, return_2019)) +
# Make it a scatter plot
geom_point() +
# Add a line at y = x, colored green, size 1
geom_abline(color = "green", size = 1) +
# Add a linear regression trend line, no std. error ribbon
geom_smooth(method = "lm", se = FALSE) +
# Fix the coordinate ratio
coord_fixed()``````
``````# From previous steps
mdl_price_vs_dist <- lm(
price_twd_msq ~ sqrt(dist_to_mrt_m),
data = taiwan_real_estate
)
explanatory_data <- tibble(
dist_to_mrt_m = seq(0, 80, 10) ^ 2
)
prediction_data <- explanatory_data %>%
mutate(
price_twd_msq = predict(mdl_price_vs_dist, explanatory_data)
)

ggplot(taiwan_real_estate, aes(sqrt(dist_to_mrt_m), price_twd_msq)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
# Add points from prediction_data, colored green, size 5
geom_point(data = prediction_data,
color = "green", size = 5)              ``````
``````# From previous steps
mdl_click_vs_impression <- lm(
I(n_clicks ^ 0.25) ~ I(n_impressions ^ 0.25),
)
explanatory_data <- tibble(
n_impressions = seq(0, 3e6, 5e5)
)
prediction_data <- explanatory_data %>%
mutate(
n_clicks_025 = predict(mdl_click_vs_impression, explanatory_data),
n_clicks = n_clicks_025 ^ 4
)

ggplot(ad_conversion, aes(n_impressions ^ 0.25, n_clicks ^ 0.25)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
# Add points from prediction_data, colored green
geom_point(data = prediction_data, color = "green")
``````