Skip to content
Course Notes: Introduction to Regression in R
  • AI Chat
  • Code
  • Report
  • Course Notes

    Use this workspace to take notes, store code snippets, or build your own interactive cheatsheet! The datasets used in this course are available in the datasets folder.

    # Import any packages you want to use here
    

    Take Notes

    Add notes here about the concepts you've learned and code cells with code you want to keep.

    Add your notes here

    # Add your code snippets here
    # Add a linear trend line without a confidence ribbon
    ggplot(taiwan_real_estate, aes(n_convenience, price_twd_msq)) +
      geom_point(alpha = 0.5) +
      geom_smooth(method = "lm", se = FALSE)
    # Run a linear regression of price_twd_msq vs. n_convenience
    lm(price_twd_msq ~ n_convenience, data = taiwan_real_estate)
    # Using taiwan_real_estate, plot price_twd_msq
    ggplot(taiwan_real_estate, aes(price_twd_msq)) +
      # Make it a histogram with 10 bins
      geom_histogram(bins = 10) +
      # Facet the plot so each house age group gets its own panel
      facet_wrap(~house_age_years)
    summary_stats <- taiwan_real_estate %>% 
      # Group by house age
      group_by(house_age_years) %>% 
      # Summarize to calculate the mean house price/area
      summarize(mean_by_group = mean(price_twd_msq))
    
    # See the result
    summary_stats
    # Update the model formula to remove the intercept
    mdl_price_vs_age_no_intercept <- lm(
      price_twd_msq ~ house_age_years + 0, 
      data = taiwan_real_estate
    )
    
    # See the result
    mdl_price_vs_age_no_intercept
    # Create a tibble with n_convenience column from zero to ten
    explanatory_data <- tibble(
      n_convenience = 0:10
    )
    
    # Edit this, so predictions are stored in prediction_data
    prediction_data <- explanatory_data %>%
      mutate( price_twd_msq = predict(mdl_price_vs_conv, explanatory_data))
    # See the result
    prediction_data
    # Add to the plot
    ggplot(taiwan_real_estate, aes(n_convenience, price_twd_msq)) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE) +
      # Add a point layer of prediction data, colored yellow
      geom_point(
        data = prediction_data,
        color = "yellow"
      )
    # Get the coefficients of mdl_price_vs_conv
    coeffs <- coefficients(mdl_price_vs_conv)
    
    # Get the intercept
    intercept <- coeffs[1]
    
    # Get the slope
    slope <- coeffs[2]
    
    explanatory_data %>% 
      mutate(
        # Manually calculate the predictions
        price_twd_msq = intercept + slope*n_convenience
      )
    
    # Compare to the results from predict()
    predict(mdl_price_vs_conv, explanatory_data)
    # Using sp500_yearly_returns, plot return_2019 vs. return_2018
    ggplot(sp500_yearly_returns, aes(return_2018, return_2019)) +
      # Make it a scatter plot
      geom_point() +
      # Add a line at y = x, colored green, size 1
      geom_abline(color = "green", size = 1) +
      # Add a linear regression trend line, no std. error ribbon
      geom_smooth(method = "lm", se = FALSE) +
      # Fix the coordinate ratio
      coord_fixed()
    # From previous steps
    mdl_price_vs_dist <- lm(
      price_twd_msq ~ sqrt(dist_to_mrt_m), 
      data = taiwan_real_estate
    )
    explanatory_data <- tibble(
      dist_to_mrt_m = seq(0, 80, 10) ^ 2
    )
    prediction_data <- explanatory_data %>% 
      mutate(
        price_twd_msq = predict(mdl_price_vs_dist, explanatory_data)
      )
    
    ggplot(taiwan_real_estate, aes(sqrt(dist_to_mrt_m), price_twd_msq)) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE) +
      # Add points from prediction_data, colored green, size 5
      geom_point(data = prediction_data,
      color = "green", size = 5)              
    # From previous steps
    mdl_click_vs_impression <- lm(
      I(n_clicks ^ 0.25) ~ I(n_impressions ^ 0.25),
      data = ad_conversion
    )
    explanatory_data <- tibble(
      n_impressions = seq(0, 3e6, 5e5)
    )
    prediction_data <- explanatory_data %>% 
      mutate(
        n_clicks_025 = predict(mdl_click_vs_impression, explanatory_data),
        n_clicks = n_clicks_025 ^ 4
      )
    
    ggplot(ad_conversion, aes(n_impressions ^ 0.25, n_clicks ^ 0.25)) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE) +
      # Add points from prediction_data, colored green
      geom_point(data = prediction_data, color = "green")