Skip to content
Introduction to Data Visualization with ggplot2
  • AI Chat
  • Code
  • Report
  • Introduction to Data Visualization with ggplot2

    Run the hidden code cell below to import the data used in this course.

    Take Notes

    Add notes about the concepts you've learned and code cells with code you want to keep.

    Add your notes here

    # Add your code snippets here
    library(ggplot2)
    
    # Change the command below so that cyl is treated as factor
    ggplot(mtcars, aes(factor(cyl), mpg)) + 	
      geom_point() +
      # add regression line
      geom_smooth(method ="lm", se =FALSE) +
      # set the size of points in geom_point
      geom_point(alpha = 0.4)
    
    # save the plot as plt_prop_unemployed_over_time
    plt_prop_unemployed_over_time <- ggplot(economics, aes(date, unemploy/pop)) +
      geom_line()
    
    # Position the legend at the bottom of the plot
    plt_prop_unemployed_over_time +
      theme(legend.position = "bottom")
    
    # Using the late_shipments dataset, draw a proportional stacked bar plot of vendor_inco_term with fill color by freight_cost_group.
    ggplot(late_shipments, aes(vendor_inco_term, fill = freight_cost_group)) +
      geom_bar(position = "fill") +
      ylab("proportion")
    
    # coord_flip()
    # In ggplot2, the coord_flip() function is used to flip the x and y axes, effectively transposing the plot's orientation from horizontal to vertical or vice versa.
    
    #################################################################################################
    #### plot with two lines of two independent continuous variables
    ggplot(houseprice, aes(x = size, y = price)) + 
           geom_point() +
           geom_line(aes(y = pred, color = modeltype)) + # the predictions
           scale_color_brewer(palette = "Dark2")   #### below is what preceded it
    
    # houseprice is available
    summary(houseprice)
    # Create the formula for price as a function of squared size
    (fmla_sqr <- price ~ I(size^2))
    # Fit a model of price as a function of squared size (use fmla_sqr)
    model_sqr <- lm(fmla_sqr, data = houseprice)
    # Fit a model of price as a linear function of size
    model_lin <- lm(price ~ size, data = houseprice)
    # Make predictions and compare
    houseprice %>% 
        mutate(pred_lin = predict(model_lin),       # predictions from linear model
               pred_sqr = predict(model_sqr)) %>%   # predictions from quadratic model 
        gather(key = modeltype, value = pred, pred_lin, pred_sqr)
    ##################################################################################################
    
    #coord_fixed() is used to equalize x and y axis intervals on a ggplot
    
    
    ### plot regression with multiple (3) numeric IV; note cool color palette inferno
    ggplot(taiwan_real_estate, aes(n_convenience, sqrt(dist_to_mrt_m), color = price_twd_msq)) + 
      # Make it a scatter plot
      geom_point() +
      # Use the continuous viridis plasma color scale
      scale_color_viridis_c(option = "inferno") # we can also put "plasma"
    
    
    ############### vline ##############
    # Store the release time as a POSIXct object
    release_time <- as.POSIXct("2015-04-16 07:13:33", tz = "UTC")
    
    # When is the first download of 3.2.0?
    logs %>% 
      filter(datetime>release_time,
        r_version == "3.2.0")
    
    # Examine histograms of downloads by version
    ggplot(logs, aes(x = datetime)) +
      geom_histogram() +
      geom_vline(aes(xintercept = as.numeric(release_time)))+
      facet_wrap(~ r_version, ncol = 1)
    
    
    ### ADD a dashed diagonal line that passes by (0, 0) with slope equal to 1.
    ggplot(subdata, aes(Male, Female)) +
      geom_point() +
      geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
      xlim(35, 85) +
      ylim(35, 85)
    
    # Add horizontal and vertical lines that pass through points x=0 and y=0 respectively.. this divides the plot in 4 QUADRANTS
    ggplot(subdata2, aes(x = diff_Male, y = diff_Female, label = Country.or.Area)) +
      geom_point(colour = "white", fill = "chartreuse3", shape = 21, alpha = 0.55, size = 5) +
      geom_abline(intercept = 0, slope = 1, linetype = 2) +
      scale_x_continuous(limits = c(-25, 25)) +
      scale_y_continuous(limits = c(-25, 25)) +
      geom_hline(yintercept = 0, linetype = "dashed") +  # Add horizontal dashed line
      geom_vline(xintercept = 0, linetype = "dashed") +  # Add vertical dashed line
      labs(
        title = "Life Expectancy at Birth by Country",
        subtitle = "Years. Difference between 1985-1990 and 2000-2005. Average.",
        caption = "Source: United Nations Statistics Division",
        x = "Males",
        y = "Females") +
      theme_bw()
    Hidden output