Skip to content
Introduction to Data Visualization with ggplot2
Run the hidden code cell below to import the data used in this course.
Take Notes
Add notes about the concepts you've learned and code cells with code you want to keep.
Add your notes here
# Add your code snippets here
library(ggplot2)
# Change the command below so that cyl is treated as factor
ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_point() +
# add regression line
geom_smooth(method ="lm", se =FALSE) +
# set the size of points in geom_point
geom_point(alpha = 0.4)
# save the plot as plt_prop_unemployed_over_time
plt_prop_unemployed_over_time <- ggplot(economics, aes(date, unemploy/pop)) +
geom_line()
# Position the legend at the bottom of the plot
plt_prop_unemployed_over_time +
theme(legend.position = "bottom")
# Using the late_shipments dataset, draw a proportional stacked bar plot of vendor_inco_term with fill color by freight_cost_group.
ggplot(late_shipments, aes(vendor_inco_term, fill = freight_cost_group)) +
geom_bar(position = "fill") +
ylab("proportion")
# coord_flip()
# In ggplot2, the coord_flip() function is used to flip the x and y axes, effectively transposing the plot's orientation from horizontal to vertical or vice versa.
#################################################################################################
#### plot with two lines of two independent continuous variables
ggplot(houseprice, aes(x = size, y = price)) +
geom_point() +
geom_line(aes(y = pred, color = modeltype)) + # the predictions
scale_color_brewer(palette = "Dark2") #### below is what preceded it
# houseprice is available
summary(houseprice)
# Create the formula for price as a function of squared size
(fmla_sqr <- price ~ I(size^2))
# Fit a model of price as a function of squared size (use fmla_sqr)
model_sqr <- lm(fmla_sqr, data = houseprice)
# Fit a model of price as a linear function of size
model_lin <- lm(price ~ size, data = houseprice)
# Make predictions and compare
houseprice %>%
mutate(pred_lin = predict(model_lin), # predictions from linear model
pred_sqr = predict(model_sqr)) %>% # predictions from quadratic model
gather(key = modeltype, value = pred, pred_lin, pred_sqr)
##################################################################################################
#coord_fixed() is used to equalize x and y axis intervals on a ggplot
### plot regression with multiple (3) numeric IV; note cool color palette inferno
ggplot(taiwan_real_estate, aes(n_convenience, sqrt(dist_to_mrt_m), color = price_twd_msq)) +
# Make it a scatter plot
geom_point() +
# Use the continuous viridis plasma color scale
scale_color_viridis_c(option = "inferno") # we can also put "plasma"
############### vline ##############
# Store the release time as a POSIXct object
release_time <- as.POSIXct("2015-04-16 07:13:33", tz = "UTC")
# When is the first download of 3.2.0?
logs %>%
filter(datetime>release_time,
r_version == "3.2.0")
# Examine histograms of downloads by version
ggplot(logs, aes(x = datetime)) +
geom_histogram() +
geom_vline(aes(xintercept = as.numeric(release_time)))+
facet_wrap(~ r_version, ncol = 1)
### ADD a dashed diagonal line that passes by (0, 0) with slope equal to 1.
ggplot(subdata, aes(Male, Female)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
xlim(35, 85) +
ylim(35, 85)
# Add horizontal and vertical lines that pass through points x=0 and y=0 respectively.. this divides the plot in 4 QUADRANTS
ggplot(subdata2, aes(x = diff_Male, y = diff_Female, label = Country.or.Area)) +
geom_point(colour = "white", fill = "chartreuse3", shape = 21, alpha = 0.55, size = 5) +
geom_abline(intercept = 0, slope = 1, linetype = 2) +
scale_x_continuous(limits = c(-25, 25)) +
scale_y_continuous(limits = c(-25, 25)) +
geom_hline(yintercept = 0, linetype = "dashed") + # Add horizontal dashed line
geom_vline(xintercept = 0, linetype = "dashed") + # Add vertical dashed line
labs(
title = "Life Expectancy at Birth by Country",
subtitle = "Years. Difference between 1985-1990 and 2000-2005. Average.",
caption = "Source: United Nations Statistics Division",
x = "Males",
y = "Females") +
theme_bw()
Hidden output