## Introduction to Data Visualization with ggplot2

```
# Add your code snippets here
library(ggplot2)
# Change the command below so that cyl is treated as factor
ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_point() +
# add regression line
geom_smooth(method ="lm", se =FALSE) +
# set the size of points in geom_point
geom_point(alpha = 0.4)
# save the plot as plt_prop_unemployed_over_time
plt_prop_unemployed_over_time <- ggplot(economics, aes(date, unemploy/pop)) +
geom_line()
# Position the legend at the bottom of the plot
plt_prop_unemployed_over_time +
theme(legend.position = "bottom")
# Using the late_shipments dataset, draw a proportional stacked bar plot of vendor_inco_term with fill color by freight_cost_group.
ggplot(late_shipments, aes(vendor_inco_term, fill = freight_cost_group)) +
geom_bar(position = "fill") +
ylab("proportion")
# coord_flip()
# In ggplot2, the coord_flip() function is used to flip the x and y axes, effectively transposing the plot's orientation from horizontal to vertical or vice versa.
#################################################################################################
#### plot with two lines of two independent continuous variables
ggplot(houseprice, aes(x = size, y = price)) +
geom_point() +
geom_line(aes(y = pred, color = modeltype)) + # the predictions
scale_color_brewer(palette = "Dark2") #### below is what preceded it
# houseprice is available
summary(houseprice)
# Create the formula for price as a function of squared size
(fmla_sqr <- price ~ I(size^2))
# Fit a model of price as a function of squared size (use fmla_sqr)
model_sqr <- lm(fmla_sqr, data = houseprice)
# Fit a model of price as a linear function of size
model_lin <- lm(price ~ size, data = houseprice)
# Make predictions and compare
houseprice %>%
mutate(pred_lin = predict(model_lin), # predictions from linear model
pred_sqr = predict(model_sqr)) %>% # predictions from quadratic model
gather(key = modeltype, value = pred, pred_lin, pred_sqr)
##################################################################################################
#coord_fixed() is used to equalize x and y axis intervals on a ggplot
### plot regression with multiple (3) numeric IV; note cool color palette inferno
ggplot(taiwan_real_estate, aes(n_convenience, sqrt(dist_to_mrt_m), color = price_twd_msq)) +
# Make it a scatter plot
geom_point() +
# Use the continuous viridis plasma color scale
scale_color_viridis_c(option = "inferno") # we can also put "plasma"
############### vline ##############
# Store the release time as a POSIXct object
release_time <- as.POSIXct("2015-04-16 07:13:33", tz = "UTC")
# When is the first download of 3.2.0?
logs %>%
filter(datetime>release_time,
r_version == "3.2.0")
# Examine histograms of downloads by version
ggplot(logs, aes(x = datetime)) +
geom_histogram() +
geom_vline(aes(xintercept = as.numeric(release_time)))+
facet_wrap(~ r_version, ncol = 1)
### ADD a dashed diagonal line that passes by (0, 0) with slope equal to 1.
ggplot(subdata, aes(Male, Female)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
xlim(35, 85) +
ylim(35, 85)
# Add horizontal and vertical lines that pass through points x=0 and y=0 respectively.. this divides the plot in 4 QUADRANTS
ggplot(subdata2, aes(x = diff_Male, y = diff_Female, label = Country.or.Area)) +
geom_point(colour = "white", fill = "chartreuse3", shape = 21, alpha = 0.55, size = 5) +
geom_abline(intercept = 0, slope = 1, linetype = 2) +
scale_x_continuous(limits = c(-25, 25)) +
scale_y_continuous(limits = c(-25, 25)) +
geom_hline(yintercept = 0, linetype = "dashed") + # Add horizontal dashed line
geom_vline(xintercept = 0, linetype = "dashed") + # Add vertical dashed line
labs(
title = "Life Expectancy at Birth by Country",
subtitle = "Years. Difference between 1985-1990 and 2000-2005. Average.",
caption = "Source: United Nations Statistics Division",
x = "Males",
y = "Females") +
theme_bw()
```

