# Specify a null model with no predictors
null_model <- glm(donated ~ 1, data = donors, family = "binomial")
# Specify the full model using all of the potential predictors
full_model <- glm(donated ~ ., data = donors, family = "binomial")
# Use a forward stepwise algorithm to build a parsimonious model
step_model <- step(null_model, scope = list(lower = null_model, upper = full_model), direction = "forward")
# Estimate the stepwise donation probability
step_prob <- predict(step_model, type = "response")
# Plot the ROC of the stepwise model
library(pROC)
ROC <- roc(donors$donated, step_prob)
plot(ROC, col = "red")
auc(ROC)
# Impute missing age values with the mean age
donors$imputed_age <- ifelse(is.na(donors$age), round(mean(donors$age, na.rm = TRUE), 2), donors$age)
# Create missing value indicator for age
donors$missing_age <- ifelse(is.na(donors$age), 1, 0)
# Run a generalized additive model of yield vs. smoothed year and census region
gam(yield_kg_per_ha ~ s(year) + census_region, data = corn)
# Wrap the model code into a function
run_gam_yield_vs_year_by_region <- function(data) {
gam(yield_kg_per_ha ~ s(year) + census_region, data = corn)
}
# Try it on the wheat dataset
run_gam_yield_vs_year_by_region(wheat)
# Make predictions in 2050
predict_this <- data.frame(
year = 2050,
census_region = census_regions
)
# Predict the yield
pred_yield_kg_per_ha <- predict(corn_model, predict_this, type = "response")
predict_this %>%
# Add the prediction as a column of predict_this
mutate(pred_yield_kg_per_ha = pred_yield_kg_per_ha)
# Wrap this prediction code into a function
predict_yields <- function(model, year) {
predict_this <- data.frame(
year = year,
census_region = census_regions
)
pred_yield_kg_per_ha <- predict(model, predict_this, type = "response")
predict_this %>%
mutate(pred_yield_kg_per_ha = pred_yield_kg_per_ha)
}
# Try it on the wheat dataset
predict_yields(wheat_model, year = 2050)
Built-in themes In addition to making your own themes, there are several out-of-the-box solutions that may save you lots of time.
theme_gray() is the default. theme_bw() is useful when you use transparency. theme_classic() is more traditional. theme_void() removes everything but the data.
sub & gsub (2) Regular expressions are a typical concept that you'll learn by doing and by seeing other examples. Before you rack your brains over the regular expression in this exercise, have a look at the new things that will be used:
.*: A usual suspect! It can be read as "any character that is matched zero or more times". \s: Match a space. The "s" is normally a character, escaping it (\) makes it a metacharacter. [0-9]+: Match the numbers 0 to 9, at least once (+). ([0-9]+): The parentheses are used to make parts of the matching string available to define the replacement. The \1 in the replacement argument of sub() gets set to the string that is captured by the regular expression [0-9]+. awards <- c("Won 1 Oscar.", "Won 1 Oscar. Another 9 wins & 24 nominations.", "1 win and 2 nominations.", "2 wins & 3 nominations.", "Nominated for 2 Golden Globes. 1 more win & 2 nominations.", "4 wins & 1 nomination.")
sub(".\s([0-9]+)\snomination.$", "\1", awards) What does this code chunk return? awards is already defined in the workspace so you can start playing in the console straight away.
# split_low has been created for you
split_low
# Transform: use anonymous function inside lapply
select_first <- function(x) {
x[1]
}
names <- lapply(split_low, select_first)
# Transform: use anonymous function inside lapply
select_second <- function(x) {
x[2]
}
years <- lapply(split_low, select_second)
# split_low has been created for you
split_low
# Transform: use anonymous function inside lapply
names <- lapply(split_low, function(x) { x[1] })
# Transform: use anonymous function inside lapply
years <- lapply(split_low, function(x) { x[2] })
Course Notes
Use this workspace to take notes, store code snippets, or build your own interactive cheatsheet! For courses that use data, the datasets will be available in the datasets
folder.
# Chunk 1
library(data.table)
require(rjson)
# Chunk 2
library("data.table")
require(rjson)
# Chunk 3
library(data.table)
require(rjson, character.only = TRUE)
# Chunk 4
library(c("data.table", "rjson"))
# Import any packages you want to use here
Take Notes
Add notes here about the concepts you've learned and code cells with code you want to keep.
# Pre-defined variables
rquote <- "r's internals are irrefutably intriguing"
chars <- strsplit(rquote, split = "")[[1]]
# Initialize rcount
rcount <- 0
# Finish the for loop
for (char in chars) {
if(char == "r")
rcount <- rcount + 1
if(char == "u") {
break
}
}
# Print out rcount
print(rcount)
Add your notes here
# Add your code snippets here