Skip to content
Data Manipulation with dplyr
Data Manipulation with dplyr
Run the hidden code cell below to import the data used in this course.
# Load the Tidyverse
library(tidyverse)
# Load the course datasets
babynames <- read_rds("datasets/babynames.rds")
counties <- read_rds("datasets/counties.rds")Chapter 1: Transforming data with dplyr
dplyrExploring data with dplyr
dplyr- Specializes in data manipulation
- Chapter 1 verbs:
select()filter()arrange()mutate()
glimpse(counties)# select() verb
counties %>%
select(state, county, population, unemployment)# Creating a new table
counties_selected <- counties %>%
select(state, county, population, unemployment)
counties_selected# arrange() verb
counties_selected %>%
arrange(population)# desc()
counties_selected %>%
arrange(desc(population))filter()
filter()- extract observations based on conditions
counties_selected %>%
arrange(desc(population)) %>%
filter(state == "New York")counties_selected %>%
arrange(desc(population)) %>%
filter(unemployment < 6)# combining conditions
counties_selected %>%
arrange(desc(population)) %>%
filter(state == "New York",
unemployment < 6)# mutate()
counties_selected %>%
mutate(unemployed_population = population * unemployment / 100) %>%
arrange(desc(unemployed_population))# selecting and transforming with `mutate()`
counties %>%
mutate(state, county, population, unemployment,
unemployed_population = population * unemployment / 100, keep = "none") %>%
arrange(desc(unemployed_population))Chapter 2: Aggregating Data