Skip to content
Data Manipulation with dplyr
Data Manipulation with dplyr
Run the hidden code cell below to import the data used in this course.
# Load the Tidyverse
library(tidyverse)
# Load the course datasets
babynames <- read_rds("datasets/babynames.rds")
counties <- read_rds("datasets/counties.rds")
Chapter 1: Transforming data with dplyr
dplyr
Exploring data with dplyr
dplyr
- Specializes in data manipulation
- Chapter 1 verbs:
select()
filter()
arrange()
mutate()
glimpse(counties)
# select() verb
counties %>%
select(state, county, population, unemployment)
# Creating a new table
counties_selected <- counties %>%
select(state, county, population, unemployment)
counties_selected
# arrange() verb
counties_selected %>%
arrange(population)
# desc()
counties_selected %>%
arrange(desc(population))
filter()
filter()
- extract observations based on conditions
counties_selected %>%
arrange(desc(population)) %>%
filter(state == "New York")
counties_selected %>%
arrange(desc(population)) %>%
filter(unemployment < 6)
# combining conditions
counties_selected %>%
arrange(desc(population)) %>%
filter(state == "New York",
unemployment < 6)
# mutate()
counties_selected %>%
mutate(unemployed_population = population * unemployment / 100) %>%
arrange(desc(unemployed_population))
# selecting and transforming with `mutate()`
counties %>%
mutate(state, county, population, unemployment,
unemployed_population = population * unemployment / 100, keep = "none") %>%
arrange(desc(unemployed_population))
Chapter 2: Aggregating Data