Skip to content

Data Manipulation with dplyr

Run the hidden code cell below to import the data used in this course.


1 hidden cell

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

Add your notes here

# Add your code snippets here

library(dplyr)

glimpse(df) # allows to view the first few values from each variable, along with the data type


names_filtered <- names_normalized %>%
  # Filter for the names Steven, Thomas, and Matthew
  filter(name == "Steven"| name == "Thomas" | name == "Matthew")

# Visualize these names over time
ggplot(names_filtered, aes(x = year, y = fraction_max, color = name)) + 
  geom_line()


# Change the name of the unemployment column
counties %>%
  rename(unemployment_rate = unemployment)

# Keep the state and county columns, and the columns containing poverty
counties %>%
  select(state, county, contains("poverty"))

# Calculate the fraction_women column without dropping the other columns
counties %>%
  mutate(fraction_women = women / population)

# Keep only the state, county, and employment_rate columns
counties %>%
  transmute(state, county, employment_rate = employed / population)

counties_selected %>%
  # Find the total population for each combination of state and metro
  group_by(state, metro) %>%
  summarize(total_pop = sum(population))


babynames %>%
  # Add columns name_total and name_max for each name
  group_by(name) %>%
  mutate(name_total = sum(number),
         name_max = max(number)) %>%
  # Ungroup the table 
  ungroup() %>%
  # Add the fraction_max column containing the number by the name maximum 
  mutate(fraction_max = number / name_max)