Skip to content

Joining Data with dplyr

Run the hidden code cell below to import the data used in this course.

# Load the Tidyverse
library(tidyverse)

# Load the course datasets
sets <- read_rds("datasets/sets.rds")
themes <- read_rds("datasets/themes.rds")
parts <- read_rds("datasets/parts.rds")
part_categories <- read_rds("datasets/part_categories.rds")
inventories <- read_rds("datasets/inventories.rds")
inventory_parts <- read_rds("datasets/inventory_parts.rds")
colors <- read_rds("datasets/colors.rds")
questions <- read_rds("datasets/questions.rds")
tags <- read_rds("datasets/tags.rds")
question_tags <- read_rds("datasets/question_tags.rds")
answers <- read_rds("datasets/answers.rds")

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

Add your notes here

batman_colors %>% full_join(star_wars_colors, by = "color_id", suffix = c("_batman", "_star_wars")) %>% replace_na(list(total_batman = 0, total_star_wars = 0)) %>% inner_join(colors, by = c("color_id" = "id")) %>%

Create the difference and total columns

mutate(difference = fraction_batman - fraction_star_wars, total = total_batman + total_star_wars) %>%

Filter for totals greater than 200

filter(total > 200)

# Add your code snibatman_colors <- inventory_parts_themes %>%
  # Filter the inventory_parts_themes table for the Batman theme
  filter(name_theme == "Batman") %>%
  group_by(color_id) %>%
  summarize(total = sum(quantity)) %>%
  # Add a fraction column of the total divided by the sum of the total 
  mutate(fraction = total / sum(total))

# Filter and aggregate the Star Wars set data; add a fraction column
star_wars_colors <- inventory_parts_themes %>%
  # Filter the inventory_parts_themes table for the Batman theme
  filter(name_theme == "star_wars") %>%
  group_by(color_id) %>%
  summarize(total = sum(quantity)) %>%
  # Add a fraction column of the total divided by the sum of the total 
  mutate(fraction = total / sum(total))ppetsinventory here