Skip to content

Project: Wrangling and Visualizing Music Data
from Billboard Chart Dataset

# Loading individual Tidyverse packages
# .... YOUR CODE FOR TASK 1 ....

library(dplyr)
library(readr)
library(ggplot2)
# Reading in the McGill Billboard chord data
bb <- read_csv('datasets/bb_chords.csv')



# Taking a look at the first rows in bb
# .... YOUR CODE FOR TASK 1 ....
bb
saveRDS(bb, "bb.rds")
Hidden output
Run cancelled
# Counting the most common chords
bb_count <- bb %>%
  count(chord)%>%
  arrange(desc(n))


# Displaying the top 20 chords
# .... YOUR CODE FOR TASK 2 ....

head(bb_count, 20)
Run cancelled
# Creating a bar plot from bb_count
bb_count %>%
  slice(1:20) %>%
  mutate(share = (n/sum(n))*100,
         chord = reorder(chord, share)) %>%
  ggplot(aes(x = chord, y = share, fill = chord)) +
  geom_bar(stat = "identity", show.legend = F) +
  coord_flip() +
  xlab("Chord") +
  ylab("Share of total chords") 
Run cancelled
# Wrangling and counting bigrams
bb_bigram_count <- bb %>%
    # .... YOUR CODE FOR TASK 4 ....
      mutate(next_chord = lead(chord),
             next_title = lead(title),
             bigram = paste(chord, next_chord, sep = " ")) %>%
      filter(title == next_title) %>%
      count(bigram, sort = TRUE)
# Displaying the first 20 rows of bb_bigram_count
# .... YOUR CODE FOR TASK 4 ....
Hidden output
Run cancelled
# Creating a column plot from bb_bigram_count
bb_bigram_count %>%
  slice(1:20) %>%
  mutate(share = (n/sum(n))*100,
         bigram = reorder(bigram, share)) %>%
  ggplot(aes(x = bigram, y = share, fill = bigram)) +
  geom_bar(stat = "identity", show.legend = F) +
  coord_flip() +
  xlab("Bigram") +
  ylab("Share of total bigrams") 
Run cancelled
# Finding 30 artists with the most songs in the corpus
bb_30_artists <- bb %>%
    #.... YOUR CODE FOR TASK 6 ....
  select(artist, title) %>%
  unique(by="title") %>%
  count(artist) %>%
  arrange(desc(n)) 

print(bb_30_artists, n = 30)
# Displaying 30 artists with the most songs in the corpus
#.... YOUR CODE FOR TASK 6 ....
Run cancelled
tags <- tibble(
  artist = c('Abba', 'Billy Joel', 'Elton John', 'Stevie Wonder', 'The Rolling Stones', 'The Beatles', 'Eric Clapton'),
  instrument = c('piano', 'piano', 'piano', 'piano', 'guitar', 'guitar', 'guitar'))

# Creating a new dataframe bb_tagged that includes a new column instrument from tags
bb_tagged <- bb %>%
  inner_join(tags, by = "artist")
    # .... YOUR CODE FOR TASK 7 ....
    
# Displaying the new data frame
bb_tagged
# .... YOUR CODE FOR TASK 7 ....