Skip to content
0

💪 Challenge

Create a report to answer your colleague's questions. Include:

  1. What are the total sales for each payment method?
  2. What is the average unit price for each product line?
  3. Create plots to visualize findings for questions 1 and 2.
  4. [Optional] Investigate further (e.g., average purchase value by client type, total purchase value by product line, etc.)
  5. Summarize your findings.
library(tidyverse)
library(broom)
theme_set(theme_bw())
df <- readr::read_csv('./data/sales_data.csv')
head(df)

Reporting on sales data

What are the total sales for each payment method?

(df1 <- df %>%
  group_by(payment) %>%
  summarize(total_sales = sum(quantity)) %>%
  arrange(-total_sales))

df1 %>%
  ggplot(aes(total_sales, fct_reorder(payment, total_sales), fill = payment)) + 
  geom_col(color = "black") + labs(y = "", x = "Total Sales") +
  theme(legend.position = "")

What is the average unit price for each product line?

(df2 <- df %>% 
  group_by(product_line) %>%
  summarize(avg = mean(unit_price)) %>%
  arrange(-avg))

df2 %>%
  ggplot(aes(avg, fct_reorder(product_line, avg), fill = product_line)) + 
  geom_col(color = "black") + labs(y = "", x = "Average Unit Price") +
  theme(legend.position = "")
  

Further Analysis

(df3 <- df %>%
  group_by(client_type) %>%
  summarize(value = sum(total)) %>%
  arrange(-value))

df3 %>%
  ggplot(aes(value, fct_reorder(client_type, value), fill = client_type)) +
  geom_col(color = "black") + 
  labs(y = "", x = "Total Income") + theme(legend.position = "")

Quick ggplot function comparing character columns to numerics

gplot <- function(x) {
  df %>%
    ggplot(aes(quantity, unit_price, color = )) +
    geom_point()
}
gplot(warehouse) 
gplot(client_type)
gplot(product_line)
gplot(payment)

Linear model of only non-numerics, predicting total

df %>%
  select(where(is.character), total) %>%
  lm(total ~ ., .) %>%
  tidy(conf.int = TRUE) %>%
  drop_na() %>%
  arrange(-estimate) %>%
  mutate(term = str_replace_all(term,"client_type|product_line|warehouse|payment","")) %>%
  ggplot(aes(estimate, fct_reorder(term, estimate), color = term)) + 
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high)) + geom_point() +
  labs(y = "", x = "Estimate") + theme(legend.position = "")

The Logistic Model

library(tidymodels)
model <- logistic_reg() %>%
  set_engine("glm") %>%
  set_mode("classification")

df_split <- df %>%
  mutate(client_type = factor(client_type)) %>%
  initial_split()

wkfl <- workflow() %>%
  add_model(model) %>%
  add_formula(client_type ~ quantity) %>%
  last_fit(df_split)

wkfl %>%
  collect_predictions() %>%
  roc_curve(client_type, .pred_Retail) %>%
  autoplot()

wkfl %>% collect_metrics()