Skip to content
From a Ggplot Function to a Logistic Model
0
  • AI Chat
  • Code
  • Report
  • 💪 Challenge

    Create a report to answer your colleague's questions. Include:

    1. What are the total sales for each payment method?
    2. What is the average unit price for each product line?
    3. Create plots to visualize findings for questions 1 and 2.
    4. [Optional] Investigate further (e.g., average purchase value by client type, total purchase value by product line, etc.)
    5. Summarize your findings.
    library(tidyverse)
    library(broom)
    theme_set(theme_bw())
    df <- readr::read_csv('./data/sales_data.csv')
    head(df)

    Reporting on sales data

    What are the total sales for each payment method?

    (df1 <- df %>%
      group_by(payment) %>%
      summarize(total_sales = sum(quantity)) %>%
      arrange(-total_sales))
    
    df1 %>%
      ggplot(aes(total_sales, fct_reorder(payment, total_sales), fill = payment)) + 
      geom_col(color = "black") + labs(y = "", x = "Total Sales") +
      theme(legend.position = "")

    What is the average unit price for each product line?

    (df2 <- df %>% 
      group_by(product_line) %>%
      summarize(avg = mean(unit_price)) %>%
      arrange(-avg))
    
    df2 %>%
      ggplot(aes(avg, fct_reorder(product_line, avg), fill = product_line)) + 
      geom_col(color = "black") + labs(y = "", x = "Average Unit Price") +
      theme(legend.position = "")
      

    Further Analysis

    (df3 <- df %>%
      group_by(client_type) %>%
      summarize(value = sum(total)) %>%
      arrange(-value))
    
    df3 %>%
      ggplot(aes(value, fct_reorder(client_type, value), fill = client_type)) +
      geom_col(color = "black") + 
      labs(y = "", x = "Total Income") + theme(legend.position = "")

    Quick ggplot function comparing character columns to numerics

    gplot <- function(x) {
      df %>%
        ggplot(aes(quantity, unit_price, color = )) +
        geom_point()
    }
    gplot(warehouse) 
    gplot(client_type)
    gplot(product_line)
    gplot(payment)

    Linear model of only non-numerics, predicting total

    df %>%
      select(where(is.character), total) %>%
      lm(total ~ ., .) %>%
      tidy(conf.int = TRUE) %>%
      drop_na() %>%
      arrange(-estimate) %>%
      mutate(term = str_replace_all(term,"client_type|product_line|warehouse|payment","")) %>%
      ggplot(aes(estimate, fct_reorder(term, estimate), color = term)) + 
      geom_errorbar(aes(xmin = conf.low, xmax = conf.high)) + geom_point() +
      labs(y = "", x = "Estimate") + theme(legend.position = "")

    The Logistic Model

    library(tidymodels)
    model <- logistic_reg() %>%
      set_engine("glm") %>%
      set_mode("classification")
    
    df_split <- df %>%
      mutate(client_type = factor(client_type)) %>%
      initial_split()
    
    wkfl <- workflow() %>%
      add_model(model) %>%
      add_formula(client_type ~ quantity) %>%
      last_fit(df_split)
    
    wkfl %>%
      collect_predictions() %>%
      roc_curve(client_type, .pred_Retail) %>%
      autoplot()
    
    wkfl %>% collect_metrics()