Skip to content
From a Ggplot Function to a Logistic Model
💪 Challenge
Create a report to answer your colleague's questions. Include:
- What are the total sales for each payment method?
- What is the average unit price for each product line?
- Create plots to visualize findings for questions 1 and 2.
- [Optional] Investigate further (e.g., average purchase value by client type, total purchase value by product line, etc.)
- Summarize your findings.
library(tidyverse)
library(broom)
theme_set(theme_bw())
df <- readr::read_csv('./data/sales_data.csv')
head(df)Reporting on sales data
What are the total sales for each payment method?
(df1 <- df %>%
group_by(payment) %>%
summarize(total_sales = sum(quantity)) %>%
arrange(-total_sales))
df1 %>%
ggplot(aes(total_sales, fct_reorder(payment, total_sales), fill = payment)) +
geom_col(color = "black") + labs(y = "", x = "Total Sales") +
theme(legend.position = "")What is the average unit price for each product line?
(df2 <- df %>%
group_by(product_line) %>%
summarize(avg = mean(unit_price)) %>%
arrange(-avg))
df2 %>%
ggplot(aes(avg, fct_reorder(product_line, avg), fill = product_line)) +
geom_col(color = "black") + labs(y = "", x = "Average Unit Price") +
theme(legend.position = "")
Further Analysis
(df3 <- df %>%
group_by(client_type) %>%
summarize(value = sum(total)) %>%
arrange(-value))
df3 %>%
ggplot(aes(value, fct_reorder(client_type, value), fill = client_type)) +
geom_col(color = "black") +
labs(y = "", x = "Total Income") + theme(legend.position = "")Quick ggplot function comparing character columns to numerics
gplot <- function(x) {
df %>%
ggplot(aes(quantity, unit_price, color = )) +
geom_point()
}
gplot(warehouse)
gplot(client_type)
gplot(product_line)
gplot(payment)Linear model of only non-numerics, predicting total
df %>%
select(where(is.character), total) %>%
lm(total ~ ., .) %>%
tidy(conf.int = TRUE) %>%
drop_na() %>%
arrange(-estimate) %>%
mutate(term = str_replace_all(term,"client_type|product_line|warehouse|payment","")) %>%
ggplot(aes(estimate, fct_reorder(term, estimate), color = term)) +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high)) + geom_point() +
labs(y = "", x = "Estimate") + theme(legend.position = "")The Logistic Model
library(tidymodels)
model <- logistic_reg() %>%
set_engine("glm") %>%
set_mode("classification")
df_split <- df %>%
mutate(client_type = factor(client_type)) %>%
initial_split()
wkfl <- workflow() %>%
add_model(model) %>%
add_formula(client_type ~ quantity) %>%
last_fit(df_split)
wkfl %>%
collect_predictions() %>%
roc_curve(client_type, .pred_Retail) %>%
autoplot()
wkfl %>% collect_metrics()