Did you know that there are more than 190,000 fast-food restaurants in the US? In this project, you will explore sales of burgers and salads at two fast-food restaurants. The sales in burgers and salads have been picking up and they have had to place urgent orders to have enough stock since their previous predictions were wrong. This has costed them a lot of money!
These restaurants are interested in improving the daily predictions of their sales. They believe that calendar events, holidays and discounts have a high impact on the demand and are keen to know if they need to make changes to their inventory policies. Better predictions will help them fulfill more demand and reduce costs.
You will be working with historical food sales data. The data has been split into training and test sets (historicsales_fastfooditems_train.csv and historicsales_fastfooditems_train.csv) that contain the following columns:
- restaurant - the name of the restaurant (chr)
- item_name - the food item (chr)
- date - the date of the sale (chr)
- baseprice_USD - the price of the item before discount in USD (int)
- discount_percent - the percentage discount (int)
- sales_quantity - the quantity of item sold on a given date (int)
- is_weekend - 1 if the sale date is Saturday or Sunday, otherwise 0 (int)
- is_friday - 1 if the sale date is Friday, otherwise 0 (int)
- is_holiday - 1 if the sale date is a holiday (that is not a weekend), otherwise 0 (int)
# Run this cell to load the libraries and data
# Load required libraries: dplyr, gplot2
library(dplyr)
library(ggplot2)
# Load the data
train <- read.csv("historicsales_fastfooditems_train.csv")
test <- read.csv("historicsales_fastfooditems_test.csv")
# Review the first few rows of your data
head(train)
# Start coding here...
# Add as many coding cells as you liketrain %>% is.na() %>% colSums()train$discount_percent[is.na(train$discount_percent)] <- 0
train
test$discount_percent[is.na(test$discount_percent)] <- 0
testcorrelation <- cor(train$discount_percent, train$sales_quantity)
correlationR1_Burger <- train %>% filter(restaurant == "R1" & item_name == "Burger")
head(R1_Burger)
R1_Salad <- train %>% filter(restaurant == "R1" & item_name == "Salad")
head(R1_Salad)
R2_Burger <- train %>% filter(restaurant == "R2" & item_name == "Burger")
head(R2_Burger)
R2_Salad <- train %>% filter(restaurant == "R2" & item_name == "Salad")
head(R2_Salad)R1_Burger_cor <- cor(R1_Burger$discount_percent, R1_Burger$sales_quantity)
R1_Burger_cor
R1_Salad_cor <- cor(R1_Salad$discount_percent, R1_Salad$sales_quantity)
R1_Salad_cor # this pair has highest correlation(0.87)
R2_Burger_cor <- cor(R2_Burger$discount_percent, R2_Burger$sales_quantity)
R2_Burger_cor
R2_Salad_cor <- cor(R2_Salad$discount_percent, R2_Salad$sales_quantity)
R2_Salad_cor
highest_cor <- c("R1", "Salad")
highest_cortrain$date <- as.Date(train$date, format = "%d-%b-%Y")
test$date <- as.Date(test$date, format = "%d-%b-%Y")
model <- lm(sales_quantity ~ discount_percent, data = train)
summary(model)model2 <- lm(sales_quantity ~ discount_percent + is_weekend, data = train)
summary(model2)
model3 <- lm(sales_quantity ~ discount_percent + is_weekend + is_friday + is_holiday, data = R1_Burger)
s<-summary(model3)
adj_rsquared <- s$adj.r.squared
adj_rsquaredtest$predicted_sales <- predict(model3, newdata = test)
test$residuals <- test$sales_quantity - test$predicted_sales
test
rmse <- sqrt(mean((test$residuals) ^ 2))
rmse