Skip to content

INFER Packge - Null vs Observed distribution

When we compared the distributions of null and obsereved, we can just easily adjust to code for null to obtain observed statistics.

# Libraries and data
if (!require("pacman")) install.packages("pacman")
pacman::p_load(fst)
late_shipments <- read.fst("late_shipments.fst")

Code for compution

# Null hypothesis
null_distn <- late_shipments %>% 
  specify(
    late ~ freight_cost_group, 
    success = "Yes"
  ) %>% 
  hypothesize(null = "independence") %>% 
  generate(reps = 2000, type = "permute") %>% 
  calculate(
    stat = "diff in props", 
    order = c("expensive", "reasonable")
  )

# Observed statistics
obs_stat <- late_shipments %>% 
  specify(
    late ~ freight_cost_group, 
    success = "Yes"
  ) %>% 
#  hypothesize(null = "independence") %>% 
#  generate(reps = 2000, type = "permute") %>% 
  calculate(
    stat = "diff in props", 
    order = c("expensive", "reasonable")
  )

# Visualize the null dist'n, adding a vertical line at the observed statistic
visualize(null_distn) +  
geom_vline(aes(xintercept = stat),data = obs_stat,color ="red")

# Get the p-value
p_value <- get_p_value(  null_distn, obs_stat,  direction ="two sided") # Not alternative = "two.sided"