Skip to content
Run cancelled
suppressPackageStartupMessages(library(tidyverse, warn.conflicts = FALSE))
suppressPackageStartupMessages(library(tidymodels, warn.conflicts = FALSE))

# Load necessary libraries
library(tidyr)
library(caret)


# Load the dataset
thesis <- read.csv("cleaned_data.csv")

# One hot encoding
encoded <- thesis %>%
  mutate(age = as.factor(as.numeric(age)),
         sex = as.factor(as.numeric(sex)),
         region = as.factor(as.numeric(region)),
         ill = as.factor(as.numeric(ill)),
         work = as.factor(as.numeric(work)),
         voucher = as.factor(as.numeric(voucher)),
         bank = as.factor(as.numeric(bank)),
         preschool = as.factor(as.numeric(primary)))
encoded$y <- make.names(encoded$y)

# Splitting the data into train and test
set.seed(123) # set seed for reproducibility
index <- createDataPartition(encoded$y, p = .70, list = FALSE)
train <- encoded[index, ]
test <- encoded[-index, ]

# Build the model
control <- trainControl(## 10-fold cross validation
  method = "cv", 
  number = 10,
  savePredictions = "all",
  index=createFolds(train$y, 10),
  classProbs=TRUE,
  summaryFunction=twoClassSummary,
  sampling='up') # for random oversampling to address class imbalance

# Formula to be used
form1=as.factor(y) ~ as.factor(region)+as.factor(sex)+as.factor(age)+as.factor(primary)+as.factor(work)+as.factor(voucher)+as.factor(bank)+as.factor(ill)

# KNN Model
knnModel <- train(
  form1, 
  data = train, 
  method = "knn", 
  trControl = control,
  metric = "Sens")

knnModel$bestTune$k
#saveRDS(knnModel,"knn.model.RData")
#loaded_model<-readRDS("knn.model.RData")

knnModel
summary(knnModel)
knnModel$finalModel

#Check tuned model and its validation
knn.predictiontrain <- predict(knnModel, train, type = "raw")
knn.predictiontrain
confusionMatrix(data=knn.predictiontrain,reference=as.factor(train$y), mode="everything") 

#Predict tuned model with test data and retrieve confusion matrix
pred_prob.knn <- predict(knnModel, test, type = "raw")
pred_prob.knn
class(pred_prob.knn)
class(test$y)
confusionMatrix(data=pred_prob.knn,reference=as.factor(test$y), mode="everything")

pred_prob.knnprob <- predict(knnModel, newdata=test, type = "prob")
knn.eval<-MLeval::evalm(data.frame(pred_prob.knnprob, test$y), positive='X0')