Skip to content
KNN - A Comparative Study to Predict Student Dropout in the Philippines using APIS 2019 with Machine Learning Algorithms
Run cancelled
suppressPackageStartupMessages(library(tidyverse, warn.conflicts = FALSE))
suppressPackageStartupMessages(library(tidymodels, warn.conflicts = FALSE))
# Load necessary libraries
library(tidyr)
library(caret)
# Load the dataset
thesis <- read.csv("cleaned_data.csv")
# One hot encoding
encoded <- thesis %>%
mutate(age = as.factor(as.numeric(age)),
sex = as.factor(as.numeric(sex)),
region = as.factor(as.numeric(region)),
ill = as.factor(as.numeric(ill)),
work = as.factor(as.numeric(work)),
voucher = as.factor(as.numeric(voucher)),
bank = as.factor(as.numeric(bank)),
preschool = as.factor(as.numeric(primary)))
encoded$y <- make.names(encoded$y)
# Splitting the data into train and test
set.seed(123) # set seed for reproducibility
index <- createDataPartition(encoded$y, p = .70, list = FALSE)
train <- encoded[index, ]
test <- encoded[-index, ]
# Build the model
control <- trainControl(## 10-fold cross validation
method = "cv",
number = 10,
savePredictions = "all",
index=createFolds(train$y, 10),
classProbs=TRUE,
summaryFunction=twoClassSummary,
sampling='up') # for random oversampling to address class imbalance
# Formula to be used
form1=as.factor(y) ~ as.factor(region)+as.factor(sex)+as.factor(age)+as.factor(primary)+as.factor(work)+as.factor(voucher)+as.factor(bank)+as.factor(ill)
# KNN Model
knnModel <- train(
form1,
data = train,
method = "knn",
trControl = control,
metric = "Sens")
knnModel$bestTune$k
#saveRDS(knnModel,"knn.model.RData")
#loaded_model<-readRDS("knn.model.RData")
knnModel
summary(knnModel)
knnModel$finalModel
#Check tuned model and its validation
knn.predictiontrain <- predict(knnModel, train, type = "raw")
knn.predictiontrain
confusionMatrix(data=knn.predictiontrain,reference=as.factor(train$y), mode="everything")
#Predict tuned model with test data and retrieve confusion matrix
pred_prob.knn <- predict(knnModel, test, type = "raw")
pred_prob.knn
class(pred_prob.knn)
class(test$y)
confusionMatrix(data=pred_prob.knn,reference=as.factor(test$y), mode="everything")
pred_prob.knnprob <- predict(knnModel, newdata=test, type = "prob")
knn.eval<-MLeval::evalm(data.frame(pred_prob.knnprob, test$y), positive='X0')