Skip to content
Thesis - Data Cleaning
suppressPackageStartupMessages(library(tidyverse, warn.conflicts = FALSE))
suppressPackageStartupMessages(library(tidymodels, warn.conflicts = FALSE))
# Load necessary libraries
library(tidyr)
# Load the dataset
APIS_PUF_2019_Person <- read.csv("APIS_PUF_2019_Person.CSV")
data <- APIS_PUF_2019_Person[c(-2:-4,-10,-14, -16:-19,-23:-24)] # Remove unnecessary data
library(dplyr)
# Rename Columns
data <- data %>%
rename("sex" = "C04_SEY", "age" = "C05_AGF","marital" = "C06_STATUS", "primary" = "C08_PRE_PRIM",
"Attending_school" = "C09_CUR_ATTEND", "reason" = "C11_YNOT_ATTND",
"Highest_educational_attainment" = "C13_HGC", "work" = "C14_DID_WORK","ill" = "C16_ILL", "SUC/LUC" = "C23_A",
"TES" = "C23_B","Student_Loan" = "C23_C")
# Change NA to 0
data$reason[is.na(data$reason)] <- 0 # Unknown status
# Remove observations that already graduated in Reasons on not attending school
data <- filter(data, reason != "06")
# Change values in Reason not attending to 12 which means they are still in school
data$reason[data$Attending_school == 1] <- "12"
data$reason[data$Attending_school == 2] <- "12"
data$reason[data$Attending_school == 3] <- "12"
# Change values 1 if attending school and 0 if not
data$Attending_school[data$Attending_school == 2] <- 1 # Still attending
data$Attending_school[data$Attending_school == 3] <- 1 # Still attending
data$Attending_school[data$Attending_school == 4] <- 0 # Not attending
# Remove NA in specific columns
library(tidyr)
data <- data %>% drop_na(Attending_school)
data <- data %>% drop_na(work)
# Change values for NA in some column
data$Student_Loan[is.na(data$Student_Loan)] <- "0" # Did not avail
data$Student_Loan[data$Student_Loan == 'TRUE'] <- "1" # Did avail for loans
data$`SUC/LUC`[is.na(data$`SUC/LUC`)] <- 0 # Did not avail for SUC/LUC
data$TES[is.na(data$TES)] <- 0 # Did not avail for TES
data$TES[data$TES == 2] <- 1 # Did avail for TES
View(data)
# To export
# readxl::read_xlsx(data, "path/cleaned_data.xlsx")
# the place I want my cleaned dataset to be saveddata