Skip to content
suppressPackageStartupMessages(library(tidyverse, warn.conflicts = FALSE))
suppressPackageStartupMessages(library(tidymodels, warn.conflicts = FALSE))

# Load necessary libraries
library(tidyr)

# Load the dataset
APIS_PUF_2019_Person <- read.csv("APIS_PUF_2019_Person.CSV")

data <- APIS_PUF_2019_Person[c(-2:-4,-10,-14, -16:-19,-23:-24)] # Remove unnecessary data


library(dplyr)
# Rename Columns
data <- data %>% 
  rename("sex" = "C04_SEY", "age" = "C05_AGF","marital" = "C06_STATUS", "primary" = "C08_PRE_PRIM",
                        "Attending_school" = "C09_CUR_ATTEND", "reason" = "C11_YNOT_ATTND",
                        "Highest_educational_attainment" = "C13_HGC", "work" = "C14_DID_WORK","ill" = "C16_ILL", "SUC/LUC" = "C23_A", 
                        "TES" = "C23_B","Student_Loan" = "C23_C")
# Change NA to 0
data$reason[is.na(data$reason)] <- 0 # Unknown status


# Remove observations that already graduated in Reasons on not attending school
data <- filter(data, reason != "06")

# Change values in Reason not attending to 12 which means they are still in school
data$reason[data$Attending_school == 1] <- "12"
data$reason[data$Attending_school == 2] <- "12"
data$reason[data$Attending_school == 3] <- "12"

# Change values 1 if attending school and 0 if not
data$Attending_school[data$Attending_school == 2] <- 1 # Still attending
data$Attending_school[data$Attending_school == 3] <- 1 # Still attending
data$Attending_school[data$Attending_school == 4] <- 0 # Not attending

# Remove NA in specific columns
library(tidyr)
data <- data %>% drop_na(Attending_school)
data <- data %>% drop_na(work)


# Change values for NA in some column
data$Student_Loan[is.na(data$Student_Loan)] <- "0" # Did not avail
data$Student_Loan[data$Student_Loan == 'TRUE'] <- "1" # Did avail for loans
data$`SUC/LUC`[is.na(data$`SUC/LUC`)] <- 0 # Did not avail for SUC/LUC
data$TES[is.na(data$TES)] <- 0 # Did not avail for TES
data$TES[data$TES == 2] <- 1 # Did avail for TES

View(data)

# To export
# readxl::read_xlsx(data, "path/cleaned_data.xlsx") 
# the place I want my cleaned dataset to be saved
data