Donald

# Start coding here...
install.packages("readxl")
library(readxl)

# Specify the file path and sheet name
file1 <- "Donald Radu new.xlsx"
sheet_name1 <- "Final_new"


# Load the specified sheet into a data frame
df <- read_excel(file1, sheet = sheet_name1)
df <- as.data.frame(df)

df$gender[df$gender == "F"] <- "1"
df$gender[df$gender == "M"] <- "0"

# load stringr package
library(stringr)

# create new column with count of metastasis
df$num_metastases <- str_count(df$locations, ",") + 1

# Create a new column indicating presence of "2"=liver metastasis in "Locations"
df$liver_meta <- grepl("2", df$locations)

# Create a new column indicating presence of "1"=signet ring histological type in "Pathology"
df$signet <- grepl("1", df$patho)

# Create a new column indicating presence of "1"=signet ring histological type in "Pathology"
df$poorly <- grepl("2", df$patho)

# Create a new column indicating presence of "1"=signet ring histological type in "Pathology"
df$tubular <- grepl("3", df$patho)

# Create a new column indicating total time on treatment for the patients as the sum of time1, time2, and time3, ignoring NAs
df$tot_tot <- rowSums(df[, c("time1", "time_maintenance", "time2", "time3")], na.rm = TRUE)

# Create a new column indicating total time on treatment for the patients as the sum of time1, time2, and time3, ignoring NAs
df$tot_time1 <- rowSums(df[, c("time1", "time_maintenance")], na.rm = TRUE)

# Change TRUE, FALSE to 1,0
df$liver_meta <- ifelse(df$liver_meta, 1, 0)
df$signet <- ifelse(df$signet, 1, 0)
df$poorly <- ifelse(df$poorly, 1, 0)
df$tubular <- ifelse(df$poorly, 1, 0)

# Display the updated dataframe
df

#Total number of patients (patients that received line 1)
(sum_tot = nrow(df))

df_og_data <- subset(df, outcome_line1 == 2 | outcome_line2 == 2 | outcome_line3 == 2 | outcome_line3 == 0 )
df_og_data
nrow(df_og_data)

df_lost_data <- subset(df, outcome_line1 == 3 | outcome_line2 == 3 | outcome_line3 == 3)
df_lost_data
nrow(df_lost_data)

df_new <- subset(df, outcome_line1 == 1 | outcome_line2 == 1 | outcome_line3 == 1)
df_new

# Filter the data to include patients who received at least line 2 in a new dataframe
line2_equal_or_more_data <- subset(df, line2_yes == 1 & (outcome_line2 == 0 | outcome_line2 == 1))
nrow(line2_equal_or_more_data)
mean(line2_equal_or_more_data$age)

# Filter the data to include patients who received at least line 3 in a new dataframe
line3_equal_or_more_data <- subset(df, line3_yes == 1 & (outcome_line3 == 0 | outcome_line3 == 1))
nrow(line3_equal_or_more_data)
mean(line3_equal_or_more_data$age)

Subseturi

# Filter the data to include patients who received line 1 only in a new dataframe
line1_only_data <- subset(df, line1_yes == 1 & outcome_line1 == 1)
nrow(line1_only_data)
mean(line1_only_data$age)
mean(line1_only_data$tot_time1)

# Filter the data to include patients who received line 1 and line 2 only in a new dataframe
line2_only_data <- subset(df, line2_yes == 1 & outcome_line2 == 1)
nrow(line2_only_data)
mean(line2_only_data$age)
mean(line2_only_data$tot_time1)

# Filter the data to include patients who received line 1, line 2, line 3 only in a new dataframe
line3_only_data <- subset(df, line3_yes == 1 & outcome_line3 == 1)
nrow(line3_only_data)
mean(line3_only_data$age)
mean(line3_only_data$tot_time1)

line1_only_data

(sum_line1_only = sum(line1_only_data$line1_yes))
(sum_line2_only = sum(line2_only_data$line2_yes))
(sum_line3_only = sum(line3_only_data$line3_yes))
(sum_tot = sum_line1_only + sum_line2_only + sum_line3_only)

#Patients per each line
(perc_line1_only = sum_line1_only / sum_tot)
(perc_line2_only = sum_line2_only / sum_tot)
(perc_line3_only = sum_line3_only / sum_tot)

Age

hist(line1_only_data$age)
hist(line2_only_data$age)
hist(line3_only_data$age)

Hidden output

# Calculate the mean age for patients who received line 1
mean_age_line1 <- mean(line1_only_data$age)

#Calculate the median age for patients who received line 1
median_age_line1 <- median(line1_only_data$age)

# Print the results
cat("Mean age of patients who received up to line 1: ", mean_age_line1, "\n")
cat("Median age of patients who received up to line 1: ", median_age_line1, "\n")

# Calculate the mean age for patients who received line 2
mean_age_line2 <- mean(line2_only_data$age)

#Calculate the median age for patients who received line 2
median_age_line2 <- median(line2_only_data$age)

# Print the result
cat("Mean age of patients who received up to line 2: ", mean_age_line2, "\n")
cat("Median age of patients who received up to line 2: ", median_age_line2, "\n")

# Calculate the mean age for patients who received line 3
mean_age_line3 <- mean(line3_only_data$age)

#Calculate the median age for patients who received line 3
median_age_line3 <- median(line3_only_data$age)

# Print the result
cat("Mean age of patients who received line 3: ", mean_age_line3, "\n")
cat("Median age of patients who received up to line 3: ", median_age_line3, "\n")

# Calculate the standard deviation for the mean age of patients who received line 1, line 2, line 3
sd_age_line1 <- sd(line1_only_data$age)
sd_age_line2 <- sd(line2_only_data$age)
sd_age_line3 <- sd(line3_only_data$age)

# Print the results
cat("Standard deviation for the mean age of patients who received up to line 1: ", sd_age_line1, "\n")
cat("Standard deviation for the mean age of patients who received up to line 2: ", sd_age_line2, "\n")
cat("Standard deviation for the mean age of patients who received up to line 3: ", sd_age_line3, "\n")

# Calculate the first and third quartiles for age in line 1 
q1_age_line1 <- quantile(line1_only_data$age, 0.25)
q3_age_line1 <- quantile(line1_only_data$age, 0.75)

# Calculate the first and third quartiles for age in line 2 
q1_age_line2 <- quantile(line2_only_data$age, 0.25)
q3_age_line2 <- quantile(line2_only_data$age, 0.75)

# Calculate the first and third quartiles for age in line 3
q1_age_line3 <- quantile(line3_only_data$age, 0.25)
q3_age_line3 <- quantile(line3_only_data$age, 0.75)

# Calculate the interquartile range for age in line 1
iqr_age_line1 <- q3_age_line1 - q1_age_line1

# Calculate the interquartile range for age in line 2
iqr_age_line2 <- q3_age_line2 - q1_age_line2

# Calculate the interquartile range for age in line 3
iqr_age_line3 <- q3_age_line3 - q1_age_line3

# Print the results
cat("Interquartile range for age in line 1: ", q1_age_line1, "-", q3_age_line1, "\n")
cat("Interquartile range for age in line 2: ", q1_age_line2, "-", q3_age_line2, "\n")
cat("Interquartile range for age in line 3: ", q1_age_line3, "-", q3_age_line3, "\n")

line1_data_age <- line1_only_data$age
line2_data_age <- line2_only_data$age
line3_data_age <- line3_only_data$age

# Perform Mann-Whitney U test for median age of patients on line 2 vs. line 3
mwu_test <- wilcox.test(line2_data_age, line3_data_age)

# Print the results of the test
cat("Mann-Whitney U test results line2vsline3:\n")
cat("U statistic = ", mwu_test$statistic, "\n")
cat("p-value = ", mwu_test$p.value, "\n")


# Perform Mann-Whitney U test for median age of patients on line 1 vs. line 2
mwu_test <- wilcox.test(line1_data_age, line2_data_age)

# Print the results of the test
cat("Mann-Whitney U test results line1vsline2:\n")
cat("U statistic = ", mwu_test$statistic, "\n")
cat("p-value = ", mwu_test$p.value, "\n")

# Install and load the 'car' package for Levene's test
if (!requireNamespace("car", quietly = TRUE)) {
  install.packages("car")
}
library(car)

# Check for normality
shapiro_line1 <- shapiro.test(line1_data_age)
shapiro_line2 <- shapiro.test(line2_data_age)
shapiro_line3 <- shapiro.test(line3_data_age)

cat("Shapiro-Wilk test for line 1: ", shapiro_line1$p.value, "\n")
cat("Shapiro-Wilk test for line 2: ", shapiro_line2$p.value, "\n")
cat("Shapiro-Wilk test for line 3: ", shapiro_line3$p.value, "\n")

# Create a new dataframe with 'age' and 'group' columns
age_group_data <- data.frame(age = c(line1_data_age, line2_data_age, line3_data_age),
                             group = factor(c(rep("Line1", length(line1_data_age)),
                                              rep("Line2", length(line2_data_age)),
                                              rep("Line3", length(line3_data_age)))))

# Check for homogeneity of variances
levene_test <- leveneTest(age ~ group, data = age_group_data)

cat("Levene's test: ", levene_test[1, "Pr(>F)"], "\n")

# Perform t-test if assumptions are met
if (shapiro_line1$p.value > 0.05 && shapiro_line2$p.value > 0.05 && shapiro_line3$p.value > 0.05 && levene_test[1, "Pr(>F)"] > 0.05) {
  t_test_line1_line2 <- t.test(line1_data_age, line2_data_age)
  t_test_line1_line3 <- t.test(line1_data_age, line3_data_age)
  t_test_line2_line3 <- t.test(line2_data_age, line3_data_age)

  cat("T-test result for line 1 vs line 2: ", t_test_line1_line2$p.value, "\n")
  cat("T-test result for line 1 vs line 3: ", t_test_line1_line3$p.value, "\n")
  cat("T-test result for line 2 vs line 3: ", t_test_line2_line3$p.value, "\n")
} else {
  cat("Assumptions for the t-test are not met.\n")
}

‌
‌
‌

.mfe-app-workspace-kj242g{position:absolute;top:-8px;}.mfe-app-workspace-11ezf91{display:inline-block;}.mfe-app-workspace-11ezf91:hover .Anchor__copyLink{visibility:visible;}Age

Age