1. Cleaning and preparing the data
# Create a vector of German terms and their English equivalents
german_terms <- c(
"keine Operation" = "no surgery",
"Nasenseptum + AE" = "nasal septum + middle ear",
"Zahnimplantat OK" = "upper jaw dental implant",
"Tonsillektomie" = "tonsillectomy",
"Resektion Speicheldrüse UK li" = "resection of left submandibular gland",
"Adenoidektomie" = "adenoidectomy",
"TE+AE" = "tympanic membrane + middle ear",
"Beck´sche Bohrung li" = "left Beck's drilling",
"Sinus max Fensterung li" = "left maxillary sinus fenestration",
"Ohrmuschelrekonstruktion bei Knorpelhypertrophie" = "auricular reconstruction due to cartilage hypertrophy",
"Verstärkung rundes Fenster re" = "reinforcement of right round window",
"Nasenseptum + TE" = "nasal septum + tympanic membrane",
"Tympanoplastik li" = "left tympanoplasty",
"Nasenseptum" = "nasal septum", "selten (1-2/y)" = "rarely", "häufig" = "frequently (>2/y)", "nie" = "never"
)
replace_german_terms <- function(text) {
# Replace German terms with English equivalents
for (term in names(german_terms)) {
text <- stringr::str_replace_all(text, term, german_terms[term])
}
return(text)
}
# Load necessary library
library(dplyr)
# Read the CSV file into a dataframe
data <- read.csv('druckausgleich.csv', sep = ",", dec =",")
# Remove empty rows
cleaned_data <- data %>%
filter(!is.na(X)) |>
janitor::clean_names() |>
dplyr::select(-x,-geschlecht_r, -laufend, -var00001) |>
dplyr::mutate_if(is.character, stringr::str_trim) |>
mutate_if(is.character, factor) |>
dplyr::mutate(operationen = replace_german_terms(operationen))
# Replace terms in all columns
cleaned_data <- cleaned_data %>%
mutate(across(everything(), ~ stringr::str_replace_all(., c(
"selten" = "rarely",
"häufig" = "frequently",
"nie" = "never",
"männlich" = "m",
"weiblich" = "f",
"kein" = "no",
"nicht beeinträchtigt" = "not impaired",
"bisher nicht aufgefallen" = "not noticed so far",
"vorhanden, aber nicht beeinträchtigt" = "present but not impaired",
"beeinträchtigt" = "impaired",
"stark beeinträchtigt" = "severely impaired", "Abflug" = "takeoff", "Abtauchen" = "diving", "Landeanflug" = "approach", "Auftauchen" = "surfacing",
"Rechtshänder" = "right-handed", "Linkshänder" = "left-handed",
"noe Allergien" = "no allergies", "allergische Rhinitis" = "allergic rhinitis", "Medikamente" = "medications", "sonstige" = "other", "Lebensmittel" = "food",
"regelrecht" = "normal", "flach" = "flat",
"vernarbt" = "scarred", "myringosklerotische Plaques vorne unten" = "myringosclerotic plaques front bottom", "etwas atroph" = "slightly atrophic", "etwas schuppig belegt" = "slightly scaly", "leicht vernarbt, Rötung, Zn Spülung bei Cerumen" = "slightly scarred, redness, Zn irrigation with cerumen", "regelrecht, etwas Cerumenauflagerung" = "normal, slight cerumen overlay", "minimal vernarbt" = "minimally scarred", "etwas gefäßinjiziert" = "slightly vascularized", "gestielte Exostose" = "pedunculated exostosis", "myringosklerotische Plaques sichelförmig hinten und vorne unten" = "myringosclerotic plaques crescent-shaped back and front bottom", "Rötung, Zn Spülung bei Cerumen" = "redness, Zn irrigation with cerumen", "intaktes, weiß mattes TF, Va Zn TyPla" = "intact, white matte TM, Va Zn TyPla",
"etwas gefäßinjiziert" = "slightly vascularized", "unverändert" = "unchanged", "minimal gefäßinjiziert" = "minimally vascularized", "deutlich gefäßinjiziert" = "clearly vascularized",
"prompt" = "prompt", "prompt mit schneller Rückstellbewegung" = "prompt with quick return movement", "leicht verzögert" = "slightly delayed", "schnelle, kleine Bewegung" = "quick, small movement", "nicht beeinträchtigt" = "not impaired", "bisher nicht aufgefallen" = "not noticed so far", "vorhanden, aber nicht beeinträchtigt" = "present, but not impaired", "beeinträchtigt" = "impaired", "stark beeinträchtigt" = "severely impaired", "schwach" = "weak", "schwach und verzögert" = "weak and delayed", "nicht erkennbar" = "not recognizable", "atemsynchron" = "breath-synchronous",
"vollständig" = "complete", "Druckkammer fehlt" = "pressure chamber missing", "Tymp+TMM fehlt" = "Tymp+TMM missing", "TMM fehlt" = "TMM missing",
"bestanden" = "passed", "Abbruch 1. Tauchgang" = "aborted 1st dive", "Abbruch 2. Tauchgang" = "aborted 2nd dive", "Daten technisch unvollständig" = "data technically incomplete", "Taucher" = "diver", "Nicht-Taucher" = "non-diver", "Nicht" = "non", "immer" = "always", "eingeschränkt" = "restricted", "aber" = "but", "vorhanden" = "present", "stark" = "strongly", "schwach" = "slightly", "und" = "and", "verzögert" = "delayed", "Daten" = "data", "technisch" = "technically"
))))
# Display the cleaned data
cleaned_data |>
glimpse()
# Load necessary libraries
library(dplyr)
library(stringr)
# Assuming df is the dataframe containing the columns
# Convert columns with numeric values stored as characters to numeric
# Function to clean and convert character columns to numeric
clean_numeric_column <- function(column) {
column <- str_replace_all(column, "'", "") # Remove single quotes
as.numeric(column)
}
# Identify columns that are characters but contain numeric values, including negative values and NAs
numeric_columns <- cleaned_data %>%
select_if(~ all(str_detect(., "^-?[0-9.']*$"))) %>%
names()
# Convert these columns to numeric
cleaned_data <- cleaned_data %>%
mutate(across(all_of(numeric_columns), clean_numeric_column))
# Select only character columns
character_columns <- cleaned_data %>%
select(where(is.character)) %>%
names()
# Combine cleaned numeric columns with original character columns
final_data <- cleaned_data %>%
select(all_of(character_columns), all_of(numeric_columns))
glimpse(final_data)
# Select and print column names containing either 're' or 'li'
selected_columns <- colnames(final_data)[str_detect(colnames(final_data), "re|li")]
selected_columns
unique(cleaned_data$allergien)
final_data <- final_data |>
dplyr::rename_with(
.cols = where(is.numeric),
.fn = ~ gsub("_(re|li)(.*)", "\\2_\\1", .)
) |>
rename(ets7_re = ets7re, ets7_li = ets7li) |>
tidyr::pivot_longer(
cols = tidyselect::matches(".*_(re|li)$"),
names_to = c(".value", "side"),
names_pattern = "(.*)_(re|li)"
) |>
dplyr::mutate(side = dplyr::recode(side, re = "right", li = "left")) |>
filter_all(any_vars(!is.na(.))) |>
mutate_if(is.character, factor)
glimpse(final_data |>
select_if(is.numeric))
# Summarize the measurements and create new columns
final_data <- final_data %>%
rowwise() %>%
mutate(
ohr_ab_sl_summe = sum(c_across(matches("^ohr_ab_sl_\\d+$")), na.rm = TRUE),
ohr_auf_sl_summe = sum(c_across(matches("^ohr_sl_auf_\\d+$")), na.rm = TRUE),
ohr_ab_fa_summe = sum(c_across(matches("^ohr_ab_fa_\\d+$")), na.rm = TRUE),
ohr_auf_fa_summe = sum(c_across(matches("^ohr_fa_auf_\\d+$")), na.rm = TRUE),
taster_ab_sl_summe = sum(c_across(matches("^taster_ab_sl_\\d+$")), na.rm = TRUE),
taster_auf_sl_summe = sum(c_across(matches("^taster_auf_sl_\\d+$")), na.rm = TRUE),
taster_ab_fa_summe = sum(c_across(matches("^taster_ab_fa_\\d+$")), na.rm = TRUE),
taster_auf_fa_summe = sum(c_across(matches("^taster_auf_fa_\\d+$")), na.rm = TRUE)
) %>%
ungroup()
# Remove the original measurement columns
druckversuch <- final_data %>%
select(-matches("^ohr_sl_auf_\\d+$|^ohr_ab_sl_\\d+$|^ohr_ab_fa_\\d+$|^ohr_fa_auf_\\d+$|^taster_ab_sl_\\d+$|^taster_auf_sl_\\d+$|^taster_ab_fa_\\d+$|^taster_auf_fa_\\d+$"))
The dataset was cleaned and transformed. Especially left and right side were defined as factors and put together in a long format. Further the words and terms were translated into English. I will keep on with defining the data columns, checking the data for normality and performing descriptive statistics on the data.
2. Defining the column meanings
-
proband
: Identifier for the participant. -
kohorte
: Cohort group (e.g., diver, non-diver). -
geschlecht
: Gender (m = male, f = female). -
operationen
: History of surgeries. -
rhinosinusitis
: Frequency of rhinosinusitis (sinus infection). -
otitiden: Frequency of otitis (ear infections).
-
rauchen: Smoking status.
-
druckexposition: Frequency of pressure exposure.
-
tbs_subj: Subjective assessment of ear function.
-
tbs_wann: Time of subjective assessment.
-
handigkeit: Handedness (right-handed, left-handed).
-
allergien: Allergies.
-
indikator: Indicator of completeness.
-
druckkammer: Result of pressure chamber test.
-
alter: Age of the participant.
-
tauchgange: Number of dives.
-
ets7re: ETS7 measurement for the right ear.
-
ets7li: ETS7 measurement for the left ear.
-
etdq7: ETDQ7 score (Eustachian Tube Dysfunction Questionnaire).
-
taster_ab_sl_1: Measurement 1 for "ab" (down) and "sl" (sound conduction).
-
taster_ab_sl_2: Measurement 2 for "ab" (down) and "sl" (sound conduction).
-
taster_ab_sl_3: Measurement 3 for "ab" (down) and "sl" (sound conduction).
-
taster_auf_sl_1: Measurement 1 for "auf" (up) and "sl" (sound conduction).
-
taster_auf_sl_2: Measurement 2 for "auf" (up) and "sl" (sound conduction).
-
taster_auf_sl_3: Measurement 3 for "auf" (up) and "sl" (sound conduction).
-
taster_ab_fa_1: Measurement 1 for "ab" (down) and "fa" (frequency analysis).
-
taster_ab_fa_2: Measurement 2 for "ab" (down) and "fa" (frequency analysis).
-
taster_ab_fa_3: Measurement 3 for "ab" (down) and "fa" (frequency analysis).
-
taster_auf_fa_1: Measurement 1 for "auf" (up) and "fa" (frequency analysis).
-
taster_auf_fa_2: Measurement 2 for "auf" (up) and "fa" (frequency analysis).
-
taster_auf_fa_3: Measurement 3 for "auf" (up) and "fa" (frequency analysis).
-
taster_ab_sl_summe: Sum of "ab" (down) and "sl" (sound conduction) measurements.
-
taster_auf_sl_summe: Sum of "auf" (up) and "sl" (sound conduction) measurements.
-
taster_ab_fa_summe: Sum of "ab" (down) and "fa" (frequency analysis) measurements.
-
taster_auf_fa_summe: Sum of "auf" (up) and "fa" (frequency analysis) measurements.
-
side: Side (right or left).
-
tymp: Tympanometry result.
-
subj_valsalva: Subjective Valsalva maneuver result.
-
subj_schlucken: Subjective swallowing result.
-
t_fpra: Tympanic membrane pre-test result.
-
t_fpost: Tympanic membrane post-test result.
-
obj_valsalva: Objective Valsalva maneuver result.
-
r_tymp: Resistance or reflectance in tympanometry.
-
v_tymp: Volume of the ear canal in tympanometry.
-
s_tymp: Stiffness or compliance in tympanometry.
-
gehorgangvol: Ear canal volume.
-
resonanzfreq: Resonance frequency.
-
spitzendruck: Peak pressure.
-
c226hz: Compliance at 226 Hz.
-
x_1000hz: Measurement at 1000 Hz.
-
tmm30: Tympanometric measurement at 30 Hz.
-
tmm30_t: Tympanometric measurement at 30 Hz, specific parameter (e.g., time).
-
tmm30_p: Tympanometric measurement at 30 Hz, peak pressure.
-
tmm40: Tympanometric measurement at 40 Hz.
-
tm_mr40_t: Tympanometric measurement at 40 Hz, specific method or response, parameter (e.g., time).
-
tmm40_p: Tympanometric measurement at 40 Hz, peak pressure.
-
tmm50: Tympanometric measurement at 50 Hz.
-
tm_mr50_t: Tympanometric measurement at 50 Hz, specific method or response, parameter (e.g., time).
-
tmm50_p: Tympanometric measurement at 50 Hz, peak pressure.
-
tmm40_t: Tympanometric measurement at 40 Hz, specific parameter (e.g., time).
-
tmm50_t: Tympanometric measurement at 50 Hz, specific parameter (e.g., time).
-
ohr_ab_sl_1: Ear measurement 1 for "ab" (down) and "sl" (sound conduction).
-
ohr_ab_sl_2: Ear measurement 2 for "ab" (down) and "sl" (sound conduction).
-
ohr_ab_sl_3: Ear measurement 3 for "ab" (down) and "sl" (sound conduction).
-
ohr_sl_auf_1: Ear measurement 1 for "auf" (up) and "sl" (sound conduction).
-
ohr_sl_auf_2: Ear measurement 2 for "auf" (up) and "sl" (sound conduction).
-
ohr_sl_auf_3: Ear measurement 3 for "auf" (up) and "sl" (sound conduction).
-
ohr_ab_fa_1: Ear measurement 1 for "ab" (down) and "fa" (frequency analysis).
-
ohr_ab_fa_2: Ear measurement 2 for "ab" (down) and "fa" (frequency analysis).
-
ohr_ab_fa_3: Ear measurement 3 for "ab" (down) and "fa" (frequency analysis).
-
ohr_fa_auf_1: Ear measurement 1 for "auf" (up) and "fa" (frequency analysis).
-
ohr_fa_auf_2: Ear measurement 2 for "auf" (up) and "fa" (frequency analysis).
-
ohr_fa_auf_3: Ear measurement 3 for "auf" (up) and "fa" (frequency analysis).
-
ohr_ab_sl_summe: Sum of ear measurements for "ab" (down) and "sl" (sound conduction).
-
ohr_auf_sl_summe: Sum of ear measurements for "auf" (up) and "sl" (sound conduction).
-
ohr_ab_fa_summe: Sum of ear measurements for "ab" (down) and "fa" (frequency analysis).
-
ohr_auf_fa_summe: Sum of ear measurements for "auf" (up) and "fa" (frequency analysis).
These interpretations are based on common medical abbreviations and might differ.
3. Descriptive statistics