# Name: ADSL
#
# Label: Subject Level Analysis Dataset
#
# Input: dm, ex, ds
library(admiral)
library(admiral.test) # Contains example datasets from the CDISC pilot project
library(dplyr)
library(lubridate)
library(stringr)
library(tidylog)
# Load source datasets ----
# Use e.g. haven::read_sas to read in .sas7bdat, or other suitable functions
# as needed and assign to the variables below.
# For illustration purposes read in admiral test data
data("admiral_dm")
data("admiral_ds")
data("admiral_ex")
data("admiral_ae")
data("admiral_lb")
dm <- admiral_dm
ds <- admiral_ds
ex <- admiral_ex
ae <- admiral_ae
lb <- admiral_lb
# When SAS datasets are imported into R using haven::read_sas(), missing
# character values from SAS appear as "" characters in R, instead of appearing
# as NA values. Further details can be obtained via the following link:
# https://pharmaverse.github.io/admiral/cran-release/articles/admiral.html#handling-of-missing-values
dm <- convert_blanks_to_na(dm)
ds <- convert_blanks_to_na(ds)
ex <- convert_blanks_to_na(ex)
ae <- convert_blanks_to_na(ae)
lb <- convert_blanks_to_na(lb)
# User defined functions ----
# Here are some examples of how you can create your own functions that
# operates on vectors, which can be used in `mutate`.
# Grouping
format_racegr1 <- function(x) {
case_when(
x == "WHITE" ~ "White",
x != "WHITE" ~ "Non-white",
TRUE ~ "Missing"
)
}
format_region1 <- function(x) {
case_when(
x %in% c("CAN", "USA") ~ "NA",
!is.na(x) ~ "RoW",
TRUE ~ "Missing"
)
}
format_lddthgr1 <- function(x) {
case_when(
x <= 30 ~ "<= 30",
x > 30 ~ "> 30",
TRUE ~ NA_character_
)
}
# EOSSTT mapping
format_eoxxstt <- function(x) {
case_when(
x %in% c("COMPLETED") ~ "COMPLETED",
!(x %in% c("COMPLETED", "SCREEN FAILURE")) & !is.na(x) ~ "DISCONTINUED",
x %in% c("SCREEN FAILURE") ~ NA_character_,
TRUE ~ "ONGOING"
)
}
# Derivations ----
# impute start and end time of exposure to first and last respectively, do not impute date
ex_ext <- ex %>%
derive_vars_dtm(
dtc = EXSTDTC,
new_vars_prefix = "EXST"
) %>%
derive_vars_dtm(
dtc = EXENDTC,
new_vars_prefix = "EXEN",
time_imputation = "last"
)
adsl <- dm %>%
## derive treatment variables (TRT01P, TRT01A) ----
mutate(TRT01P = ARM, TRT01A = ACTARM) %>%
## derive treatment start date (TRTSDTM) ----
derive_vars_merged(
dataset_add = ex_ext,
filter_add = (EXDOSE > 0 |
(EXDOSE == 0 &
str_detect(EXTRT, "PLACEBO"))) &
!is.na(EXSTDTM),
new_vars = vars(TRTSDTM = EXSTDTM, TRTSTMF = EXSTTMF),
order = vars(EXSTDTM, EXSEQ),
mode = "first",
by_vars = vars(STUDYID, USUBJID)
) %>%
## derive treatment end date (TRTEDTM) ----
derive_vars_merged(
dataset_add = ex_ext,
filter_add = (EXDOSE > 0 |
(EXDOSE == 0 &
str_detect(EXTRT, "PLACEBO"))) & !is.na(EXENDTM),
new_vars = vars(TRTEDTM = EXENDTM, TRTETMF = EXENTMF),
order = vars(EXENDTM, EXSEQ),
mode = "last",
by_vars = vars(STUDYID, USUBJID)
) %>%
## Derive treatment end/start date TRTSDT/TRTEDT ----
derive_vars_dtm_to_dt(source_vars = vars(TRTSDTM, TRTEDTM)) %>%
## derive treatment duration (TRTDURD) ----
derive_var_trtdurd()
## Disposition dates, status ----
# convert character date to numeric date without imputation
ds_ext <- derive_vars_dt(
ds,
dtc = DSSTDTC,
new_vars_prefix = "DSST"
)
# Screen fail date
adsl <- adsl %>%
derive_vars_merged(
dataset_add = ds_ext,
by_vars = vars(STUDYID, USUBJID),
new_vars = vars(SCRFDT = DSSTDT),
filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD == "SCREEN FAILURE"
) %>%
derive_vars_merged(
dataset_add = ds_ext,
by_vars = vars(STUDYID, USUBJID),
new_vars = vars(EOSDT = DSSTDT),
filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD != "SCREEN FAILURE"
) %>%
# EOS status
derive_var_disposition_status(
dataset_ds = ds_ext,
new_var = EOSSTT,
status_var = DSDECOD,
format_new_var = format_eoxxstt,
filter_ds = DSCAT == "DISPOSITION EVENT"
) %>%
# Last retrieval date
derive_vars_merged(
dataset_add = ds_ext,
by_vars = vars(STUDYID, USUBJID),
new_vars = vars(FRVDT = DSSTDT),
filter_add = DSCAT == "OTHER EVENT" & DSDECOD == "FINAL RETRIEVAL VISIT"
) %>%
# Derive Randomization Date
derive_vars_merged(
dataset_add = ds_ext,
filter_add = DSDECOD == "RANDOMIZED",
by_vars = vars(STUDYID, USUBJID),
new_vars = vars(RANDDT = DSSTDT)
) %>%
# Death date - impute partial date to first day/month
derive_vars_dt(
new_vars_prefix = "DTH",
dtc = DTHDTC,
highest_imputation = "M",
date_imputation = "first"
) %>%
# Relative Day of Death
derive_vars_duration(
new_var = DTHADY,
start_date = TRTSDT,
end_date = DTHDT
) %>%
# Elapsed Days from Last Dose to Death
derive_vars_duration(
new_var = LDDTHELD,
start_date = TRTEDT,
end_date = DTHDT,
add_one = FALSE
)
## Last known alive date ----
ae_start_date <- date_source(
dataset_name = "ae",
date = AESTDT
)
ae_end_date <- date_source(
dataset_name = "ae",
date = AEENDT
)
lb_date <- date_source(
dataset_name = "lb",
date = LBDT,
filter = !is.na(LBDT)
)
trt_end_date <- date_source(
dataset_name = "adsl",
date = TRTEDT
)
# impute AE start and end date to first
ae_ext <- ae %>%
derive_vars_dt(
dtc = AESTDTC,
new_vars_prefix = "AEST",
highest_imputation = "M"
) %>%
derive_vars_dt(
dtc = AEENDTC,
new_vars_prefix = "AEEN",
highest_imputation = "M"
)
# impute LB date to first
lb_ext <- derive_vars_dt(
lb,
dtc = LBDTC,
new_vars_prefix = "LB",
highest_imputation = "M"
)
adsl <- adsl %>%
derive_var_extreme_dt(
new_var = LSTALVDT,
ae_start_date, ae_end_date, lb_date, trt_end_date,
source_datasets = list(ae = ae_ext, lb = lb_ext, adsl = adsl),
mode = "last"
) %>%
## Age group ----
derive_var_agegr_fda(
age_var = AGE,
new_var = AGEGR1
) %>%
## Safety population ----
derive_var_merged_exist_flag(
dataset_add = ex,
by_vars = vars(STUDYID, USUBJID),
new_var = SAFFL,
condition = (EXDOSE > 0 | (EXDOSE == 0 & str_detect(EXTRT, "PLACEBO")))
) %>%
## Groupings and others variables ----
mutate(
RACEGR1 = format_racegr1(RACE),
REGION1 = format_region1(COUNTRY),
LDDTHGR1 = format_lddthgr1(LDDTHELD),
DTH30FL = if_else(LDDTHGR1 == "<= 30", "Y", NA_character_),
DTHA30FL = if_else(LDDTHGR1 == "> 30", "Y", NA_character_),
DTHB30FL = if_else(DTHDT <= TRTSDT + 30, "Y", NA_character_),
DOMAIN = NULL
)
# Save output ----
dir <- tempdir() # Change to whichever directory you want to save the dataset in
save(adsl, file = file.path(dir, "adsl.rda"), compress = "bzip2")