Skip to contents

Derive death cause (DTHCAUS) and add traceability variables if required.

Usage

derive_var_dthcaus(
  dataset,
  ...,
  source_datasets,
  subject_keys = get_admiral_option("subject_keys")
)

Arguments

dataset

Input dataset.

The variables specified by subject_keys are required.

...

Objects of class "dthcaus_source" created by dthcaus_source().

source_datasets

A named list containing datasets in which to search for the death cause

subject_keys

Variables to uniquely identify a subject

A list of quosures where the expressions are symbols as returned by vars() is expected.

Value

The input dataset with DTHCAUS variable added.

Details

This function derives DTHCAUS along with the user-defined traceability variables, if required. If a subject has death info from multiple sources, the one from the source with the earliest death date will be used. If dates are equivalent, the first source will be kept, so the user should provide the inputs in the preferred order.

Author

Shimeng Huang, Samia Kabi, Thomas Neitmann, Tamara Senior

Examples

library(tibble)
library(dplyr, warn.conflicts = FALSE)
library(lubridate)

adsl <- tribble(
  ~STUDYID,  ~USUBJID,
  "STUDY01", "PAT01",
  "STUDY01", "PAT02",
  "STUDY01", "PAT03"
)
ae <- tribble(
  ~STUDYID,  ~USUBJID, ~AESEQ, ~AEDECOD,       ~AEOUT,  ~AEDTHDTC,
  "STUDY01", "PAT01",  12,     "SUDDEN DEATH", "FATAL", "2021-04-04"
) %>%
  mutate(
    AEDTHDT = ymd(AEDTHDTC)
  )
ds <- tribble(
  ~STUDYID, ~USUBJID, ~DSSEQ, ~DSDECOD, ~DSTERM, ~DSSTDTC,
  "STUDY01", "PAT02", 1, "INFORMED CONSENT OBTAINED", "INFORMED CONSENT OBTAINED", "2021-04-03",
  "STUDY01", "PAT02", 2, "RANDOMIZATION", "RANDOMIZATION", "2021-04-11",
  "STUDY01", "PAT02", 3, "DEATH", "DEATH DUE TO PROGRESSION OF DISEASE", "2022-02-01",
  "STUDY01", "PAT03", 1, "DEATH", "POST STUDY REPORTING OF DEATH", "2022-03-03"
) %>%
  mutate(
    DSSTDT = ymd(DSSTDTC)
  )

# Derive `DTHCAUS` only - for on-study deaths only
src_ae <- dthcaus_source(
  dataset_name = "ae",
  filter = AEOUT == "FATAL",
  date = AEDTHDT,
  mode = "first",
  dthcaus = AEDECOD
)

src_ds <- dthcaus_source(
  dataset_name = "ds",
  filter = DSDECOD == "DEATH" & grepl("DEATH DUE TO", DSTERM),
  date = DSSTDT,
  mode = "first",
  dthcaus = DSTERM
)

derive_var_dthcaus(adsl, src_ae, src_ds, source_datasets = list(ae = ae, ds = ds))
#> # A tibble: 3 x 3
#>   STUDYID USUBJID DTHCAUS                            
#>   <chr>   <chr>   <chr>                              
#> 1 STUDY01 PAT01   SUDDEN DEATH                       
#> 2 STUDY01 PAT02   DEATH DUE TO PROGRESSION OF DISEASE
#> 3 STUDY01 PAT03   NA                                 

# Derive `DTHCAUS` and add traceability variables - for on-study deaths only
src_ae <- dthcaus_source(
  dataset_name = "ae",
  filter = AEOUT == "FATAL",
  date = AEDTHDT,
  mode = "first",
  dthcaus = AEDECOD,
  traceability_vars = vars(DTHDOM = "AE", DTHSEQ = AESEQ)
)

src_ds <- dthcaus_source(
  dataset_name = "ds",
  filter = DSDECOD == "DEATH" & grepl("DEATH DUE TO", DSTERM),
  date = DSSTDT,
  mode = "first",
  dthcaus = DSTERM,
  traceability_vars = vars(DTHDOM = "DS", DTHSEQ = DSSEQ)
)

derive_var_dthcaus(adsl, src_ae, src_ds, source_datasets = list(ae = ae, ds = ds))
#> # A tibble: 3 x 5
#>   STUDYID USUBJID DTHCAUS                             DTHDOM DTHSEQ
#>   <chr>   <chr>   <chr>                               <chr>   <dbl>
#> 1 STUDY01 PAT01   SUDDEN DEATH                        AE         12
#> 2 STUDY01 PAT02   DEATH DUE TO PROGRESSION OF DISEASE DS          3
#> 3 STUDY01 PAT03   NA                                  NA         NA

# Derive `DTHCAUS` as above - now including post-study deaths with different `DTHCAUS` value
src_ae <- dthcaus_source(
  dataset_name = "ae",
  filter = AEOUT == "FATAL",
  date = AEDTHDT,
  mode = "first",
  dthcaus = AEDECOD,
  traceability_vars = vars(DTHDOM = "AE", DTHSEQ = AESEQ)
)

src_ds <- dthcaus_source(
  dataset_name = "ds",
  filter = DSDECOD == "DEATH" & grepl("DEATH DUE TO", DSTERM),
  date = DSSTDT,
  mode = "first",
  dthcaus = DSTERM,
  traceability_vars = vars(DTHDOM = "DS", DTHSEQ = DSSEQ)
)

src_ds_post <- dthcaus_source(
  dataset_name = "ds",
  filter = DSDECOD == "DEATH" & DSTERM == "POST STUDY REPORTING OF DEATH",
  date = DSSTDT,
  mode = "first",
  dthcaus = "POST STUDY: UNKNOWN CAUSE",
  traceability_vars = vars(DTHDOM = "DS", DTHSEQ = DSSEQ)
)

derive_var_dthcaus(adsl, src_ae, src_ds, src_ds_post, source_datasets = list(ae = ae, ds = ds))
#> # A tibble: 3 x 5
#>   STUDYID USUBJID DTHCAUS                             DTHDOM DTHSEQ
#>   <chr>   <chr>   <chr>                               <chr>   <dbl>
#> 1 STUDY01 PAT01   SUDDEN DEATH                        AE         12
#> 2 STUDY01 PAT02   DEATH DUE TO PROGRESSION OF DISEASE DS          3
#> 3 STUDY01 PAT03   POST STUDY: UNKNOWN CAUSE           DS          1