assign_no_ct()
maps a variable in a raw dataset to a target SDTM variable that has no terminology restrictions.assign_ct()
maps a variable in a raw dataset to a target SDTM variable following controlled terminology recoding.
Usage
assign_no_ct(
tgt_dat = NULL,
tgt_var,
raw_dat,
raw_var,
id_vars = oak_id_vars()
)
assign_ct(
tgt_dat = NULL,
tgt_var,
raw_dat,
raw_var,
ct_spec,
ct_clst,
id_vars = oak_id_vars()
)
Arguments
- tgt_dat
Target dataset: a data frame to be merged against
raw_dat
by the variables indicated inid_vars
. This parameter is optional, see section Value for how the output changes depending on this argument value.- tgt_var
The target SDTM variable: a single string indicating the name of variable to be derived.
- raw_dat
The raw dataset (dataframe); must include the variables passed in
id_vars
andraw_var
.- raw_var
The raw variable: a single string indicating the name of the raw variable in
raw_dat
.- id_vars
Key variables to be used in the join between the raw dataset (
raw_dat
) and the target data set (raw_dat
).- ct_spec
Study controlled terminology specification: a dataframe with a minimal set of columns, see
ct_spec_vars()
for details.- ct_clst
A codelist code indicating which subset of the controlled terminology to apply in the derivation.
Value
The returned data set depends on the value of tgt_dat
:
If no target dataset is supplied, meaning that
tgt_dat
defaults toNULL
, then the returned data set israw_dat
, selected for the variables indicated inid_vars
, and a new extra column: the derived variable, as indicated intgt_var
.If the target dataset is provided, then it is merged with the raw data set
raw_dat
by the variables indicated inid_vars
, with a new column: the derived variable, as indicated intgt_var
.
Examples
md1 <-
tibble::tibble(
oak_id = 1:14,
raw_source = "MD1",
patient_number = 101:114,
MDIND = c(
"NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA",
"VOMITINGS", "DIARHHEA", "COLD",
"FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN"
)
)
assign_no_ct(
tgt_var = "CMINDC",
raw_dat = md1,
raw_var = "MDIND"
)
#> # A tibble: 14 × 4
#> oak_id raw_source patient_number CMINDC
#> <int> <chr> <int> <chr>
#> 1 1 MD1 101 NAUSEA
#> 2 2 MD1 102 NAUSEA
#> 3 3 MD1 103 ANEMIA
#> 4 4 MD1 104 NAUSEA
#> 5 5 MD1 105 PYREXIA
#> 6 6 MD1 106 VOMITINGS
#> 7 7 MD1 107 DIARHHEA
#> 8 8 MD1 108 COLD
#> 9 9 MD1 109 FEVER
#> 10 10 MD1 110 LEG PAIN
#> 11 11 MD1 111 FEVER
#> 12 12 MD1 112 COLD
#> 13 13 MD1 113 COLD
#> 14 14 MD1 114 PAIN
cm_inter <-
tibble::tibble(
oak_id = 1:14,
raw_source = "MD1",
patient_number = 101:114,
CMTRT = c(
"BABY ASPIRIN",
"CORTISPORIN",
"ASPIRIN",
"DIPHENHYDRAMINE HCL",
"PARCETEMOL",
"VOMIKIND",
"ZENFLOX OZ",
"AMITRYPTYLINE",
"BENADRYL",
"DIPHENHYDRAMINE HYDROCHLORIDE",
"TETRACYCLINE",
"BENADRYL",
"SOMINEX",
"ZQUILL"
),
CMROUTE = c(
"ORAL",
"ORAL",
NA,
"ORAL",
"ORAL",
"ORAL",
"INTRAMUSCULAR",
"INTRA-ARTERIAL",
NA,
"NON-STANDARD",
"RANDOM_VALUE",
"INTRA-ARTICULAR",
"TRANSDERMAL",
"OPHTHALMIC"
)
)
# Controlled terminology specification
(ct_spec <- read_ct_spec_example("ct-01-cm"))
#> # A tibble: 33 × 8
#> codelist_code term_code CodedData term_value collected_value
#> <chr> <chr> <chr> <chr> <chr>
#> 1 C71113 C25473 QD QD QD (Every Day)
#> 2 C71113 C64496 BID BID BID (Twice a Day)
#> 3 C71113 C64499 PRN PRN PRN (As Needed)
#> 4 C71113 C64516 Q2H Q2H Q2H (Every 2 Hours)
#> 5 C71113 C64530 QID QID QID (4 Times a Day)
#> 6 C66726 C25158 CAPSULE CAPSULE Capsule
#> 7 C66726 C25394 PILL PILL Pill
#> 8 C66726 C29167 LOTION LOTION Lotion
#> 9 C66726 C42887 AEROSOL AEROSOL Aerosol
#> 10 C66726 C42944 INHALANT INHALANT Inhalant
#> # ℹ 23 more rows
#> # ℹ 3 more variables: term_preferred_term <chr>, term_synonyms <chr>,
#> # raw_codelist <chr>
assign_ct(
tgt_dat = cm_inter,
tgt_var = "CMINDC",
raw_dat = md1,
raw_var = "MDIND",
ct_spec = ct_spec,
ct_clst = "C66729"
)
#> ℹ These terms could not be mapped per the controlled terminology: "NAUSEA", "ANEMIA", "PYREXIA", "VOMITINGS", "DIARHHEA", "COLD", "FEVER", "LEG PAIN", and "PAIN".
#> # A tibble: 14 × 6
#> oak_id raw_source patient_number CMTRT CMROUTE CMINDC
#> <int> <chr> <int> <chr> <chr> <chr>
#> 1 1 MD1 101 BABY ASPIRIN ORAL NAUSEA
#> 2 2 MD1 102 CORTISPORIN ORAL NAUSEA
#> 3 3 MD1 103 ASPIRIN NA ANEMIA
#> 4 4 MD1 104 DIPHENHYDRAMINE HCL ORAL NAUSEA
#> 5 5 MD1 105 PARCETEMOL ORAL PYREX…
#> 6 6 MD1 106 VOMIKIND ORAL VOMIT…
#> 7 7 MD1 107 ZENFLOX OZ INTRAM… DIARH…
#> 8 8 MD1 108 AMITRYPTYLINE INTRA-… COLD
#> 9 9 MD1 109 BENADRYL NA FEVER
#> 10 10 MD1 110 DIPHENHYDRAMINE HYDROCHLORIDE NON-ST… LEG P…
#> 11 11 MD1 111 TETRACYCLINE RANDOM… FEVER
#> 12 12 MD1 112 BENADRYL INTRA-… COLD
#> 13 13 MD1 113 SOMINEX TRANSD… COLD
#> 14 14 MD1 114 ZQUILL OPHTHA… PAIN
# Variables are derived in sequence from multiple input sources.
# For each target variable, only missing (`NA`) values are filled
# during each step—previously assigned (non-missing) values are retained.
cm_raw <-
tibble::tibble(
oak_id = 1:4,
raw_source = "cm_raw",
patient_number = 370L + oak_id,
PATNUM = patient_number,
IT.CMTRT = c("BABY ASPIRIN", "CORTISPORIN", NA, NA),
IT.CMTRTOTH = c("Other Treatment - ", NA, "Other Treatment - Baby Aspirin", NA)
)
cm_raw
#> # A tibble: 4 × 6
#> oak_id raw_source patient_number PATNUM IT.CMTRT IT.CMTRTOTH
#> <int> <chr> <int> <int> <chr> <chr>
#> 1 1 cm_raw 371 371 BABY ASPIRIN "Other Treatment - "
#> 2 2 cm_raw 372 372 CORTISPORIN NA
#> 3 3 cm_raw 373 373 NA "Other Treatment - Baby …
#> 4 4 cm_raw 374 374 NA NA
# Derivation of `CMTRT` first from `IT.CMTRT` and then from `IT.CMTRTOTH`.
assign_no_ct(
raw_dat = cm_raw,
raw_var = "IT.CMTRT",
tgt_var = "CMTRT"
) |>
assign_no_ct(
raw_dat = cm_raw,
raw_var = "IT.CMTRTOTH",
tgt_var = "CMTRT"
)
#> # A tibble: 4 × 4
#> oak_id raw_source patient_number CMTRT
#> <int> <chr> <int> <chr>
#> 1 1 cm_raw 371 BABY ASPIRIN
#> 2 2 cm_raw 372 CORTISPORIN
#> 3 3 cm_raw 373 Other Treatment - Baby Aspirin
#> 4 4 cm_raw 374 NA
# Derivation of `CMTRT` first from `IT.CMTRTOTH` and then from `IT.CMTRT`.
assign_no_ct(
raw_dat = cm_raw,
raw_var = "IT.CMTRTOTH",
tgt_var = "CMTRT"
) |>
assign_no_ct(
raw_dat = cm_raw,
raw_var = "IT.CMTRT",
tgt_var = "CMTRT"
)
#> # A tibble: 4 × 4
#> oak_id raw_source patient_number CMTRT
#> <int> <chr> <int> <chr>
#> 1 1 cm_raw 371 "Other Treatment - "
#> 2 2 cm_raw 372 "CORTISPORIN"
#> 3 3 cm_raw 373 "Other Treatment - Baby Aspirin"
#> 4 4 cm_raw 374 NA
# Another example of variables derived in sequence from multiple input
# sources but now with controlled terminology remapping, in this case,
# CDISC Dose Unit (C71620) recoding.
cm_raw2 <- tibble::tibble(
oak_id = c(1:3, 6, 8:10, 12:14),
raw_source = "cm_raw",
patient_number = c(rep(375L, 2), 376:377, rep(378L, 3), rep(379L, 3)),
PATNUM = patient_number,
`IT.DOSUO` = c(NA, NA, NA, NA, NA, "Other Dose Unit", "cap", NA, NA, NA),
`IT.CMDOSU` = c("mg", "Gram", NA, "Tablet", "g", "mg", NA, "IU", "mL", "%")
)
assign_ct(
raw_dat = cm_raw2,
raw_var = "IT.DOSUO",
tgt_var = "CMDOSU",
ct_spec = ct_spec,
ct_clst = "C71620",
# Dose Unit
id_vars = oak_id_vars()
) |>
assign_ct(
raw_dat = cm_raw2,
raw_var = "IT.CMDOSU",
tgt_var = "CMDOSU",
ct_spec = ct_spec,
ct_clst = "C71620",
id_vars = oak_id_vars()
)
#> ℹ These terms could not be mapped per the controlled terminology: "Other Dose Unit".
#> # A tibble: 10 × 4
#> oak_id raw_source patient_number CMDOSU
#> <dbl> <chr> <int> <chr>
#> 1 1 cm_raw 375 mg
#> 2 2 cm_raw 375 g
#> 3 3 cm_raw 376 NA
#> 4 6 cm_raw 377 TABLET
#> 5 8 cm_raw 378 g
#> 6 9 cm_raw 378 OTHER DOSE UNIT
#> 7 10 cm_raw 378 CAPSULE
#> 8 12 cm_raw 379 IU
#> 9 13 cm_raw 379 mL
#> 10 14 cm_raw 379 %