Skip to contents
  • assign_no_ct() maps a variable in a raw dataset to a target SDTM variable that has no terminology restrictions.

  • assign_ct() maps a variable in a raw dataset to a target SDTM variable following controlled terminology recoding.

Usage

assign_no_ct(
  tgt_dat = NULL,
  tgt_var,
  raw_dat,
  raw_var,
  id_vars = oak_id_vars()
)

assign_ct(
  tgt_dat = NULL,
  tgt_var,
  raw_dat,
  raw_var,
  ct_spec,
  ct_clst,
  id_vars = oak_id_vars()
)

Arguments

tgt_dat

Target dataset: a data frame to be merged against raw_dat by the variables indicated in id_vars. This parameter is optional, see section Value for how the output changes depending on this argument value.

tgt_var

The target SDTM variable: a single string indicating the name of variable to be derived.

raw_dat

The raw dataset (dataframe); must include the variables passed in id_vars and raw_var.

raw_var

The raw variable: a single string indicating the name of the raw variable in raw_dat.

id_vars

Key variables to be used in the join between the raw dataset (raw_dat) and the target data set (raw_dat).

ct_spec

Study controlled terminology specification: a dataframe with a minimal set of columns, see ct_spec_vars() for details.

ct_clst

A codelist code indicating which subset of the controlled terminology to apply in the derivation.

Value

The returned data set depends on the value of tgt_dat:

  • If no target dataset is supplied, meaning that tgt_dat defaults to NULL, then the returned data set is raw_dat, selected for the variables indicated in id_vars, and a new extra column: the derived variable, as indicated in tgt_var.

  • If the target dataset is provided, then it is merged with the raw data set raw_dat by the variables indicated in id_vars, with a new column: the derived variable, as indicated in tgt_var.

Examples


md1 <-
  tibble::tibble(
    oak_id = 1:14,
    raw_source = "MD1",
    patient_number = 101:114,
    MDIND = c(
      "NAUSEA", "NAUSEA", "ANEMIA", "NAUSEA", "PYREXIA",
      "VOMITINGS", "DIARHHEA", "COLD",
      "FEVER", "LEG PAIN", "FEVER", "COLD", "COLD", "PAIN"
    )
  )

assign_no_ct(
  tgt_var = "CMINDC",
  raw_dat = md1,
  raw_var = "MDIND"
)
#> # A tibble: 14 × 4
#>    oak_id raw_source patient_number CMINDC   
#>     <int> <chr>               <int> <chr>    
#>  1      1 MD1                   101 NAUSEA   
#>  2      2 MD1                   102 NAUSEA   
#>  3      3 MD1                   103 ANEMIA   
#>  4      4 MD1                   104 NAUSEA   
#>  5      5 MD1                   105 PYREXIA  
#>  6      6 MD1                   106 VOMITINGS
#>  7      7 MD1                   107 DIARHHEA 
#>  8      8 MD1                   108 COLD     
#>  9      9 MD1                   109 FEVER    
#> 10     10 MD1                   110 LEG PAIN 
#> 11     11 MD1                   111 FEVER    
#> 12     12 MD1                   112 COLD     
#> 13     13 MD1                   113 COLD     
#> 14     14 MD1                   114 PAIN     

cm_inter <-
  tibble::tibble(
    oak_id = 1:14,
    raw_source = "MD1",
    patient_number = 101:114,
    CMTRT = c(
      "BABY ASPIRIN",
      "CORTISPORIN",
      "ASPIRIN",
      "DIPHENHYDRAMINE HCL",
      "PARCETEMOL",
      "VOMIKIND",
      "ZENFLOX OZ",
      "AMITRYPTYLINE",
      "BENADRYL",
      "DIPHENHYDRAMINE HYDROCHLORIDE",
      "TETRACYCLINE",
      "BENADRYL",
      "SOMINEX",
      "ZQUILL"
    ),
    CMROUTE = c(
      "ORAL",
      "ORAL",
      NA,
      "ORAL",
      "ORAL",
      "ORAL",
      "INTRAMUSCULAR",
      "INTRA-ARTERIAL",
      NA,
      "NON-STANDARD",
      "RANDOM_VALUE",
      "INTRA-ARTICULAR",
      "TRANSDERMAL",
      "OPHTHALMIC"
    )
  )

# Controlled terminology specification
(ct_spec <- read_ct_spec_example("ct-01-cm"))
#> # A tibble: 33 × 8
#>    codelist_code term_code CodedData term_value collected_value    
#>    <chr>         <chr>     <chr>     <chr>      <chr>              
#>  1 C71113        C25473    QD        QD         QD (Every Day)     
#>  2 C71113        C64496    BID       BID        BID (Twice a Day)  
#>  3 C71113        C64499    PRN       PRN        PRN (As Needed)    
#>  4 C71113        C64516    Q2H       Q2H        Q2H (Every 2 Hours)
#>  5 C71113        C64530    QID       QID        QID (4 Times a Day)
#>  6 C66726        C25158    CAPSULE   CAPSULE    Capsule            
#>  7 C66726        C25394    PILL      PILL       Pill               
#>  8 C66726        C29167    LOTION    LOTION     Lotion             
#>  9 C66726        C42887    AEROSOL   AEROSOL    Aerosol            
#> 10 C66726        C42944    INHALANT  INHALANT   Inhalant           
#> # ℹ 23 more rows
#> # ℹ 3 more variables: term_preferred_term <chr>, term_synonyms <chr>,
#> #   raw_codelist <chr>

assign_ct(
  tgt_dat = cm_inter,
  tgt_var = "CMINDC",
  raw_dat = md1,
  raw_var = "MDIND",
  ct_spec = ct_spec,
  ct_clst = "C66729"
)
#>  These terms could not be mapped per the controlled terminology: "NAUSEA", "ANEMIA", "PYREXIA", "VOMITINGS", "DIARHHEA", "COLD", "FEVER", "LEG PAIN", and "PAIN".
#> # A tibble: 14 × 6
#>    oak_id raw_source patient_number CMTRT                         CMROUTE CMINDC
#>     <int> <chr>               <int> <chr>                         <chr>   <chr> 
#>  1      1 MD1                   101 BABY ASPIRIN                  ORAL    NAUSEA
#>  2      2 MD1                   102 CORTISPORIN                   ORAL    NAUSEA
#>  3      3 MD1                   103 ASPIRIN                       NA      ANEMIA
#>  4      4 MD1                   104 DIPHENHYDRAMINE HCL           ORAL    NAUSEA
#>  5      5 MD1                   105 PARCETEMOL                    ORAL    PYREX…
#>  6      6 MD1                   106 VOMIKIND                      ORAL    VOMIT…
#>  7      7 MD1                   107 ZENFLOX OZ                    INTRAM… DIARH…
#>  8      8 MD1                   108 AMITRYPTYLINE                 INTRA-… COLD  
#>  9      9 MD1                   109 BENADRYL                      NA      FEVER 
#> 10     10 MD1                   110 DIPHENHYDRAMINE HYDROCHLORIDE NON-ST… LEG P…
#> 11     11 MD1                   111 TETRACYCLINE                  RANDOM… FEVER 
#> 12     12 MD1                   112 BENADRYL                      INTRA-… COLD  
#> 13     13 MD1                   113 SOMINEX                       TRANSD… COLD  
#> 14     14 MD1                   114 ZQUILL                        OPHTHA… PAIN  

# Variables are derived in sequence from multiple input sources.
# For each target variable, only missing (`NA`) values are filled
# during each step—previously assigned (non-missing) values are retained.

cm_raw <-
  tibble::tibble(
    oak_id = 1:4,
    raw_source = "cm_raw",
    patient_number = 370L + oak_id,
    PATNUM = patient_number,
    IT.CMTRT = c("BABY ASPIRIN", "CORTISPORIN", NA, NA),
    IT.CMTRTOTH = c("Other Treatment - ", NA, "Other Treatment - Baby Aspirin", NA)
  )

cm_raw
#> # A tibble: 4 × 6
#>   oak_id raw_source patient_number PATNUM IT.CMTRT     IT.CMTRTOTH              
#>    <int> <chr>               <int>  <int> <chr>        <chr>                    
#> 1      1 cm_raw                371    371 BABY ASPIRIN "Other Treatment - "     
#> 2      2 cm_raw                372    372 CORTISPORIN   NA                      
#> 3      3 cm_raw                373    373 NA           "Other Treatment - Baby …
#> 4      4 cm_raw                374    374 NA            NA                      

# Derivation of `CMTRT` first from `IT.CMTRT` and then from `IT.CMTRTOTH`.
assign_no_ct(
  raw_dat = cm_raw,
  raw_var = "IT.CMTRT",
  tgt_var = "CMTRT"
) |>
  assign_no_ct(
    raw_dat = cm_raw,
    raw_var = "IT.CMTRTOTH",
    tgt_var = "CMTRT"
  )
#> # A tibble: 4 × 4
#>   oak_id raw_source patient_number CMTRT                         
#>    <int> <chr>               <int> <chr>                         
#> 1      1 cm_raw                371 BABY ASPIRIN                  
#> 2      2 cm_raw                372 CORTISPORIN                   
#> 3      3 cm_raw                373 Other Treatment - Baby Aspirin
#> 4      4 cm_raw                374 NA                            

# Derivation of `CMTRT` first from `IT.CMTRTOTH` and then from `IT.CMTRT`.
assign_no_ct(
  raw_dat = cm_raw,
  raw_var = "IT.CMTRTOTH",
  tgt_var = "CMTRT"
) |>
  assign_no_ct(
    raw_dat = cm_raw,
    raw_var = "IT.CMTRT",
    tgt_var = "CMTRT"
  )
#> # A tibble: 4 × 4
#>   oak_id raw_source patient_number CMTRT                           
#>    <int> <chr>               <int> <chr>                           
#> 1      1 cm_raw                371 "Other Treatment - "            
#> 2      2 cm_raw                372 "CORTISPORIN"                   
#> 3      3 cm_raw                373 "Other Treatment - Baby Aspirin"
#> 4      4 cm_raw                374  NA                             

# Another example of variables derived in sequence from multiple input
# sources but now with controlled terminology remapping, in this case,
# CDISC Dose Unit (C71620) recoding.

cm_raw2 <- tibble::tibble(
  oak_id = c(1:3, 6, 8:10, 12:14),
  raw_source = "cm_raw",
  patient_number = c(rep(375L, 2), 376:377, rep(378L, 3), rep(379L, 3)),
  PATNUM = patient_number,
  `IT.DOSUO` = c(NA, NA, NA, NA, NA, "Other Dose Unit", "cap", NA, NA, NA),
  `IT.CMDOSU` = c("mg", "Gram", NA, "Tablet", "g", "mg", NA, "IU", "mL", "%")
)

assign_ct(
  raw_dat = cm_raw2,
  raw_var = "IT.DOSUO",
  tgt_var = "CMDOSU",
  ct_spec = ct_spec,
  ct_clst = "C71620",
  # Dose Unit
  id_vars = oak_id_vars()
) |>
  assign_ct(
    raw_dat = cm_raw2,
    raw_var = "IT.CMDOSU",
    tgt_var = "CMDOSU",
    ct_spec = ct_spec,
    ct_clst = "C71620",
    id_vars = oak_id_vars()
  )
#>  These terms could not be mapped per the controlled terminology: "Other Dose Unit".
#> # A tibble: 10 × 4
#>    oak_id raw_source patient_number CMDOSU         
#>     <dbl> <chr>               <int> <chr>          
#>  1      1 cm_raw                375 mg             
#>  2      2 cm_raw                375 g              
#>  3      3 cm_raw                376 NA             
#>  4      6 cm_raw                377 TABLET         
#>  5      8 cm_raw                378 g              
#>  6      9 cm_raw                378 OTHER DOSE UNIT
#>  7     10 cm_raw                378 CAPSULE        
#>  8     12 cm_raw                379 IU             
#>  9     13 cm_raw                379 mL             
#> 10     14 cm_raw                379 %