
Add a Variable Flagging the First or Last Observation Within Each By Group
Source:R/derive_var_extreme_flag.R
      derive_var_extreme_flag.RdAdd a variable flagging the first or last observation within each by group
Usage
derive_var_extreme_flag(
  dataset,
  by_vars,
  order,
  new_var,
  mode,
  true_value = "Y",
  false_value = NA_character_,
  flag_all = FALSE,
  check_type = "warning"
)Arguments
- dataset
- 
Input dataset The variables specified by the by_varsargument are expected to be in the dataset.- Default value
- none 
 
- by_vars
- 
Grouping variables - Default value
- none 
 
- order
- 
Sort order The first or last observation is determined with respect to the specified order. For handling of NAs in sorting variables see Sort Order.- Permitted values
- list of variables or functions of variables 
- Default value
- none 
 
- new_var
- 
Variable to add The specified variable is added to the output dataset. It is set to the value set in true_valuefor the first or last observation (depending on the mode) of each by group.- Permitted values
- list of name-value pairs 
- Default value
- none 
 
- mode
- 
Flag mode Determines of the first or last observation is flagged. - Permitted values
- "first",- "last"
- Default value
- none 
 
- true_value
- 
True value The value for the specified variable new_var, applicable to the first or last observation (depending on the mode) of each by group.- Permitted values
- An atomic scalar 
- Default value
- "Y"
 
- false_value
- 
False value The value for the specified variable new_var, NOT applicable to the first or last observation (depending on the mode) of each by group.- Permitted values
- An atomic scalar 
- Default value
- NA_character_
 
- flag_all
- 
Flag setting A logical value where if set to TRUE, all records are flagged and no error or warning is issued if the first or last record is not unique.- Default value
- FALSE
 
- check_type
- 
Check uniqueness? If "warning"or"error"is specified, the specified message is issued if the observations of the input dataset are not unique with respect to the by variables and the order.- Permitted values
- "none",- "warning",- "error"
- Default value
- "warning"
 
Details
For each group (with respect to the variables specified for the
by_vars parameter), new_var is set to "Y" for the first or last observation
(with respect to the order specified for the order parameter and the flag mode
specified for the mode parameter). In the case where the user wants to flag multiple records
of a grouping, for example records that all happen on the same visit and time, the argument
flag_all can be set to TRUE.
Otherwise, new_var is set to NA. Thus, the direction of "worst" is considered fixed for
all parameters in the dataset depending on the order and the mode, i.e. for every
parameter the first or last record will be flagged across the whole dataset.
See also
General Derivation Functions for all ADaMs that returns variable appended to dataset:
derive_var_joined_exist_flag(),
derive_var_merged_ef_msrc(),
derive_var_merged_exist_flag(),
derive_var_merged_summary(),
derive_var_obs_number(),
derive_var_relative_flag(),
derive_vars_cat(),
derive_vars_computed(),
derive_vars_joined(),
derive_vars_merged(),
derive_vars_merged_lookup(),
derive_vars_transposed()
Examples
library(tibble)
library(dplyr, warn.conflicts = FALSE)
example_vs <- tribble(
  ~USUBJID, ~VSTESTCD,      ~VISIT, ~VISITNUM, ~VSTPTNUM, ~VSSTRESN,
  "1001",     "DIABP", "SCREENING",         1,        10,        64,
  "1001",     "DIABP", "SCREENING",         1,        11,        66,
  "1001",     "DIABP",  "BASELINE",         2,       100,        68,
  "1001",     "DIABP",  "BASELINE",         2,       101,        68,
  "1001",     "DIABP",    "WEEK 2",         3,       200,        72,
  "1001",     "DIABP",    "WEEK 2",         3,       201,        71,
  "1001",     "DIABP",    "WEEK 4",         4,       300,        70,
  "1001",     "DIABP",    "WEEK 4",         4,       301,        70
)
# Flag last value for each patient, test, and visit, baseline observations are ignored
example_vs %>%
  restrict_derivation(
    derivation = derive_var_extreme_flag,
    args = params(
      by_vars = exprs(USUBJID, VSTESTCD, VISIT),
      order = exprs(VSTPTNUM),
      new_var = LASTFL,
      mode = "last"
    ),
    filter = VISIT != "BASELINE"
  ) %>%
  arrange(USUBJID, VSTESTCD, VISITNUM, VSTPTNUM) %>%
  select(USUBJID, VSTESTCD, VISIT, VSTPTNUM, VSSTRESN, LASTFL)
#> # A tibble: 8 × 6
#>   USUBJID VSTESTCD VISIT     VSTPTNUM VSSTRESN LASTFL
#>   <chr>   <chr>    <chr>        <dbl>    <dbl> <chr> 
#> 1 1001    DIABP    SCREENING       10       64 NA    
#> 2 1001    DIABP    SCREENING       11       66 Y     
#> 3 1001    DIABP    BASELINE       100       68 NA    
#> 4 1001    DIABP    BASELINE       101       68 NA    
#> 5 1001    DIABP    WEEK 2         200       72 NA    
#> 6 1001    DIABP    WEEK 2         201       71 Y     
#> 7 1001    DIABP    WEEK 4         300       70 NA    
#> 8 1001    DIABP    WEEK 4         301       70 Y     
# Baseline (ABLFL) examples:
input <- tribble(
  ~STUDYID, ~USUBJID,  ~PARAMCD,     ~AVISIT,                  ~ADT, ~AVAL,    ~DTYPE,
  "TEST01",  "PAT01", "PARAM01",  "BASELINE", as.Date("2021-04-27"),  15.0,        NA,
  "TEST01",  "PAT01", "PARAM01",  "BASELINE", as.Date("2021-04-25"),  14.0,        NA,
  "TEST01",  "PAT01", "PARAM01",  "BASELINE", as.Date("2021-04-23"),  15.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM01",    "WEEK 1", as.Date("2021-04-27"),  10.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM01",    "WEEK 2", as.Date("2021-04-30"),  12.0,        NA,
  "TEST01",  "PAT02", "PARAM01", "SCREENING", as.Date("2021-04-27"),  15.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",  "BASELINE", as.Date("2021-04-25"),  14.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",  "BASELINE", as.Date("2021-04-23"),  15.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",    "WEEK 1", as.Date("2021-04-27"),  10.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",    "WEEK 2", as.Date("2021-04-30"),  12.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-27"),  15.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-25"),  14.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-23"),  15.0,        NA,
  "TEST01",  "PAT01", "PARAM02",  "BASELINE", as.Date("2021-04-27"),  10.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02",    "WEEK 2", as.Date("2021-04-30"),  12.0,        NA,
  "TEST01",  "PAT02", "PARAM02", "SCREENING", as.Date("2021-04-27"),  15.0,        NA,
  "TEST01",  "PAT02", "PARAM02",  "BASELINE", as.Date("2021-04-25"),  14.0,        NA,
  "TEST01",  "PAT02", "PARAM02",    "WEEK 1", as.Date("2021-04-23"),  15.0,        NA,
  "TEST01",  "PAT02", "PARAM02",    "WEEK 1", as.Date("2021-04-27"),  10.0,        NA,
  "TEST01",  "PAT02", "PARAM02",  "BASELINE", as.Date("2021-04-30"),  12.0,        NA
)
# Last observation
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = exprs(USUBJID, PARAMCD),
    order = exprs(ADT),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 × 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  2 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      NA   
#>  3 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      Y    
#>  4 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  5 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  6 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE Y    
#>  7 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      NA   
#>  8 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      Y    
#>  9 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 10 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#> 11 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 12 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 13 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 15 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 16 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 17 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 18 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 20 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   
# Worst observation - Direction = High
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = exprs(USUBJID, PARAMCD),
    order = exprs(AVAL, ADT),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 × 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      NA   
#>  2 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  3 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      Y    
#>  4 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  5 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE NA   
#>  6 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE Y    
#>  7 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      NA   
#>  8 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      Y    
#>  9 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 10 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#> 11 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 12 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 13 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 15 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 16 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 17 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 18 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 20 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   
# Worst observation - Direction = Lo
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = exprs(USUBJID, PARAMCD),
    order = exprs(desc(AVAL), ADT),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 × 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  2 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      NA   
#>  3 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      Y    
#>  4 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  5 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  6 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE Y    
#>  7 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      NA   
#>  8 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      Y    
#>  9 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 10 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#> 11 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 12 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 13 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 15 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 16 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 17 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 18 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 20 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   
# Average observation
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = exprs(USUBJID, PARAMCD),
    order = exprs(ADT, desc(AVAL)),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE" & DTYPE == "AVERAGE"
)
#> # A tibble: 20 × 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE Y    
#>  2 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  3 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  4 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE Y    
#>  5 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      NA   
#>  6 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      NA   
#>  7 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#>  8 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#>  9 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 10 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 11 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 12 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 13 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 15 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 16 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 17 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      NA   
#> 18 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   
#> 20 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      NA   
# OCCURDS Examples
example_ae <- tribble(
  ~USUBJID,         ~AEBODSYS,    ~AEDECOD,   ~AESEV, ~AESTDY, ~AESEQ,
  "1015", "GENERAL DISORDERS",  "ERYTHEMA",   "MILD",       2,      1,
  "1015", "GENERAL DISORDERS",  "PRURITUS",   "MILD",       2,      2,
  "1015",      "GI DISORDERS", "DIARRHOEA",   "MILD",       8,      3,
  "1023", "CARDIAC DISORDERS",  "AV BLOCK",   "MILD",      22,      4,
  "1023",    "SKIN DISORDERS",  "ERYTHEMA",   "MILD",       3,      1,
  "1023",    "SKIN DISORDERS",  "ERYTHEMA", "SEVERE",       5,      2,
  "1023",    "SKIN DISORDERS",  "ERYTHEMA",   "MILD",       8,      3
)
# Most severe AE first occurrence per patient
example_ae %>%
  mutate(
    TEMP_AESEVN =
      as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
  ) %>%
  derive_var_extreme_flag(
    new_var = AOCCIFL,
    by_vars = exprs(USUBJID),
    order = exprs(TEMP_AESEVN, AESTDY, AESEQ),
    mode = "first"
  ) %>%
  arrange(USUBJID, AESTDY, AESEQ) %>%
  select(USUBJID, AEDECOD, AESEV, AESTDY, AESEQ, AOCCIFL)
#> # A tibble: 7 × 6
#>   USUBJID AEDECOD   AESEV  AESTDY AESEQ AOCCIFL
#>   <chr>   <chr>     <chr>   <dbl> <dbl> <chr>  
#> 1 1015    ERYTHEMA  MILD        2     1 Y      
#> 2 1015    PRURITUS  MILD        2     2 NA     
#> 3 1015    DIARRHOEA MILD        8     3 NA     
#> 4 1023    ERYTHEMA  MILD        3     1 NA     
#> 5 1023    ERYTHEMA  SEVERE      5     2 Y      
#> 6 1023    ERYTHEMA  MILD        8     3 NA     
#> 7 1023    AV BLOCK  MILD       22     4 NA     
# Most severe AE first occurrence per patient (flag all cases)
example_ae %>%
  mutate(
    TEMP_AESEVN =
      as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
  ) %>%
  derive_var_extreme_flag(
    new_var = AOCCIFL,
    by_vars = exprs(USUBJID),
    order = exprs(TEMP_AESEVN, AESTDY),
    mode = "first",
    flag_all = TRUE
  ) %>%
  arrange(USUBJID, AESTDY) %>%
  select(USUBJID, AEDECOD, AESEV, AESTDY, AOCCIFL)
#> # A tibble: 7 × 5
#>   USUBJID AEDECOD   AESEV  AESTDY AOCCIFL
#>   <chr>   <chr>     <chr>   <dbl> <chr>  
#> 1 1015    ERYTHEMA  MILD        2 Y      
#> 2 1015    PRURITUS  MILD        2 Y      
#> 3 1015    DIARRHOEA MILD        8 NA     
#> 4 1023    ERYTHEMA  MILD        3 NA     
#> 5 1023    ERYTHEMA  SEVERE      5 Y      
#> 6 1023    ERYTHEMA  MILD        8 NA     
#> 7 1023    AV BLOCK  MILD       22 NA     
# Most severe AE first occurrence per patient per body system
example_ae %>%
  mutate(
    TEMP_AESEVN =
      as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
  ) %>%
  derive_var_extreme_flag(
    new_var = AOCCSIFL,
    by_vars = exprs(USUBJID, AEBODSYS),
    order = exprs(TEMP_AESEVN, AESTDY, AESEQ),
    mode = "first"
  ) %>%
  arrange(USUBJID, AESTDY, AESEQ) %>%
  select(USUBJID, AEBODSYS, AESEV, AESTDY, AOCCSIFL)
#> # A tibble: 7 × 5
#>   USUBJID AEBODSYS          AESEV  AESTDY AOCCSIFL
#>   <chr>   <chr>             <chr>   <dbl> <chr>   
#> 1 1015    GENERAL DISORDERS MILD        2 Y       
#> 2 1015    GENERAL DISORDERS MILD        2 NA      
#> 3 1015    GI DISORDERS      MILD        8 Y       
#> 4 1023    SKIN DISORDERS    MILD        3 NA      
#> 5 1023    SKIN DISORDERS    SEVERE      5 Y       
#> 6 1023    SKIN DISORDERS    MILD        8 NA      
#> 7 1023    CARDIAC DISORDERS MILD       22 Y