Skip to contents

Add a variable flagging the first or last observation within each by group

Usage

derive_var_extreme_flag(
  dataset,
  by_vars,
  order,
  new_var,
  mode,
  filter = deprecated(),
  check_type = "warning"
)

Arguments

dataset

Input dataset

The variables specified by the by_vars parameter are expected.

by_vars

Grouping variables

Permitted Values: list of variables

order

Sort order

The first or last observation is determined with respect to the specified order.

Permitted Values: list of variables or functions of variables

new_var

Variable to add

The specified variable is added to the output dataset. It is set to "Y" for the first or last observation (depending on the mode) of each by group.

Permitted Values: list of name-value pairs

mode

Flag mode

Determines of the first or last observation is flagged.

Permitted Values: "first", "last"

filter

Deprecated, please use restrict_derivation() instead (see examples).

check_type

Check uniqueness?

If "warning" or "error" is specified, the specified message is issued if the observations of the input dataset are not unique with respect to the by variables and the order.

Default: "warning"

Permitted Values: "none", "warning", "error"

Value

The input dataset with the new flag variable added

Details

For each group (with respect to the variables specified for the by_vars parameter), new_var is set to "Y" for the first or last observation (with respect to the order specified for the order parameter and the flag mode specified for the mode parameter). Only observations included by the filter parameter are considered for flagging. Otherwise, new_var is set to NA. Thus, the direction of "worst" is considered fixed for all parameters in the dataset depending on the order and the mode, i.e. for every parameter the first or last record will be flagged across the whole dataset.

Author

Stefan Bundfuss

Examples

library(dplyr, warn.conflicts = FALSE)
library(admiral.test)
data("admiral_vs")

# Flag last value for each patient, test, and visit, baseline observations are ignored
admiral_vs %>%
  restrict_derivation(
    derivation = derive_var_extreme_flag,
    args = params(
      by_vars = vars(USUBJID, VSTESTCD, VISIT),
      order = vars(VSTPTNUM),
      new_var = LASTFL,
      mode = "last"
    ),
    filter = VISIT != "BASELINE"
  ) %>%
  arrange(USUBJID, VSTESTCD, VISITNUM, VSTPTNUM) %>%
  select(USUBJID, VSTESTCD, VISIT, VSTPTNUM, VSSTRESN, LASTFL)
#> # A tibble: 29,643 x 6
#>    USUBJID     VSTESTCD VISIT               VSTPTNUM VSSTRESN LASTFL
#>    <chr>       <chr>    <chr>                  <dbl>    <dbl> <chr> 
#>  1 01-701-1015 DIABP    SCREENING 1              815       64 NA    
#>  2 01-701-1015 DIABP    SCREENING 1              816       83 NA    
#>  3 01-701-1015 DIABP    SCREENING 1              817       57 Y     
#>  4 01-701-1015 DIABP    SCREENING 2              815       68 NA    
#>  5 01-701-1015 DIABP    SCREENING 2              816       59 NA    
#>  6 01-701-1015 DIABP    SCREENING 2              817       71 Y     
#>  7 01-701-1015 DIABP    BASELINE                 815       56 NA    
#>  8 01-701-1015 DIABP    BASELINE                 816       51 NA    
#>  9 01-701-1015 DIABP    BASELINE                 817       61 NA    
#> 10 01-701-1015 DIABP    AMBUL ECG PLACEMENT      815       67 NA    
#> # … with 29,633 more rows

# Baseline (ABLFL) examples:

input <- tibble::tribble(
  ~STUDYID, ~USUBJID,  ~PARAMCD,     ~AVISIT,                  ~ADT, ~AVAL,    ~DTYPE,
  "TEST01",  "PAT01", "PARAM01",  "BASELINE", as.Date("2021-04-27"),  15.0,        NA,
  "TEST01",  "PAT01", "PARAM01",  "BASELINE", as.Date("2021-04-25"),  14.0,        NA,
  "TEST01",  "PAT01", "PARAM01",  "BASELINE", as.Date("2021-04-23"),  15.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM01",    "WEEK 1", as.Date("2021-04-27"),  10.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM01",    "WEEK 2", as.Date("2021-04-30"),  12.0,        NA,
  "TEST01",  "PAT02", "PARAM01", "SCREENING", as.Date("2021-04-27"),  15.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",  "BASELINE", as.Date("2021-04-25"),  14.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",  "BASELINE", as.Date("2021-04-23"),  15.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",    "WEEK 1", as.Date("2021-04-27"),  10.0, "AVERAGE",
  "TEST01",  "PAT02", "PARAM01",    "WEEK 2", as.Date("2021-04-30"),  12.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-27"),  15.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-25"),  14.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-23"),  15.0,        NA,
  "TEST01",  "PAT01", "PARAM02",  "BASELINE", as.Date("2021-04-27"),  10.0, "AVERAGE",
  "TEST01",  "PAT01", "PARAM02",    "WEEK 2", as.Date("2021-04-30"),  12.0,        NA,
  "TEST01",  "PAT02", "PARAM02", "SCREENING", as.Date("2021-04-27"),  15.0,        NA,
  "TEST01",  "PAT02", "PARAM02",  "BASELINE", as.Date("2021-04-25"),  14.0,        NA,
  "TEST01",  "PAT02", "PARAM02",    "WEEK 1", as.Date("2021-04-23"),  15.0,        NA,
  "TEST01",  "PAT02", "PARAM02",    "WEEK 1", as.Date("2021-04-27"),  10.0,        NA,
  "TEST01",  "PAT02", "PARAM02",  "BASELINE", as.Date("2021-04-30"),  12.0,        NA
)

# Last observation
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = vars(USUBJID, PARAMCD),
    order = vars(ADT),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  2 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      NA   
#>  3 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      Y    
#>  4 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  5 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  6 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE Y    
#>  7 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      NA   
#>  8 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      Y    
#>  9 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 10 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#> 11 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 12 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 13 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 15 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 16 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 17 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 18 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 20 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   

# Worst observation - Direction = High
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = vars(USUBJID, PARAMCD),
    order = vars(AVAL, ADT),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      NA   
#>  2 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  3 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      Y    
#>  4 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  5 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE NA   
#>  6 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE Y    
#>  7 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      NA   
#>  8 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      Y    
#>  9 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 10 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#> 11 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 12 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 13 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 15 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 16 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 17 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 18 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 20 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   

# Worst observation - Direction = Lo
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = vars(USUBJID, PARAMCD),
    order = vars(desc(AVAL), ADT),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  2 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      NA   
#>  3 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      Y    
#>  4 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  5 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  6 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE Y    
#>  7 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      NA   
#>  8 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      Y    
#>  9 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 10 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#> 11 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 12 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 13 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 15 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 16 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 17 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 18 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 20 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   

# Average observation
restrict_derivation(
  input,
  derivation = derive_var_extreme_flag,
  args = params(
    by_vars = vars(USUBJID, PARAMCD),
    order = vars(ADT, desc(AVAL)),
    new_var = ABLFL,
    mode = "last"
  ),
  filter = AVISIT == "BASELINE" & DTYPE == "AVERAGE"
)
#> # A tibble: 20 x 8
#>    STUDYID USUBJID PARAMCD AVISIT    ADT         AVAL DTYPE   ABLFL
#>    <chr>   <chr>   <chr>   <chr>     <date>     <dbl> <chr>   <chr>
#>  1 TEST01  PAT01   PARAM01 BASELINE  2021-04-23    15 AVERAGE Y    
#>  2 TEST01  PAT01   PARAM02 BASELINE  2021-04-27    10 AVERAGE Y    
#>  3 TEST01  PAT02   PARAM01 BASELINE  2021-04-23    15 AVERAGE NA   
#>  4 TEST01  PAT02   PARAM01 BASELINE  2021-04-25    14 AVERAGE Y    
#>  5 TEST01  PAT01   PARAM01 BASELINE  2021-04-27    15 NA      NA   
#>  6 TEST01  PAT01   PARAM01 BASELINE  2021-04-25    14 NA      NA   
#>  7 TEST01  PAT01   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#>  8 TEST01  PAT01   PARAM01 WEEK 2    2021-04-30    12 NA      NA   
#>  9 TEST01  PAT02   PARAM01 SCREENING 2021-04-27    15 AVERAGE NA   
#> 10 TEST01  PAT02   PARAM01 WEEK 1    2021-04-27    10 AVERAGE NA   
#> 11 TEST01  PAT02   PARAM01 WEEK 2    2021-04-30    12 AVERAGE NA   
#> 12 TEST01  PAT01   PARAM02 SCREENING 2021-04-27    15 AVERAGE NA   
#> 13 TEST01  PAT01   PARAM02 SCREENING 2021-04-25    14 AVERAGE NA   
#> 14 TEST01  PAT01   PARAM02 SCREENING 2021-04-23    15 NA      NA   
#> 15 TEST01  PAT01   PARAM02 WEEK 2    2021-04-30    12 NA      NA   
#> 16 TEST01  PAT02   PARAM02 SCREENING 2021-04-27    15 NA      NA   
#> 17 TEST01  PAT02   PARAM02 BASELINE  2021-04-25    14 NA      NA   
#> 18 TEST01  PAT02   PARAM02 WEEK 1    2021-04-23    15 NA      NA   
#> 19 TEST01  PAT02   PARAM02 WEEK 1    2021-04-27    10 NA      NA   
#> 20 TEST01  PAT02   PARAM02 BASELINE  2021-04-30    12 NA      NA   

# OCCURDS Examples
data("admiral_ae")

# Most severe AE first occurrence per patient
admiral_ae %>%
  mutate(
    TEMP_AESEVN =
      as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
  ) %>%
  derive_var_extreme_flag(
    new_var = AOCCIFL,
    by_vars = vars(USUBJID),
    order = vars(TEMP_AESEVN, AESTDY, AESEQ),
    mode = "first"
  ) %>%
  arrange(USUBJID, AESTDY, AESEQ) %>%
  select(USUBJID, AEDECOD, AESEV, AESTDY, AESEQ, AOCCIFL)
#> # A tibble: 1,191 x 6
#>    USUBJID     AEDECOD                              AESEV   AESTDY AESEQ AOCCIFL
#>    <chr>       <chr>                                <chr>    <dbl> <dbl> <chr>  
#>  1 01-701-1015 APPLICATION SITE ERYTHEMA            MILD         2     1 Y      
#>  2 01-701-1015 APPLICATION SITE PRURITUS            MILD         2     2 NA     
#>  3 01-701-1015 DIARRHOEA                            MILD         8     3 NA     
#>  4 01-701-1023 ERYTHEMA                             MILD         3     1 NA     
#>  5 01-701-1023 ERYTHEMA                             MODERA…      3     2 Y      
#>  6 01-701-1023 ERYTHEMA                             MILD         3     4 NA     
#>  7 01-701-1023 ATRIOVENTRICULAR BLOCK SECOND DEGREE MILD        22     3 NA     
#>  8 01-701-1028 APPLICATION SITE ERYTHEMA            MILD         3     1 Y      
#>  9 01-701-1028 APPLICATION SITE PRURITUS            MILD        21     2 NA     
#> 10 01-701-1034 APPLICATION SITE PRURITUS            MILD        58     1 Y      
#> # … with 1,181 more rows

# Most severe AE first occurrence per patient per body system
admiral_ae %>%
  mutate(
    TEMP_AESEVN =
      as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
  ) %>%
  derive_var_extreme_flag(
    new_var = AOCCSIFL,
    by_vars = vars(USUBJID, AEBODSYS),
    order = vars(TEMP_AESEVN, AESTDY, AESEQ),
    mode = "first"
  ) %>%
  arrange(USUBJID, AESTDY, AESEQ) %>%
  select(USUBJID, AEBODSYS, AESEV, AESTDY, AOCCSIFL)
#> # A tibble: 1,191 x 5
#>    USUBJID     AEBODSYS                                  AESEV   AESTDY AOCCSIFL
#>    <chr>       <chr>                                     <chr>    <dbl> <chr>   
#>  1 01-701-1015 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD         2 Y       
#>  2 01-701-1015 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD         2 NA      
#>  3 01-701-1015 GASTROINTESTINAL DISORDERS                MILD         8 Y       
#>  4 01-701-1023 SKIN AND SUBCUTANEOUS TISSUE DISORDERS    MILD         3 NA      
#>  5 01-701-1023 SKIN AND SUBCUTANEOUS TISSUE DISORDERS    MODERA…      3 Y       
#>  6 01-701-1023 SKIN AND SUBCUTANEOUS TISSUE DISORDERS    MILD         3 NA      
#>  7 01-701-1023 CARDIAC DISORDERS                         MILD        22 Y       
#>  8 01-701-1028 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD         3 Y       
#>  9 01-701-1028 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD        21 NA      
#> 10 01-701-1034 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD        58 Y       
#> # … with 1,181 more rows