Add a Variable Flagging the First or Last Observation Within Each By Group
Source:R/derive_var_extreme_flag.R
derive_var_extreme_flag.Rd
Add a variable flagging the first or last observation within each by group
Usage
derive_var_extreme_flag(
dataset,
by_vars,
order,
new_var,
mode,
true_value = "Y",
false_value = NA_character_,
flag_all = FALSE,
check_type = "warning"
)
Arguments
- dataset
Input dataset
The variables specified by the
by_vars
argument are expected to be in the dataset.- by_vars
Grouping variables
Permitted Values: list of variables created by
exprs()
e.g.exprs(USUBJID, VISIT)
- order
Sort order
The first or last observation is determined with respect to the specified order.
For handling of
NA
s in sorting variables see Sort Order.Permitted Values: list of variables or functions of variables
- new_var
Variable to add
The specified variable is added to the output dataset. It is set to the value set in
true_value
for the first or last observation (depending on the mode) of each by group.Permitted Values: list of name-value pairs
- mode
Flag mode
Determines of the first or last observation is flagged.
Permitted Values:
"first"
,"last"
- true_value
True value
The value for the specified variable
new_var
, applicable to the first or last observation (depending on the mode) of each by group.Permitted Values: An atomic scalar
- false_value
False value
The value for the specified variable
new_var
, NOT applicable to the first or last observation (depending on the mode) of each by group.Permitted Values: An atomic scalar
- flag_all
Flag setting
A logical value where if set to
TRUE
, all records are flagged and no error or warning is issued if the first or last record is not unique.- check_type
Check uniqueness?
If
"warning"
or"error"
is specified, the specified message is issued if the observations of the input dataset are not unique with respect to the by variables and the order.Default:
"warning"
Permitted Values:
"none"
,"warning"
,"error"
Details
For each group (with respect to the variables specified for the
by_vars
parameter), new_var
is set to "Y"
for the first or last observation
(with respect to the order specified for the order
parameter and the flag mode
specified for the mode
parameter). In the case where the user wants to flag multiple records
of a grouping, for example records that all happen on the same visit and time, the argument
flag_all
can be set to TRUE
.
Otherwise, new_var
is set to NA
. Thus, the direction of "worst" is considered fixed for
all parameters in the dataset depending on the order
and the mode
, i.e. for every
parameter the first or last record will be flagged across the whole dataset.
See also
General Derivation Functions for all ADaMs that returns variable appended to dataset:
derive_var_joined_exist_flag()
,
derive_var_merged_ef_msrc()
,
derive_var_merged_exist_flag()
,
derive_var_merged_summary()
,
derive_var_obs_number()
,
derive_var_relative_flag()
,
derive_vars_computed()
,
derive_vars_joined()
,
derive_vars_merged()
,
derive_vars_merged_lookup()
,
derive_vars_transposed()
Examples
library(tibble)
library(dplyr, warn.conflicts = FALSE)
example_vs <- tribble(
~USUBJID, ~VSTESTCD, ~VISIT, ~VISITNUM, ~VSTPTNUM, ~VSSTRESN,
"1001", "DIABP", "SCREENING", 1, 10, 64,
"1001", "DIABP", "SCREENING", 1, 11, 66,
"1001", "DIABP", "BASELINE", 2, 100, 68,
"1001", "DIABP", "BASELINE", 2, 101, 68,
"1001", "DIABP", "WEEK 2", 3, 200, 72,
"1001", "DIABP", "WEEK 2", 3, 201, 71,
"1001", "DIABP", "WEEK 4", 4, 300, 70,
"1001", "DIABP", "WEEK 4", 4, 301, 70
)
# Flag last value for each patient, test, and visit, baseline observations are ignored
example_vs %>%
restrict_derivation(
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, VSTESTCD, VISIT),
order = exprs(VSTPTNUM),
new_var = LASTFL,
mode = "last"
),
filter = VISIT != "BASELINE"
) %>%
arrange(USUBJID, VSTESTCD, VISITNUM, VSTPTNUM) %>%
select(USUBJID, VSTESTCD, VISIT, VSTPTNUM, VSSTRESN, LASTFL)
#> # A tibble: 8 × 6
#> USUBJID VSTESTCD VISIT VSTPTNUM VSSTRESN LASTFL
#> <chr> <chr> <chr> <dbl> <dbl> <chr>
#> 1 1001 DIABP SCREENING 10 64 NA
#> 2 1001 DIABP SCREENING 11 66 Y
#> 3 1001 DIABP BASELINE 100 68 NA
#> 4 1001 DIABP BASELINE 101 68 NA
#> 5 1001 DIABP WEEK 2 200 72 NA
#> 6 1001 DIABP WEEK 2 201 71 Y
#> 7 1001 DIABP WEEK 4 300 70 NA
#> 8 1001 DIABP WEEK 4 301 70 Y
# Baseline (ABLFL) examples:
input <- tribble(
~STUDYID, ~USUBJID, ~PARAMCD, ~AVISIT, ~ADT, ~AVAL, ~DTYPE,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-27"), 15.0, NA,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-25"), 14.0, NA,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-23"), 15.0, "AVERAGE",
"TEST01", "PAT01", "PARAM01", "WEEK 1", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT01", "PARAM01", "WEEK 2", as.Date("2021-04-30"), 12.0, NA,
"TEST01", "PAT02", "PARAM01", "SCREENING", as.Date("2021-04-27"), 15.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "BASELINE", as.Date("2021-04-25"), 14.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "BASELINE", as.Date("2021-04-23"), 15.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "WEEK 1", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "WEEK 2", as.Date("2021-04-30"), 12.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-27"), 15.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-25"), 14.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-23"), 15.0, NA,
"TEST01", "PAT01", "PARAM02", "BASELINE", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "WEEK 2", as.Date("2021-04-30"), 12.0, NA,
"TEST01", "PAT02", "PARAM02", "SCREENING", as.Date("2021-04-27"), 15.0, NA,
"TEST01", "PAT02", "PARAM02", "BASELINE", as.Date("2021-04-25"), 14.0, NA,
"TEST01", "PAT02", "PARAM02", "WEEK 1", as.Date("2021-04-23"), 15.0, NA,
"TEST01", "PAT02", "PARAM02", "WEEK 1", as.Date("2021-04-27"), 10.0, NA,
"TEST01", "PAT02", "PARAM02", "BASELINE", as.Date("2021-04-30"), 12.0, NA
)
# Last observation
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 × 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Worst observation - Direction = High
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(AVAL, ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 × 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Worst observation - Direction = Lo
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(desc(AVAL), ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 × 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Average observation
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(ADT, desc(AVAL)),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE" & DTYPE == "AVERAGE"
)
#> # A tibble: 20 × 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE Y
#> 2 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 3 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 4 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 5 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA NA
#> 6 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 7 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 8 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 9 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 10 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 11 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 12 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 13 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 15 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 16 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 17 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 18 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
#> 20 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA NA
# OCCURDS Examples
example_ae <- tribble(
~USUBJID, ~AEBODSYS, ~AEDECOD, ~AESEV, ~AESTDY, ~AESEQ,
"1015", "GENERAL DISORDERS", "ERYTHEMA", "MILD", 2, 1,
"1015", "GENERAL DISORDERS", "PRURITUS", "MILD", 2, 2,
"1015", "GI DISORDERS", "DIARRHOEA", "MILD", 8, 3,
"1023", "CARDIAC DISORDERS", "AV BLOCK", "MILD", 22, 4,
"1023", "SKIN DISORDERS", "ERYTHEMA", "MILD", 3, 1,
"1023", "SKIN DISORDERS", "ERYTHEMA", "SEVERE", 5, 2,
"1023", "SKIN DISORDERS", "ERYTHEMA", "MILD", 8, 3
)
# Most severe AE first occurrence per patient
example_ae %>%
mutate(
TEMP_AESEVN =
as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
) %>%
derive_var_extreme_flag(
new_var = AOCCIFL,
by_vars = exprs(USUBJID),
order = exprs(TEMP_AESEVN, AESTDY, AESEQ),
mode = "first"
) %>%
arrange(USUBJID, AESTDY, AESEQ) %>%
select(USUBJID, AEDECOD, AESEV, AESTDY, AESEQ, AOCCIFL)
#> # A tibble: 7 × 6
#> USUBJID AEDECOD AESEV AESTDY AESEQ AOCCIFL
#> <chr> <chr> <chr> <dbl> <dbl> <chr>
#> 1 1015 ERYTHEMA MILD 2 1 Y
#> 2 1015 PRURITUS MILD 2 2 NA
#> 3 1015 DIARRHOEA MILD 8 3 NA
#> 4 1023 ERYTHEMA MILD 3 1 NA
#> 5 1023 ERYTHEMA SEVERE 5 2 Y
#> 6 1023 ERYTHEMA MILD 8 3 NA
#> 7 1023 AV BLOCK MILD 22 4 NA
# Most severe AE first occurrence per patient (flag all cases)
example_ae %>%
mutate(
TEMP_AESEVN =
as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
) %>%
derive_var_extreme_flag(
new_var = AOCCIFL,
by_vars = exprs(USUBJID),
order = exprs(TEMP_AESEVN, AESTDY),
mode = "first",
flag_all = TRUE
) %>%
arrange(USUBJID, AESTDY) %>%
select(USUBJID, AEDECOD, AESEV, AESTDY, AOCCIFL)
#> # A tibble: 7 × 5
#> USUBJID AEDECOD AESEV AESTDY AOCCIFL
#> <chr> <chr> <chr> <dbl> <chr>
#> 1 1015 ERYTHEMA MILD 2 Y
#> 2 1015 PRURITUS MILD 2 Y
#> 3 1015 DIARRHOEA MILD 8 NA
#> 4 1023 ERYTHEMA MILD 3 NA
#> 5 1023 ERYTHEMA SEVERE 5 Y
#> 6 1023 ERYTHEMA MILD 8 NA
#> 7 1023 AV BLOCK MILD 22 NA
# Most severe AE first occurrence per patient per body system
example_ae %>%
mutate(
TEMP_AESEVN =
as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
) %>%
derive_var_extreme_flag(
new_var = AOCCSIFL,
by_vars = exprs(USUBJID, AEBODSYS),
order = exprs(TEMP_AESEVN, AESTDY, AESEQ),
mode = "first"
) %>%
arrange(USUBJID, AESTDY, AESEQ) %>%
select(USUBJID, AEBODSYS, AESEV, AESTDY, AOCCSIFL)
#> # A tibble: 7 × 5
#> USUBJID AEBODSYS AESEV AESTDY AOCCSIFL
#> <chr> <chr> <chr> <dbl> <chr>
#> 1 1015 GENERAL DISORDERS MILD 2 Y
#> 2 1015 GENERAL DISORDERS MILD 2 NA
#> 3 1015 GI DISORDERS MILD 8 Y
#> 4 1023 SKIN DISORDERS MILD 3 NA
#> 5 1023 SKIN DISORDERS SEVERE 5 Y
#> 6 1023 SKIN DISORDERS MILD 8 NA
#> 7 1023 CARDIAC DISORDERS MILD 22 Y