
Add a Variable Flagging the First or Last Observation Within Each By Group
Source:R/derive_var_extreme_flag.R
derive_var_extreme_flag.Rd
Add a variable flagging the first or last observation within each by group
Usage
derive_var_extreme_flag(
dataset,
by_vars,
order,
new_var,
mode,
filter = deprecated(),
check_type = "warning"
)
Arguments
- dataset
Input dataset
The variables specified by the
by_vars
parameter are expected.- by_vars
Grouping variables
Permitted Values: list of variables
- order
Sort order
The first or last observation is determined with respect to the specified order.
Permitted Values: list of variables or functions of variables
- new_var
Variable to add
The specified variable is added to the output dataset. It is set to
"Y"
for the first or last observation (depending on the mode) of each by group.Permitted Values: list of name-value pairs
- mode
Flag mode
Determines of the first or last observation is flagged.
Permitted Values:
"first"
,"last"
- filter
Deprecated, please use
restrict_derivation()
instead (see examples).- check_type
Check uniqueness?
If
"warning"
or"error"
is specified, the specified message is issued if the observations of the input dataset are not unique with respect to the by variables and the order.Default:
"warning"
Permitted Values:
"none"
,"warning"
,"error"
Details
For each group (with respect to the variables specified for the
by_vars
parameter), new_var
is set to "Y" for the first or last observation
(with respect to the order specified for the order
parameter and the flag mode
specified for the mode
parameter). Only observations included by the filter
parameter
are considered for flagging.
Otherwise, new_var
is set to NA
. Thus, the direction of "worst" is considered fixed for
all parameters in the dataset depending on the order
and the mode
, i.e. for every
parameter the first or last record will be flagged across the whole dataset.
See also
General Derivation Functions for all ADaMs that returns variable appended to dataset:
derive_var_confirmation_flag()
,
derive_var_last_dose_amt()
,
derive_var_last_dose_date()
,
derive_var_last_dose_grp()
,
derive_var_merged_cat()
,
derive_var_merged_character()
,
derive_var_merged_exist_flag()
,
derive_var_merged_summary()
,
derive_var_obs_number()
,
derive_var_relative_flag()
,
derive_var_worst_flag()
,
derive_vars_joined()
,
derive_vars_last_dose()
,
derive_vars_merged_lookup()
,
derive_vars_merged()
,
derive_vars_transposed()
,
get_summary_records()
Examples
library(tibble)
library(dplyr, warn.conflicts = FALSE)
library(admiral.test)
data("admiral_vs")
# Flag last value for each patient, test, and visit, baseline observations are ignored
admiral_vs %>%
restrict_derivation(
derivation = derive_var_extreme_flag,
args = params(
by_vars = vars(USUBJID, VSTESTCD, VISIT),
order = vars(VSTPTNUM),
new_var = LASTFL,
mode = "last"
),
filter = VISIT != "BASELINE"
) %>%
arrange(USUBJID, VSTESTCD, VISITNUM, VSTPTNUM) %>%
select(USUBJID, VSTESTCD, VISIT, VSTPTNUM, VSSTRESN, LASTFL)
#> # A tibble: 29,643 x 6
#> USUBJID VSTESTCD VISIT VSTPTNUM VSSTRESN LASTFL
#> <chr> <chr> <chr> <dbl> <dbl> <chr>
#> 1 01-701-1015 DIABP SCREENING 1 815 64 NA
#> 2 01-701-1015 DIABP SCREENING 1 816 83 NA
#> 3 01-701-1015 DIABP SCREENING 1 817 57 Y
#> 4 01-701-1015 DIABP SCREENING 2 815 68 NA
#> 5 01-701-1015 DIABP SCREENING 2 816 59 NA
#> 6 01-701-1015 DIABP SCREENING 2 817 71 Y
#> 7 01-701-1015 DIABP BASELINE 815 56 NA
#> 8 01-701-1015 DIABP BASELINE 816 51 NA
#> 9 01-701-1015 DIABP BASELINE 817 61 NA
#> 10 01-701-1015 DIABP AMBUL ECG PLACEMENT 815 67 NA
#> # … with 29,633 more rows
# Baseline (ABLFL) examples:
input <- tribble(
~STUDYID, ~USUBJID, ~PARAMCD, ~AVISIT, ~ADT, ~AVAL, ~DTYPE,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-27"), 15.0, NA,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-25"), 14.0, NA,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-23"), 15.0, "AVERAGE",
"TEST01", "PAT01", "PARAM01", "WEEK 1", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT01", "PARAM01", "WEEK 2", as.Date("2021-04-30"), 12.0, NA,
"TEST01", "PAT02", "PARAM01", "SCREENING", as.Date("2021-04-27"), 15.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "BASELINE", as.Date("2021-04-25"), 14.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "BASELINE", as.Date("2021-04-23"), 15.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "WEEK 1", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "WEEK 2", as.Date("2021-04-30"), 12.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-27"), 15.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-25"), 14.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-23"), 15.0, NA,
"TEST01", "PAT01", "PARAM02", "BASELINE", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "WEEK 2", as.Date("2021-04-30"), 12.0, NA,
"TEST01", "PAT02", "PARAM02", "SCREENING", as.Date("2021-04-27"), 15.0, NA,
"TEST01", "PAT02", "PARAM02", "BASELINE", as.Date("2021-04-25"), 14.0, NA,
"TEST01", "PAT02", "PARAM02", "WEEK 1", as.Date("2021-04-23"), 15.0, NA,
"TEST01", "PAT02", "PARAM02", "WEEK 1", as.Date("2021-04-27"), 10.0, NA,
"TEST01", "PAT02", "PARAM02", "BASELINE", as.Date("2021-04-30"), 12.0, NA
)
# Last observation
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = vars(USUBJID, PARAMCD),
order = vars(ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Worst observation - Direction = High
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = vars(USUBJID, PARAMCD),
order = vars(AVAL, ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Worst observation - Direction = Lo
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = vars(USUBJID, PARAMCD),
order = vars(desc(AVAL), ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Average observation
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = vars(USUBJID, PARAMCD),
order = vars(ADT, desc(AVAL)),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE" & DTYPE == "AVERAGE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE Y
#> 2 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 3 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 4 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 5 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA NA
#> 6 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 7 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 8 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 9 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 10 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 11 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 12 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 13 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 15 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 16 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 17 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 18 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
#> 20 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA NA
# OCCURDS Examples
data("admiral_ae")
# Most severe AE first occurrence per patient
admiral_ae %>%
mutate(
TEMP_AESEVN =
as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
) %>%
derive_var_extreme_flag(
new_var = AOCCIFL,
by_vars = vars(USUBJID),
order = vars(TEMP_AESEVN, AESTDY, AESEQ),
mode = "first"
) %>%
arrange(USUBJID, AESTDY, AESEQ) %>%
select(USUBJID, AEDECOD, AESEV, AESTDY, AESEQ, AOCCIFL)
#> # A tibble: 1,191 x 6
#> USUBJID AEDECOD AESEV AESTDY AESEQ AOCCIFL
#> <chr> <chr> <chr> <dbl> <dbl> <chr>
#> 1 01-701-1015 APPLICATION SITE ERYTHEMA MILD 2 1 Y
#> 2 01-701-1015 APPLICATION SITE PRURITUS MILD 2 2 NA
#> 3 01-701-1015 DIARRHOEA MILD 8 3 NA
#> 4 01-701-1023 ERYTHEMA MILD 3 1 NA
#> 5 01-701-1023 ERYTHEMA MODERA… 3 2 Y
#> 6 01-701-1023 ERYTHEMA MILD 3 4 NA
#> 7 01-701-1023 ATRIOVENTRICULAR BLOCK SECOND DEGREE MILD 22 3 NA
#> 8 01-701-1028 APPLICATION SITE ERYTHEMA MILD 3 1 Y
#> 9 01-701-1028 APPLICATION SITE PRURITUS MILD 21 2 NA
#> 10 01-701-1034 APPLICATION SITE PRURITUS MILD 58 1 Y
#> # … with 1,181 more rows
# Most severe AE first occurrence per patient per body system
admiral_ae %>%
mutate(
TEMP_AESEVN =
as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
) %>%
derive_var_extreme_flag(
new_var = AOCCSIFL,
by_vars = vars(USUBJID, AEBODSYS),
order = vars(TEMP_AESEVN, AESTDY, AESEQ),
mode = "first"
) %>%
arrange(USUBJID, AESTDY, AESEQ) %>%
select(USUBJID, AEBODSYS, AESEV, AESTDY, AOCCSIFL)
#> # A tibble: 1,191 x 5
#> USUBJID AEBODSYS AESEV AESTDY AOCCSIFL
#> <chr> <chr> <chr> <dbl> <chr>
#> 1 01-701-1015 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD 2 Y
#> 2 01-701-1015 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD 2 NA
#> 3 01-701-1015 GASTROINTESTINAL DISORDERS MILD 8 Y
#> 4 01-701-1023 SKIN AND SUBCUTANEOUS TISSUE DISORDERS MILD 3 NA
#> 5 01-701-1023 SKIN AND SUBCUTANEOUS TISSUE DISORDERS MODERA… 3 Y
#> 6 01-701-1023 SKIN AND SUBCUTANEOUS TISSUE DISORDERS MILD 3 NA
#> 7 01-701-1023 CARDIAC DISORDERS MILD 22 Y
#> 8 01-701-1028 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD 3 Y
#> 9 01-701-1028 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD 21 NA
#> 10 01-701-1034 GENERAL DISORDERS AND ADMINISTRATION SIT… MILD 58 Y
#> # … with 1,181 more rows