# Load Packages
library(admiral)
library(dplyr)
library(lubridate)
library(stringr)
library(metacore)
library(metatools)
library(xportr)
library(readr)
library(pharmaversesdtm)
ADPPK Template Walkthrough
The Population PK Analysis Data (ADPPK) follows the CDISC Implementation Guide (https://www.cdisc.org/standards/foundational/adam/basic-data-structure-adam-poppk-implementation-guide-v1-0). Population PK models generally make use of nonlinear mixed effects models that require numeric variables. The data used in the models will include both dosing and concentration records, relative time variables, and numeric covariate variables. A DV
or dependent variable is often expected. This is equivalent to the ADaM AVAL
variable and will be included in addition to AVAL
for ADPPK.
First Load Packages
First we will load the packages required for our project. We will use {admiral}
for the creation of analysis data. {admiral}
requires {dplyr}
, {lubridate}
and {stringr}
. We will use {metacore}
and {metatools}
to store and manipulate metadata from our specifications. We will use {xportr}
to perform checks on the final data and export to a transport file.
The source SDTM data will come from the CDISC pilot study data stored in {pharmaversesdtm}
.
Next Load Specifications for Metacore
We have saved our specifications in an Excel file and will load them into {metacore}
with the spec_to_metacore()
function. The spec file can be found here
# ---- Load Specs for Metacore ----
<- spec_to_metacore("pk_spec.xlsx") %>%
metacore select_dataset("ADPPK")
Load Source Datasets
We will load are SDTM data from {pharmaversesdtm}
. The main components of this will be exposure data from EX
and pharmacokinetic concentration data from PC
. We will use ADSL
for baseline characteristics and we will derive additional baselines from vital signs VS
and laboratory data LB
.
# ---- Load source datasets ----
# Load PC, EX, VS, LB and ADSL
data("pc")
data("ex")
data("vs")
data("lb")
data("admiral_adsl")
<- admiral_adsl
adsl
<- convert_blanks_to_na(ex)
ex <- convert_blanks_to_na(pc)
pc <- convert_blanks_to_na(vs)
vs <- convert_blanks_to_na(lb) lb
Derivations
Derive PC Dates
At this step, it may be useful to join ADSL
to your PC
and EX
domains as well. Only the ADSL
variables used for derivations are selected at this step. The rest of the relevant ADSL
variables will be added later.
In this case we will keep TRTSDT
/TRTSDTM
for day derivation and TRT01P
/TRT01A
for planned and actual treatments.
In this segment we will use derive_vars_merged()
to join the ADSL
variables and the following {admiral}
functions to derive analysis dates, times and days:
derive_vars_dtm()
derive_vars_dtm_to_dt()
derive_vars_dtm_to_tm()
derive_vars_dy()
We will also create NFRLT
for PC
data based on PCTPTNUM
. We will create an event ID (EVID
) of 0 for concentration records and 1 for dosing records.
# ---- Derivations ----
# Get list of ADSL vars required for derivations
<- exprs(TRTSDT, TRTSDTM, TRT01P, TRT01A)
adsl_vars
<- pc %>%
pc_dates # Join ADSL with PC (need TRTSDT for ADY derivation)
derive_vars_merged(
dataset_add = adsl,
new_vars = adsl_vars,
by_vars = exprs(STUDYID, USUBJID)
%>%
) # Derive analysis date/time
# Impute missing time to 00:00:00
derive_vars_dtm(
new_vars_prefix = "A",
dtc = PCDTC,
time_imputation = "00:00:00"
%>%
) # Derive dates and times from date/times
derive_vars_dtm_to_dt(exprs(ADTM)) %>%
derive_vars_dtm_to_tm(exprs(ADTM)) %>%
# Derive event ID and nominal relative time from first dose (NFRLT)
mutate(
EVID = 0,
DRUG = PCTEST,
NFRLT = if_else(PCTPTNUM < 0, 0, PCTPTNUM), .after = USUBJID
)
Get Dosing Information
Next we will also join ADSL
data with EX
and derive dates/times. This section uses the {admiral}
functions derive_vars_merged()
, derive_vars_dtm()
, and derive_vars_dtm_to_dt()
. Time is imputed to 00:00:00 here for reasons specific to the sample data. Other imputation times may be used based on study details. Here we create NFRLT
for EX
data based on VISITDY
using the formula (VISITDY - 1) * 24
using dplyr::mutate
.
# ---- Get dosing information ----
<- ex %>%
ex_dates derive_vars_merged(
dataset_add = adsl,
new_vars = adsl_vars,
by_vars = exprs(STUDYID, USUBJID)
%>%
) # Keep records with nonzero dose
filter(EXDOSE > 0) %>%
# Add time and set missing end date to start date
# Impute missing time to 00:00:00
# Note all times are missing for dosing records in this example data
# Derive Analysis Start and End Dates
derive_vars_dtm(
new_vars_prefix = "AST",
dtc = EXSTDTC,
time_imputation = "00:00:00"
%>%
) derive_vars_dtm(
new_vars_prefix = "AEN",
dtc = EXENDTC,
time_imputation = "00:00:00"
%>%
) # Derive event ID and nominal relative time from first dose (NFRLT)
mutate(
EVID = 1,
NFRLT = 24 * (VISITDY - 1), .after = USUBJID
%>%
) # Set missing end dates to start date
mutate(AENDTM = case_when(
is.na(AENDTM) ~ ASTDTM,
TRUE ~ AENDTM
%>%
)) # Derive dates from date/times
derive_vars_dtm_to_dt(exprs(ASTDTM)) %>%
derive_vars_dtm_to_dt(exprs(AENDTM))
Expand Dosing Records
The {admiral}
function create_single_dose_dataset()
will be used to expand dosing records between the start date and end date. The nominal time will also be expanded based on the values of EXDOSFRQ
, for example “QD” will result in nominal time being incremented by 24 hours and “BID” will result in nominal time being incremented by 12 hours.
# ---- Expand dosing records between start and end dates ----
# Updated function includes nominal_time parameter
<- ex_dates %>%
ex_exp create_single_dose_dataset(
dose_freq = EXDOSFRQ,
start_date = ASTDT,
start_datetime = ASTDTM,
end_date = AENDT,
end_datetime = AENDTM,
nominal_time = NFRLT,
lookup_table = dose_freq_lookup,
lookup_column = CDISC_VALUE,
keep_source_vars = exprs(
STUDYID, USUBJID, EVID, EXDOSFRQ, EXDOSFRM,
NFRLT, EXDOSE, EXDOSU, EXTRT, ASTDT, ASTDTM, AENDT, AENDTM,
VISIT, VISITNUM, VISITDY,!!!adsl_vars
TRT01A, TRT01P, DOMAIN, EXSEQ,
)%>%
) # Derive AVISIT based on nominal relative time
# Derive AVISITN to nominal time in whole days using integer division
# Define AVISIT based on nominal day
mutate(
AVISITN = NFRLT %/% 24 + 1,
AVISIT = paste("Day", AVISITN),
ADTM = ASTDTM,
DRUG = EXTRT
%>%
) # Derive dates and times from datetimes
derive_vars_dtm_to_dt(exprs(ADTM)) %>%
derive_vars_dtm_to_tm(exprs(ADTM)) %>%
derive_vars_dtm_to_tm(exprs(ASTDTM)) %>%
derive_vars_dtm_to_tm(exprs(AENDTM))
Find First Dose
We find the first dose for the concentration records using the {admiral}
function derive_vars_merged()
# ---- Find first dose per treatment per subject ----
# ---- Join with ADPPK data and keep only subjects with dosing ----
<- pc_dates %>%
adppk_first_dose derive_vars_merged(
dataset_add = ex_exp,
filter_add = (!is.na(ADTM)),
new_vars = exprs(FANLDTM = ADTM, EXDOSE_first = EXDOSE),
order = exprs(ADTM, EXSEQ),
mode = "first",
by_vars = exprs(STUDYID, USUBJID, DRUG)
%>%
) filter(!is.na(FANLDTM)) %>%
# Derive AVISIT based on nominal relative time
# Derive AVISITN to nominal time in whole days using integer division
# Define AVISIT based on nominal day
mutate(
AVISITN = NFRLT %/% 24 + 1,
AVISIT = paste("Day", AVISITN),
)
Find Previous Dose
For ADPPK
we will find the previous dose with respect to actual time and nominal time. We will use `derive_vars_joined().
# ---- Find previous dose ----
<- adppk_first_dose %>%
adppk_prev derive_vars_joined(
dataset_add = ex_exp,
by_vars = exprs(USUBJID),
order = exprs(ADTM),
new_vars = exprs(
ADTM_prev = ADTM, EXDOSE_prev = EXDOSE, AVISIT_prev = AVISIT,
AENDTM_prev = AENDTM
),join_vars = exprs(ADTM),
join_type = "all",
filter_add = NULL,
filter_join = ADTM > ADTM.join,
mode = "last",
check_type = "none"
)
Find Previous Nominal Dose
# ---- Find previous nominal dose ----
<- adppk_prev %>%
adppk_nom_prev derive_vars_joined(
dataset_add = ex_exp,
by_vars = exprs(USUBJID),
order = exprs(NFRLT),
new_vars = exprs(NFRLT_prev = NFRLT),
join_type = "all",
join_vars = exprs(NFRLT),
filter_add = NULL,
filter_join = NFRLT > NFRLT.join,
mode = "last",
check_type = "none"
)
Combine PC and EX Data
Here we combine PC
and EX
records. We will derive the relative time variables AFRLT
(Actual Relative Time from First Dose), APRLT
(Actual Relative Time from Previous Dose), and NPRLT
(Nominal Relative Time from Previous Dose). Use derive_vars_duration()
to derive AFRLT
and APRLT
. Note we defined EVID
above with values of 0 for observation records and 1 for dosing records.
# ---- Combine ADPPK and EX data ----
# Derive Relative Time Variables
<- bind_rows(adppk_nom_prev, ex_exp) %>%
adppk_aprlt group_by(USUBJID, DRUG) %>%
mutate(
FANLDTM = min(FANLDTM, na.rm = TRUE),
min_NFRLT = min(NFRLT, na.rm = TRUE),
maxdate = max(ADT[EVID == 0], na.rm = TRUE), .after = USUBJID
%>%
) arrange(USUBJID, ADTM) %>%
ungroup() %>%
filter(ADT <= maxdate) %>%
# Derive Actual Relative Time from First Dose (AFRLT)
derive_vars_duration(
new_var = AFRLT,
start_date = FANLDTM,
end_date = ADTM,
out_unit = "hours",
floor_in = FALSE,
add_one = FALSE
%>%
) # Derive Actual Relative Time from Reference Dose (APRLT)
derive_vars_duration(
new_var = APRLT,
start_date = ADTM_prev,
end_date = ADTM,
out_unit = "hours",
floor_in = FALSE,
add_one = FALSE
%>%
) # Derive APRLT
mutate(
APRLT = case_when(
== 1 ~ 0,
EVID is.na(APRLT) ~ AFRLT,
TRUE ~ APRLT
),NPRLT = case_when(
== 1 ~ 0,
EVID is.na(NFRLT_prev) ~ NFRLT - min_NFRLT,
TRUE ~ NFRLT - NFRLT_prev
) )
Derive Analysis Variables
The expected analysis variable for ADPPK
is DV
or dependent variable. For this example DV
is set to the numeric concentration value PCSTRESN
. We will also include AVAL
equivalent to DV
for consistency with CDISC ADaM standards. MDV
missing dependent variable will also be included.
# ---- Derive Analysis Variables ----
# Derive actual dose DOSEA and planned dose DOSEP,
# Derive AVAL and DV
<- adppk_aprlt %>%
adppk_aval mutate(
# Derive Actual Dose
DOSEA = case_when(
== 1 ~ EXDOSE,
EVID is.na(EXDOSE_prev) ~ EXDOSE_first,
TRUE ~ EXDOSE_prev
),# Derive Planned Dose
DOSEP = case_when(
== "Xanomeline High Dose" ~ 81,
TRT01P == "Xanomeline Low Dose" ~ 54,
TRT01P == "Placebo" ~ 0
TRT01P
),# Derive PARAMCD
PARAMCD = case_when(
== 1 ~ "DOSE",
EVID TRUE ~ PCTESTCD
),ALLOQ = PCLLOQ,
# Derive CMT
CMT = case_when(
== 1 ~ 1,
EVID TRUE ~ 2
),# Derive BLQFL/BLQFN
BLQFL = case_when(
== "<BLQ" ~ "Y",
PCSTRESC TRUE ~ "N"
),BLQFN = case_when(
== "<BLQ" ~ 1,
PCSTRESC TRUE ~ 0
),AMT = case_when(
== 1 ~ EXDOSE,
EVID TRUE ~ NA_real_
),# Derive DV and AVAL
DV = PCSTRESN,
DVID = PCTESTCD,
AVAL = DV,
DVL = case_when(
!= 0 ~ log(DV),
DV TRUE ~ NA_real_
),# Derive MDV
MDV = case_when(
== 1 ~ 1,
EVID is.na(DV) ~ 1,
TRUE ~ 0
),AVALU = case_when(
== 1 ~ NA_character_,
EVID TRUE ~ PCSTRESU
),RLTU = "h",
USTRESC = PCSTRESC,
UDTC = format_ISO8601(ADTM),
II = if_else(EVID == 1, 1, 0),
SS = if_else(EVID == 1, 1, 0),
ADDL = 0,
OCC = 1,
)
Add ASEQ
We add a sequence variable using the {admiral}
function derive_var_obs_number()
.
# ---- Add ASEQ ----
<- adppk_aval %>%
adppk_aseq # Calculate ASEQ
derive_var_obs_number(
new_var = ASEQ,
by_vars = exprs(STUDYID, USUBJID),
order = exprs(AFRLT, EVID),
check_type = "error"
%>%
) mutate(
PROJID = DRUG,
PROJIDN = 1,
PART = 1,
)
Derive Covariates Using Metacore
In this step we will create our numeric covariates using the create_var_from_codelist()
function from {metatools}
.
#---- Derive Covariates ----
# Include numeric values for STUDYIDN, USUBJIDN, SEXN, RACEN etc.
<- adsl %>%
covar create_var_from_codelist(metacore, input_var = STUDYID, out_var = STUDYIDN) %>%
create_var_from_codelist(metacore, input_var = SEX, out_var = SEXN) %>%
create_var_from_codelist(metacore, input_var = RACE, out_var = RACEN) %>%
create_var_from_codelist(metacore, input_var = ETHNIC, out_var = AETHNIC) %>%
create_var_from_codelist(metacore, input_var = AETHNIC, out_var = AETHNICN) %>%
create_var_from_codelist(metacore, input_var = ARMCD, out_var = COHORT) %>%
create_var_from_codelist(metacore, input_var = ARMCD, out_var = COHORTC) %>%
create_var_from_codelist(metacore, input_var = COUNTRY, out_var = COUNTRYN) %>%
create_var_from_codelist(metacore, input_var = COUNTRY, out_var = COUNTRYL) %>%
mutate(
STUDYIDN = as.numeric(word(USUBJID, 1, sep = fixed("-"))),
SITEIDN = as.numeric(word(USUBJID, 2, sep = fixed("-"))),
USUBJIDN = as.numeric(word(USUBJID, 3, sep = fixed("-"))),
SUBJIDN = as.numeric(SUBJID),
ROUTE = unique(ex$EXROUTE),
FORM = unique(ex$EXDOSFRM),
REGION1 = COUNTRY,
REGION1N = COUNTRYN,
SUBJTYPC = "Volunteer",
%>%
) create_var_from_codelist(metacore, input_var = FORM, out_var = FORMN) %>%
create_var_from_codelist(metacore, input_var = ROUTE, out_var = ROUTEN) %>%
create_var_from_codelist(metacore, input_var = SUBJTYPC, out_var = SUBJTYP)
Derive Additional Baselines
Next we add additional baselines from vital signs and laboratory data. We will use the {admiral}
functions derive_vars_merged()
and derive_vars_transposed()
to add these.
#---- Derive additional baselines from VS and LB ----
<- lb %>%
labsbl filter(LBBLFL == "Y" & LBTESTCD %in% c("CREAT", "ALT", "AST", "BILI")) %>%
mutate(LBTESTCDB = paste0(LBTESTCD, "BL")) %>%
select(STUDYID, USUBJID, LBTESTCDB, LBSTRESN)
<- covar %>%
covar_vslb derive_vars_merged(
dataset_add = vs,
filter_add = VSTESTCD == "HEIGHT",
by_vars = exprs(STUDYID, USUBJID),
new_vars = exprs(HTBL = VSSTRESN)
%>%
) derive_vars_merged(
dataset_add = vs,
filter_add = VSTESTCD == "WEIGHT" & VSBLFL == "Y",
by_vars = exprs(STUDYID, USUBJID),
new_vars = exprs(WTBL = VSSTRESN)
%>%
) derive_vars_transposed(
dataset_merge = labsbl,
by_vars = exprs(STUDYID, USUBJID),
key_var = LBTESTCDB,
value_var = LBSTRESN
%>%
) mutate(
BMIBL = compute_bmi(height = HTBL, weight = WTBL),
BSABL = compute_bsa(
height = HTBL,
weight = HTBL,
method = "Mosteller"
),CRCLBL = compute_egfr(
creat = CREATBL, creatu = "SI", age = AGE, weight = WTBL, sex = SEX,
method = "CRCL"
),EGFRBL = compute_egfr(
creat = CREATBL, creatu = "SI", age = AGE, weight = WTBL, sex = SEX,
method = "CKD-EPI"
)%>%
) rename(TBILBL = BILIBL)
Combine with Covariates
We combine our covariates with the rest of the data
# Combine covariates with APPPK data
<- adppk_aseq %>%
adppk_prefinal derive_vars_merged(
dataset_add = select(covar_vslb, !!!negate_vars(adsl_vars)),
by_vars = exprs(STUDYID, USUBJID)
%>%
) arrange(STUDYIDN, USUBJIDN, AFRLT, EVID) %>%
# Add RECSEQ
# Exclude records if needed
mutate(
RECSEQ = row_number(),
EXCLFCOM = "None"
%>%
) create_var_from_codelist(metacore, input_var = DVID, out_var = DVIDN) %>%
create_var_from_codelist(metacore, input_var = EXCLFCOM, out_var = EXCLF)
Check Data With Metacore
We use {metacore}
to perform a number of checks on the data. We will drop variables not in the specs and make sure all the variables from the specs are included.
# Final Steps, Select final variables and Add labels
# This process will be based on your metadata, no example given for this reason
# ...
<- "./output"
dir
# Apply metadata and perform associated checks ----
# uses {metatools}
<- adppk_prefinal %>%
adppk drop_unspec_vars(metacore) %>% # Drop unspecified variables from specs
check_variables(metacore) %>% # Check all variables specified are present and no more
check_ct_data(metacore) %>% # Checks all variables with CT only contain values within the CT
order_cols(metacore) %>% # Orders the columns according to the spec
sort_by_key(metacore) # Sorts the rows by the sort keys
Apply Labels and Formats with xportr
Using {xportr} we check variable type, assign variable lenght, add variable labels, add variable formats, and save a transport file.
<- adppk %>%
adppk_xpt xportr_type(metacore) %>% # Coerce variable type to match spec
xportr_length(metacore) %>% # Assigns SAS length from a variable level metadata
xportr_label(metacore) %>% # Assigns variable label from metacore specifications
xportr_format(metacore) %>% # Assigns variable format from metacore specifications
xportr_df_label(metacore) %>% # Assigns dataset label from metacore specifications
xportr_write(file.path(dir, "adppk.xpt")) # Write xpt v5 transport file
Save Final Output
Finally we save the final output. We will also create a CSV
file for the modeler.
# ---- Save output ----
saveRDS(adppk, file = file.path(dir, "adppk.rds"), compress = "bzip2")
# Write CSV
write_csv(adppk_xpt, "./output/adppk.csv")
Example Scripts
ADaM | Sample Code |
---|---|
ADPPK | ad_adppk_spec.R |