R/check_dm_usubjid_dup.R
check_dm_usubjid_dup.Rd
This check looks for duplicate patient demographics records in DM
check_dm_usubjid_dup(DM)
Boolean value for whether the check passed or failed, with 'msg' attribute if the check failed
## duplicates and same patient number across sites for 3-part USUBJID
DM <- data.frame(USUBJID = c("GO12345-00000-1000",
"GO12345-11111-1000",
"GO12345-00000-1000",
"GO12345-00000-1001"),
stringsAsFactors = FALSE)
check_dm_usubjid_dup(DM)
#> [1] FALSE
#> attr(,"msg")
#> [1] "Duplicate USUBJID and/or same Patient number across different USUBJIDs"
#> attr(,"data")
#> # A tibble: 3 × 2
#> USUBJID FLAG
#> <chr> <chr>
#> 1 GO12345-00000-1000 Same Patient Number Across Different USUBJID
#> 2 GO12345-11111-1000 Same Patient Number Across Different USUBJID
#> 3 GO12345-00000-1000 Duplicate USUBJID
## no duplicate IDs in the dataframe for 3-part USUBJID
DM2 <- data.frame(USUBJID = c("GO12345-00000-1000",
"GO12345-11111-1001",
"GO12345-11111-1002"),
stringAsFactors = FALSE)
check_dm_usubjid_dup(DM2)
#> [1] TRUE
## duplicates for 2-part USUBJID
DM3 <- data.frame(USUBJID = c("GO12345-1000",
"GO12345-1000"),
stringAsFactors = FALSE)
check_dm_usubjid_dup(DM3)
#> [1] FALSE
#> attr(,"msg")
#> [1] "Duplicate USUBJID and/or same Patient number across different USUBJIDs"
#> attr(,"data")
#> # A tibble: 1 × 2
#> USUBJID FLAG
#> <chr> <chr>
#> 1 GO12345-1000 Duplicate USUBJID
## no duplicate IDs in the dataframe for 2-part USUBJID
DM4 <- data.frame(USUBJID = c("GO12345-1000",
"GO12345-1001",
"GO12345-1002"),
stringAsFactors = FALSE)
check_dm_usubjid_dup(DM4)
#> [1] TRUE
## dataframe with one or two additional variables, if there is variation across other variables
DM5 <- data.frame(USUBJID = c("GO12345-1000",
"GO12345-1000"),
SEX = c("M", "F"),
AGE = c(18, 60),
stringAsFactors = FALSE)
check_dm_usubjid_dup(DM5)
#> [1] FALSE
#> attr(,"msg")
#> [1] "Duplicate USUBJID and/or same Patient number across different USUBJIDs"
#> attr(,"data")
#> # A tibble: 1 × 2
#> USUBJID FLAG
#> <chr> <chr>
#> 1 GO12345-1000 Duplicate USUBJID
## dataframe in which USUBJID is not present
DM6 <- data.frame(
STUDYID = c("GO12345"),
SEX = c("M"),
AGE = c(72),
stringAsFactors = FALSE)
check_dm_usubjid_dup(DM6)
#> [1] FALSE
#> attr(,"msg")
#> [1] "DM is missing the variable: USUBJID"