| Title: | Preprocessing Tools for Clinical Trial Data |
|---|---|
| Description: | A collection of functions to preprocess data and organize them in a format amenable to use by chevron. |
| Authors: | Liming Li [aut] (ORCID: <https://orcid.org/0009-0008-6870-0878>, Original creator of the package), Benoit Falquet [aut] (ORCID: <https://orcid.org/0000-0002-4434-3799>, Original creator of the package), Xiaoli Duan [ctb], Pawel Rucki [ctb], Joe Zhu [cre] (ORCID: <https://orcid.org/0000-0001-7566-2787>), F. Hoffmann-La Roche AG [cph, fnd] |
| Maintainer: | Joe Zhu <[email protected]> |
| License: | Apache License 2.0 |
| Version: | 0.1.12.9000 |
| Built: | 2026-06-02 09:50:29 UTC |
| Source: | https://github.com/insightsengineering/dunlin |
Dunlin PackageA collection of functions to preprocess data and organize them in a format amenable to use by chevron.
Maintainer: Joe Zhu [email protected] (ORCID)
Authors:
Liming Li [email protected] (ORCID) (Original creator of the package)
Benoit Falquet [email protected] (ORCID) (Original creator of the package)
Other contributors:
Xiaoli Duan [email protected] [contributor]
Pawel Rucki [email protected] [contributor]
F. Hoffmann-La Roche AG [copyright holder, funder]
Useful links:
Report bugs at https://github.com/insightsengineering/dunlin/issues
Add whisker values
add_whisker(x)add_whisker(x)
x |
Named ( |
The names of the character gives the string to be replaced and the value gives the new string.
invisible NULL. Assign the key-value pair provided as argument in the whisker environment.
my_whiskers <- c(Placeholder = "Replacement", Placeholder2 = "Replacement2") add_whisker(my_whiskers)my_whiskers <- c(Placeholder = "Replacement", Placeholder2 = "Replacement2") add_whisker(my_whiskers)
Convert Rule to List
## S3 method for class 'rule' as.list(x, ...)## S3 method for class 'rule' as.list(x, ...)
x |
( |
... |
not used. |
an object of class list.
x <- rule("a" = c("a", "b"), "X" = "x", .to_NA = c("v", "w")) as.list(x)x <- rule("a" = c("a", "b"), "X" = "x", .to_NA = c("v", "w")) as.list(x)
list of data.frame.Assert that all names are among names of a list of data.frame.
assert_all_tablenames(db, tab, null_ok = TRUE, qualifier = NULL)assert_all_tablenames(db, tab, null_ok = TRUE, qualifier = NULL)
db |
( |
tab |
( |
null_ok |
( |
qualifier |
( |
invisible TRUE or an error message if the criteria are not fulfilled.
lsd <- list( mtcars = mtcars, iris = iris ) assert_all_tablenames(lsd, c("mtcars", "iris"), qualifier = "first test:")lsd <- list( mtcars = mtcars, iris = iris ) assert_all_tablenames(lsd, c("mtcars", "iris"), qualifier = "first test:")
Assert Nested List can be used as Format Argument in Reformat.
assert_valid_format(object)assert_valid_format(object)
object |
( |
invisible TRUE or an error message if the criteria are not fulfilled.
format <- list( df1 = list( var1 = rule("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = rule(), var2 = rule("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_format(format)format <- list( df1 = list( var1 = rule("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = rule(), var2 = rule("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_format(format)
Assert List can be Converted into a Nested List Compatible with the Format Argument of Reformat.
assert_valid_list_format(object)assert_valid_list_format(object)
object |
( |
invisible TRUE or an error message if the criteria are not fulfilled.
format <- list( df1 = list( var1 = list("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = list(), var2 = list("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_list_format(format)format <- list( df1 = list( var1 = list("X" = "x", "N" = c(NA, "")) ), df2 = list( var1 = list(), var2 = list("f11" = "F11", "NN" = NA) ), df3 = list() ) assert_valid_list_format(format)
Setting the Label Attribute
attr_label(var, label)attr_label(var, label)
var |
( |
label |
( |
object with label attribute.
x <- c(1:10) attr(x, "label") y <- attr_label(x, "my_label") attr(y, "label")x <- c(1:10) attr(x, "label") y <- attr_label(x, "my_label") attr(y, "label")
Setting the Label Attribute to Data Frame Columns
attr_label_df(df, label)attr_label_df(df, label)
df |
( |
label |
( |
data.frame with label attributes.
res <- attr_label_df(mtcars, letters[1:11]) res lapply(res, attr, "label")res <- attr_label_df(mtcars, letters[1:11]) res lapply(res, attr, "label")
Reorder Two Columns Levels Simultaneously
co_relevels(df, primary, secondary, levels_primary)co_relevels(df, primary, secondary, levels_primary)
df |
( |
primary |
( |
secondary |
( |
levels_primary |
( |
The function expect a 1:1 matching between the elements of the two selected column.
a data.frame with the secondary column converted to factor with reordered levels.
df <- data.frame( SUBJID = 1:3, PARAMCD = factor(c("A", "B", "C")), PARAM = factor(paste("letter", LETTERS[1:3])) ) co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))df <- data.frame( SUBJID = 1:3, PARAMCD = factor(c("A", "B", "C")), PARAM = factor(paste("letter", LETTERS[1:3])) ) co_relevels(df, "PARAMCD", "PARAM", levels_primary = c("C", "A", "B"))
Combine Rules Found in Lists of Rules.
combine_list_rules(x, val, ...)combine_list_rules(x, val, ...)
x |
( |
val |
( |
... |
passed to |
a list of rule objects.
l1 <- list( r1 = rule( "first" = c("overwritten", "OVERWRITTEN"), "almost first" = c(NA, "almost") ), r2 = rule( ANYTHING = "anything" ) ) l2 <- list( r1 = rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ), r3 = rule( SOMETHING = "something" ) ) combine_list_rules(l1, l2)l1 <- list( r1 = rule( "first" = c("overwritten", "OVERWRITTEN"), "almost first" = c(NA, "almost") ), r2 = rule( ANYTHING = "anything" ) ) l2 <- list( r1 = rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ), r3 = rule( SOMETHING = "something" ) ) combine_list_rules(l1, l2)
Combine Two Rules
combine_rules(x, y, ...)combine_rules(x, y, ...)
x |
( |
y |
( |
... |
not used. |
a rule.
The order of the mappings in the resulting rule corresponds to the order of the mappings in x followed by the
mappings that are only present in y.
r1 <- rule( "first" = c("from ori rule", "FROM ORI RULE"), "last" = c(NA, "last"), .to_NA = "X", .drop = TRUE ) r2 <- rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ) combine_rules(r1, r2)r1 <- rule( "first" = c("from ori rule", "FROM ORI RULE"), "last" = c(NA, "last"), .to_NA = "X", .drop = TRUE ) r2 <- rule( "first" = c("F", "f"), "second" = c("S", "s"), "third" = c("T", "t"), .to_NA = "something" ) combine_rules(r1, r2)
Cutting data by group
cut_by_group(df, col_data, col_group, group, cat_col)cut_by_group(df, col_data, col_group, group, cat_col)
df |
( |
col_data |
( |
col_group |
( |
group |
( |
cat_col |
( |
Function used to categorize numeric data stored in long format depending on their group. Intervals are closed on the right (and open on the left).
data.frame with a column containing categorical values.
group <- list( list( "Height", c(-Inf, 150, 170, Inf), c("=<150", "150-170", ">170") ), list( "Weight", c(-Inf, 65, Inf), c("=<65", ">65") ), list( "Age", c(-Inf, 31, Inf), c("=<31", ">31") ), list( "PreCondition", c(-Inf, 1, Inf), c("=<1", "<1") ) ) data <- data.frame( SUBJECT = rep(letters[1:10], 4), PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10), AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)), index = 1:40 ) cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")group <- list( list( "Height", c(-Inf, 150, 170, Inf), c("=<150", "150-170", ">170") ), list( "Weight", c(-Inf, 65, Inf), c("=<65", ">65") ), list( "Age", c(-Inf, 31, Inf), c("=<31", ">31") ), list( "PreCondition", c(-Inf, 1, Inf), c("=<1", "<1") ) ) data <- data.frame( SUBJECT = rep(letters[1:10], 4), PARAM = rep(c("Height", "Weight", "Age", "other"), each = 10), AVAL = c(rnorm(10, 165, 15), rnorm(10, 65, 5), runif(10, 18, 65), rnorm(10, 0, 1)), index = 1:40 ) cut_by_group(data, "AVAL", "PARAM", group, "my_new_categories")
Getting Argument From System, Option or Default
get_arg(opt = NULL, sys = NULL, default = NULL, split = ";")get_arg(opt = NULL, sys = NULL, default = NULL, split = ";")
opt |
( |
sys |
( |
default |
value to return if neither the environment variable nor the option are set. |
split |
( |
if defined, the value of the option (opt), a character from the environment variable (sys) or the
default in this order of priority.
get_arg("my.option", "MY_ARG", "default") withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default")) withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default"))get_arg("my.option", "MY_ARG", "default") withr::with_envvar(c(MY_ARG = "x;y"), get_arg("my.option", "MY_ARG", "default")) withr::with_options(c(my.option = "y"), get_arg("my.option", "MY_ARG", "default"))
Get Log
get_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' get_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' get_log(data, incl = TRUE, incl.adsl = TRUE)get_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' get_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' get_log(data, incl = TRUE, incl.adsl = TRUE)
data |
( |
incl |
( |
incl.adsl |
( |
character or list of character describing the filtering applied to data.
data <- log_filter(iris, Sepal.Length >= 7, "xx") data <- log_filter(data, Sepal.Length < 2) data <- log_filter(data, Sepal.Length >= 2, "yy") get_log(data) data <- log_filter( list(iris1 = iris, iris2 = iris), Sepal.Length >= 7, "iris1", character(0), "Sep" ) get_log(data)data <- log_filter(iris, Sepal.Length >= 7, "xx") data <- log_filter(data, Sepal.Length < 2) data <- log_filter(data, Sepal.Length >= 2, "yy") get_log(data) data <- log_filter( list(iris1 = iris, iris2 = iris), Sepal.Length >= 7, "iris1", character(0), "Sep" ) get_log(data)
adsub to adsl
Join adsub to adsl
join_adsub_adsl( adam_db, keys, continuous_var, categorical_var, continuous_suffix, categorical_suffix, drop_na = TRUE, drop_lvl = TRUE ) ## S3 method for class 'list' join_adsub_adsl( adam_db, keys = c("USUBJID", "STUDYID"), continuous_var = "all", categorical_var = "all", continuous_suffix = "", categorical_suffix = "_CAT", drop_na = TRUE, drop_lvl = FALSE )join_adsub_adsl( adam_db, keys, continuous_var, categorical_var, continuous_suffix, categorical_suffix, drop_na = TRUE, drop_lvl = TRUE ) ## S3 method for class 'list' join_adsub_adsl( adam_db, keys = c("USUBJID", "STUDYID"), continuous_var = "all", categorical_var = "all", continuous_suffix = "", categorical_suffix = "_CAT", drop_na = TRUE, drop_lvl = FALSE )
adam_db |
( |
keys |
( |
continuous_var |
( |
categorical_var |
( |
continuous_suffix |
( |
categorical_suffix |
( |
drop_na |
( |
drop_lvl |
( |
a list of data.frame with new columns in the adsl table.
adsl <- data.frame( USUBJID = c("S1", "S2", "S3", "S4"), STUDYID = "My_study", AGE = c(60, 44, 23, 31) ) adsub <- data.frame( USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"), STUDYID = "My_study", PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"), PARAMCD = c("w", "w", "w", "w", "h", "h", "h"), AVAL = c(98, 75, 70, 71, 182, 155, 152), AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180") ) db <- list(adsl = adsl, adsub = adsub) x <- join_adsub_adsl(adam_db = db) x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")adsl <- data.frame( USUBJID = c("S1", "S2", "S3", "S4"), STUDYID = "My_study", AGE = c(60, 44, 23, 31) ) adsub <- data.frame( USUBJID = c("S1", "S2", "S3", "S4", "S1", "S2", "S3"), STUDYID = "My_study", PARAM = c("weight", "weight", "weight", "weight", "height", "height", "height"), PARAMCD = c("w", "w", "w", "w", "h", "h", "h"), AVAL = c(98, 75, 70, 71, 182, 155, 152), AVALC = c(">80", "<=80", "<=80", "<=80", ">180", "<=180", "<=180") ) db <- list(adsl = adsl, adsub = adsub) x <- join_adsub_adsl(adam_db = db) x <- join_adsub_adsl(adam_db = db, continuous_var = c("w", "h"), categorical_var = "h")
rule
Convert nested list into list of rule
list2rules(obj)list2rules(obj)
obj |
( |
a list of rule objects.
obj <- list( rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"), rule2 = list(Missing = c(NA, "")), rule3 = list(Missing = c(NA, ""), .drop = TRUE), rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy") ) list2rules(obj)obj <- list( rule1 = list("X" = c("a", "b"), "Z" = "c", .to_NA = "xxxx"), rule2 = list(Missing = c(NA, "")), rule3 = list(Missing = c(NA, ""), .drop = TRUE), rule4 = list(Absent = c(NA, ""), .drop = TRUE, .to_NA = "yyyy") ) list2rules(obj)
Filter Data with Log
log_filter(data, condition, ...) ## S3 method for class 'data.frame' log_filter(data, condition, suffix = NULL, ...) ## S3 method for class 'list' log_filter( data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, mode = ifelse(table == "adsl", "all", "unique"), ... )log_filter(data, condition, ...) ## S3 method for class 'data.frame' log_filter(data, condition, suffix = NULL, ...) ## S3 method for class 'list' log_filter( data, condition, table, by = c("USUBJID", "STUDYID"), suffix = NULL, verbose = FALSE, mode = ifelse(table == "adsl", "all", "unique"), ... )
data |
( |
condition |
( |
... |
further arguments to be passed to or from other methods. |
suffix |
( |
table |
( |
by |
( |
verbose |
( |
mode |
( |
log_filter will filter the data.frame /named list of data.frame according to the condition.
All the variables in condition must exist in the data (as variables) or in the parent
frame(e.g., in global environment).
For a named list of data.frame, set mode = "all"`` to filter other tables by keys retained in table (using by), or mode = "unique"' to leave other tables unchanged.
a data.frame or list of data.frame filtered for the provided conditions.
data <- iris attr(data$Sepal.Length, "label") <- "cm" log_filter(data, Sepal.Length >= 7) log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))data <- iris attr(data$Sepal.Length, "label") <- "cm" log_filter(data, Sepal.Length >= 7) log_filter(list(iris = iris), Sepal.Length >= 7, "iris", character(0))
list of data.frame
Encode Categorical Missing Values in a list of data.frame
ls_explicit_na( data, omit_tables = NULL, omit_columns = NULL, char_as_factor = TRUE, na_level = "<Missing>" )ls_explicit_na( data, omit_tables = NULL, omit_columns = NULL, char_as_factor = TRUE, na_level = "<Missing>" )
data |
( |
omit_tables |
( |
omit_columns |
( |
char_as_factor |
( |
na_level |
( |
This is a helper function to encode missing values (i.e NA and empty string) of every character and
factor variable found in a list of data.frame. The label attribute of the columns is preserved.
list of data.frame object with explicit missing levels.
df1 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "char2" = c("A", "B", NA, "A", "K", "X"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA) ) df2 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "num" = c(1:5, NA) ) df3 <- data.frame( "char" = c(NA, NA, "A") ) db <- list(df1 = df1, df2 = df2, df3 = df3) ls_explicit_na(db) ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")df1 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "char2" = c("A", "B", NA, "A", "K", "X"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "logi" = c(NA, FALSE, TRUE, NA, FALSE, NA) ) df2 <- data.frame( "char" = c("a", "b", NA, "a", "k", "x"), "fact" = factor(c("f1", "f2", NA, NA, "f1", "f1")), "num" = c(1:5, NA) ) df3 <- data.frame( "char" = c(NA, NA, "A") ) db <- list(df1 = df1, df2 = df2, df3 = df3) ls_explicit_na(db) ls_explicit_na(db, omit_tables = "df3", omit_columns = "char2")
list of data.frame.Unite Columns of a Table in a list of data.frame.
ls_unite(adam_db, tab, cols, sep = ".", new = NULL)ls_unite(adam_db, tab, cols, sep = ".", new = NULL)
adam_db |
( |
tab |
( |
cols |
( |
sep |
( |
new |
( |
list of data.frames object with a united column.
db <- list(mtcars = mtcars, iris = iris) x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION") x$mtcarsdb <- list(mtcars = mtcars, iris = iris) x <- ls_unite(db, "mtcars", c("mpg", "hp"), new = "FUSION") x$mtcars
Transforming data.frame with Multiple Identifying columns into Wide Format
multi_id_pivot_wider( data, id, param_from, value_from, drop_na = FALSE, drop_lvl = FALSE )multi_id_pivot_wider( data, id, param_from, value_from, drop_na = FALSE, drop_lvl = FALSE )
data |
( |
id |
( |
param_from |
( |
value_from |
( |
drop_na |
( |
drop_lvl |
( |
This function allows to identify observations on the basis of several columns. Warning: Instead of nesting duplicated values, the function will throw an error if the same parameter is provided twice for the same observation.
data.frame in a wide format.
test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) multi_id_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val") multi_id_pivot_wider(test_data, "the_obs2", "the_param", "the_val")test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_val = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) multi_id_pivot_wider(test_data, c("the_obs", "the_obs2"), "the_param", "the_val") multi_id_pivot_wider(test_data, "the_obs2", "the_param", "the_val")
Transforming data.frame with multiple Data Column into Wide Format
poly_pivot_wider( data, id, param_from, value_from, labels_from = NULL, drop_na = TRUE, drop_lvl = FALSE )poly_pivot_wider( data, id, param_from, value_from, labels_from = NULL, drop_na = TRUE, drop_lvl = FALSE )
data |
( |
id |
( |
param_from |
( |
value_from |
( |
labels_from |
( |
drop_na |
( |
drop_lvl |
( |
This function is adapted to cases where the data are distributed in several columns while the name of the
parameter is in one. Typical example is adsub where numeric data are stored in AVAL while categorical data are
in AVALC.
list of data.frame in a wide format with label attribute attached to each columns.
test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_label = c( "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)", "Gender", "Height (cm)", "Height (cm)", "Pre-condition" ), the_val = c(65, 165, NA, 66, NA, 166, 155, NA), the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) x <- poly_pivot_wider( test_data, c("the_obs", "the_obs2"), "the_param", c("the_val", "the_val2"), "the_label" ) x Reduce(function(u, v) merge(u, v, all = TRUE), x)test_data <- data.frame( the_obs = c("A", "A", "A", "B", "B", "B", "C", "D"), the_obs2 = c("Ax", "Ax", "Ax", "Bx", "Bx", "Bx", "Cx", "Dx"), the_param = c("weight", "height", "gender", "weight", "gender", "height", "height", "other"), the_label = c( "Weight (Kg)", "Height (cm)", "Gender", "Weight (Kg)", "Gender", "Height (cm)", "Height (cm)", "Pre-condition" ), the_val = c(65, 165, NA, 66, NA, 166, 155, NA), the_val2 = c(65, 165, "M", 66, "F", 166, 155, TRUE) ) x <- poly_pivot_wider( test_data, c("the_obs", "the_obs2"), "the_param", c("the_val", "the_val2"), "the_label" ) x Reduce(function(u, v) merge(u, v, all = TRUE), x)
Print Log
print_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' print_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' print_log(data, incl = TRUE, incl.adsl = TRUE)print_log(data, incl, incl.adsl) ## S3 method for class 'data.frame' print_log(data, incl = TRUE, incl.adsl = TRUE) ## S3 method for class 'list' print_log(data, incl = TRUE, incl.adsl = TRUE)
data |
( |
incl |
( |
incl.adsl |
( |
NULL. Print a description of the filtering applied to data.
data <- log_filter(iris, Sepal.Length >= 7, "Sep") print_log(data) data <- log_filter( list( adsl = iris, iris2 = iris, mtcars = mtcars, iris3 = iris ), Sepal.Length >= 7, "adsl", character(0), "adsl filter" ) data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter") print_log(data) print_log(data, incl = FALSE) print_log(data, incl.adsl = FALSE, incl = FALSE)data <- log_filter(iris, Sepal.Length >= 7, "Sep") print_log(data) data <- log_filter( list( adsl = iris, iris2 = iris, mtcars = mtcars, iris3 = iris ), Sepal.Length >= 7, "adsl", character(0), "adsl filter" ) data <- log_filter(data, Sepal.Length >= 7, "iris2", character(0), "iris2 filter") print_log(data) print_log(data, incl = FALSE) print_log(data, incl.adsl = FALSE, incl = FALSE)
propagatecopy columns from a given table of a list of data.frame to all tables based on other
common columns. If several rows are associated with the same key, the rows will be duplicated in the receiving
tables. In safe mode, the key must be unique in the original table.
propagate(db, from, add, by, safe = TRUE) ## S3 method for class 'list' propagate(db, from, add, by, safe = TRUE)propagate(db, from, add, by, safe = TRUE) ## S3 method for class 'list' propagate(db, from, add, by, safe = TRUE)
db |
( |
from |
( |
add |
( |
by |
( |
safe |
( |
updated list of data.frame.
df1 <- data.frame( id1 = c("a", "a", "c", "d", "e", "f"), id2 = c("A", "B", "A", "A", "A", "A"), int = c(1, 2, 3, 4, 5, 6), bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE) ) df2 <- data.frame( id1 = c("a", "a", "d", "e", "f", "g"), id2 = c("A", "B", "A", "A", "A", "A") ) df3 <- data.frame( id1 = c("a", "c", "d", "e", "f", "x"), id2 = c("A", "A", "A", "A", "B", "A"), int = c(11, 22, 33, 44, 55, 66) ) db <- list(df1 = df1, fd2 = df2, df3 = df3) propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))df1 <- data.frame( id1 = c("a", "a", "c", "d", "e", "f"), id2 = c("A", "B", "A", "A", "A", "A"), int = c(1, 2, 3, 4, 5, 6), bool = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE) ) df2 <- data.frame( id1 = c("a", "a", "d", "e", "f", "g"), id2 = c("A", "B", "A", "A", "A", "A") ) df3 <- data.frame( id1 = c("a", "c", "d", "e", "f", "x"), id2 = c("A", "A", "A", "A", "B", "A"), int = c(11, 22, 33, 44, 55, 66) ) db <- list(df1 = df1, fd2 = df2, df3 = df3) propagate(db, from = "df1", add = c("int", "bool"), by = c("id1", "id2"))
Replaces values in vectors or list of data.frame using used-defined rule or list of rule.
See vignette("Reformatting", package = "dunlin") for a detailed guide on using this function.
reformat(obj, ...) ## Default S3 method: reformat(obj, format, ...) ## S3 method for class 'character' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'factor' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'list' reformat( obj, format, ..., verbose = get_arg("dunlin.reformat.verbose", "R_DUNLIN_REFORMAT_VERBOSE", FALSE) )reformat(obj, ...) ## Default S3 method: reformat(obj, format, ...) ## S3 method for class 'character' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'factor' reformat(obj, format, ..., verbose = FALSE) ## S3 method for class 'list' reformat( obj, format, ..., verbose = get_arg("dunlin.reformat.verbose", "R_DUNLIN_REFORMAT_VERBOSE", FALSE) )
obj |
( |
... |
for compatibility between methods and pass additional special mapping to transform rules.
|
format |
( |
verbose |
( |
(character, factor or list of data.frame) with remapped values.
When the rule is empty rule or when values subject to reformatting are absent from the object, no error is
raised. The conversion to factor if .string_as_fct = TRUE) is still carried out. The conversion of the levels
declared in .to_NA to NA values occurs after the remapping. NA values created this way are not affected by a
rule declaring a remapping of NA values. For factors, level dropping is the last step, hence, levels converted to
NA by the .to_NA argument, will be removed if .drop is TRUE. Arguments passed via reformat override the
ones defined during rule creation.
the variables listed under the all_dataset keyword will be reformatted with the corresponding rule in every
data set except where another rule is specified for the same variable under a specific data set name.
# Reformatting of character. obj <- c("a", "b", "x", NA, "") attr(obj, "label") <- "my label" format <- rule("A" = "a", "NN" = NA) reformat(obj, format) reformat(obj, format, .string_as_fct = FALSE, .to_NA = NULL) # Reformatting of factor. obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z")) attr(obj, "label") <- "my label" format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P") reformat(obj, format) reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE) # Reformatting of list of data.frame. df1 <- data.frame( var1 = c("a", "b", NA), var2 = factor(c("F1", "F2", NA)) ) df2 <- data.frame( var1 = c("x", NA, "y"), var2 = factor(c("F11", NA, "F22")) ) db <- list(df1 = df1, df2 = df2) format <- list( df1 = list( var1 = rule("X" = "x", "N" = NA, .to_NA = "b") ), df2 = list( var2 = rule("f11" = "F11", "NN" = NA) ), df_absent = list( var1 = rule("NO" = "no") ), all_datasets = list( var1 = rule("xx" = "x", "aa" = "a") ) ) reformat(db, format)# Reformatting of character. obj <- c("a", "b", "x", NA, "") attr(obj, "label") <- "my label" format <- rule("A" = "a", "NN" = NA) reformat(obj, format) reformat(obj, format, .string_as_fct = FALSE, .to_NA = NULL) # Reformatting of factor. obj <- factor(c("first", "a", "aa", "b", "x", NA), levels = c("first", "x", "b", "aa", "a", "z")) attr(obj, "label") <- "my label" format <- rule("A" = c("a", "aa"), "NN" = c(NA, "x"), "Not_present" = "z", "Not_a_level" = "P") reformat(obj, format) reformat(obj, format, .na_last = FALSE, .to_NA = "b", .drop = FALSE) # Reformatting of list of data.frame. df1 <- data.frame( var1 = c("a", "b", NA), var2 = factor(c("F1", "F2", NA)) ) df2 <- data.frame( var1 = c("x", NA, "y"), var2 = factor(c("F11", NA, "F22")) ) db <- list(df1 = df1, df2 = df2) format <- list( df1 = list( var1 = rule("X" = "x", "N" = NA, .to_NA = "b") ), df2 = list( var2 = rule("f11" = "F11", "NN" = NA) ), df_absent = list( var1 = rule("NO" = "no") ), all_datasets = list( var1 = rule("xx" = "x", "aa" = "a") ) ) reformat(db, format)
Remove whisker values
remove_whisker(x)remove_whisker(x)
x |
Named ( |
invisible NULL. Removes x from the whisker environment.
Render whiskers safely
render_safe(x)render_safe(x)
x |
( |
character with substituted placeholders.
The strings enclosed in {} are substituted using the key-values pairs set with add_whiskers.
render_safe("Name of {Patient_label}")render_safe("Name of {Patient_label}")
Create rule based on mappings
rule( ..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = "" )rule( ..., .lst = list(...), .string_as_fct = TRUE, .na_last = TRUE, .drop = FALSE, .to_NA = "" )
... |
Mapping pairs, the argument name is the transformed while its values are original values. |
.lst |
( |
.string_as_fct |
( |
.na_last |
( |
.drop |
( |
.to_NA |
( |
a rule object.
Conversion to NA is the last step of the remapping process.
rule("X" = "x", "Y" = c("y", "z")) rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)rule("X" = "x", "Y" = c("y", "z")) rule("X" = "x", "Y" = c("y", "z"), .drop = TRUE, .to_NA = c("a", "b"), .na_last = FALSE)
Show Whisker Values
show_whisker()show_whisker()
invisible NULL. Prints the values stored in the whisker environment.
show_whisker()show_whisker()
Utility for creating subject-level flags from data frames that are more than one line per subject. For example, use this function to create a flag indicating whether a subject experienced any serious adverse events.
The function works by first creating a logical variable in data_long
indicating whether the condition passed in the ... argument is met.
If a subject has true on any row, then the new variable is added
to data as TRUE, otherwise that subject's value is populated with a FALSE.
subject_level_flag(data, data_long, ..., .key = "USUBJID")subject_level_flag(data, data_long, ..., .key = "USUBJID")
data |
( |
data_long |
( |
... |
( |
.key |
( |
Subject-level data frame
adsl <- tibble::tribble( ~USUBJID, ~SEX, "01-701-1015", "F", "01-701-1023", "M", "01-701-1028", "M" ) adae <- tibble::tribble( ~USUBJID, ~AESER, ~AEACN, "01-701-1015", "Y", "DOSE NOT CHANGED", "01-701-1015", "N", "DOSE NOT CHANGED", "01-701-1028", "N", "DRUG WITHDRAWN" ) subject_level_flag( data = adsl, data_long = adae, ANY_AESER = AESER == "Y", ANY_DRUG_WITHDRAWN = AEACN == "DRUG WITHDRAWN" )adsl <- tibble::tribble( ~USUBJID, ~SEX, "01-701-1015", "F", "01-701-1023", "M", "01-701-1028", "M" ) adae <- tibble::tribble( ~USUBJID, ~AESER, ~AEACN, "01-701-1015", "Y", "DOSE NOT CHANGED", "01-701-1015", "N", "DOSE NOT CHANGED", "01-701-1028", "N", "DRUG WITHDRAWN" ) subject_level_flag( data = adsl, data_long = adae, ANY_AESER = AESER == "Y", ANY_DRUG_WITHDRAWN = AEACN == "DRUG WITHDRAWN" )