How to generate QC-ready result data frames from tables

Disclaimer

This vignette is a work in progress.

Create the example table

First of all we need a table to retrieve all the necessary information. Borrowing one from the vignette about clinical trials.

library(rtables)
ADSL <- ex_adsl # Example ADSL dataset
mean_sd_custom <- function(x) {
  mean <- mean(x, na.rm = FALSE)
  sd <- sd(x, na.rm = FALSE)

  rcell(c(mean, sd), label = "Mean (SD)", format = "xx.x (xx.x)")
}
counts_percentage_custom <- function(x) {
  # browser()
  cnts <- table(x)
  out <- lapply(cnts, function(x) {
    perc <- x / sum(cnts)
    rcell(c(x, perc), format = "xx. (xx.%)")
  })
  in_rows(.list = as.list(out), .labels = names(cnts))
}

lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>%
  # split_rows_by("STRATA1", split_fun = keep_split_levels(c("A"))) %>%
  # split_cols_by("STRATA2") %>%
  split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>%
  analyze(vars = "AGE", afun = mean_sd_custom) %>%
  analyze(vars = "SEX", afun = counts_percentage_custom)

tbl <- build_table(lyt, ADSL)
tbl
#                      A: Drug X    B: Placebo
#                        N=134        N=134   
# ————————————————————————————————————————————
# AGE                                         
#   Mean (SD)          33.8 (6.6)   35.4 (7.9)
# SEX                                         
#   F                   79 (59%)     77 (57%) 
#   M                   51 (38%)     55 (41%) 
#   U                    3 (2%)       2 (1%)  
#   UNDIFFERENTIATED     1 (1%)       0 (0%)

Convert the table to a result data frame

The as_result_df function is the one that converts a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may differ for different standard and lets see how to produce different outputs. Final goal is having clearly one result for row. Lets play with different options.

as_result_df(tbl)
#   avar_name         row_name       label_name row_num is_group_summary
# 1       AGE        Mean (SD)        Mean (SD)       2            FALSE
# 2       SEX                F                F       4            FALSE
# 3       SEX                M                M       5            FALSE
# 4       SEX                U                U       6            FALSE
# 5       SEX UNDIFFERENTIATED UNDIFFERENTIATED       7            FALSE
#   node_class                A: Drug X             B: Placebo
# 1    DataRow      33.768657, 6.553326    35.432836, 7.895414
# 2    DataRow    79.0000000, 0.5895522  77.0000000, 0.5746269
# 3    DataRow      51.000000, 0.380597  55.0000000, 0.4104478
# 4    DataRow   3.00000000, 0.02238806 2.00000000, 0.01492537
# 5    DataRow 1.000000000, 0.007462687                   0, 0

as_result_df(tbl, data_format = "strings")
#   avar_name         row_name       label_name row_num is_group_summary
# 1       AGE        Mean (SD)        Mean (SD)       2            FALSE
# 2       SEX                F                F       4            FALSE
# 3       SEX                M                M       5            FALSE
# 4       SEX                U                U       6            FALSE
# 5       SEX UNDIFFERENTIATED UNDIFFERENTIATED       7            FALSE
#   node_class  A: Drug X B: Placebo
# 1    DataRow 33.8 (6.6) 35.4 (7.9)
# 2    DataRow   79 (59%)   77 (57%)
# 3    DataRow   51 (38%)   55 (41%)
# 4    DataRow     3 (2%)     2 (1%)
# 5    DataRow     1 (1%)     0 (0%)
as_result_df(tbl, simplify = TRUE)
#         label_name                A: Drug X             B: Placebo
# 1        Mean (SD)      33.768657, 6.553326    35.432836, 7.895414
# 2                F    79.0000000, 0.5895522  77.0000000, 0.5746269
# 3                M      51.000000, 0.380597  55.0000000, 0.4104478
# 4                U   3.00000000, 0.02238806 2.00000000, 0.01492537
# 5 UNDIFFERENTIATED 1.000000000, 0.007462687                   0, 0
as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE)
#         label_name                A: Drug X             B: Placebo
# 1              AGE                       NA                     NA
# 2        Mean (SD)      33.768657, 6.553326    35.432836, 7.895414
# 3              SEX                       NA                     NA
# 4                F    79.0000000, 0.5895522  77.0000000, 0.5746269
# 5                M      51.000000, 0.380597  55.0000000, 0.4104478
# 6                U   3.00000000, 0.02238806 2.00000000, 0.01492537
# 7 UNDIFFERENTIATED 1.000000000, 0.007462687                   0, 0
as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE, expand_colnames = TRUE)
#                 label_name                A: Drug X             B: Placebo
# 1  <only_for_column_names>                A: Drug X             B: Placebo
# 2 <only_for_column_counts>                      134                    134
# 3                      AGE                       NA                     NA
# 4                Mean (SD)      33.768657, 6.553326    35.432836, 7.895414
# 5                      SEX                       NA                     NA
# 6                        F    79.0000000, 0.5895522  77.0000000, 0.5746269
# 7                        M      51.000000, 0.380597  55.0000000, 0.4104478
# 8                        U   3.00000000, 0.02238806 2.00000000, 0.01492537
# 9         UNDIFFERENTIATED 1.000000000, 0.007462687                   0, 0

Now lets get the final ARD output. This is the one that is ready to be used for quality control purposes.

as_result_df(tbl, make_ard = TRUE)
#    group1 group1_level variable   variable_level   variable_label         stat
# 1     ARM    A: Drug X      AGE        Mean (SD)        Mean (SD) 33.76865....
# 2     ARM    A: Drug X      SEX                F                F 79, 0.58....
# 3     ARM    A: Drug X      SEX                M                M 51, 0.38....
# 4     ARM    A: Drug X      SEX                U                U 3, 0.022....
# 5     ARM    A: Drug X      SEX UNDIFFERENTIATED UNDIFFERENTIATED 1, 0.007....
# 6     ARM   B: Placebo      AGE        Mean (SD)        Mean (SD) 35.43283....
# 7     ARM   B: Placebo      SEX                F                F 77, 0.57....
# 8     ARM   B: Placebo      SEX                M                M 55, 0.41....
# 9     ARM   B: Placebo      SEX                U                U 2, 0.014....
# 10    ARM   B: Placebo      SEX UNDIFFERENTIATED UNDIFFERENTIATED         0, 0