Complete an experimental design.
complete_design(design_table, data_table)
optree or for experimental design.
optree for data.
joined and annotated table optree.
if (requireNamespace("DBI", quietly = TRUE) &&
requireNamespace("RSQLite", quietly = TRUE)) {
my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
# example experimental design
values <- list(nums = 1:3, lets = c("a", "b"))
design <- expand_grid(my_db, values)
# not quite matching data
data <- build_frame(
"nums", "lets" |
1L , "a" |
1L , "b" |
77L , "a" | # out of place ID
2L , "b" |
3L , "a" |
3L , "a" | # duplicated
3L , "b" )
data$row_number <- seq_len(nrow(data))
data <- rq_copy_to(my_db, "data", data)
# compare/augment
res <- complete_design(design, data)
cat(format(res))
res <- materialize(my_db, res)
print("completed data design")
print(execute(my_db, res))
# look for dups (can use extende_se(partation) on
# databases with window fns.
print("duplicate key rows:")
res %.>%
project_se(.,
groupby = column_names(design),
"count" %:=% "SUM(1)") %.>%
select_rows_se(., "count>1") %.>%
execute(my_db, .) %.>%
print(.)
# look for data that was not in design
print("data rows not in design:")
data %.>%
natural_join(., res,
jointype = "LEFT",
by = column_names(design)) %.>%
select_rows_se(., "is.na(row_in_design_table)") %.>%
execute(my_db, .) %.>%
print(.)
DBI::dbDisconnect(my_db)
}
#> mk_td("eg_81725467761235468742_0000000000", c(
#> "nums",
#> "lets")) %.>%
#> extend(.,
#> row_in_design_table := 1) %.>%
#> natural_join(.,
#> mk_td("data", c(
#> "nums",
#> "lets",
#> "row_number")) %.>%
#> extend(.,
#> row_in_data_table := 1),
#> jointype = "LEFT", by = c('nums', 'lets')) %.>%
#> null_replace(.; row_in_design_table,
#> row_in_data_table: 0)
#> [1] "completed data design"
#> nums lets row_in_design_table row_number row_in_data_table
#> 1 1 a 1 1 1
#> 2 1 b 1 2 1
#> 3 2 a 1 NA 0
#> 4 2 b 1 4 1
#> 5 3 a 1 5 1
#> 6 3 a 1 6 1
#> 7 3 b 1 7 1
#> [1] "duplicate key rows:"
#> nums lets count
#> 1 3 a 2
#> [1] "data rows not in design:"
#> nums lets row_number row_in_design_table row_in_data_table
#> 1 77 a 3 NA NA