R/normalize.R
normalize_cols.Rd
This is an example of building up a desired pre-prepared pipeline fragment from relop nodes.
normalize_cols(source, columns, ..., partitionby = NULL, env = parent.frame())
relop tree or data.frame source.
character, columns to normalize.
force later arguments to bind by name.
partitioning (window function) column names to define partitions.
environment to look for values in.
# by hand logistic regression example
scale <- 0.237
d <- mk_td("survey_table",
c("subjectID", "surveyCategory", "assessmentTotal"))
optree <- d %.>%
extend(.,
probability %:=%
exp(assessmentTotal * scale)) %.>%
normalize_cols(.,
"probability",
partitionby = 'subjectID') %.>%
pick_top_k(.,
partitionby = 'subjectID',
orderby = c('probability', 'surveyCategory'),
reverse = c('probability')) %.>%
rename_columns(., 'diagnosis' %:=% 'surveyCategory') %.>%
select_columns(., c('subjectID',
'diagnosis',
'probability')) %.>%
orderby(., 'subjectID')
cat(format(optree))
#> mk_td("survey_table", c(
#> "subjectID",
#> "surveyCategory",
#> "assessmentTotal")) %.>%
#> extend(.,
#> probability := exp(assessmentTotal * 0.237)) %.>%
#> extend(.,
#> probability := probability / sum(probability),
#> partitionby = c('subjectID'),
#> orderby = c(),
#> reverse = c()) %.>%
#> extend(.,
#> row_number := row_number(),
#> partitionby = c('subjectID'),
#> orderby = c('probability', 'surveyCategory'),
#> reverse = c('probability')) %.>%
#> select_rows(.,
#> row_number <= 1) %.>%
#> rename_columns(.,
#> c('diagnosis' = 'surveyCategory')) %.>%
#> select_columns(.,
#> c('subjectID', 'diagnosis', 'probability')) %.>%
#> order_rows(.,
#> c('subjectID'),
#> reverse = c(),
#> limit = NULL)