Please see https://win-vector.com/2014/05/30/trimming-the-fat-from-glm-models-in-r/ for discussion.
clean_fit_glm(
outcome,
variables,
data,
...,
family,
intercept = TRUE,
outcome_target = NULL,
outcome_comparator = "==",
weights = NULL,
env = baseenv()
)
character, name of outcome column.
character, names of varaible columns.
data.frame, training data.
not used, force later arguments to be used by name
passed to stats::glm()
logical, if TRUE allow an intercept term.
scalar, if not NULL write outcome==outcome_target in formula.
one of "==", "!=", ">=", "<=", ">", "<", only use of outcome_target is not NULL.
passed to stats::glm()
environment to work in.
list(model=model, summary=summary)
mk_data_example <- function(k) {
data.frame(
x1 = rep(c("a", "a", "b", "b"), k),
x2 = rep(c(0, 0, 0, 1), k),
y = rep(1:4, k),
yC = rep(c(FALSE, TRUE, TRUE, TRUE), k),
stringsAsFactors = FALSE)
}
res_glm <- clean_fit_glm("yC", c("x1", "x2"),
mk_data_example(1),
family = binomial)
length(serialize(res_glm$model, NULL))
#> [1] 33811
res_glm <- clean_fit_glm("yC", c("x1", "x2"),
mk_data_example(10000),
family = binomial)
length(serialize(res_glm$model, NULL))
#> [1] 33811
predict(res_glm$model,
newdata = mk_data_example(1),
type = "response")
#> 1 2 3 4
#> 0.5 0.5 1.0 1.0