## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message = FALSE--------------------------------------------------- library(postcard) withr::local_seed(1395878) withr::local_options(list(postcard.verbose = 0)) ## ----------------------------------------------------------------------------- n_train <- 2000 n_test <- 200 b1 <- 1.2 b2 <- 2.3 b3 <- 1 b4 <- 1.8 train_pois <- dplyr::mutate( glm_data( Y ~ b1*abs(sin(X1))-b2*X2+b3*X3-b4*X2*X3, X1 = runif(n_train, min = -2, max = 2), X2 = rnorm(n_train, mean = 2, sd = 2), X3 = rgamma(n_train, shape = 1), family = gaussian() ), Y = round(abs(Y)) ) test_pois <- dplyr::mutate( glm_data( Y ~ b1*abs(sin(X1))-b2*X2+b3*X3-b4*X2*X3, X1 = runif(n_test, min = -2, max = 2), X2 = rnorm(n_test, mean = 2, sd = 2), X3 = rgamma(n_test, shape = 1), family = gaussian() ), Y = round(abs(Y)) ) ## ----------------------------------------------------------------------------- lrnr <- fit_best_learner(list(mod = Y ~ X1 + X2 + X3), data = train_pois) preds <- dplyr::pull(predict(lrnr, new_data = test_pois)) power_marginaleffect( response = test_pois$Y, predictions = preds, var1 = function(var0) 1.2 * var0, kappa1_squared = 2, estimand_fun = "rate_ratio", target_effect = 1.3, exposure_prob = 1/2 ) ## ----------------------------------------------------------------------------- # Generate some data b0 <- 1 b1 <- 1.6 b2 <- 1.4 b3 <- 2 b4 <- 0.8 train <- glm_data( Y ~ b0+b1*abs(sin(X1))+b2*X2+b3*X3+b4*X2*X3, X1 = runif(n_train, min = -2, max = 2), X2 = rnorm(n_train, mean = 2, sd = 2), X3 = rbinom(n_train, 1, 0.5) ) test <- glm_data( Y ~ b0+b1*abs(sin(X1))+b2*X2+b3*X3+b4*X2*X3, X1 = runif(n_test, min = -2, max = 2), X2 = rnorm(n_test, mean = 2, sd = 2), X3 = rbinom(n_test, 1, 0.5) ) ## ----------------------------------------------------------------------------- lrnr <- fit_best_learner( data = train, preproc = list(mod = Y ~ .), cv_folds = 10, verbose = 0 ) test <- dplyr::bind_cols(test, predict(lrnr, test)) ## ----------------------------------------------------------------------------- var_bound_ancova <- variance_ancova(Y ~ X1 + X2 + X3, data = test) var_bound_prog <- variance_ancova(Y ~ X1 + X2 + X3 + .pred, data = test) ## ----------------------------------------------------------------------------- desired_power <- 0.9 n_from <- 10 n_to <- 250 iterate_power <- function(variance) { power_ancova <- sapply(n_from:n_to, FUN = function(n) power_gs( n = n, variance = variance, r = 1, ate = .8, margin = 0 ) ) data.frame(n = n_from:n_to, power = power_ancova) } data_power <- dplyr::bind_rows( iterate_power(var_bound_ancova) %>% dplyr::mutate( n_desired = samplesize_gs( variance = var_bound_ancova, power = desired_power, r = 1, ate = .8, margin = 0 ), model = "ancova", model_label = "ANCOVA" ), iterate_power(var_bound_prog) %>% dplyr::mutate( n_desired = samplesize_gs( variance = var_bound_prog, power = desired_power, r = 1, ate = .8, margin = 0 ), model = "prog", model_label = "ANCOVA with prognostic score") ) ## ----------------------------------------------------------------------------- model_cols <- c(ancova = "darkorange1", prog = "dodgerblue4") show_npower <- function(data, coords) { line <- grid::segmentsGrob( x0 = coords$x, x1 = coords$x, y0 = 0, y1 = coords$y, gp = grid::gpar( lty = "dashed", col = data$colour )) group <- unique(data$group) if (group == 1) y_pos <- grid::unit(coords$y, "npc") - grid::unit(2, "mm") else y_pos <- grid::unit(0.55, "npc") label <- grid::textGrob( label = paste0(data$model_label, ": ", ceiling(data$x)), x = grid::unit(coords$x, "npc") + grid::unit(2, "mm"), y = y_pos, just = c(0, 1), gp = grid::gpar(col = data$colour) ) grid::grobTree(line, label) } data_power %>% ggplot2::ggplot(ggplot2::aes(x = n, y = power, color = model)) + ggplot2::geom_line(linewidth = 1.2, alpha = 0.8, show.legend = FALSE) + ggplot2::geom_hline( yintercept = desired_power, color = "grey40", linetype = "dashed" ) + gggrid::grid_group( show_npower, ggplot2::aes(x = n_desired, y = desired_power, model_label = model_label) ) + ggplot2::scale_color_manual( name = "", values = model_cols) + ggplot2::scale_y_continuous( breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = function(x) paste0(x*100, "%") ) + ggplot2::labs(x = "Total sample size", y = "Power", title = "Guenther Schouten approximation of power") + ggplot2::theme(plot.title = ggplot2::element_text( face = "bold", size = 16 )) + ggplot2::theme_minimal()