## ----echo = FALSE, message = FALSE---------------------------------------------------------------- knitr::opts_chunk$set(comment = "") options(width = 100, max.print = 100) set.seed(123456) library(simTool) library(dplyr) library(tidyr) library(tibble) library(ggplot2) library(broom) library(boot) ## ------------------------------------------------------------------------------------------------- regData <- function(n, SD) { x <- seq(0, 1, length = n) y <- 10 + 2 * x + rnorm(n, sd = SD) tibble(x = x, y = y) } eval_tibbles( expand_tibble(fun = "regData", n = 5L, SD = 1:2), expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")), post_analyze = broom::tidy, summary_fun = list(mean = mean, sd = sd), group_for_summary = "term", replications = 3 ) ## ------------------------------------------------------------------------------------------------- print(dg <- dplyr::bind_rows( expand_tibble(fun = "rexp", n = c(10L, 20L), rate = 1:2), expand_tibble(fun = "rnorm", n = c(10L, 20L), mean = 1:2) )) ## ------------------------------------------------------------------------------------------------- print(pg <- dplyr::bind_rows( expand_tibble(proc = "min"), expand_tibble(proc = "mean", trim = c(0.1, 0.2)) )) ## ----eval=FALSE----------------------------------------------------------------------------------- # 1. convert dg to R-functions {g_1, ..., g_k} # 2. convert pg to R-functions {f_1, ..., f_L} # 3. initialize result object # 4. append dg and pg to the result object # 5. t1 = current.time() # 6. for g in {g_1, ..., g_k} # 7. for r in 1:replications (optionally in a parallel manner) # 8. data = g() # 9. for f in {f_1, \ldots, f_L} # 10. append f(data) to the result object (optionally apply a post-analyze-function) # 11. optionally append data to the result object # 12. optionally summarize the result object over all # replications but separately for f_1, ..., f_L (and optional group variables) # 13. t2 = current.time() # 14. Estimate the number of replications per hour from t1 and t2 ## ------------------------------------------------------------------------------------------------- dg <- expand_tibble(fun = "rnorm", n = 10, mean = 1:2) pg <- expand_tibble(proc = "min") eg <- eval_tibbles(data_grid = dg, proc_grid = pg, replications = 2) eg ## ------------------------------------------------------------------------------------------------- eg <- eval_tibbles(data_grid = dg, proc_grid = pg, replications = 3) eg ## ------------------------------------------------------------------------------------------------- eg <- eval_tibbles(data_grid = dg, proc_grid = pg, replications = 1) eg$simulation eg$generated_data ## ------------------------------------------------------------------------------------------------- dg <- expand_tibble(fun = "runif", n = c(10, 20, 30)) pg <- expand_tibble(proc = c("min", "max")) eval_tibbles( data_grid = dg, proc_grid = pg, replications = 1000, summary_fun = list(mean = mean) ) eval_tibbles( data_grid = dg, proc_grid = pg, replications = 1000, summary_fun = list(mean = mean, sd = sd) ) ## ------------------------------------------------------------------------------------------------- eval_tibbles( expand_tibble(fun = "regData", n = 5L, SD = 1:2), expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")), replications = 2 ) ## ------------------------------------------------------------------------------------------------- eval_tibbles( expand_tibble(fun = "regData", n = 5L, SD = 1:2), expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")), post_analyze = purrr::compose(function(mat) mat["(Intercept)", "Estimate"], coef, summary.lm), replications = 2 ) ## ------------------------------------------------------------------------------------------------- presever_rownames <- function(mat) { rn <- rownames(mat) ret <- tibble::as_tibble(mat) ret$term <- rn ret } eval_tibbles( expand_tibble(fun = "regData", n = 5L, SD = 1:2), expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")), post_analyze = purrr::compose(presever_rownames, coef, summary), replications = 3 ) ## ------------------------------------------------------------------------------------------------- eval_tibbles( expand_tibble(fun = "regData", n = 5L, SD = 1:2), expand_tibble(proc = "lm", formula = c("y~x", "y~I(x^2)")), post_analyze = purrr::compose(presever_rownames, coef, summary), summary_fun = list(mean = mean, sd = sd), group_for_summary = "term", replications = 3 ) ## ------------------------------------------------------------------------------------------------- eval_tibbles( data_grid = dg, proc_grid = pg, replications = 10, ncpus = 2, summary_fun = list(mean = mean) ) ## ------------------------------------------------------------------------------------------------- library(parallel) cl <- makeCluster(rep("localhost", 2), type = "PSOCK") eval_tibbles( data_grid = dg, proc_grid = pg, replications = 10, cluster = cl, summary_fun = list(mean = mean) ) stopCluster(cl) ## ------------------------------------------------------------------------------------------------- library(boot) ratio <- function(d, w) sum(d$x * w) / sum(d$u * w) city.boot <- boot(city, ratio, R = 999, stype = "w", sim = "ordinary" ) boot.ci(city.boot, conf = c(0.90, 0.95), type = c("norm", "basic", "perc", "bca") ) ## ------------------------------------------------------------------------------------------------- returnCity <- function() { city } bootConfInt <- function(data) { city.boot <- boot(data, ratio, R = 999, stype = "w", sim = "ordinary" ) boot.ci(city.boot, conf = c(0.90, 0.95), type = c("norm", "basic", "perc", "bca") ) } ## ------------------------------------------------------------------------------------------------- dg <- expand_tibble(fun = "returnCity") pg <- expand_tibble(proc = "bootConfInt") eval_tibbles(dg, pg, replications = 10, ncpus = 2, cluster_libraries = c("boot"), cluster_global_objects = c("ratio") ) ## ------------------------------------------------------------------------------------------------- # masking summary from the base package summary <- function(x) tibble(sd = sd(x)) g <- function(x) tibble(q0.1 = quantile(x, 0.1)) someFunc <- function() { summary <- function(x) tibble(sd = sd(x), mean = mean(x)) dg <- expand_tibble(fun = "runif", n = 100) pg <- expand_tibble(proc = c("summary", "g")) # the standard is to use the global # environment, hence summary defined outside # of someFunc() will be used print(eval_tibbles(dg, pg)) cat("--------------------------------------------------\n") # will use the local defined summary, but g # from the global environment, because # g is not locally defined. print(eval_tibbles(dg, pg, envir = environment())) } someFunc() ## ------------------------------------------------------------------------------------------------- dg <- expand_tibble(fun = c("rnorm"), mean = c(1,1000), sd = c(1,10), n = c(10L, 100L)) pg <- expand_tibble(proc = "quantile", probs = 0.975) post_ana <- function(q_est, .truth){ tibble::tibble(bias = q_est - stats::qnorm(0.975, mean = .truth$mean, sd = .truth$sd)) } eval_tibbles(dg, pg, replications = 10^3, discard_generated_data = TRUE, ncpus = 2, post_analyze = post_ana, summary_fun = list(mean = mean)) ## ------------------------------------------------------------------------------------------------- dg <- dplyr::bind_rows( expand_tibble(fun = c("rnorm"), mean = 0, n = c(10L, 100L), .truth = qnorm(0.975)), expand_tibble(fun = c("rexp"), rate = 1, n = c(10L, 100L), .truth = qexp(0.975, rate = 1)), expand_tibble(fun = c("runif"), max = 2, n = c(10L, 100L), .truth = qunif(0.975, max = 2)) ) pg <- expand_tibble(proc = "quantile", probs = 0.975) post_ana <- function(q_est, .truth){ ret <- q_est - .truth names(ret) <- "bias" ret } eval_tibbles(dg, pg, replications = 10^3, discard_generated_data = TRUE, ncpus = 2, post_analyze = post_ana, summary_fun = list(mean = mean)) ## ------------------------------------------------------------------------------------------------- dg <- dplyr::bind_rows( expand_tibble(fun = c("rnorm"), mean = 0, n = c(10L, 1000L), .truth = list(function(prob) qnorm(prob, mean = 0))), expand_tibble(fun = c("rexp"), rate = 1, n = c(10L, 1000L), .truth = list(function(prob) qexp(prob, rate = 1))), expand_tibble(fun = c("runif"), max = 2, n = c(10L, 1000L), .truth = list(function(prob) qunif(prob, max = 2))) ) bias_quantile <- function(x, prob, .truth) { est <- quantile(x, probs = prob) ret <- est - .truth[[1]](prob) names(ret) <- "bias" ret } pg <- expand_tibble(proc = "bias_quantile", prob = c(0.9, 0.975)) eval_tibbles(dg, pg, replications = 10^3, discard_generated_data = TRUE, ncpus = 1, summary_fun = list(mean = mean)) ## ----eval = FALSE--------------------------------------------------------------------------------- # 6. for g in {g_1, ..., g_k} # 7. for r in 1:replications (optionally in a parallel manner) # 8. data = g() # 9. for f in {f_1, \ldots, f_L} # 10. append f(data) to the result object (optionally apply a post-analyze-function) ## ------------------------------------------------------------------------------------------------- EVAL <- FALSE if (Sys.getenv("NOT_CRAN") == "true") { EVAL <- TRUE } ## ----eval = EVAL---------------------------------------------------------------------------------- dg <- dplyr::bind_rows( expand_tibble(fun = c("rnorm"), mean = 1, n = c(10L, 100L)), expand_tibble(fun = c("rexp"), rate = 1, n = c(10L, 100L)) ) dg ## ----eval = EVAL---------------------------------------------------------------------------------- pg <- expand_tibble(proc = c("mean", "median")) pg ## ----eval = EVAL---------------------------------------------------------------------------------- et <- eval_tibbles(dg, pg, replications = 10^4, ncpus = 2) et$simulation %>% ggplot(aes(x = results, color = interaction(fun, n), fill = interaction(fun, n))) + geom_density(alpha = 0.3) + facet_wrap(~ proc) ## ----eval = EVAL---------------------------------------------------------------------------------- bootstrap_ci <- function(x, conf.level, R = 999) { b <- boot::boot(x, function(d, i) { n <- length(i) c( mean = mean(d[i]), variance = (n - 1) * var(d[i]) / n^2 ) }, R = R) boot::boot.ci(b, conf = conf.level, type = "all") } ## ----eval = EVAL---------------------------------------------------------------------------------- post_analyze <- function(o, .truth) { if (class(o) == "htest") { #post-process the object returned by t.test ci <- o$conf.int return(tibble::tibble( type = "t.test", aspect = c("covered", "ci_length"), value = c(ci[1] <= .truth && .truth <= ci[2], ci[2] - ci[1]) )) } else if (class(o) == "bootci") { #post-process the object returned by boot.ci method = c("normal", "basic", "student", "percent", "bca") ret = o[method] lower = unlist(purrr::map(ret, ~dplyr::nth(.x[1,], -2))) upper = unlist(purrr::map(ret, ~dplyr::nth(.x[1,], -1))) type = paste("boot", method, sep = "_") return( dplyr::bind_rows( tibble::tibble( type = type, aspect = "covered", value = as.integer(lower <= .truth & .truth <= upper)), tibble::tibble( type = type, aspect = "ci_length", value = upper - lower)) ) } } ## ----eval = EVAL---------------------------------------------------------------------------------- dg <- dplyr::bind_rows( simTool::expand_tibble(fun = "rnorm", n = 10L, mean = 0, sd = sqrt(3), .truth = 0), simTool::expand_tibble(fun = "runif", n = 10L, max = 6, .truth = 3), simTool::expand_tibble(fun = "rexp", n = 10L, rate = 1 / sqrt(3), .truth = sqrt(3)) ) dg ## ----eval = EVAL---------------------------------------------------------------------------------- pg <- simTool::expand_tibble( proc = c("t.test","bootstrap_ci"), conf.level = c(0.9, 0.95) ) pg ## ----eval = EVAL---------------------------------------------------------------------------------- et <- eval_tibbles(dg, pg, replications = 10^3, ncpus = 2, cluster_global_objects = "post_analyze", post_analyze = post_analyze, summary_fun = list(mean = mean), group_for_summary = c("aspect", "type") ) et ## ----eval = EVAL, fig.width=13-------------------------------------------------------------------- et$simulation %>% ggplot(aes(x = fun, y = value, group = type, fill = type, label = round(value, 2))) + geom_col(position = "dodge") + geom_label(position = position_dodge(0.9), size = 3) + theme(legend.position = "bottom") + facet_grid(aspect ~ conf.level, scales = "free") ## ----eval = EVAL---------------------------------------------------------------------------------- t_test = function(x, conf.level){ tt <- t.test(x, conf.level = conf.level) # unify return tibble::tibble(type = "t.test", lower = tt$conf.int[1], upper = tt$conf.int[2]) } bootstrap_ci <- function(x, conf.level, R = 999) { b <- boot::boot(x, function(d, i) { n <- length(i) c( mean = mean(d[i]), variance = (n - 1) * var(d[i]) / n^2 ) }, R = R) ci <- boot::boot.ci(b, conf = conf.level, type = "all") method = c("normal", "basic", "student", "percent", "bca") ret = ci[method] lower = unlist(purrr::map(ret, ~dplyr::nth(.x[1,], -2))) upper = unlist(purrr::map(ret, ~dplyr::nth(.x[1,], -1))) type = paste("boot", method, sep = "_") # unify return tibble::tibble(type = type, lower = lower, upper = upper) } dg <- dplyr::bind_rows( simTool::expand_tibble(fun = "rnorm", n = 10L, mean = 0, sd = sqrt(3), .truth = 0), simTool::expand_tibble(fun = "runif", n = 10L, max = 6, .truth = 3), simTool::expand_tibble(fun = "rexp", n = 10L, rate = 1 / sqrt(3), .truth = sqrt(3)) ) pg <- simTool::expand_tibble( proc = c("t_test","bootstrap_ci"), conf.level = c(0.9, 0.95) ) %>% mutate(R = ifelse(proc == "bootstrap_ci", 100, NA)) et <- eval_tibbles(dg, pg, replications = 10^2, ncpus = 2 ) et ## ----eval = EVAL---------------------------------------------------------------------------------- grps <- et$simulation %>% select(-replications) %>% select(fun:type) %>% names et$simulation %>% mutate(covered = lower <= .truth & .truth <= upper, ci_length = upper - lower) %>% group_by(.dots = grps) %>% summarise(coverage = mean(covered), ci_length = mean(ci_length))