## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----------------------------------------------------------------------------- # # generate data # set.seed(1) # n <- 200 # p <- 20 # X <- matrix(rnorm(n * p), ncol = p) # colnames(X) <- paste0("X", 1:p) # y <- 1 + 0.4 * rowSums(X[, c(5, 10, 15, 20)]) + rnorm(n) ## ----------------------------------------------------------------------------- # library(bigstep) # data <- prepare_data(y, X) ## ----------------------------------------------------------------------------- # results <- stepwise(data, crit = aic) # results$model # summary(results) # get_model(results) ## ----------------------------------------------------------------------------- # forward(data, crit = aic) ## ----------------------------------------------------------------------------- # data %>% # forward(aic) %>% # forward(aic) %>% # forward(aic) %>% # backward(bic) ## ----------------------------------------------------------------------------- # # generate data # set.seed(1) # n <- 1e3 # p <- 1e4 # X <- matrix(rnorm(p * n), ncol = p) # colnames(X) <- paste0("X", 1:p) # Xadd <- matrix(rnorm(5 * n), n, 5) # additional variables # colnames(Xadd) <- paste0("Xadd", 1:5) # y <- 0.2 * rowSums(X[, 1000 * (1:10)]) + Xadd[, 1] - 0.1 * Xadd[, 3] + rnorm(n) ## ----------------------------------------------------------------------------- # data <- prepare_data(y, X, Xadd = Xadd) # data %>% # reduce_matrix(minpv = 0.15) %>% # stepwise(mbic) -> # results # summary(results) # # data %>% # reduce_matrix(0.15) %>% # stepwise(bic) # bad idea... ## ----------------------------------------------------------------------------- # data %>% # reduce_matrix() %>% # fast_forward() %>% # multi_backward() %>% # stepwise() ## ----------------------------------------------------------------------------- # Xbig <- read.big.matrix("X.txt", sep = " ", header = TRUE, # backingfile = "X.bin", descriptorfile = "X.desc") # # Xbig <- attach.big.matrix("X.desc") # much faster # y <- read.table("y.txt") # # data <- prepare_data(y, Xbig) # slow because of checking NA # data <- prepare_data(y, Xbig, na = FALSE) # set if you know that you do not have NA # data %>% # reduce_matrix(minpv = 0.001) %>% # fast_forward(crit = bic, maxf = 50) %>% # multi_backward(crit = mbic) %>% # stepwise(crit = mbic) -> m # summary(m) ## ----------------------------------------------------------------------------- # my_crit <- function(loglik, k, n, c1 = 0.5, c2 = 8) { # -c1*loglik + 10*sqrt(k*c2) # } # m <- reduce_matrix(data, minpv = 0.15) # data from the paragraph "Bigger data" # stepwise(m, crit = my_crit) # stepwise(m, crit = function(loglik, k, n) -0.4*loglik + 10*sqrt(k*8)) ## ----------------------------------------------------------------------------- # # Poisson model # set.seed(1) # n <- 50 # p <- 1000 # X <- matrix(runif(n * p), ncol = p) # colnames(X) <- paste0("X", 1:p) # mu <- rowSums(X[, 100 * (1:5)]) # y <- rpois(n, exp(mu)) # data1 <- prepare_data(y, X, type = "linear") # data2 <- prepare_data(y, X, type = "poisson") # data1 %>% # reduce_matrix() %>% # stepwise() # did not see any variables # data2 %>% # reduce_matrix() %>% # stepwise() # # # logistic model # set.seed(2) # n <- 100 # X <- matrix(runif(n * p, -5, 5), ncol = p) # colnames(X) <- paste0("X", 1:p) # mu <- 0.8 * rowSums(X[, 100 * (1:5)]) # prob <- 1 /( 1 + exp(-mu)) # y <- rbinom(n, 1, prob) # data1 <- prepare_data(y, X, type = "linear") # data2 <- prepare_data(y, X, type = "logistic") # data1 %>% # reduce_matrix() %>% # stepwise() # data2 %>% # reduce_matrix() %>% # stepwise()