## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(pminternal) library(Hmisc) getHdata("gusto") gusto <- gusto[, c("sex", "age", "hyp", "htn", "hrt", "pmi", "ste", "day30")] gusto$y <- gusto$day30; gusto$day30 <- NULL set.seed(234) gusto <- gusto[sample(1:nrow(gusto), size = 4000),] ## ----------------------------------------------------------------------------- stepglm <- function(data, ...){ m <- glm(y~., data=data, family="binomial") step(m, trace = 0) } steppred <- function(model, data, ...){ predict(model, newdata = data, type = "response") } validate(data = gusto, outcome = "y", model_fun = stepglm, pred_fun = steppred, method = "cv_opt", B = 10) ## ----------------------------------------------------------------------------- #library(glmnet) ridge_fun <- function(data, ...){ y <- data$y x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')] x$sex <- as.numeric(x$sex == "male") x$pmi <- as.numeric(x$pmi == "yes") x <- as.matrix(x) cv <- glmnet::cv.glmnet(x=x, y=y, alpha=0, nfolds = 10, family="binomial") lambda <- cv$lambda.min glmnet::glmnet(x=x, y=y, alpha = 0, lambda = lambda, family="binomial") } ridge_predict <- function(model, data, ...){ # note this is identical to lasso_predict from "pminternal" vignette x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')] x$sex <- as.numeric(x$sex == "male") x$pmi <- as.numeric(x$pmi == "yes") x <- as.matrix(x) plogis(glmnet::predict.glmnet(model, newx = x)[,1]) } validate(method = "cv_optimism", data = gusto, outcome = "y", model_fun = ridge_fun, pred_fun = ridge_predict, B = 10) # the use of package::function in user defined functions # is especially important if you want to run # boot_* or .632 in parallel via cores argument # e.g. # validate(method = ".632", data = gusto, # outcome = "y", model_fun = ridge_fun, # pred_fun = ridge_predict, B = 100, cores = 4) ## ----eval = F----------------------------------------------------------------- # lognet_fun <- function(data, ...){ # # dots <- list(...) # if ("alpha" %in% names(dots)){ # alpha <- dots[["alpha"]] # } else{ # alpha <- 0 # } # # y <- data$y # x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')] # x$sex <- as.numeric(x$sex == "male") # x$pmi <- as.numeric(x$pmi == "yes") # x <- as.matrix(x) # # cv <- glmnet::cv.glmnet(x=x, y=y, alpha = alpha, nfolds = 10, family="binomial") # lambda <- cv$lambda.min # # glmnet::glmnet(x=x, y=y, alpha = alpha, lambda = lambda, family="binomial") # } # # validate(method = "cv_optimism", data = gusto, # outcome = "y", model_fun = lognet_fun, # pred_fun = ridge_predict, B = 10, alpha = 0.5) ## ----eval = F----------------------------------------------------------------- # enet_fun <- function(data, ...){ # # dots <- list(...) # if ("nalpha" %in% names(dots)){ # nalpha <- dots[["nalpha"]] # } else{ # nalpha <- 21 # 0 to 1 in steps of 0.05 # } # # y <- data$y # x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')] # x$sex <- as.numeric(x$sex == "male") # x$pmi <- as.numeric(x$pmi == "yes") # x <- as.matrix(x) # # # run 10 fold CV for each alpha # alphas <- seq(0, 1, length.out = nalpha) # res <- lapply(alphas, function(a){ # cv <- glmnet::cv.glmnet(x=x, y=y, alpha = a, nfolds = 10, family="binomial") # list(lambda = cv$lambda.min, bin.dev = min(cv$cvm)) # }) # # select result with min binomial deviance # j <- which.min(sapply(res, function(x) x$bin.dev)) # # produce 'final' model with alpha and lambda # glmnet::glmnet(x=x, y=y, alpha = alphas[j], lambda = res[[j]][["lambda"]], family="binomial") # } # # validate(method = "cv_optimism", data = gusto, # outcome = "y", model_fun = enet_fun, # pred_fun = ridge_predict, B = 10) # ## ----------------------------------------------------------------------------- rf_fun <- function(data, ...){ dots <- list(...) num.trees <- if ("num.trees" %in% names(dots)) dots[["num.trees"]] else 500 max.depth <- if ("max.depth" %in% names(dots)) dots[["max.depth"]] else NULL min.node.size <- if ("min.node.size" %in% names(dots)) dots[["min.node.size"]] else 1 # best to make sure y is a factor where '1' is level 2 data$y <- factor(data$y, levels = 0:1) ranger::ranger(y~., data = data, probability = TRUE, num.trees = num.trees, max.depth = max.depth, min.node.size = min.node.size) } rf_predict <- function(model, data, ...){ predict(model, data = data)$predictions[, 2] } validate(method = "cv_optimism", data = gusto, outcome = "y", model_fun = rf_fun, pred_fun = rf_predict, B = 10) # instead of unlimited tree depth... validate(method = "cv_optimism", data = gusto, outcome = "y", model_fun = rf_fun, pred_fun = rf_predict, B = 10, max.depth = 3)