## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(pminternal) library(Hmisc) getHdata("gusto") gusto <- gusto[, c("sex", "age", "hyp", "htn", "hrt", "pmi", "ste", "day30")] gusto$y <- gusto$day30; gusto$day30 <- NULL mean(gusto$y) # outcome rate set.seed(234) gusto <- gusto[sample(1:nrow(gusto), size = 4000),] mod <- glm(y ~ ., data = gusto, family = "binomial") mod_iv <- validate(mod, B = 20) mod_iv ## ----fig.height=5, fig.width=6------------------------------------------------ # prediction stability plot with 95% 'stability interval' prediction_stability(mod_iv, bounds = .95) # calibration stability # (using default calibration curve arguments: see pminternal:::cal_defaults()) calibration_stability(mod_iv) # mean absolute prediction error (mape) stability # mape = average difference between boot model predictions # for original data and original model mape <- mape_stability(mod_iv) mape$average_mape ## ----fig.height=5, fig.width=6------------------------------------------------ # find 100 equally spaced points # between the lowest and highest risk prediction p <- predict(mod, type="response") p_range <- seq(min(p), max(p), length.out=100) mod_iv2 <- validate(mod, B = 20, calib_args = list( eval=p_range, smooth="rcs", nk=5) ) mod_iv2 calp <- cal_plot(mod_iv2) ## ----fig.height=5, fig.width=6------------------------------------------------ head(calp) library(ggplot2) ggplot(calp, aes(x=predicted)) + geom_abline(lty=2) + geom_line(aes(y=apparent, color="Apparent")) + geom_line(aes(y=bias_corrected, color="Bias-Corrected")) + geom_histogram(data = data.frame(p = p), aes(x=p, y=after_stat(density)*.01), binwidth = .001, inherit.aes = F, alpha=1/2) + labs(x="Predicted Risk", y="Estimated Risk", color=NULL) ## ----------------------------------------------------------------------------- (mod_iv2 <- confint(mod_iv2, method = "shifted", R=100)) ## ----fig.height=5, fig.width=6------------------------------------------------ cal_plot(mod_iv2, bc_col = "red") ## ----eval=F------------------------------------------------------------------- # ### generalized boosted model with gbm # library(gbm) # # syntax y ~ . does not work with gbm # mod <- gbm(y ~ sex + age + hyp + htn + hrt + pmi + ste, # data = gusto, distribution = "bernoulli", interaction.depth = 2) # # (gbm_iv <- validate(mod, B = 20)) # # ### generalized additive model with mgcv # library(mgcv) # # mod <- gam(y ~ sex + s(age) + hyp + htn + hrt + pmi + ste, # data = gusto, family = "binomial") # # (gam_iv <- validate(mod, B = 20)) # # ### rms implementation of logistic regression # mod <- rms::lrm(y ~ ., data = gusto) # # not loading rms to avoid conflict with rms::validate... # # (lrm_iv <- validate(mod, B = 20)) # ## ----------------------------------------------------------------------------- #library(glmnet) lasso_fun <- function(data, ...){ y <- data$y x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')] x$sex <- as.numeric(x$sex == "male") x$pmi <- as.numeric(x$pmi == "yes") x <- as.matrix(x) cv <- glmnet::cv.glmnet(x=x, y=y, alpha=1, nfolds = 10, family="binomial") lambda <- cv$lambda.min glmnet::glmnet(x=x, y=y, alpha = 1, lambda = lambda, family="binomial") } lasso_predict <- function(model, data, ...){ x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')] x$sex <- as.numeric(x$sex == "male") x$pmi <- as.numeric(x$pmi == "yes") x <- as.matrix(x) plogis(glmnet::predict.glmnet(model, newx = x)[,1]) } ## ----------------------------------------------------------------------------- lasso_app <- lasso_fun(gusto) lasso_p <- lasso_predict(model = lasso_app, data = gusto) ## ----fig.height=5, fig.width=6------------------------------------------------ # for calibration plot eval <- seq(min(lasso_p), max(lasso_p), length.out=100) iv_lasso <- validate(method = "cv_optimism", data = gusto, outcome = "y", model_fun = lasso_fun, pred_fun = lasso_predict, B = 10, calib_args=list(eval=eval)) iv_lasso cal_plot(iv_lasso) ## ----------------------------------------------------------------------------- sens_spec <- function(y, p, ...){ # this function supports an optional # arg: threshold (set to .5 if not specified) dots <- list(...) if ("threshold" %in% names(dots)){ thresh <- dots[["threshold"]] } else{ thresh <- .5 } # make sure y is 1/0 if (is.logical(y)) y <- as.numeric(y) # predicted 'class' pcla <- as.numeric(p > thresh) sens <- sum(y==1 & pcla==1)/sum(y==1) spec <- sum(y==0 & pcla==0)/sum(y==0) scores <- c(sens, spec) names(scores) <- c("Sensitivity", "Specificity") return(scores) } ## ----------------------------------------------------------------------------- validate(fit = mod, score_fun = sens_spec, threshold=.2, method = "cv_optimism", B = 10)