## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(pminternal)
library(Hmisc)

getHdata("gusto")
gusto <- gusto[, c("sex", "age", "hyp", "htn", "hrt", "pmi", "ste", "day30")]

gusto$y <- gusto$day30; gusto$day30 <- NULL
mean(gusto$y) # outcome rate

set.seed(234)
gusto <- gusto[sample(1:nrow(gusto), size = 4000),]

mod <- glm(y ~ ., data = gusto, family = "binomial")

mod_iv <- validate(mod, B = 20)
mod_iv


## ----fig.height=5, fig.width=6------------------------------------------------
# prediction stability plot with 95% 'stability interval'
prediction_stability(mod_iv, bounds = .95)

# calibration stability 
# (using default calibration curve arguments: see pminternal:::cal_defaults())
calibration_stability(mod_iv)

# mean absolute prediction error (mape) stability 
# mape = average difference between boot model predictions
# for original data and original model
mape <- mape_stability(mod_iv)
mape$average_mape


## ----fig.height=5, fig.width=6------------------------------------------------
# find 100 equally spaced points 
# between the lowest and highest risk prediction
p <- predict(mod, type="response")

p_range <- seq(min(p), max(p), length.out=100)

mod_iv2 <- validate(mod, B = 20, 
                    calib_args = list(
                      eval=p_range, 
                      smooth="rcs", 
                      nk=5)
                    )
mod_iv2

calp <- cal_plot(mod_iv2)

## ----fig.height=5, fig.width=6------------------------------------------------
head(calp)

library(ggplot2)

ggplot(calp, aes(x=predicted)) +
  geom_abline(lty=2) +
  geom_line(aes(y=apparent, color="Apparent")) +
  geom_line(aes(y=bias_corrected, color="Bias-Corrected")) +
  geom_histogram(data = data.frame(p = p), aes(x=p, y=after_stat(density)*.01),
                 binwidth = .001, inherit.aes = F, alpha=1/2) +
  labs(x="Predicted Risk", y="Estimated Risk", color=NULL)


## -----------------------------------------------------------------------------
(mod_iv2 <- confint(mod_iv2, method = "shifted", R=100))

## ----fig.height=5, fig.width=6------------------------------------------------
cal_plot(mod_iv2, bc_col = "red")

## ----eval=F-------------------------------------------------------------------
# ### generalized boosted model with gbm
# library(gbm)
# # syntax y ~ . does not work with gbm
# mod <- gbm(y ~ sex + age + hyp + htn + hrt + pmi + ste,
#            data = gusto, distribution = "bernoulli", interaction.depth = 2)
# 
# (gbm_iv <- validate(mod, B = 20))
# 
# ### generalized additive model with mgcv
# library(mgcv)
# 
# mod <- gam(y ~ sex + s(age) + hyp + htn + hrt + pmi + ste,
#            data = gusto, family = "binomial")
# 
# (gam_iv <- validate(mod, B = 20))
# 
# ### rms implementation of logistic regression
# mod <- rms::lrm(y ~ ., data = gusto)
# # not loading rms to avoid conflict with rms::validate...
# 
# (lrm_iv <- validate(mod, B = 20))
# 

## -----------------------------------------------------------------------------
#library(glmnet)

lasso_fun <- function(data, ...){
  y <- data$y
  x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')]
  x$sex <- as.numeric(x$sex == "male")
  x$pmi <- as.numeric(x$pmi == "yes")
  x <- as.matrix(x)
  
  cv <- glmnet::cv.glmnet(x=x, y=y, alpha=1, nfolds = 10, family="binomial")
  lambda <- cv$lambda.min
  
  glmnet::glmnet(x=x, y=y, alpha = 1, lambda = lambda, family="binomial")
}

lasso_predict <- function(model, data, ...){
  x <- data[, c('sex', 'age', 'hyp', 'htn', 'hrt', 'pmi', 'ste')]
  x$sex <- as.numeric(x$sex == "male")
  x$pmi <- as.numeric(x$pmi == "yes")
  x <- as.matrix(x)
  
  plogis(glmnet::predict.glmnet(model, newx = x)[,1])
}

## -----------------------------------------------------------------------------
lasso_app <- lasso_fun(gusto)
lasso_p <- lasso_predict(model = lasso_app, data = gusto)

## ----fig.height=5, fig.width=6------------------------------------------------
# for calibration plot
eval <- seq(min(lasso_p), max(lasso_p), length.out=100)

iv_lasso <- validate(method = "cv_optimism", data = gusto, 
                     outcome = "y", model_fun = lasso_fun, 
                     pred_fun = lasso_predict, B = 10, 
                     calib_args=list(eval=eval))

iv_lasso

cal_plot(iv_lasso)


## -----------------------------------------------------------------------------
sens_spec <- function(y, p, ...){
  # this function supports an optional
  # arg: threshold (set to .5 if not specified)
  dots <- list(...)
  if ("threshold" %in% names(dots)){
    thresh <- dots[["threshold"]]
  } else{
    thresh <- .5
  }
  # make sure y is 1/0
  if (is.logical(y)) y <- as.numeric(y)
  # predicted 'class'
  pcla <- as.numeric(p > thresh) 

  sens <- sum(y==1 & pcla==1)/sum(y==1)
  spec <- sum(y==0 & pcla==0)/sum(y==0)

  scores <- c(sens, spec)
  names(scores) <- c("Sensitivity", "Specificity")
  
  return(scores)
}

## -----------------------------------------------------------------------------
validate(fit = mod, score_fun = sens_spec, threshold=.2,
         method = "cv_optimism", B = 10)