## ----Load necessary packages--------------------------------------------------
library(tidyverse)
library(caret)
library(iml)
library(rmlnomogram)

## ----Example 1 - Train a prediction model, eval=FALSE-------------------------
#  # Load dataset
#  data("mtcars")
#  
#  # Preprocess for training a classifier
#  mtcars$am <- factor(mtcars$am, levels = c(0, 1), labels = c("Auto", "Manual"))
#  
#  # Convert some numerical features to categorical for demonstration
#  mtcars$cyl <- factor(mtcars$cyl)
#  mtcars$vs <- factor(mtcars$vs)
#  mtcars$qsec <- factor(as.integer(mtcars$qsec >= 18))
#  
#  # Using tidyverse to filter only factor columns
#  mtcars <- select(mtcars, where(is.factor))
#  
#  # Create dummy variables for all factor variables, excluding the target variable
#  dummy_vars <- dummyVars(am ~ ., data = mtcars) |>
#    predict(newdata = mtcars)
#  
#  # Combine binarized predictors with the target variable
#  mtcars_binarized <- as.data.frame(dummy_vars) |>
#    mutate_all(as.factor) |>
#    mutate(am = mtcars$am) |>
#    select(-vs.0, -cyl.4, -qsec.0)
#  
#  # Train a random forest model using the full dataset
#  set.seed(1)
#  model <- train(
#    am ~ .,
#    data = mtcars_binarized,
#    method = "rf",
#    trControl = trainControl(method = "none", classProbs = TRUE)
#  )

## ----Example 1 - Entire dataset of combinations of the selected features, eval=FALSE----
#  # Extract feature names used in the trained model from caret model object
#  caret_features <- str_remove_all(model$finalModel$xNames, "1$")
#  
#  # Modify the training dataset to include only the model features
#  mtcars_selected <- mtcars_binarized[, caret_features]
#  
#  # Generate all possible feature combinations for nomogram
#  nomogram_features <- expand.grid(lapply(mtcars_selected, unique))

## ----Example 1 - Load subs 1 to avoid multi-threading, include=FALSE----------
data(nomogram_features)

## ----table-1, echo=FALSE------------------------------------------------------
head(nomogram_features)

## ----Example 1 - Prediction outputs for the entire dataset, eval=FALSE--------
#  nomogram_outputs <- predict(model, nomogram_features, type = "prob") |>
#    select(output = levels(mtcars_binarized$am)[2])

## ----Example 1 - Load subs 2 to avoid multi-threading, include=FALSE----------
data(nomogram_outputs)

## ----table-2, echo=FALSE------------------------------------------------------
head(nomogram_outputs)

## ----Example 1 - Create nomogram----------------------------------------------
nomogram <- create_nomogram(nomogram_features, nomogram_outputs)

## ----figure-1, echo=FALSE, fig.height=3, fig.width=12-------------------------
nomogram

## ----Example 1 - Compute SHAP values for the entire dataset, eval=FALSE-------
#  # Prepare data and model for SHAP value calculation using iml
#  X <- mtcars_binarized[, -which(names(mtcars_binarized) == "am")]
#  
#  # Create a predictor object
#  predictor <- Predictor$new(model = model, data = X)
#  
#  # Calculate SHAP values
#  nomogram_shaps <- list()
#  
#  for(i in seq(nrow(nomogram_features))){
#    shapley <-  Shapley$new(predictor, x.interest = nomogram_features[i, ])
#    nomogram_shaps[[i]] <- shapley$results
#  }
#  
#  names(nomogram_shaps) <- seq(nrow(nomogram_features))
#  
#  nomogram_shaps <- reduce(imap(nomogram_shaps, ~ mutate(.x, i = .y)), rbind) |>
#    filter(class == levels(mtcars_binarized$am)[2]) |>
#    select(i, feature, phi) |>
#    spread(feature, phi) |>
#    arrange(as.numeric(i)) |>
#    column_to_rownames(var = "i")

## ----Example 1 - Load subs 3 to avoid multi-threading, include=FALSE----------
data(nomogram_shaps)

## ----table-3, echo=FALSE------------------------------------------------------
head(nomogram_shaps)

## ----Example 2 - Create nomogram----------------------------------------------
nomogram2 <- create_nomogram(nomogram_features, nomogram_outputs, prob = TRUE)

## ----figure-2, echo=FALSE, fig.height=4.5, fig.width=10-----------------------
nomogram2

## ----Example 2 - Create nomogram with SHAP------------------------------------
nomogram2_with_shap <-
  create_nomogram(
    nomogram_features, nomogram_outputs, nomogram_shaps
    , prob = TRUE
  )

## ----figure-3, echo=FALSE, fig.height=6, fig.width=12-------------------------
nomogram2_with_shap

## ----Example 3 - Train a prediction model, eval=FALSE-------------------------
#  # Reload original dataset
#  data("mtcars")
#  
#  # Round to 0 decimal to reduce possible combinations later
#  mtcars <- mutate(mtcars, qsec = round(qsec, 0))
#  
#  # Add single numerical predictor to binarized predictors with the target variable
#  mtcars_mixed <- cbind(mtcars["qsec"], select(mtcars_binarized, -qsec.1))
#  
#  # Train a random forest model using the full dataset
#  set.seed(1)
#  model2 <- train(
#    am ~ .,
#    data = mtcars_mixed,
#    method = "rf",
#    trControl = trainControl(method = "none", classProbs = TRUE)
#  )

## ----Example 3 - Entire dataset of combinations of the selected features, eval=FALSE----
#  # Extract feature names used in the trained model from caret model object
#  caret_features2 <- str_remove_all(model2$finalModel$xNames, "1$")
#  
#  # Modify the training dataset to include only the model features
#  mtcars_selected2 <- mtcars_mixed[, caret_features2]
#  
#  # Generate all possible feature combinations for nomogram
#  nomogram_features2 <-
#    select_if(mtcars_selected2, is.numeric) |>
#    lapply(\(x) seq(min(x), max(x))) |>
#    c(lapply(select_if(mtcars_selected2, is.factor), unique)) |>
#    expand.grid()

## ----Example 3 - Load subs 1 to avoid multi-threading, include=FALSE----------
data(nomogram_features2)

## ----table-4, echo=FALSE------------------------------------------------------
head(nomogram_features2)

## ----Example 3 - Prediction outputs for the entire dataset, eval=FALSE--------
#  nomogram_outputs2 <- predict(model2, nomogram_features2, type = "prob") |>
#    select(output = levels(mtcars_mixed$am)[2])

## ----Example 3 - Load subs 2 to avoid multi-threading, include=FALSE----------
data(nomogram_outputs2)

## ----Example 3 - Create nomogram----------------------------------------------
nomogram3 <- create_nomogram(nomogram_features2, nomogram_outputs2, prob = TRUE)

## ----figure-4, echo=FALSE, fig.height=7.5, fig.width=12-----------------------
nomogram3

## ----Example 3 - Compute SHAP values for the entire dataset, eval=FALSE-------
#  # Prepare data and model for SHAP value calculation using iml
#  X2 <- mtcars_mixed[, -which(names(mtcars_mixed) == "am")]
#  
#  # Create a predictor object
#  predictor2 <- Predictor$new(model = model2, data = X2)
#  
#  # Calculate SHAP values
#  nomogram_shaps2 <- list()
#  
#  for(i in seq(nrow(nomogram_features2))){
#    shapley2 <-  Shapley$new(predictor2, x.interest = nomogram_features2[i, ])
#    nomogram_shaps2[[i]] <- shapley2$results
#  }
#  
#  names(nomogram_shaps2) <- seq(nrow(nomogram_features2))
#  
#  nomogram_shaps2 <- reduce(imap(nomogram_shaps2, ~ mutate(.x, i = .y)), rbind) |>
#    filter(class == levels(mtcars_mixed$am)[2]) |>
#    select(i, feature, phi) |>
#    spread(feature, phi) |>
#    arrange(as.numeric(i)) |>
#    column_to_rownames(var = "i")

## ----Example 3 - Load subs 3 to avoid multi-threading, include=FALSE----------
data(nomogram_shaps2)

## ----Example 3 - Create nomogram with SHAP------------------------------------
nomogram3_with_shap <-
  create_nomogram(
    nomogram_features2, nomogram_outputs2, nomogram_shaps2
    , prob = TRUE
  )

## ----figure-5, echo=FALSE, fig.height=7.5, fig.width=12-----------------------
nomogram3_with_shap

## ----Example 4 - Train a prediction model, eval=FALSE-------------------------
#  # Load dataset
#  data("mtcars")
#  
#  # Preprocess for training a regressor
#  outcome <- mtcars$wt
#  
#  # Convert some numerical features to categorical for demonstration
#  mtcars$cyl <- factor(mtcars$cyl)
#  mtcars$vs <- factor(mtcars$vs)
#  mtcars$qsec <- factor(as.integer(mtcars$qsec >= 18))
#  
#  # Using tidyverse to filter only factor columns
#  mtcars <- cbind(select(mtcars, where(is.factor)), select(mtcars, wt))
#  
#  # Create dummy variables for all factor variables, excluding the target variable
#  dummy_vars2 <- dummyVars(wt ~ ., data = mtcars) |>
#    predict(newdata = mtcars)
#  
#  # Combine binarized predictors with the target variable
#  mtcars_binarized2 <- as.data.frame(dummy_vars2) |>
#    mutate_all(as.factor) |>
#    mutate(wt = outcome) |>
#    select(-vs.0, -cyl.4, -qsec.0)
#  
#  # Train a random forest model using the full dataset
#  set.seed(1)
#  model3 <- train(
#    wt ~ .,
#    data = mtcars_binarized2,
#    method = "rf",
#    trControl = trainControl(method = "none")
#  )

## ----Example 4 - Entire dataset of combinations of the selected features, eval=FALSE----
#  # Extract feature names used in the trained model from caret model object
#  caret_features3 <- str_remove_all(model3$finalModel$xNames, "1$")
#  
#  # Modify the training dataset to include only the model features
#  mtcars_selected3 <- mtcars_binarized2[, caret_features3]
#  
#  # Generate all possible feature combinations for nomogram
#  nomogram_features3 <- expand.grid(lapply(mtcars_selected3, unique))

## ----Example 4 - Load subs 1 to avoid multi-threading, include=FALSE----------
data(nomogram_features3)

## ----Example 4 - Prediction outputs for the entire dataset, eval=FALSE--------
#  nomogram_outputs3 <- data.frame(output = predict(model3, nomogram_features3))

## ----Example 4 - Load subs 2 to avoid multi-threading, include=FALSE----------
data(nomogram_outputs3)

## ----table-5, echo=FALSE------------------------------------------------------
head(nomogram_outputs3)

## ----Example 4 - Create nomogram----------------------------------------------
nomogram4 <- create_nomogram(nomogram_features3, nomogram_outputs3, est = TRUE)

## ----figure-6, echo=FALSE, fig.height=4.5, fig.width=10-----------------------
nomogram4

## ----Example 4 - Compute SHAP values for the entire dataset, eval=FALSE-------
#  # Prepare data and model for SHAP value calculation using iml
#  X3 <- mtcars_binarized2[, -which(names(mtcars_binarized2) == "wt")]
#  
#  # Create a predictor object
#  predictor3 <- Predictor$new(model = model3, data = X3)
#  
#  # Calculate SHAP values
#  nomogram_shaps3 <- list()
#  
#  for(i in seq(nrow(nomogram_features3))){
#    shapley3 <-  Shapley$new(predictor3, x.interest = nomogram_features3[i, ])
#    nomogram_shaps3[[i]] <- shapley3$results
#  }
#  
#  names(nomogram_shaps3) <- seq(nrow(nomogram_features3))
#  
#  nomogram_shaps3 <- reduce(imap(nomogram_shaps3, ~ mutate(.x, i = .y)), rbind) |>
#    select(i, feature, phi) |>
#    spread(feature, phi) |>
#    arrange(as.numeric(i)) |>
#    column_to_rownames(var = "i")

## ----Example 4 - Load subs 3 to avoid multi-threading, include=FALSE----------
data(nomogram_shaps3)

## ----Example 4 - Create nomogram with SHAP------------------------------------
nomogram4_with_shap <-
  create_nomogram(
    nomogram_features3, nomogram_outputs3, nomogram_shaps3
    , est = TRUE
  )

## ----figure-7, echo=FALSE, fig.height=7.5, fig.width=12-----------------------
nomogram4_with_shap

## ----Example 5 - Train a prediction model, eval=FALSE-------------------------
#  # Reload original dataset
#  data("mtcars")
#  
#  # Round to 0 decimal to reduce possible combinations later
#  mtcars <- mutate(mtcars, qsec = round(qsec, 0))
#  
#  # Add single numerical predictor to binarized predictors with the target variable
#  mtcars_mixed2 <- cbind(mtcars["qsec"], select(mtcars_binarized2, -qsec.1))
#  
#  # Train a random forest model using the full dataset
#  set.seed(1)
#  model4 <- train(
#    wt ~ .,
#    data = mtcars_mixed2,
#    method = "rf",
#    trControl = trainControl(method = "none")
#  )

## ----Example 5 - Entire dataset of combinations of the selected features, eval=FALSE----
#  # Extract feature names used in the trained model from caret model object
#  caret_features4 <- str_remove_all(model4$finalModel$xNames, "1$")
#  
#  # Modify the training dataset to include only the model features
#  mtcars_selected4 <- mtcars_mixed2[, caret_features4]
#  
#  # Generate all possible feature combinations for nomogram
#  nomogram_features4 <-
#    select_if(mtcars_selected4, is.numeric) |>
#    lapply(\(x) seq(min(x), max(x))) |>
#    c(lapply(select_if(mtcars_selected4, is.factor), unique)) |>
#    expand.grid()

## ----Example 5 - Load subs 1 to avoid multi-threading, include=FALSE----------
data(nomogram_features4)

## ----Example 5 - Prediction outputs for the entire dataset, eval=FALSE--------
#  nomogram_outputs4 <- data.frame(output = predict(model4, nomogram_features4))

## ----Example 5 - Load subs 2 to avoid multi-threading, include=FALSE----------
data(nomogram_outputs4)

## ----Example 5 - Create nomogram----------------------------------------------
nomogram5 <- create_nomogram(nomogram_features4, nomogram_outputs4, est = TRUE)

## ----figure-8, echo=FALSE, fig.height=7.5, fig.width=12-----------------------
nomogram5

## ----Example 5 - Compute SHAP values for the entire dataset, eval=FALSE-------
#  # Prepare data and model for SHAP value calculation using iml
#  X4 <- mtcars_mixed2[, -which(names(mtcars_mixed2) == "wt")]
#  
#  # Create a predictor object
#  predictor4 <- Predictor$new(model = model4, data = X4)
#  
#  # Calculate SHAP values
#  nomogram_shaps4 <- list()
#  
#  for(i in seq(nrow(nomogram_features4))){
#    shapley4 <-  Shapley$new(predictor4, x.interest = nomogram_features4[i, ])
#    nomogram_shaps4[[i]] <- shapley4$results
#  }
#  
#  names(nomogram_shaps4) <- seq(nrow(nomogram_features4))
#  
#  nomogram_shaps4 <- reduce(imap(nomogram_shaps4, ~ mutate(.x, i = .y)), rbind) |>
#    select(i, feature, phi) |>
#    spread(feature, phi) |>
#    arrange(as.numeric(i)) |>
#    column_to_rownames(var = "i")

## ----Example 5 - Load subs 3 to avoid multi-threading, include=FALSE----------
data(nomogram_shaps4)

## ----Example 5 - Create nomogram with SHAP------------------------------------
nomogram4_with_shap <-
  create_nomogram(
    nomogram_features4, nomogram_outputs4, nomogram_shaps4
    , est = TRUE
  )

## ----figure-9, echo=FALSE, fig.height=7.5, fig.width=12-----------------------
nomogram4_with_shap

## ----session-info, echo=TRUE--------------------------------------------------
sessionInfo()