## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----message = FALSE----------------------------------------------------------
library("DynForest")
data(pbc2)
head(pbc2)

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  set.seed(1234)
#  id <- unique(pbc2$id)
#  id_sample <- sample(id, length(id)*2/3)
#  id_row <- which(pbc2$id %in% id_sample)
#  pbc2_train <- pbc2[id_row,]
#  pbc2_pred <- pbc2[-id_row,]

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  timeData_train <- pbc2_train[,c("id","time",
#                                  "serBilir","SGOT",
#                                  "albumin","alkaline")]
#  fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")])

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  timeVarModel <- list(serBilir = list(fixed = serBilir ~ time,
#                                       random = ~ time),
#                       SGOT = list(fixed = SGOT ~ time + I(time^2),
#                                   random = ~ time + I(time^2)),
#                       albumin = list(fixed = albumin ~ time,
#                                      random = ~ time),
#                       alkaline = list(fixed = alkaline ~ time,
#                                       random = ~ time))

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  Y <- list(type = "surv",
#            Y = unique(pbc2_train[,c("id","years","event")]))

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  res_dyn <- dynforest(timeData = timeData_train,
#                       fixedData = fixedData_train,
#                       timeVar = "time", idVar = "id",
#                       timeVarModel = timeVarModel, Y = Y,
#                       ntree = 200, mtry = 3, nodesize = 2, minsplit = 3,
#                       cause = 2, ncores = 7, seed = 1234)

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  summary(res_dyn)
#  
#  dynforest executed for survival (competing risk) outcome
#  	Splitting rule: Fine & Gray statistic test
#  	Out-of-bag error type: Integrated Brier Score
#  	Leaf statistic: Cumulative incidence function
#  ----------------
#  Input
#  	Number of subjects: 208
#  	Longitudinal: 4 predictor(s)
#  	Numeric: 1 predictor(s)
#  	Factor: 2 predictor(s)
#  ----------------
#  Tuning parameters
#  	mtry: 3
#  	nodesize: 2
#  	minsplit: 3
#  	ntree: 200
#  ----------------
#  ----------------
#  dynforest summary
#  	Average depth per tree: 6.62
#  	Average number of leaves per tree: 27.68
#  	Average number of subjects per leaf: 4.78
#  	Average number of events of interest per leaf: 1.95
#  ----------------
#  Computation time
#  	Number of cores used: 7
#  	Time difference of 3.18191 mins
#  ----------------

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  head(get_tree(dynforest_obj = res_dyn, tree = 1))
#  
#            type id_node var_split feature  threshold   N depth
#  1 Longitudinal       1         4       2 -0.6931377 123     1
#  2      Numeric       2         2      NA -1.4960589  25     2
#  3      Numeric       3         1      NA -1.0052000  98     2
#  4 Longitudinal       4         3       1  0.3332725   3     3
#  5       Factor       5         2      NA         NA  22     3
#  6 Longitudinal       6         5       2  0.7550109  10     3

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  tail(get_tree(dynforest_obj = res_dyn, tree = 1))
#  
#              type id_node var_split feature     threshold N depth
#  156         Leaf   14522        NA      NA            NA 1    14
#  157 Longitudinal   14523         1       1  8.894535e-03 3    14
#  158 Longitudinal   29046         6       1 -1.385950e-07 2    15
#  159         Leaf   29047        NA      NA            NA 1    15
#  160         Leaf   58092        NA      NA            NA 1    16
#  161         Leaf   58093        NA      NA            NA 1    16

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  plot(res_dyn, tree = 1, nodes = 251)

## ----fig.cap = "Figure 1: Estimated cumulative incidence functions in tree 1 and node 251.", eval = TRUE, echo = FALSE, out.width="70%"----
knitr::include_graphics("Figures/DynForestR_surv_CIF.png")

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  plot(res_dyn, id = 104, max_tree = 9)

## ----DynForestRCIF, fig.cap = "Figure 2: Estimated cumulative incidence functions for subject 104 over 9 trees.", eval = TRUE, echo = FALSE, out.width="70%"----
knitr::include_graphics("Figures/DynForestR_surv_CIF9.png")

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  res_dyn_OOB <- compute_ooberror(dynforest_obj = res_dyn)

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  res_dyn_OOB
#  
#  [1] 0.1265053

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  id_pred <- unique(pbc2_pred$id[which(pbc2_pred$years>4)])
#  pbc2_pred_tLM <- pbc2_pred[which(pbc2_pred$id %in% id_pred),]
#  timeData_pred <- pbc2_pred_tLM[,c("id","time",
#                                    "serBilir","SGOT",
#                                    "albumin","alkaline")]
#  fixedData_pred <- unique(pbc2_pred_tLM[,c("id","age","drug","sex")])
#  pred_dyn <- predict(object = res_dyn,
#                      timeData = timeData_pred,
#                      fixedData = fixedData_pred,
#                      idVar = "id", timeVar = "time",
#                      t0 = 4)

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  plot(pred_dyn, id = c(102, 260))

## ----DynForestRpredCIF, fig.cap = "Figure 3: Predicted cumulative incidence function for subjects 102 and 260 from landmark time of 4 years (represented by the dashed vertical line)", eval = TRUE, echo = FALSE, out.width="70%"----
knitr::include_graphics("Figures/DynForestR_surv_CIF9.png")

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  res_dyn_VIMP <- compute_vimp(dynforest_obj = res_dyn, seed = 123)

## ----eval = FALSE, echo = TRUE, fig.show='hide'-------------------------------
#  p1 <- plot(res_dyn_VIMP, PCT = TRUE)

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  group <- list(group1 = c("serBilir","SGOT"),
#                group2 = c("albumin","alkaline"))
#  res_dyn_gVIMP <- compute_gvimp(dynforest_obj = res_dyn,
#                                 group = group, seed = 123)

## ----eval = FALSE, echo = TRUE, fig.show='hide'-------------------------------
#  p2 <- plot(res_dyn_gVIMP, PCT = TRUE)

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  plot_grid(p1, p2, labels = c("A", "B"))

## ----DynForestRVIMPgVIMP, fig.cap = "Figure 4: (A) VIMP statistic and (B) grouped-VIMP statistic displayed as a percentage of loss in OOB error of prediction. group1 includes serBilir and SGOT; group2 includes albumin and alkaline.", eval = TRUE, echo = FALSE, out.width="70%"----
knitr::include_graphics("Figures/DynForestR_surv_VIMP_gVIMP.png")

## ----eval = FALSE, echo = TRUE------------------------------------------------
#  res_dyn_max <- dynforest(timeData = timeData_train,
#                           fixedData = fixedData_train,
#                           timeVar = "time", idVar = "id",
#                           timeVarModel = timeVarModel, Y = Y,
#                           ntree = 200, mtry = 7, nodesize = 2, minsplit = 3,
#                           cause = 2, ncores = 7, seed = 1234)

## ----echo = TRUE, eval = FALSE, fig.show='hide'-------------------------------
#  depth_dyn <- compute_vardepth(dynforest_obj = res_dyn_max)
#  p1 <- plot(depth_dyn, plot_level = "predictor")
#  p2 <- plot(depth_dyn, plot_level = "feature")

## ----echo = TRUE, eval = FALSE------------------------------------------------
#  plot_grid(p1, p2, labels = c("A", "B"))

## ----DynForestRmindepth, fig.cap = "Figure 5: Average minimal depth level by predictor (A) and feature (B).", eval = TRUE, echo = FALSE, out.width="70%"----
knitr::include_graphics("Figures/DynForestR_surv_mindepth.png")

## ----DynForestRmtrytuned, fig.cap = "Figure 6: OOB error according to mtry hyperparameter", eval = TRUE, echo = FALSE, out.width="70%"----
knitr::include_graphics("Figures/DynForestR_surv_mtrytuned.png")