---
title: "Exploring The Variables Importance"
author: "Gabriele Pittarello"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Exploring The Variables Importance}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r instancepackage, include=FALSE}
library(ReSurv)
```

# Introduction 

Machine learning models catch interactions between covariates. Often they are a black-box but they can be interpreted with SHAP values. We generate two data sets, one from scenario Alpha and one from scenario Delta the plotting functionalities of the ReSurv package.

```{r eval=FALSE, include=TRUE}

input_data_0 <- data_generator(
  random_seed = 1,
  scenario = 0,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_0 <- IndividualDataPP(
  data = input_data_0,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)

```


```{r eval=FALSE, include=TRUE}
# Input data scenario Delta

input_data3 <- data_generator(
  random_seed = 1,
  scenario = 3,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_3 <- IndividualDataPP(
  data = input_data3,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)

```


Here we fit Neural Networks and XGB. In order to simplify this vignette, we provide in advance the optimal hyperparameters.

```{r eval=FALSE, include=TRUE}

hp_scenario_alpha_xgb <- list(
  params = list(
    booster = "gbtree",
    eta = 0.9887265,
    subsample = 0.7924135 ,
    alpha = 10.85342,
    lambda = 6.213317,
    min_child_weight = 3.042204,
    max_depth = 1
  ),
  print_every_n = 0,
  nrounds = 3000,
  verbose = FALSE,
  early_stopping_rounds = 500
)

hp_scenario_alpha_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 10,
  optim = "SGD",
  batch_size = as.integer(5000),
  lr = 0.3023043,
  xi = 0.426443,
  eps = 0,
  activation = "SELU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)

hp_scenario_delta_xgb <- list(params=list(booster="gbtree",
                                          eta=0.2717736,
                                          subsample=0.9043068 ,
                                          alpha=7.789214,
                                          lambda=12.09398 ,
                                          min_child_weight=22.4837 ,
                                          max_depth = 4),
                                          print_every_n = 0,
                                          nrounds=3000,
                                          verbose= FALSE,
                                          early_stopping_rounds = 500)

hp_scenario_delta_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 2,
  optim = "Adam",
  batch_size = as.integer(5000),
  lr = 0.3542422,
  xi = 0.1803953,
  eps = 0,
  activation = "LeakyReLU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)

```

```{r eval=FALSE, include=TRUE}

resurv_model_xgb_A <-  ReSurv(individual_data_0,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_alpha_xgb)

resurv_model_nn_A <-  ReSurv(individual_data_0,
                             hazard_model = "NN",
                             hparameters = hp_scenario_alpha_nn)

resurv_model_xgb_D <-  ReSurv(individual_data_3,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_delta_xgb)

resurv_model_nn_D <- ReSurv(individual_data_3,
                            hazard_model = "NN",
                            hparameters = hp_scenario_delta_nn)


```


## Shap values (XGB)

```{r eval=FALSE, include=TRUE}
plot(resurv_model_xgb_A)
```

```{r eval=FALSE, include=TRUE}
plot(resurv_model_xgb_D)
```

## Shap values (NN)

```{r eval=FALSE, include=TRUE}
plot(resurv_model_nn_A, nsamples = 10000)
```

```{r eval=FALSE, include=TRUE}
plot(resurv_model_nn_D, nsamples=10000)
```