## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)


## ----include = FALSE----------------------------------------------------------
library(knitr)
library (kableExtra)
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

## ----echo=TRUE----------------------------------------------------------------
library(BioPred)

kable_styling(kable(x=head(tutorial_data), format="html", caption = "The first 6 subjects"),
              bootstrap_options="striped",font_size=16)

rownames(tutorial_data)=NULL

## ----echo=TRUE----------------------------------------------------------------
X_feature=tutorial_data[,c("x1", "x2" ,"x3" ,"x4","x5","x6","x7","x8","x9","x10")]
y_label=tutorial_data$y.con
# Convert treatment variable to (1 -1) format (1 for treatment, -1 for control)
trt=ifelse(tutorial_data$treatment==1, 1, -1)
true_label=tutorial_data$subgroup_label
# Estimate the propensity score using logistic regression
data_logit=tutorial_data[,c("treatment","x1", "x2" ,"x3" ,"x4" , "x5", "x6" , "x7","x8","x9","x10")]
logit_model <- glm(treatment~ ., data = data_logit, family = binomial)
pi <- predict(logit_model, type = "response")
# Train the XGBoostSub_con model using the specified parameters
model <- XGBoostSub_con(X_feature, y_label, trt ,pi,Loss_type = "A_learning", params = list(learning_rate = 0.005, max_depth = 1, lambda = 5, tree_method = 'hist'), nrounds = 200, disable_default_eval_metric = 0, verbose = FALSE)

## ----echo=TRUE----------------------------------------------------------------
cat("Evaluation loss:\n")
print(model$evaluation_log)

## ----echo=TRUE----------------------------------------------------------------
eval_metric_test <- eval_metric_con(model, X_feature, y_label, pi, trt, Loss_type = "A_learning")
cat("Testing Evaluation Result:\n")
print(eval_metric_test$value)


## ----echo=TRUE----------------------------------------------------------------
biomarker_imp=predictive_biomarker_imp(model)
kable_styling(kable(x=biomarker_imp, format="html", caption = "Biomarker importance table"),
              bootstrap_options="striped",font_size=16)

## ----echo=TRUE----------------------------------------------------------------
subgroup_results=get_subgroup_results(model, X_feature, subgroup_label=true_label, cutoff = 0.5)
kable_styling(kable(x=head(subgroup_results$assignment), format="html", caption = "The first 6 subjects subgroup assigentment"),
              bootstrap_options="striped",font_size=16)
cat("Prediction accuracy of true subgroup label:\n")
cat(subgroup_results$acc)



## ----echo=TRUE,fig.height=6, fig.width=6--------------------------------------
cdf_plot (xvar="x2", data=tutorial_data, y.int=5, xlim=NULL, xvar.display="Biomarker_X2", group=NULL)

## ----echo=TRUE,fig.height=6, fig.width=8--------------------------------------
scat_cont_plot(
  yvar='y.con',
  xvar='x2',
  data=tutorial_data,
  ybreaks = NULL,
  xbreaks = NULL,
  yvar.display = 'y',
  xvar.display = "Biomarker_X2"
)

## ----echo=TRUE,fig.height=6, fig.width=6--------------------------------------
cutoff_result_con=fixcut_con(yvar='y.con', xvar="x2", dir=">", cutoffs=c(-0.1,-0.3,-0.5,0.1,0.3,0.5), data=tutorial_data, method="t.test", yvar.display='y.con', xvar.display='biomarker x2', vert.x=F)

cutoff_result_con$fig


## ----echo=TRUE----------------------------------------------------------------
res=cut_perf (yvar="y.con", censorvar=NULL, xvar="x2", cutoff=c(0.5), dir="<=", xvars.adj=NULL, data=tutorial_data, type='c', yvar.display='y.con', xvar.display="biomarker x2")
kable_styling(kable(x=res$comp.res.display[,-2:-4], format="html", caption = "Performace at optimal cutoff"),
              bootstrap_options="striped",font_size=16)


## ----echo=TRUE,fig.height=10, fig.width=15------------------------------------
tutorial_data$biogroup=ifelse(tutorial_data$x2<=0.5,'biomarker_positive','biomarker_negative')

res = subgrp_perf_pred(yvar="y.time", censorvar="y.event", grpvar="biogroup", grpname=c("biomarker_positive",'biomarker_negative'),trtvar="treatment_categorical", trtname=c("Placebo" , "Treatment"), xvars.adj=NULL,data=tutorial_data, type="s")
kable_styling(kable(x=res$group.res.display, format="html", caption = "BioSubgroup Stat"),
              bootstrap_options="striped",font_size=16)
res$fig


## ----echo=TRUE----------------------------------------------------------------
y_label=tutorial_data$y.bin
model <- XGBoostSub_bin(X_feature, y_label, trt ,pi,Loss_type = "A_learning", params = list(learning_rate = 0.01, max_depth = 1, lambda = 5, tree_method = 'hist'), nrounds = 300, disable_default_eval_metric = 0, verbose = FALSE)

## ----echo=TRUE----------------------------------------------------------------
biomarker_imp=predictive_biomarker_imp(model)
kable_styling(kable(x=biomarker_imp, format="html", caption = "Biomarker importance table"),
              bootstrap_options="striped",font_size=16)

## ----echo=TRUE----------------------------------------------------------------
subgroup_results=get_subgroup_results(model, X_feature, subgroup_label=true_label, cutoff = 0.5)
kable_styling(kable(x=head(subgroup_results$assignment), format="html", caption = "The first 6 subjects subgroup assigentment"),
              bootstrap_options="striped",font_size=16)
cat("Prediction accuracy of true subgroup label:\n")
cat(subgroup_results$acc)

## ----echo=TRUE,fig.height=6, fig.width=6--------------------------------------
roc_bin_plot (yvar='y.bin', xvars="x10", dirs="auto", data=tutorial_data, yvar.display='y.bin', xvars.display="Biomarker x10")

## ----echo=TRUE,fig.height=6, fig.width=6--------------------------------------
roc_bin_plot (yvar='y.bin', xvars="x2", dirs="auto", data=tutorial_data, yvar.display='y.bin', xvars.display="Biomarker x2")

## ----echo=TRUE,fig.height=6, fig.width=9--------------------------------------
cutoff_result_bin=fixcut_bin (yvar='y.bin', xvar="x10", dir=">", cutoffs=c(-0.1,-0.3,-0.5,0.1,0.3,0.5), data=tutorial_data, method="Fisher", yvar.display='Response_Y', xvar.display='Biomarker_X10', vert.x=F)
cutoff_result_bin$fig

## ----echo=TRUE----------------------------------------------------------------
res=cut_perf (yvar="y.con", censorvar=NULL, xvar="x10", cutoff=c(0.5), dir=">", xvars.adj=NULL, data=tutorial_data, type='c', yvar.display='y.con', xvar.display="biomarker x2")
kable_styling(kable(x=res$comp.res.display[,-2:-4], format="html", caption = "caption"),
              bootstrap_options="striped",font_size=16)


## ----echo=TRUE,fig.height=10, fig.width=15------------------------------------
tutorial_data$biogroup=ifelse(tutorial_data$x10>0.5,'biomarker_positive','biomarker_negative')

res = subgrp_perf_pred(yvar="y.time", censorvar="y.event", grpvar="biogroup", grpname=c("biomarker_positive",'biomarker_negative'),trtvar="treatment_categorical", trtname=c("Placebo" , "Treatment"), xvars.adj=NULL,data=tutorial_data, type="s")
kable_styling(kable(x=res$group.res.display, format="html", caption = "BioSubgroup Stat"),
              bootstrap_options="striped",font_size=16)
res$fig


## ----echo=TRUE----------------------------------------------------------------
# Prepare the data for training the XGBoostSub_sur
y_label=tutorial_data$y.time
delta=tutorial_data$y.event

# Train the XGBoostSub_sur model using the specified parameters
model <- XGBoostSub_sur(X_feature, y_label, trt ,pi,delta,Loss_type = "A_learning", params=list(learning_rate = 0.005, max_depth = 1,lambda = 9, gamma=1, min_child_weight=1,
max_bin=128, tree_method = 'exact',subsample=0.8), nrounds = 250, disable_default_eval_metric = 1, verbose = FALSE)

## ----echo=TRUE----------------------------------------------------------------
biomarker_imp=predictive_biomarker_imp(model)
kable_styling(kable(x=biomarker_imp, format="html", caption = "Biomarker importance table"),
              bootstrap_options="striped",font_size=16)

## ----echo=TRUE----------------------------------------------------------------
subgroup_results=get_subgroup_results(model, X_feature, subgroup_label=true_label, cutoff = 0.5)
kable_styling(kable(x=head(subgroup_results$assignment), format="html", caption = "The first 6 subjects subgroup assigentment"),
              bootstrap_options="striped",font_size=16)
cat("Prediction accuracy of true subgroup label:\n")
cat(subgroup_results$acc)

## ----echo=TRUE,fig.height=10, fig.width=10------------------------------------
res = cat_summary(yvar="risk_category", yname=c("High Risk","Intermediate Risk" ,"Low Risk"), xvars="treatment_categorical", xname.list=list(c("Placebo" , "Treatment")), data=tutorial_data)

kable_styling(kable(x=res$cont.display, format="html", caption = "Contingency Table"),
              bootstrap_options="striped",font_size=16)

## ----echo=TRUE,fig.height=10, fig.width=15------------------------------------
res = subgrp_perf_pred(yvar="y.time", censorvar="y.event", grpvar="risk_category", grpname=c("High Risk","Intermediate Risk" ,"Low Risk"),trtvar="treatment_categorical", trtname=c("Placebo" , "Treatment"), xvars.adj=NULL,data=tutorial_data, type="s")
kable_styling(kable(x=res$group.res.display, format="html", caption = "Subgroup Stat"),
              bootstrap_options="striped",font_size=16)
res$fig