## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  class.output="scroll-100",
  cache.path = "cached/"
)
# Check if 'priorityelasticnet' is available
if (!requireNamespace("priorityelasticnet", quietly = TRUE)) {
  message("The 'priorityelasticnet' package is not installed. Please install it to fully reproduce this vignette.")
} else {
  library(priorityelasticnet)
}

## ----eval=FALSE---------------------------------------------------------------
# install.packages("priorityelasticnet")

## -----------------------------------------------------------------------------
# Simulate some data
set.seed(123)
n <- 100  # Number of observations
p <- 50   # Number of predictors

## -----------------------------------------------------------------------------
# Create a matrix of predictors
X <- matrix(rnorm(n * p), n, p)

## -----------------------------------------------------------------------------
# Generate a response vector based on a linear combination of some predictors
beta <- rnorm(10)  # Coefficients for the first 10 predictors
Y <- X[, 1:10] %*% beta + rnorm(n)  # Linear model with added noise

## -----------------------------------------------------------------------------
# Define predictor blocks
blocks <- list(
  block1 = 1:10,    # First block includes the first 10 predictors
  block2 = 11:30,   # Second block includes the next 20 predictors
  block3 = 31:50    # Third block includes the last 20 predictors
)

## -----------------------------------------------------------------------------
# Fit a priorityelasticnet model
fit <- priorityelasticnet(
  X = X, 
  Y = Y, 
  family = "gaussian", 
  blocks = blocks, 
  type.measure = "mse",
  alpha = 0.5
)

## -----------------------------------------------------------------------------
fit$lambda.ind

## -----------------------------------------------------------------------------
fit$lambda.type

## -----------------------------------------------------------------------------
fit$lambda.min

## -----------------------------------------------------------------------------
fit$min.cvm

## -----------------------------------------------------------------------------
fit$nzero

## -----------------------------------------------------------------------------
fit$glmnet.fit

## -----------------------------------------------------------------------------
fit$coefficients

## -----------------------------------------------------------------------------
head(cbind.data.frame(pred = fit$pred[,1], observed = fit$actuals))

## -----------------------------------------------------------------------------
# Set seed for reproducibility
set.seed(123)

# Number of observations and predictors
n <- 50  # Number of observations
p <- 300  # Number of predictors

# Number of non-zero coefficients
nzc <- trunc(p / 10)

# Simulate predictor matrix
x <- matrix(rnorm(n * p), n, p)

# Simulate regression coefficients for non-zero predictors
beta <- rnorm(nzc)

# Calculate linear predictor
fx <- x[, seq(nzc)] %*% beta / 3

# Calculate hazard function
hx <- exp(fx)

# Simulate survival times using exponential distribution
ty <- rexp(n, hx)

# Generate censoring indicator (30% censoring probability)
tcens <- rbinom(n = n, prob = .3, size = 1)

# Load survival library and create survival object
library(survival)
y <- Surv(ty, 1 - tcens)

## -----------------------------------------------------------------------------
blocks <- list(
  bp1 = 1:20,    # First block with predictors 1 to 20
  bp2 = 21:200,  # Second block with predictors 21 to 200
  bp3 = 201:300  # Third block with predictors 201 to 300
)

## -----------------------------------------------------------------------------
# Fit Cox model using priorityelasticnet
fit_cox <- priorityelasticnet(
  x, 
  y, 
  family = "cox", 
  alpha = 0.5, 
  type.measure = "deviance", 
  blocks = blocks,
  block1.penalization = TRUE,
  lambda.type = "lambda.min",
  standardize = TRUE,
  nfolds = 10,
  cvoffset = TRUE
  
)

## -----------------------------------------------------------------------------
fit_cox$min.cvm

## -----------------------------------------------------------------------------
fit_cox$coefficients

## -----------------------------------------------------------------------------
fit_cox$lambda.min

## ----eval = requireNamespace("glmSparseNet", quietly = TRUE)------------------

library(glmSparseNet)

# Extract coefficients from the fitted Cox model
chosen.btas <- fit_cox$coefficients
y <- data.frame(
  time = ty,          # Survival times
  status = 1 - tcens  # Event indicator
)

# Group patients and plot Kaplan-Meier survival curves
separate2GroupsCox(
  chosen.btas = chosen.btas,  # Coefficients from the model
  xdata = x,                  # Predictor matrix (xdata)
  ydata = y,                  # Survival data (ydata as Surv object)
  probs = c(0.4, 0.6),        # Median split (adjust if necessary)
  no.plot = FALSE,            # Plot the Kaplan-Meier curve
  plot.title = "Survival Curves",  # Plot title
  xlim = NULL,                # Automatic x-axis limits
  ylim = NULL,                # Automatic y-axis limits
  expand.yzero = FALSE,       # Don't force y-axis to start at zero
  legend.outside = FALSE      # Keep legend inside the plot
)


## -----------------------------------------------------------------------------

# Check if 'priorityelasticnet' is available
if (!requireNamespace("priorityelasticnet", quietly = TRUE)) {
  message("The 'priorityelasticnet' package is not installed. Please install it to fully reproduce this vignette.")
} else {
  library(priorityelasticnet)
  # Load the dataset only if the package is available
  data("Pen_Data", package = "priorityelasticnet")
  
}



## -----------------------------------------------------------------------------
dim(Pen_Data)



## -----------------------------------------------------------------------------
blocks <- list(
  block1 = 1:5,     # Block 1: First 5 predictors
  block2 = 6:179,   # Block 2: Next 174 predictors
  block3 = 180:324  # Block 3: Next 145 predictors
  
)

## -----------------------------------------------------------------------------
set.seed(123)

fit_bin <- priorityelasticnet(
  X = as.matrix(Pen_Data[, 1:324]), 
  Y = Pen_Data[, 325],
  family = "binomial", 
  alpha = 0.5, 
  type.measure = "auc",
  blocks = blocks,
  standardize = FALSE
)

## -----------------------------------------------------------------------------
predictions <- predict(fit_bin, type = "response")
head(predictions)


## -----------------------------------------------------------------------------
predictions <- predict(fit_bin, newdata = as.matrix(Pen_Data[, 1:324]), type = "response")
head(predictions)

## -----------------------------------------------------------------------------
library(pROC)
roc_curve <- roc(Pen_Data[, 325], predictions[,1])
plot(roc_curve, col = "red", main = "ROC Curve for Binomial Model")
text(0.1, 0.1, labels = paste("AUC =", round(roc_curve$auc, 2)), col = "black", cex = 1.2)



## -----------------------------------------------------------------------------
# Set seed for reproducibility
set.seed(123)

# Number of observations and predictors
n <- 100  # Number of observations
p <- 50   # Number of predictors
k <- 3    # Number of classes

# Simulate a matrix of predictors
x <- matrix(rnorm(n * p), n, p)

# Simulate a response vector with three classes
y <- factor(sample(1:k, n, replace = TRUE))

## -----------------------------------------------------------------------------
blocks <- list(
  block1 = 1:10,   # First block with predictors 1 to 10
  block2 = 11:30,  # Second block with predictors 11 to 30
  block3 = 31:50   # Third block with predictors 31 to 50
)

## -----------------------------------------------------------------------------
fit_multinom <- priorityelasticnet(
  X = x, 
  Y = y, 
  family = "multinomial", 
  alpha = 0.5, 
  type.measure = "class", 
  blocks = blocks,
  block1.penalization = TRUE,
  lambda.type = "lambda.min",
  standardize = TRUE,
  nfolds = 5
)

## -----------------------------------------------------------------------------
fit_multinom$min.cvm

## -----------------------------------------------------------------------------
fit_multinom$coefficients

## -----------------------------------------------------------------------------
fit_multinom$lambda.min

## -----------------------------------------------------------------------------
fit_no_penalty <-
  priorityelasticnet(
    X,
    Y,
    family = "gaussian",
    type.measure = "mse",
    blocks = blocks,
    block1.penalization = FALSE
  )

## -----------------------------------------------------------------------------
fit_no_penalty

## -----------------------------------------------------------------------------
mcontrol <-missing.control(handle.missingdata = "impute.offset", nfolds.imputation = 5)

fit_missing <- priorityelasticnet(
  X,
  Y,
  family = "gaussian",
  type.measure = "mse",
  blocks = blocks,
  mcontrol = mcontrol
)

## -----------------------------------------------------------------------------
fit_missing

## -----------------------------------------------------------------------------
blocks1 <- list(1:10, 11:30, 31:50)
blocks2 <- list(1:5, 6:20, 21:50)

fit_cvm <-
  cvm_priorityelasticnet(
    X,
    Y,
    blocks.list = list(blocks1, blocks2),
    family = "gaussian",
    type.measure = "mse",
    weights = NULL,
    foldid = NULL
  )

## -----------------------------------------------------------------------------
fit_cvm

## ----eval = FALSE-------------------------------------------------------------
# weightedThreshold(object = fit_bin)

## -----------------------------------------------------------------------------
coef(fit_bin)

## -----------------------------------------------------------------------------
set.seed(123)
X_new <- matrix(rnorm(406 * 324), 406, 324)

predictions < predict(fit_bin, newdata = X_new, type = "response")
head(predictions)

## -----------------------------------------------------------------------------
# Set the random seed for reproducibility
set.seed(1234)

# Simulate high-dimensional data
n <- 200  # Number of observations
p <- 100  # Number of predictors
n_strong <- 10  # Number of strong predictors
n_weak <- 20  # Number of weak predictors

# Design matrix (predictors)
X <- matrix(rnorm(n * p), nrow = n, ncol = p)

# Generate coefficients: strong predictors with large effects, weak with small effects
beta <- c(rep(2, n_strong), rep(0.5, n_weak), rep(0, p - n_strong - n_weak))

# Generate response with Gaussian noise
Y <- X %*% beta + rnorm(n)

## -----------------------------------------------------------------------------
# Define blocks of predictors for the model
blocks <- list(
  strong_block = 1:n_strong,               # Strong predictors
  weak_block = (n_strong + 1):(n_strong + n_weak),  # Weak predictors
  noise_block = (n_strong + n_weak + 1):p  # Noise (irrelevant predictors)
)

## -----------------------------------------------------------------------------
# Run priorityelasticnet with Adaptive Elastic Net
result <- priorityelasticnet(X = X, 
                             Y = Y, 
                             family = "gaussian", 
                             alpha = 0.5, 
                             type.measure = "mse", 
                             blocks = blocks, 
                             adaptive = TRUE,
                             initial_global_weight = FALSE, 
                             verbose = TRUE)

## -----------------------------------------------------------------------------
# Examine the coefficients
cat("Final model coefficients:")
result$coefficients

## -----------------------------------------------------------------------------
# Examine the adaptive weights
cat("Adaptive weights for each predictor:")
result$adaptive_weights

## -----------------------------------------------------------------------------
plot(result$glmnet.fit[[1]], xvar = "lambda", label = TRUE, main = "Coefficient Paths for Strong Block")

## -----------------------------------------------------------------------------
plot(result$glmnet.fit[[2]], xvar = "lambda", label = TRUE, main = "Coefficient Paths for Weak Block")

## -----------------------------------------------------------------------------
plot(result$glmnet.fit[[3]], xvar = "lambda", label = TRUE, main = "Coefficient Paths for Noise Block")

## -----------------------------------------------------------------------------
# Set seed for reproducibility
set.seed(123)

# Number of observations and predictors
n <- 50  # Number of observations
p <- 300  # Number of predictors

# Number of non-zero coefficients
nzc <- trunc(p / 10)

# Simulate predictor matrix
x <- matrix(rnorm(n * p), n, p)

# Simulate regression coefficients for non-zero predictors
beta <- rnorm(nzc)

# Calculate linear predictor
fx <- x[, seq(nzc)] %*% beta / 3

# Calculate hazard function
hx <- exp(fx)

# Simulate survival times using exponential distribution
ty <- rexp(n, hx)

# Generate censoring indicator (30% censoring probability)
tcens <- rbinom(n = n, prob = .3, size = 1)

# Load survival library and create survival object
library(survival)
y <- Surv(ty, 1 - tcens)

## -----------------------------------------------------------------------------
blocks <- list(
  bp1 = 1:20,    # First block with predictors 1 to 20
  bp2 = 21:200,  # Second block with predictors 21 to 200
  bp3 = 201:300  # Third block with predictors 201 to 300
)

## -----------------------------------------------------------------------------
# Fit Cox model using priorityelasticnet
result_cox <- priorityelasticnet(
  x, 
  y, 
  family = "cox", 
  alpha = 1, 
  type.measure = "deviance", 
  blocks = blocks,
  block1.penalization = TRUE,
  lambda.type = "lambda.min",
  standardize = TRUE,
  nfolds = 5,
  adaptive = TRUE,
  initial_global_weight = FALSE
)

## -----------------------------------------------------------------------------
# Examine the coefficients
cat("Final model coefficients:")
result_cox$coefficients


## -----------------------------------------------------------------------------
result_cox$initial_coeff

## -----------------------------------------------------------------------------
# Examine the adaptive weights
cat("Adaptive weights for each predictor:")
result_cox$adaptive_weights

## -----------------------------------------------------------------------------
# Run priorityelasticnet with Adaptive Elastic Net
result_bin <- priorityelasticnet(X = as.matrix(Pen_Data[, 1:324]), Y = Pen_Data[, 325],
                             family = "binomial", alpha = 0.5, type.measure = "auc",
                             blocks = list(bp1 = 1:5, bp2 = 6:179, bp3 = 180:324),
                             standardize = FALSE,
                             adaptive = TRUE,
                             initial_global_weight = FALSE, 
                             verbose = TRUE)

## -----------------------------------------------------------------------------
result_bin$nzero

## -----------------------------------------------------------------------------
result_bin$min.cvm

## -----------------------------------------------------------------------------
result_bin$lambda.min

## -----------------------------------------------------------------------------
result_bin$adaptive_weights

## -----------------------------------------------------------------------------
result_bin$coefficients

## -----------------------------------------------------------------------------
predictions <- predict(result_bin, newdata = as.matrix(Pen_Data[, 1:324]), type = "response")
head(predictions)

## -----------------------------------------------------------------------------
library(pROC)
roc_curve <- roc(Pen_Data[, 325], predictions[,1])
plot(roc_curve, col = "red", main = "ROC Curve for Binomial Model")
text(0.1, 0.1, labels = paste("AUC =", round(roc_curve$auc, 2)), col = "black", cex = 1.2)



## -----------------------------------------------------------------------------
# Set seed for reproducibility
set.seed(123)

# Number of observations and predictors
n <- 100  # Number of observations
p <- 50   # Number of predictors
k <- 3    # Number of classes

# Simulate a matrix of predictors
x <- matrix(rnorm(n * p), n, p)

# Simulate a response vector with three classes
y <- factor(sample(1:k, n, replace = TRUE))

## -----------------------------------------------------------------------------
blocks <- list(
  block1 = 1:10,   # First block with predictors 1 to 10
  block2 = 11:30,  # Second block with predictors 11 to 30
  block3 = 31:50   # Third block with predictors 31 to 50
)

## -----------------------------------------------------------------------------

# Run priorityelasticnet
result_multinom <- priorityelasticnet(
  X = x, 
  Y = y, 
  family = "multinomial", 
  alpha = 0.5, 
  type.measure = "class", 
  blocks = blocks,
  block1.penalization = TRUE,
  lambda.type = "lambda.min",
  standardize = TRUE,
  nfolds = 10,
  adaptive = TRUE,
  initial_global_weight = FALSE
  
)


## -----------------------------------------------------------------------------
result_multinom$coefficients

## -----------------------------------------------------------------------------
result_multinom$adaptive

## -----------------------------------------------------------------------------
result_multinom$adaptive_weights

## -----------------------------------------------------------------------------
result_multinom$glmnet.fit

## -----------------------------------------------------------------------------
result_multinom$min.cvm

## -----------------------------------------------------------------------------
result_multinom$lambda.min