## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----installation, eval=FALSE-------------------------------------------------
# if (!requireNamespace("devtools", quietly = TRUE)) {
#   install.packages("devtools")
# }
# devtools::install_github("Chukyhenry/PosiR")

## ----eval=FALSE---------------------------------------------------------------
# install.packages("pbapply")
# install.packages("glmnet")

## -----------------------------------------------------------------------------
if (!requireNamespace("dplyr", quietly = TRUE)) {
  stop("Please install the 'dplyr' package to run this vignette.")
}
library(PosiR)
library(dplyr)
library(stats)

## ----message=FALSE------------------------------------------------------------
set.seed(123) # for reproducibility
X <- matrix(rnorm(200 * 2), ncol = 2, dimnames = list(NULL, c("Age", "Income")))
y <- 0.5 * X[, "Age"] + rnorm(200)  # the true intercept is 0, the true coefficient for Age is 0.5, and the true coefficient for Income is 0.

## -----------------------------------------------------------------------------
Q <- list(
  Age_only = 1,              # Model with Intercept + Age
  Age_Income = c(1, 2)       # Model with Intercept + Age + Income
)
# Indices refer to columns of X: 1="Age", 2="Income"

## ----warning=FALSE, message=FALSE---------------------------------------------
result <- simultaneous_ci(X, y, Q, B = 1000, cores = 2, verbose = FALSE)

## -----------------------------------------------------------------------------
# Show the calculated critical value
print(paste("Calculated K_alpha:", round(result$K_alpha, 3)))

# Display the intervals data frame
print(result$intervals)

## -----------------------------------------------------------------------------
plot(result, las.labels = 2, col.ci = "darkblue", main = "Simultaneous Confidence Intervals")

## -----------------------------------------------------------------------------
data(mtcars)
X_mtcars <- as.matrix(mtcars[, c("hp", "wt", "qsec")])
y_mtcars <- mtcars$mpg

## -----------------------------------------------------------------------------
Q_mtcars <- list(
  HP_only = 1,         # Model: mpg ~ Intercept + hp
  WT_only = 2,         # Model: mpg ~ Intercept + wt
  QSEC_only = 3,       # Model: mpg ~ Intercept + qsec
  Full_Model = 1:3     # Model: mpg ~ Intercept + hp + wt + qsec
)
# Indices: 1="hp", 2="wt", 3="qsec"

## -----------------------------------------------------------------------------
result_mtcars <- simultaneous_ci(X = X_mtcars, y = y_mtcars, Q_universe = Q_mtcars,
                                 B = 2000, cores = 2, seed = 123, verbose = FALSE)

# Display the critical value
print(paste("Calculated K_alpha:", round(result_mtcars$K_alpha, 3)))

## ----message=FALSE------------------------------------------------------------
z_alpha_2 <- stats::qnorm(0.975) 

intervals_comparison <- result_mtcars$intervals %>%
  mutate(
    naive_lower = estimate - z_alpha_2 * se_nqj,
    naive_upper = estimate + z_alpha_2 * se_nqj
  ) %>%
  select(model_id, coefficient_name, estimate, lower, upper, naive_lower, naive_upper) %>%
  arrange(model_id, coefficient_name) # Ensure consistent ordering

print(intervals_comparison, digits = 3)

## -----------------------------------------------------------------------------
plot(result_mtcars, las.labels = 2, main = "mtcars Data: Simultaneous CIs")

## -----------------------------------------------------------------------------
set.seed(123)
n_lasso <- 100
p_lasso <- 50
X_lasso <- matrix(rnorm(n_lasso * p_lasso), n_lasso, p_lasso,
                  dimnames = list(NULL, paste0("X", 1:p_lasso)))

# True coefficients: 1 for first 5, 0 for rest
beta_true <- c(rep(1.0, 5), rep(0, p_lasso - 5))
true_intercept <- 0.5

# Generate response (linear model)
y_lasso <- drop(true_intercept + X_lasso %*% beta_true + rnorm(n_lasso))

## -----------------------------------------------------------------------------
# This chunk only runs if glmnet is installed
if (requireNamespace("glmnet", quietly = TRUE)) {
  library(glmnet)
  # Code using glmnet
} else {
  message("glmnet is not available. Skipping this example.")
}
library(glmnet)

# Use cv.glmnet to find optimal lambda
cv_lasso_fit <- cv.glmnet(X_lasso, y_lasso, alpha = 1) # alpha=1 for Lasso

# Get coefficients at lambda.min, excluding the intercept
lasso_coeffs <- coef(cv_lasso_fit, s = "lambda.min")[-1, 1]

# Identify indices of selected variables (non-zero coefficients)
# Indices are relative to the columns of X_lasso (1 to p_lasso)
select_var_index <- which(lasso_coeffs != 0)

# Get names of selected variables
select_var_names <- names(select_var_index)

cat("Lasso selected variables (indices in X_lasso):", paste(select_var_index, collapse = ", "), "\n")
cat("Lasso selected variables (names):", paste(select_var_names, collapse = ", "), "\n")

# If no variables selected (unlikely here), handle gracefully for Q definition
if (length(select_var_index) == 0) {
    warning("Lasso selected no variables. Defining Q with intercept-only and full model.")
    select_var_index1 <- 1 # Just intercept index for design matrix
} else {
    # IMPORTANT: Need indices relative to the design matrix used in simultaneous_ci
    # which includes intercept as column 1. Add 1 to Lasso indices.
    select_var_index1 <- c(1, select_var_index + 1)
}

## -----------------------------------------------------------------------------
# Fallback if glmnet is not installed
if (!requireNamespace("glmnet", quietly = TRUE)) {
  warning("glmnet package not found. Using arbitrary selection for vignette.", call. = FALSE)
  select_var_index <- c(1, 2, 3, 4, 5, 9, 13, 18, 22, 24, 26, 29, 33, 36, 41)
  select_var_names <- paste0("X", select_var_index)
  select_var_index1 <- c(1, select_var_index + 1)
}

## -----------------------------------------------------------------------------
# Indices for X in simultaneous_ci context (assuming add_intercept=TRUE)
# Intercept is 1, X1 is 2, ..., Xp is p+1
full_model_indices <- 1:(p_lasso + 1)

Q_lasso <- list(
  # Model selected by Lasso (Intercept + selected X's)
  LassoSelected = select_var_index1,
  # Full model (Intercept + All X's) - optional, but common for context
  FullModel = full_model_indices
)

# Check if Lasso selection was empty
if (length(select_var_index1) == 1 && select_var_index1 == 1) {
    Q_lasso$LassoSelected <- 1 # Ensure it's just the intercept index
}

print("Models in Q_lasso (indices refer to design matrix with intercept):")
# Print only first few indices for brevity if models are large
print(lapply(Q_lasso, function(idx) if(length(idx)>10) c(idx[1:5], "...", idx[length(idx)]) else idx))


## -----------------------------------------------------------------------------
# Note: We pass the original X matrix (without intercept) to simultaneous_ci
# It will add the intercept based on add_intercept=TRUE
result_lasso <- simultaneous_ci(
  X_lasso, y_lasso, Q_lasso,
  B = 2000, # Recommend B=2000+ for high-dim
  cores = 2, seed = 123, verbose = FALSE
)

## -----------------------------------------------------------------------------
intervals_lasso_comp <- result_lasso$intervals %>%
  filter(model_id == "LassoSelected") %>% # Focus on the Lasso model
  mutate(
    naive_lower = estimate - qnorm(0.975) * se_nqj,
    naive_upper = estimate + qnorm(0.975) * se_nqj
  ) %>%
  select(coefficient_name, estimate, lower, upper, naive_lower, naive_upper) %>%
  arrange(match(coefficient_name, c("(Intercept)", paste0("X", 1:p_lasso)))) # Order numerically

print(intervals_lasso_comp, digits = 3)

## -----------------------------------------------------------------------------
# Plot only coefficients from the Lasso-selected model
selected_plot <- result_lasso$intervals %>%
                            filter(model_id == "LassoSelected") %>%
                            pull(coefficient_name)

plot(result_lasso, subset_pars = selected_plot,
     main = "Simultaneous CIs for Lasso-Selected Model", las.labels = 1)