## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(message = FALSE,warning = FALSE) library(PPLasso) library(ggplot2) library(cvCovEst) set.seed(123456) ## ----generate Sigma----------------------------------------------------------- p <- 50 # number of variables d <- 10 # number of actives n <- 50 # number of samples actives <- 1:d nonacts <- c(1:p)[-actives] Sigma <- matrix(0, p, p) Sigma[actives, actives] <- 0.3 Sigma[-actives, actives] <- 0.5 Sigma[actives, -actives] <- 0.5 Sigma[-actives, -actives] <- 0.7 diag(Sigma) <- rep(1,p) actives_pred <- 1:5 ## ----X------------------------------------------------------------------------ X_bm <- MASS::mvrnorm(n = n, mu=rep(0,p), Sigma, tol = 1e-6, empirical = FALSE) colnames(X_bm) <- paste0("X",(1:p)) n1=n2=n/2 # 1:1 randomized beta1 <- rep(0,p) beta1[actives] <- 1 beta2 <- beta1 beta2[actives_pred] <- 2 beta <- c(beta1, beta2) TRT1 <- c(rep(1,n1), rep(0,n2)) TRT2 <- c(rep(0,n1), rep(1,n2)) Y <- cbind(X_bm*TRT1,X_bm*TRT2)%*%beta+TRT2+rnorm(n,0,1) ## ----est Sigma---------------------------------------------------------------- cv_cov_est_out <- cvCovEst( dat = X_bm, estimators = c( linearShrinkLWEst, denseLinearShrinkEst, thresholdingEst, poetEst, sampleCovEst ), estimator_params = list( thresholdingEst = list(gamma = c(0.2, 0.4)), poetEst = list(lambda = c(0.1, 0.2), k = c(1L, 2L)) ), cv_loss = cvMatrixFrobeniusLoss, cv_scheme = "v_fold", v_folds = 5 ) Sigma_est <- cov2cor(cv_cov_est_out$estimate) ## ----WLasso model, warning = FALSE, message = FALSE--------------------------- mod <- ProgPredLasso(X1 = X_bm[1:n1, ], X2 = X_bm[(n1+1):n, ], Y = Y, cor_matrix = Sigma_est) ## ----------------------------------------------------------------------------- #alpha1 mod$beta.min[1] ## ----------------------------------------------------------------------------- #alpha2 mod$beta.min[2] ## ----variable selection, figures-side, fig.show="hold", out.width="50%",echo=FALSE,fig.cap="\\label{fig:fig1}Left: Identified prognostic biomarkers. Right: Identified predictive biomarkers."---- beta_min <- mod$beta.min[-c(1,2)] df_beta <- data.frame(beta_est=beta_min, Status = ifelse(c(beta1, beta2-beta1)==0, "non-active", "active")) df_prog <- data.frame(beta_est=beta_min[1:p], Status = ifelse(beta1==0, "false positive", "true prognostic"), index=c(1:p)) df_pred <- data.frame(beta_est=beta_min[(p+1):(2*p)], Status = ifelse(c(beta2-beta1)==0, "false positive", "true predictive"), index=c(1:p)) df_plot_prog <- df_prog[which(df_prog$beta_est!=0), ] df_plot_pred <- df_pred[which(df_pred$beta_est!=0), ] ggplot2::ggplot(data=df_plot_prog, mapping=aes(y=beta_est, x=index, color=Status))+geom_point()+ theme_bw()+ylab("Estimated coefficients")+xlab("Indices of selected variables") ggplot2::ggplot(data=df_plot_pred, mapping=aes(y=beta_est, x=index, color=Status))+geom_point()+ theme_bw()+ylab("Estimated coefficients")+xlab("Indices of selected variables") ## ----------------------------------------------------------------------------- which(beta_min[1:p]!=0) ## ----------------------------------------------------------------------------- which(beta_min[(p+1):(2*p)]!=0)