## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ConformalSmallest) ## ----------------------------------------------------------------------------- df=3 d = 1 x_test <- seq(0,5,by=0.2) alpha=0.1 n <- 500 #number of training samples nrep <- 1 #number of independent trials nrep2 <- 100 rho <- 0.5 evaluations <- expand.grid(1:nrep, n, x_test,"CQR") no_eval <- nrow(evaluations) up_mat <- lo_mat <- width_mat <- cov_mat <- data.frame(number = rep(0, no_eval), rep = evaluations[,1], nset = evaluations[,2], X_test = evaluations[,3], method = evaluations[,4]) colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "test_value","method") set.seed(1) X=as.matrix(runif(n,0,5)) eps1=rnorm(n) eps2=rnorm(n) Y=rpois(n,sin(X[,1])^2 +0.1 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01)*eps2 #X0=as.matrix(x_test) X0 = runif(nrep2,0,5) X0 = as.matrix(X0[order(X0)]) eps01=rnorm(nrep2) eps02=rnorm(nrep2) Y0=rpois(nrep2,sin(X0)^2 +0.1 )+0.03*X0*eps01+25*(runif(nrep2,0,1)<0.01)*eps02 beta_fixed = 0.05 mtry_fixed = 1 ntree_fixed = 100 tmp = try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha)) while (class(tmp)=="try-error"){ tmp = try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha),silent=TRUE) } width_vec_cqr = tmp$pred_set(X0, Y0)[[2]] cov_vec_cqr = tmp$pred_set(X0, Y0)[[1]] y_lo_cqr <- tmp$pred_set(X0, Y0)[[3]] y_up_cqr <- tmp$pred_set(X0, Y0)[[4]] quant_lo_cqr <- tmp$pred_set(X0, Y0)[[5]] quant_hi_cqr <- tmp$pred_set(X0, Y0)[[6]] #source('cqr_function_conditional.r') method = "efficient" split <- 1/2 beta_grid <- seq(0.01, 0.4, by=0.01) mtry_grid <- 1 ntree_grid <- seq(50, 400, by = 50) tmp = try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha)) while (class(tmp)=="try-error"){ tmp = try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE) } width_vec_efcp = tmp$pred_set(X0, Y0)[[2]] cov_vec_efcp = tmp$pred_set(X0, Y0)[[1]] y_lo_efcp <- tmp$pred_set(X0, Y0)[[3]] y_up_efcp <- tmp$pred_set(X0, Y0)[[4]] quant_lo_efcp <- tmp$pred_set(X0, Y0)[[5]] quant_hi_efcp <- tmp$pred_set(X0, Y0)[[6]] method = "valid" split <- c(1/2,1/2) tmp = try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha)) while (class(tmp)=="try-error"){ tmp = try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE) } width_vec_vfcp = tmp$pred_set(X0, Y0)[[2]] cov_vec_vfcp = tmp$pred_set(X0, Y0)[[1]] y_lo_vfcp <- tmp$pred_set(X0, Y0)[[3]] y_up_vfcp <- tmp$pred_set(X0, Y0)[[4]] quant_lo_vfcp <- tmp$pred_set(X0, Y0)[[5]] quant_hi_vfcp <- tmp$pred_set(X0, Y0)[[6]] ## ----plot--------------------------------------------------------------------- options(repr.plot.width=18, repr.plot.height=6) par(mfrow=c(1,3)) plot(X0,Y0,pch=1,col="black",main="CQR: predicted intervals",ylim=c(-2.5,7.5), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) a = data.frame(x = X0, y = y_lo_cqr) b = data.frame(x = c(1:100), y = y_up_cqr) c1 <- rgb(173,216,230,max = 255, alpha = 200, names = "lt.blue") polygon(c(a$x, rev(a$x)), c(b$y ,rev(a$y)), col = c1,border=NA ) points(X0, quant_lo_cqr,col="blue",pch=20) points(X0, quant_hi_cqr,col="red",pch=20) plot(X0,Y0,pch=1,col="black",main="EFCP: predicted intervals",ylim=c(-2.5,7.5), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) a = data.frame(x = X0, y = y_lo_efcp) b = data.frame(x = c(1:100), y = y_up_efcp) c1 <- rgb(173,216,230,max = 255, alpha = 200, names = "lt.blue") polygon(c(a$x, rev(a$x)), c(b$y ,rev(a$y)), col = c1,border=NA ) points(X0, quant_lo_efcp,col="blue",pch=20) points(X0, quant_hi_efcp,col="red",pch=20) plot(X0,Y0,pch=1,col="black",main="VFCP: predicted intervals",ylim=c(-2.5,7.5), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) a = data.frame(x = X0, y = y_lo_vfcp) b = data.frame(x = c(1:100), y = y_up_vfcp) c1 <- rgb(173,216,230,max = 255, alpha = 200, names = "lt.blue") polygon(c(a$x, rev(a$x)), c(b$y ,rev(a$y)), col = c1,border=NA ) points(X0, quant_lo_vfcp,col="blue",pch=20) points(X0, quant_hi_vfcp,col="red",pch=20) print(paste0("CQR average coverage: ",mean(cov_vec_cqr), " average width: ",mean(width_vec_cqr) )) print(paste0("EFCP average coverage: ",mean(cov_vec_efcp), " average width: ",mean(width_vec_efcp) )) print(paste0("VFCP average coverage: ",mean(cov_vec_vfcp), " average width: ",mean(width_vec_vfcp) )) ## ----eval=FALSE--------------------------------------------------------------- # n <- 400 # x_test = seq(0,5,by=0.2) # alpha <- 0.1 #miscoverage level # nrep <- 10 #number of independent trials # nrep2 <- 100 #number of y's for each test point x # # evaluations <- expand.grid(1:nrep, n, x_test, c("efficient", "valid","CQR")) # no_eval <- nrow(evaluations) # ntree_mat <- beta_mat <- cqr_method_mat <- width_mat <- cov_mat <- data.frame(number = rep(0, no_eval), # rep = evaluations[,1], # nset = evaluations[,2], # X_test = evaluations[,3], # method = evaluations[,4]) # colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "test_value","method") # # for(idx in 1:nrow(evaluations)){ # set.seed(evaluations[idx, 1]) # # # x0 = evaluations[idx, 3] # # # X <- as.matrix(runif(n,0,5)) # eps1 <- rnorm(n) # eps2 <- rnorm(n) # Y <- rpois(n,sin(X[,1])^2 +0.1 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2) # # X0 <- as.matrix(rep(x0,nrep2)) # eps01 <- rnorm(nrep2) # eps02 <- rnorm(nrep2) # Y0 <- rpois(nrep2,sin(X0)^2 +0.1 )+0.03*X0*eps01+25*(runif(nrep2,0,1)<0.01*eps02) # # width_mat[idx,3] <- cov_mat[idx, 3] <- n # method <- evaluations[idx, 4] # # if (method =="CQR"){ # beta_fixed <- 0.05 # mtry_fixed <- 1 # ntree_fixed <- 100 # # tmp <- try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha)) # # while (class(tmp)=="try-error"){ # # tmp <- try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha),silent=TRUE) # # } # width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[2]]) # cov_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]]) # }else{ if(method == "valid"){ # split <- c(1/2, 1/2) # } else { # split <- 1/2 # } # # beta_grid <- seq(1e-03, 4, length = 20)*alpha # mtry_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d)) # ntree_grid <- seq(50, 400, by = 50) # # tmp <- try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha)) # # while (class(tmp)=="try-error"){ # tmp <- try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE) # } # # beta_mat[idx,1] = tmp$beta # ntree_mat[idx,1] = tmp$ntree # cqr_method_mat[idx, 1]= tmp$cqr_method # width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[2]]) # cov_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]]) # } # } # # pois_n400_reps100=list(x_test, n,nrep,width_mat, cov_mat,beta_mat, ntree_mat, cqr_method_mat, evaluations, alpha) # save(pois_n400_reps100, file = "pois_n400_reps100.rda" ) ## ----------------------------------------------------------------------------- data(pois_n400_reps100,package = "ConformalSmallest") x_test=pois_n400_reps100[[1]] width_mat=pois_n400_reps100[[4]] cov_mat=pois_n400_reps100[[5]] beta_mat=pois_n400_reps100[[6]] ntree_mat=pois_n400_reps100[[7]] cqr_method_mat=pois_n400_reps100[[8]] evaluations=pois_n400_reps100[[9]] alpha=pois_n400_reps100[[10]] width_cqr <- sd_width_cqr <- width_efcp <- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL for(x in x_test){ TMP <- width_mat[evaluations[,4] == "efficient", ] TMP_prime <- TMP[TMP[,4] == x,] TMP <- width_mat[evaluations[,4] == "valid", ] TMP_prime_vfcp <- TMP[TMP[,4] == x,] TMP <- width_mat[evaluations[,4] == "CQR", ] TMP_prime_cqr <- TMP[TMP[,4] == x,] width_efcp <- c(width_efcp, mean(TMP_prime[,1] )) width_vfcp <- c(width_vfcp, mean(TMP_prime_vfcp[,1])) width_cqr <- c(width_cqr, mean(TMP_prime_cqr[,1])) #width_efcp <- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1])) #sd_width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep)) #width_vfcp <- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1])) #sd_width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep)) #width_cqr <- c(width_cqr, mean(TMP_prime_cqr[,1] / TMP_prime_vfcp[,1])) #sd_width_cqr <- c(sd_width_cqr, sd(TMP_prime_cqr[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep)) } cov_cqr <-sd_cov_cqr <-cov_efcp <- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL for(x in x_test){ TMP <- cov_mat[evaluations[,4] == "efficient", ] TMP_prime <- TMP[TMP[,4] == x,] cov_efcp <- c( cov_efcp, mean(TMP_prime[,1] ) ) sd_cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep)) TMP <- cov_mat[evaluations[,4] == "valid", ] TMP_prime <- TMP[TMP[,4] == x,] cov_vfcp <- c(cov_vfcp, mean(TMP_prime[,1])) sd_cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep)) TMP <- cov_mat[evaluations[,4] == "CQR", ] TMP_prime <- TMP[TMP[,4] == x,] cov_cqr <- c(cov_cqr, mean(TMP_prime[,1])) sd_cov_cqr <- c(sd_cov_cqr, sd(TMP_prime[,1])/sqrt(nrep)) } ## ----------------------------------------------------------------------------- c1 <- rgb(173,216,230,max = 255, alpha = 80, names = "lt.blue") c2 <- rgb(255,192,203, max = 255, alpha = 80, names = "lt.pink") oldpar=par(mfrow=c(1,2)) plot(factor(cqr_method_mat[evaluations[,4] == "valid",1]),col=c1, main="EFCP",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,xlab="Method",ylab="Frequency") #legend("right", legend=c("EFCP", "VFCP"),fill=c(c1, c2)) plot(factor(cqr_method_mat[evaluations[,4] == "efficient",1]),col=c2, main="VFCP",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,xlab="Method",ylab="Frequency") ## ----------------------------------------------------------------------------- oldpar=par(mfrow=c(1,2)) hist(ntree_mat[evaluations[,4] == "efficient",1],col=c1,breaks=ntree_grid,main="EFCP",xlab="ntree",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) hist(ntree_mat[evaluations[,4] == "valid",1],col=c2,breaks=ntree_grid,main="VFCP",xlab="ntree",cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) #legend("left", legend=c("EFCP", "VFCP"), fill=c(c1, c2)) ## ----------------------------------------------------------------------------- oldpar=par(mfrow=c(1,2)) hist(beta_mat[evaluations[,4] == "efficient",1],col=c1,breaks=seq(range(beta_mat[,1])[1],range(beta_mat[,1])[2],0.1),main="EFCP",xlab=expression(beta),cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) hist(beta_mat[evaluations[,4] == "valid",1],col=c2,breaks=seq(range(beta_mat[,1])[1],range(beta_mat[,1])[2],0.1),main="VFCP",xlab=expression(beta),cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) #legend("top", legend=c("EFCP", "VFCP"),fill=c(c1, c2)) ## ----------------------------------------------------------------------------- oldpar=par(mfrow=c(1,2)) plot(x_test, width_efcp, type = 'l', ylim = c(0,10), lwd = 2,ylab="Width", cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) #lines(x_test, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2) #lines(x_test, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2) lines(x_test, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red") #lines(x_test, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red") #lines(x_test, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red") lines(x_test, width_cqr, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "blue") legend("topright", legend=c("EFCP", "VFCP","CQR"), col=c("black","red", "blue"), lty=1, lwd=2) plot(x_test, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2,ylab="Conditional Coverage", cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5) lines(x_test, cov_vfcp, type = 'l', col = "red", lwd = 2) lines(x_test, cov_cqr, type = 'l', col = "blue", lwd = 2) legend(0,0.2, legend=c("EFCP", "VFCP","CQR"), col=c("black","red", "blue"), lty=1) abline(h = 1-alpha,lty=2) print(paste0("CQR average coverage: ",mean(cov_cqr), " average width: ",mean(width_cqr) ) ) print(paste0("EFCP average coverage: ",mean(cov_efcp), " average width: ",mean(width_efcp) ) ) print(paste0("VFCP average coverage: ",mean(cov_vfcp), " average width: ",mean(width_vfcp) ) ) par(oldpar)