### R code from vignette source 'RCAL-LATE-vig.Rnw'

###################################################
### code chunk number 1: read.iv.data
###################################################
library(RCAL)

data(simu.iv.data)


###################################################
### code chunk number 2: RCAL-LATE-vig.Rnw:80-81
###################################################
simu.iv.data[1:10, 1:6]


###################################################
### code chunk number 3: RCAL-LATE-vig.Rnw:87-95
###################################################
n <- dim(simu.iv.data)[1]
p <- 100 # include the first 100 covariates due to CRAN time constraint

y <- simu.iv.data[,1]
tr <- simu.iv.data[,2]
iv <- simu.iv.data[,3]
x <- simu.iv.data[,3+1:p]
x <- scale(x)


###################################################
### code chunk number 4: RCAL-LATE-vig.Rnw:104-109
###################################################
par(mfrow=c(3,2))
par(mar=c(4,4,2,2))
for (j in 1:6) {
  boxplot(x[,j] ~ iv, ylab=paste("covariate x", j, sep=""), xlab="instrument")
}


###################################################
### code chunk number 5: RCAL-LATE-vig.Rnw:118-123
###################################################
par(mfrow=c(3,2))
par(mar=c(4,4,2,2))
for (j in 1:6) {
  boxplot(x[,j] ~ tr, ylab=paste("covariate x", j, sep=""), xlab="treatment")
}


###################################################
### code chunk number 6: RCAL-LATE-vig.Rnw:132-137
###################################################
par(mfrow=c(3,2))
par(mar=c(4,4,2,2))
for (j in 1:6) {
  plot(x[tr==1,j], y[tr==1], ylab="y", xlab=paste("covariate x", j, sep=""))
}


###################################################
### code chunk number 7: RCAL-LATE-vig.Rnw:144-152
###################################################
pi    <- mean(iv)
del   <- iv/pi-(1-iv)/(1-pi);
del.y <- mean(del*y);
del.tr<- mean(del*tr);
(w<- del.y/del.tr)  # point estimate

g  <- mean((-iv/pi^2-(1-iv)/(1-pi)^2)*(y-w*tr))
sqrt(mean((del*(y-w*tr)-g*(pi-iv))^2)/del.tr^2/n) # standard error


###################################################
### code chunk number 8: RCAL-LATE-vig.Rnw:193-214
###################################################
## regularized calibrated estimation
RNGversion('3.5.0')
set.seed(0) # this affects random split of data in cross validation
late.cv.rcal <- 
late.regu.cv(fold=5*c(1,1,1), nrho=(1+10)*c(1,1,1), rho.seq=NULL,
y, tr, iv, fx=x, gx=x, hx=x, arm=2, d1=1, d2=3, ploss="cal", yloss="gaus")

matrix(unlist(late.cv.rcal$est), ncol=2, byrow=TRUE,
dimnames=list(c("ipw", "or", "est", "var", "ze",
"late.est", "late.var", "late.ze"), c("theta1", "theta0")))

## For comparison, we use the same set of models 
## in regularized maximum likelihood estimation
set.seed(0)
late.cv.rml <- 
late.regu.cv(fold=5*c(1,1,1), nrho=(1+10)*c(1,1,1), rho.seq=NULL,
y, tr, iv, fx=x, gx=x, hx=x, arm=2, d1=1, d2=3, ploss="ml", yloss="gaus")

matrix(unlist(late.cv.rml$est), ncol=2, byrow=TRUE,
dimnames=list(c("ipw", "or", "est", "var", "ze",
"late.est", "late.var", "late.ze"), c("theta1", "theta0")))


###################################################
### code chunk number 9: RCAL-LATE-vig.Rnw:231-305
###################################################
## Computation of standardized calibration differences 
##               within iv=1 group
fips.raw <- rep(mean(iv), n)   #constant propensity scores
fips1.cv.rcal<-late.cv.rcal$mfp[,2]
fips1.cv.rml <-late.cv.rml$mfp[,2]

check1.raw <- mn.ipw(x, iv, fips.raw)
check1.cv.rcal <- mn.ipw(x, iv, fips1.cv.rcal)
check1.cv.rml <- mn.ipw(x, iv, fips1.cv.rml)

par(mfrow=c(2,2))
par(mar=c(4,4,2,2))
plot(check1.raw$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="Raw") 
abline(h=0)

plot(check1.cv.rml$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RML") 
abline(h=0)
abline(h=max(abs(check1.cv.rml$est)) *c(-1,1), lty=2)

nz1.rml <- which(late.cv.rml$ips[[2]]$sel.bet[,1]!=0)-1
text(nz1.rml, -0.3, "x", cex=1)
length(nz1.rml)

plot(check1.cv.rcal$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RCAL") 
abline(h=0)
abline(h=max(abs(check1.cv.rcal$est)) *c(-1,1), lty=2)

nz1.rcal <- which(late.cv.rcal$ips[[2]]$sel.bet[,1]!=0)-1
text(nz1.rcal, -0.3, "x", cex=1)
length(nz1.rcal)

plot(fips1.cv.rml[iv==1], fips1.cv.rcal[iv==1], xlim=c(0,1), ylim=c(0,1), 
     xlab="RML", ylab="RCAL", main="fitted probabilities")
abline(c(0,1))

## Computation of standardized calibration differences 
##            within iv=0 group
fips0.cv.rcal<-late.cv.rcal$mfp[,1]
fips0.cv.rml <-late.cv.rml$mfp[,1]

check0.raw <- mn.ipw(x, 1-iv, fips.raw)
check0.cv.rcal <- mn.ipw(x, 1-iv, fips0.cv.rcal)
check0.cv.rml <- mn.ipw(x, 1-iv, fips0.cv.rml)

par(mfrow=c(2,2))
par(mar=c(4,4,2,2))
plot(check0.raw$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="Raw") 
abline(h=0)

plot(check0.cv.rml$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RML") 
abline(h=0)
abline(h=max(abs(check0.cv.rml$est)) *c(-1,1), lty=2)

nz0.rml <- which(late.cv.rml$ips[[2]]$sel.bet[,1]!=0)-1
text(nz0.rml, -0.3, "x", cex=1)
length(nz0.rml)

plot(check0.cv.rcal$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RCAL") 
abline(h=0)
abline(h=max(abs(check0.cv.rcal$est)) *c(-1,1), lty=2)

nz0.rcal <- which(late.cv.rcal$ips[[1]]$sel.bet[,1]!=0)-1
text(nz0.rcal, -0.3, "x", cex=1)
length(nz0.rcal)

plot(fips0.cv.rml[iv==0], fips0.cv.rcal[iv==0], xlim=c(0,1), ylim=c(0,1), 
     xlab="RML", ylab="RCAL", main="fitted probabilities")
abline(c(0,1))


###################################################
### code chunk number 10: RCAL-LATE-vig.Rnw:312-338
###################################################

par(mfrow=c(2,2))
par(mar=c(4,4,2,2))
plot(check1.raw$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="Raw") 
abline(h=0)

plot(check1.cv.rml$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RML") 
abline(h=0)
abline(h=max(abs(check1.cv.rml$est)) *c(-1,1), lty=2)

nz=which(late.cv.rml$ips[[2]]$sel.bet[,1]!=0)-1
text(nz, -0.3, "x", cex=1)

plot(check1.cv.rcal$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RCAL") 
abline(h=0)
abline(h=max(abs(check1.cv.rcal$est)) *c(-1,1), lty=2)

nz=which(late.cv.rcal$ips[[2]]$sel.bet[,1]!=0)-1
text(nz, -0.3, "x", cex=1)

plot(fips1.cv.rml[iv==1], fips1.cv.rcal[iv==1], xlim=c(0,1), ylim=c(0,1), 
     xlab="RML", ylab="RCAL", main="fitted probabilities")
abline(c(0,1))


###################################################
### code chunk number 11: RCAL-LATE-vig.Rnw:347-372
###################################################
par(mfrow=c(2,2))
par(mar=c(4,4,2,2))
plot(check0.raw$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="Raw") 
abline(h=0)

plot(check0.cv.rml$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RML") 
abline(h=0)
abline(h=max(abs(check0.cv.rml$est)) *c(-1,1), lty=2)

nz=which(late.cv.rml$ips[[2]]$sel.bet[,1]!=0)-1
text(nz, -0.3, "x", cex=1)

plot(check0.cv.rcal$est, xlim=c(0,p), ylim=c(-.3,.3), 
     xlab="", ylab="std diff", main="RCAL") 
abline(h=0)
abline(h=max(abs(check0.cv.rcal$est)) *c(-1,1), lty=2)

nz=which(late.cv.rcal$ips[[1]]$sel.bet[,1]!=0)-1
text(nz, -0.3, "x", cex=1)

plot(fips0.cv.rml[iv==0], fips0.cv.rcal[iv==0], xlim=c(0,1), ylim=c(0,1), 
     xlab="RML", ylab="RCAL", main="fitted probabilities")
abline(c(0,1))


###################################################
### code chunk number 12: RCAL-LATE-vig.Rnw:380-434
###################################################
set.seed(0)
late.path.rcal <- 
late.regu.path(fold=5*c(1,1,1), nrho=(1+10)*c(1,1,1), rho.seq=NULL,
y, tr, iv, fx=x, gx=x, hx=x, arm=1, d1=1, d2=3, ploss="cal", yloss="gaus")

set.seed(0)
late.path.rml <- 
late.regu.path(fold=5*c(1,1,1), nrho=(1+10)*c(1,1,1), rho.seq=NULL,
y, tr, iv, fx=x, gx=x, hx=x, arm=1, d1=1, d2=3, ploss="ml", yloss="gaus")
## Computation of standardized calibration differences 
##    along regularization path within iv=1 group
fips1.path.rcal <- late.path.rcal$mfp[[2]]
mdiff1.path.rcal <- rep(NA, dim(fips1.path.rcal)[2])
rvar1.path.rcal <- rep(NA, dim(fips1.path.rcal)[2])
for (j in 1:dim(fips1.path.rcal)[2]) {
  check1.path.rcal <- mn.ipw(x, iv, fips1.path.rcal[,j])
  mdiff1.path.rcal[j] <- max(abs(check1.path.rcal$est))
  rvar1.path.rcal[j] <- 
  var(1/fips1.path.rcal[iv==1,j])/mean(1/fips1.path.rcal[iv==1,j])^2
}

fips1.path.rml <- late.path.rml$mfp[[2]]
mdiff1.path.rml <- rep(NA, dim(fips1.path.rml)[2])
rvar1.path.rml <- rep(NA, dim(fips1.path.rml)[2])
for (j in 1:dim(fips1.path.rml)[2]) {
  check1.path.rml <- mn.ipw(x, iv, fips1.path.rml[,j])
  mdiff1.path.rml[j] <- max(abs(check1.path.rml$est))
  rvar1.path.rml[j] <- 
  var(1/fips1.path.rml[iv==1,j])/mean(1/fips1.path.rml[iv==1,j])^2
}

par(mfrow=c(1,2))
par(mar=c(4,4,2,2))
plot(late.path.rml$ips[[2]]$nz.all, mdiff1.path.rml, 
     xlim=c(0,p), ylim=c(0,.4), xlab="# nonzero", ylab="std diff")
lines(late.path.rml$ips[[2]]$nz.all[!late.path.rml$ips[[2]]$non.conv],
       mdiff1.path.rml, lty=3)

points(late.path.rcal$ips[[2]]$nz.all[!late.path.rcal$ips[[2]]$non.conv],
       mdiff1.path.rcal, pch=4)
lines(late.path.rcal$ips[[2]]$nz.all[!late.path.rcal$ips[[2]]$non.conv],
       mdiff1.path.rcal, lty=3)

legend(120,.4, c("RML","RCAL"), pch=c(1,4), cex=.6)

#
plot(rvar1.path.rml, mdiff1.path.rml, 
     xlim=c(0,1), ylim=c(0,.4), xlab="rel var", ylab="std diff")
lines(rvar1.path.rml, mdiff1.path.rml, lty=3)

points(rvar1.path.rcal, mdiff1.path.rcal, pch=4)
lines(rvar1.path.rcal, mdiff1.path.rcal, lty=3)

legend(0.6,.4, c("RML","RCAL"), pch=c(1,4), cex=.6)


###################################################
### code chunk number 13: RCAL-LATE-vig.Rnw:437-483 (eval = FALSE)
###################################################
## set.seed(0)
## late.path.rcal <- 
## late.regu.path(fold=5*c(1,1,1), nrho=(1+10)*c(1,1,1), rho.seq=NULL,
## y, tr, iv, fx=x, gx=x, hx=x, arm=0, d1=1, d2=3, ploss="cal", yloss="gaus")
## 
## set.seed(0)
## late.path.rml <- 
## late.regu.path(fold=5*c(1,1,1), nrho=(1+10)*c(1,1,1), rho.seq=NULL,
## y, tr, iv, fx=x, gx=x, hx=x, arm=0, d1=1, d2=3, ploss="ml", yloss="gaus")
## ## Computation of standardized calibration differences 
## ##    along regularization path within iv=0 group
## fips0.path.rcal <- late.path.rcal$mfp[[1]]
## mdiff0.path.rcal <- rep(NA, dim(fips0.path.rcal)[2])
## rvar0.path.rcal <- rep(NA, dim(fips0.path.rcal)[2])
## for (j in 1:dim(fips0.path.rcal)[2]) {
##   check0.path.rcal <- mn.ipw(x, 1-iv, fips0.path.rcal[,j])
##   mdiff0.path.rcal[j] <- max(abs(check0.path.rcal$est))
##   rvar0.path.rcal[j] <- 
##   var(1/fips0.path.rcal[iv==0,j])/mean(1/fips0.path.rcal[iv==0,j])^2
## }
## fips0.path.rml <- late.path.rml$mfp[[1]]
## mdiff0.path.rml <- rep(NA, dim(fips0.path.rml)[2])
## rvar0.path.rml <- rep(NA, dim(fips0.path.rml)[2])
## for (j in 1:dim(fips0.path.rml)[2]) {
##   check0.path.rml <- mn.ipw(x, 1-iv, fips0.path.rml[,j])
##   mdiff0.path.rml[j] <- max(abs(check0.path.rml$est))
##   rvar0.path.rml[j] <- 
##   var(1/fips0.path.rml[iv==0,j])/mean(1/fips0.path.rml[iv==0,j])^2
## }
## par(mfrow=c(1,2))
## par(mar=c(4,4,2,2))
## plot(late.path.rml$ips[[2]]$nz.all, mdiff0.path.rml, 
##      xlim=c(0,p), ylim=c(0,.4), xlab="# nonzero", ylab="std diff")
## lines(late.path.rml$ips[[2]]$nz.all[!late.path.rml$ips[[2]]$non.conv],
##        mdiff0.path.rml, lty=3)
## points(late.path.rcal$ips[[1]]$nz.all[!late.path.rcal$ips[[1]]$non.conv],
##        mdiff0.path.rcal, pch=4)
## lines(late.path.rcal$ips[[1]]$nz.all[!late.path.rcal$ips[[1]]$non.conv],
##        mdiff0.path.rcal, lty=3)
## legend(120,.4, c("RML","RCAL"), pch=c(1,4), cex=.6)
## plot(rvar0.path.rml, mdiff0.path.rml, 
##      xlim=c(0,2.5), ylim=c(0,.4), xlab="rel var", ylab="std diff")
## lines(rvar0.path.rml, mdiff0.path.rml, lty=3)
## points(rvar0.path.rcal, mdiff0.path.rcal, pch=4)
## lines(rvar0.path.rcal, mdiff0.path.rcal, lty=3)
## legend(0.6,.4, c("RML","RCAL"), pch=c(1,4), cex=.6)


###################################################
### code chunk number 14: RCAL-LATE-vig.Rnw:491-514
###################################################
par(mfrow=c(1,2))
par(mar=c(4,4,2,2))
plot(late.path.rml$ips[[2]]$nz.all, mdiff1.path.rml, 
     xlim=c(0,p), ylim=c(0,.4), xlab="# nonzero", ylab="std diff")
lines(late.path.rml$ips[[2]]$nz.all[!late.path.rml$ips[[2]]$non.conv],
       mdiff1.path.rml, lty=3)

points(late.path.rcal$ips[[2]]$nz.all[!late.path.rcal$ips[[2]]$non.conv],
       mdiff1.path.rcal, pch=4)
lines(late.path.rcal$ips[[2]]$nz.all[!late.path.rcal$ips[[2]]$non.conv],
       mdiff1.path.rcal, lty=3)

legend(120,.4, c("RML","RCAL"), pch=c(1,4), cex=.6)

#
plot(rvar1.path.rml, mdiff1.path.rml, 
     xlim=c(0,1), ylim=c(0,.4), xlab="rel var", ylab="std diff")
lines(rvar1.path.rml, mdiff1.path.rml, lty=3)

points(rvar1.path.rcal, mdiff1.path.rcal, pch=4)
lines(rvar1.path.rcal, mdiff1.path.rcal, lty=3)

legend(0.6,.4, c("RML","RCAL"), pch=c(1,4), cex=.6)