## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, prompt = TRUE, comment = "" ) ## ----setup, include = FALSE--------------------------------------------------- library(COMPoissonReg) set.seed(1235) ## ----------------------------------------------------------------------------- control = get.control( ymax = 100000, hybrid.tol = 1e-2, truncate.tol = 1e-6 ) ## ----------------------------------------------------------------------------- control = getOption("COMPoissonReg.control") control$ymax control$hybrid.tol control$truncate.tol ## ----------------------------------------------------------------------------- options(COMPoissonReg.control = control) ## ----------------------------------------------------------------------------- ncmp(lambda = 1.5, nu = 1.2) ncmp(lambda = 1.5, nu = 1.2, log = TRUE) ncmp(lambda = 1.5, nu = 1.2, log = TRUE, control = get.control(hybrid.tol = 1e10)) ncmp(lambda = 1.5, nu = 1.2, log = TRUE, control = get.control(hybrid.tol = 1e-10)) ## ----------------------------------------------------------------------------- print_warning = function(x) { print(strwrap(x), quote = FALSE) } ## ----------------------------------------------------------------------------- nu_seq = c(1, 0.5, 0.2, 0.1, 0.05, 0.03) tryCatch({ tcmp(lambda = 1.5, nu = nu_seq) }, warning = print_warning) ## ----------------------------------------------------------------------------- tcmp(lambda = 1.5, nu = nu_seq, control = get.control(ymax = 3e6)) ## ----------------------------------------------------------------------------- tcmp(lambda = 1.2, nu = 0.03, control = get.control(ymax = 1200)) ## ----prompt = FALSE----------------------------------------------------------- library(ggplot2) nu_seq = seq(0.03, 1.5, length.out = 20) nc1 = ncmp(lambda = 0.5, nu = nu_seq, log = TRUE) nc2 = ncmp(lambda = 1.05, nu = nu_seq, log = TRUE) nc3 = ncmp(lambda = 1.20, nu = nu_seq, log = TRUE) ## ----fig.width = 5, fig.height = 3, fig.align = "center", prompt = FALSE, fig.cap = "Log of normalizing constant for $\\lambda = 0.5$ ($\\circ$), $\\lambda = 1.05$ ($\\Delta$), and $\\lambda = 1.20$ ($+$)."---- ggplot() + geom_point(data = data.frame(x = nu_seq, y = nc1), aes(x = x, y = y), pch = 1) + geom_point(data = data.frame(x = nu_seq, y = nc2), aes(x = x, y = y), pch = 2) + geom_point(data = data.frame(x = nu_seq, y = nc3), aes(x = x, y = y), pch = 3) + xlab("nu") + ylab("log of normalizing constant") + theme_bw() ## ----------------------------------------------------------------------------- dcmp(0, lambda = 10, nu = 0.9) dcmp(0:17, lambda = 10, nu = 0.9, log = TRUE) dcmp(c(0, 1, 2), lambda = c(10, 11, 12), nu = c(0.9, 1.0, 1.1), log = TRUE) ## ----------------------------------------------------------------------------- rcmp(50, lambda = 10, nu = 0.9) ## ----------------------------------------------------------------------------- pcmp(0:17, lambda = 10, nu = 0.9) ## ----------------------------------------------------------------------------- qq = seq(0, 0.95, length.out = 10) qcmp(qq, lambda = 10, nu = 0.9) ## ----------------------------------------------------------------------------- tryCatch({ rcmp(1, lambda = 2, nu = 0.01) }, warning = print_warning) ## ----------------------------------------------------------------------------- tryCatch({ qcmp(0.9999999, lambda = 1.5, nu = 0.5) }, warning = print_warning) ## ----fig.width = 3, fig.height = 3, fig.align = "center", prompt = FALSE, fig.show = "hold"---- library(ggplot2) n = 100000 lambda = 0.5 nu = 0.1 x = rcmp(n, lambda, nu) xx = seq(-1, max(x)) ## Include -1 to ensure it gets probability zero qq = seq(0, 0.99, length.out = 100) fx = dcmp(xx, lambda, nu) px = pcmp(xx, lambda, nu) qx = qcmp(qq, lambda, nu) qx_emp = quantile(x, probs = qq) ## ----fig.width = 3, fig.height = 3, prompt = FALSE, fig.cap = "Empirical density of draws (histogram) versus density computed via the dcmp function (points)."---- ggplot() + geom_bar(data = data.frame(x = x), aes(x = x, y = ..prop..), fill = "NA", col = "black") + geom_point(data = data.frame(x = xx[-1], fx = fx[-1]), aes(x, fx)) + ylab("Density") + theme_bw() ## ----fig.width = 3, fig.height = 3, prompt = FALSE, fig.cap = "Empirical CDF of draws (solid line) versus CDF computed via the pcmp function (points)."---- ggplot() + stat_ecdf(data = data.frame(x = x), aes(x), geom = "step") + geom_point(data = data.frame(x = xx, px = px), aes(x, px)) + ylab("Probability") + theme_bw() ## ----fig.width = 3, fig.height = 3, prompt = FALSE, fig.cap = "Empirical quantiles of draws (`o`) versus quantiles computed via the qcmp function (`+`)."---- ggplot() + geom_point(data = data.frame(x = qq, qx_emp = qx_emp), aes(qq, qx_emp), pch = 1) + geom_point(data = data.frame(x = qq, qx = qx), aes(qq, qx), pch = 3) + xlab("Probability") + ylab("Quantile") + theme_bw() ## ----------------------------------------------------------------------------- ecmp(lambda = 10, nu = 1.2) ecmp(lambda = 1.5, nu = 0.5) ecmp(lambda = 1.5, nu = 0.05) ecmp(lambda = 1.5, nu = 0.05, control = get.control(hybrid.tol = 1e-10)) ecmp(lambda = 1.5, nu = 0.05, control = get.control(hybrid.tol = 1e10)) ## ----------------------------------------------------------------------------- vcmp(lambda = 10, nu = 1.2) vcmp(lambda = 1.5, nu = 0.5) vcmp(lambda = 1.5, nu = 0.05) vcmp(lambda = 1.5, nu = 0.05, control = get.control(hybrid.tol = 1e-10)) vcmp(lambda = 1.5, nu = 0.05, control = get.control(hybrid.tol = 1e10)) ## ----------------------------------------------------------------------------- M = tcmp(lambda = 1.5, nu = 0.05) print(M) xx = seq(0, M) sum(xx^3 * dcmp(xx, lambda, nu)) # E(X^3) sum(xx^4 * dcmp(xx, lambda, nu)) # E(X^4) ## ----------------------------------------------------------------------------- qq = seq(0, 0.95, length.out = 20) rzicmp(20, lambda = 1.5, nu = 0.2, p = 0.25) dzicmp(c(0, 1, 2), lambda = 1.5, nu = 0.2, p = 0.25) pzicmp(c(0, 1, 2), lambda = 1.5, nu = 0.2, p = 0.25) qzicmp(qq, lambda = 1.5, nu = 0.2, p = 0.25) ## ----------------------------------------------------------------------------- tryCatch({ qzicmp(0.9999999, lambda = 1.5, nu = 0.5, p = 0.5) }, warning = print_warning) ## ----fig.width = 3, fig.height = 3, fig.align = "center", prompt = FALSE, fig.show = "hold"---- library(ggplot2) n = 100000 lambda = 0.5 nu = 0.1 p = 0.5 x = rzicmp(n, lambda, nu, p) xx = seq(-1, max(x)) ## Include -1 to ensure it gets probability zero qq = seq(0, 0.99, length.out = 100) fx = dzicmp(xx, lambda, nu, p) px = pzicmp(xx, lambda, nu, p) qx = qzicmp(qq, lambda, nu, p) qx_emp = quantile(x, probs = qq) ## ----fig.width = 3, fig.height = 3, prompt = FALSE, fig.cap = "Empirical density of draws (histogram) versus density computed via the dzicmp function (points)."---- ggplot() + geom_bar(data = data.frame(x = x), aes(x = x, y = ..prop..), fill = "NA", col = "black") + geom_point(data = data.frame(x = xx[-1], fx = fx[-1]), aes(x, fx)) + ylab("Density") + theme_bw() ## ----fig.width = 3, fig.height = 3, prompt = FALSE, fig.cap = "Empirical CDF of draws (solid line) versus CDF computed via the pzicmp function (points)."---- ggplot() + stat_ecdf(data = data.frame(x = x), aes(x), geom = "step") + geom_point(data = data.frame(x = xx, px = px), aes(x, px)) + ylab("Probability") + theme_bw() ## ----fig.width = 3, fig.height = 3, prompt = FALSE, fig.cap = "Empirical quantiles of draws (`o`) versus quantiles computed via the qzicmp function (`+`)."---- ggplot() + geom_point(data = data.frame(x = qq, qx_emp = qx_emp), aes(qq, qx_emp), pch = 1) + geom_point(data = data.frame(x = qq, qx = qx), aes(qq, qx), pch = 3) + xlab("Probability") + ylab("Quantile") + theme_bw() ## ----------------------------------------------------------------------------- ezicmp(lambda = 1.5, nu = 0.5, p = 0.1) ezicmp(lambda = 1.5, nu = 0.5, p = c(0.1, 0.2, 0.5)) ## ----------------------------------------------------------------------------- vzicmp(lambda = 1.5, nu = 0.5, p = 0.1) vzicmp(lambda = 1.5, nu = 0.5, p = c(0.1, 0.2, 0.5)) ## ----eval = FALSE, prompt = FALSE--------------------------------------------- # out = glm.cmp(formula.lambda, formula.nu = ~ 1, formula.p = NULL, # data = NULL, init = NULL, fixed = NULL, control = NULL, ...) ## ----------------------------------------------------------------------------- get.init(beta = c(1, 2, 3), gamma = c(-1, 1), zeta = c(-2, -1)) get.init.zero(d1 = 3, d2 = 2, d3 = 2) ## ----------------------------------------------------------------------------- get.fixed(beta = c(1L, 2L), gamma = c(1L)) ## ----eval=FALSE--------------------------------------------------------------- # model.matrix(formula.lambda, data = data) # model.matrix(formula.nu, data = data) # model.matrix(formula.p, data = data) ## ----------------------------------------------------------------------------- control = getOption("COMPoissonReg.control") control$optim.method control$optim.control ## ----------------------------------------------------------------------------- data(freight) print(freight) ## ----------------------------------------------------------------------------- glm.out = glm(broken ~ transfers, data = freight, family = poisson) summary(glm.out) ## ----------------------------------------------------------------------------- cmp.out = glm.cmp(broken ~ transfers, data = freight) print(cmp.out) ## ----------------------------------------------------------------------------- cmp2.out = glm.cmp(broken ~ transfers, formula.nu = ~ transfers, data = freight) print(cmp2.out) ## ----------------------------------------------------------------------------- control = get.control(optim.control = list(maxit = 5, trace = 3, REPORT = 1)) cmp3.out = glm.cmp(broken ~ transfers, data = freight, control = control) ## ----results = 'hide'--------------------------------------------------------- y = freight$broken x = freight$transfers glm.cmp(y ~ x) ## ----results = 'hide'--------------------------------------------------------- freight$offx = 13 freight$offs = 1 glm.cmp(broken ~ transfers + offset(offx), data = freight) glm.cmp(broken ~ transfers + offset(offx), formula.nu = ~1 + offset(offs), data = freight) ## ----results = 'hide'--------------------------------------------------------- y = freight$broken X = model.matrix(~ transfers, data = freight) S = model.matrix(~ 1, data = freight) offs = get.offset(x = rep(13, nrow(freight)), s = rep(1, nrow(freight))) cmp.raw.out = glm.cmp.raw(y, X, S, offset = offs) ## ----------------------------------------------------------------------------- logLik(cmp.out) ## Log-likelihood evaluated at MLE. AIC(cmp.out) ## AIC evaluated at MLE. BIC(cmp.out) ## BIC evaluated at MLE. coef(cmp.out) ## Estimates of theta as a flat vector coef(cmp.out, type = "list") ## Estimates of theta as a named list vcov(cmp.out) ## Estimated covariance matrix of theta hat sdev(cmp.out) ## Standard deviations from vcov(...) diagonals sdev(cmp.out, type = "list") ## Standard deviations as a named list ## ----------------------------------------------------------------------------- predict(cmp.out) predict(cmp.out, type = "link") ## ----fig.width = 3, fig.height = 3, fig.align = "center", prompt=FALSE-------- # Prepare new data to fit by formula interface new.df = data.frame(transfers = 0:10) # Prepare new data to fit by raw interface X = model.matrix(~ transfers, data = new.df) S = model.matrix(~ 1, data = new.df) new.data = get.modelmatrix(X = X, S = S) # Pass new data to model from by formula interface y.hat.new = predict(cmp.out, newdata = new.df) # Pass new data to model from by raw interface y.hat.new = predict(cmp.raw.out, newdata = new.data) # Compute predictions for links predict.out = predict(cmp.out, newdata = new.df, type = "link") # Plot predictions ggplot() + geom_point(data = new.df, aes(transfers, y.hat.new)) + xlab("Number of transfers") + ylab("Predicted number broken") + theme_bw() ## ----------------------------------------------------------------------------- print(y.hat.new) print(predict.out) ## ----------------------------------------------------------------------------- leverage(cmp.out) ## ----------------------------------------------------------------------------- res.raw = residuals(cmp.out) res.qtl = residuals(cmp.out, type = "quantile") ## ----------------------------------------------------------------------------- link.hat = predict(cmp.out, type = "link") vv = vcmp(link.hat$lambda, link.hat$nu) hh = leverage(cmp.out) res.pearson = res.raw / sqrt(vv*(1-hh)) ## ----fig.width = 3, fig.height = 3, fig.align = "center", prompt = FALSE, fig.show = "hold"---- plot.fit.res = function(y.hat, res) { ggplot(data.frame(y = y.hat, res = res)) + geom_point(aes(y, res)) + xlab("Fitted Value") + ylab("Residual Value") + theme_bw() + theme(plot.title = element_text(size = 10)) } plot.qq.res = function(res) { ggplot(data.frame(res = res), aes(sample = res)) + stat_qq() + stat_qq_line() + theme_bw() + theme(plot.title = element_text(size = 10)) } y.hat = predict(cmp.out) plot.fit.res(y.hat, res.raw) + ggtitle("Fitted Values vs. Raw Residuals") plot.qq.res(res.raw) + ggtitle("Q-Q Plot of Raw Residuals") plot.fit.res(y.hat, res.pearson) + ggtitle("Fitted Values vs. Pearson Residuals") plot.qq.res(res.pearson) + ggtitle("Q-Q Plot of Pearson Residuals") plot.fit.res(y.hat, res.qtl) + ggtitle("Fitted Values vs. Quantile Residuals") plot.qq.res(res.qtl) + ggtitle("Q-Q Plot of Quantile Residuals") ## ----------------------------------------------------------------------------- mean(res.raw^2) ## ----------------------------------------------------------------------------- equitest(cmp.out) ## ----------------------------------------------------------------------------- deviance(cmp.out) ## ----------------------------------------------------------------------------- cmp.boot = parametric.bootstrap(cmp.out, reps = 100) head(cmp.boot) ## ----------------------------------------------------------------------------- t(apply(cmp.boot, 2, quantile, c(0.025,0.975))) ## ----prompt=FALSE------------------------------------------------------------- set.seed(1234) n = 200 x = rnorm(n, 500, 10) X = cbind(intercept = 1, slope = x) S = matrix(1, n, 1) beta_true = c(-0.05, 0.05) gamma_true = 2 lambda_true = exp(X %*% beta_true) nu_true = exp(S %*% gamma_true) y = rcmp(n, lambda_true, nu_true) ## ----------------------------------------------------------------------------- summary(x) summary(y) ## ----------------------------------------------------------------------------- tryCatch({ glm.cmp(y ~ x, formula.nu = ~ 1) }, error = print_warning) ## ----------------------------------------------------------------------------- glm.cmp(y ~ scale(x), formula.nu = ~ 1) ## ----------------------------------------------------------------------------- glm.cmp(y ~ log(x), formula.nu = ~ 1) ## ----------------------------------------------------------------------------- control = get.control(optim.method = "BFGS", optim.control = list(maxit = 200)) suppressWarnings({ cmp.out = glm.cmp(y ~ x, formula.nu = ~ 1, control = control) print(cmp.out) }) ## ----prompt=FALSE------------------------------------------------------------- set.seed(1234) n = 200 x = runif(n, 1, 2) X = cbind(intercept = 1, slope = x) S = matrix(1, n, 1) beta_true = c(1, 1) gamma_true = -0.95 lambda_true = exp(X %*% beta_true) nu_true = exp(S %*% gamma_true) y = rcmp(n, lambda_true, nu_true) ## ----------------------------------------------------------------------------- summary(x) summary(y) ## ----------------------------------------------------------------------------- tryCatch({ glm.cmp(y ~ x, formula.nu = ~ 1) }, error = print_warning) ## ----------------------------------------------------------------------------- init = get.init(beta = beta_true, gamma = gamma_true) glm.cmp(y ~ x, formula.nu = ~ 1, init = init) ## ----------------------------------------------------------------------------- control = get.control(optim.method = "Nelder-Mead", optim.control = list(maxit = 1000)) glm.cmp(y ~ x, formula.nu = ~ 1, control = control) ## ----------------------------------------------------------------------------- data(couple) head(couple) ## ----------------------------------------------------------------------------- glm.out = glm(UPB ~ EDUCATION + ANXIETY, data = couple, family = poisson) summary(glm.out) ## ----------------------------------------------------------------------------- zicmp0.out = glm.cmp(UPB ~ EDUCATION + ANXIETY, formula.nu = ~ 1, formula.p = ~ EDUCATION + ANXIETY, data = couple) print(zicmp0.out) ## ----------------------------------------------------------------------------- pred.out = predict(zicmp0.out, type = "link") summary(pred.out$lambda) ## ----prompt=FALSE------------------------------------------------------------- library(numDeriv) g = function(gamma0) { -ncmp(lambda = exp(-0.25), nu = exp(gamma0), log = TRUE) } dat = data.frame(gamma0 = seq(0, -13), g = NA, d_g = NA, d2_g = NA) for (j in 1:nrow(dat)) { gamma0 = dat$gamma0[j] dat$g[j] = g(gamma0) dat$d_g[j] = numDeriv::grad(func = g, x = gamma0) dat$d2_g[j] = numDeriv::hessian(func = g, x = gamma0) } ## ----------------------------------------------------------------------------- print(dat) ## ----prompt=FALSE------------------------------------------------------------- init = coef(zicmp0.out, type = "list") y = couple$UPB X = model.matrix(~ EDUCATION + ANXIETY, data = couple) S = model.matrix(~ 1, data = couple) W = model.matrix(~ EDUCATION + ANXIETY, data = couple) control = get.control(optim.method = "BFGS") zicmp.out = glm.zicmp.raw(y, X, S, W, init = get.init(beta = c(-1,0,0), gamma = -Inf, zeta = c(-1,0,0)), fixed = get.fixed(gamma = 1L), control = control) ## ----------------------------------------------------------------------------- print(zicmp.out) ## ----------------------------------------------------------------------------- logLik(zicmp.out) ## Log-likelihood evaluated at MLE AIC(zicmp.out) ## AIC evaluated at MLE BIC(zicmp.out) ## BIC evaluated at MLE coef(zicmp.out) ## Estimates of theta as a flat vector coef(zicmp.out, type = "list") ## Estimates of theta as a named list vcov(zicmp.out) ## Estimated covariance matrix of theta hat sdev(zicmp.out) ## Standard deviations from vcov(...) diagonals sdev(zicmp.out, type = "list") ## Standard deviations as a named list equitest(zicmp0.out) ## Likelihood ratio test for H_0: gamma = 0 tryCatch({ ## An error is thrown for model with fixed gamma equitest(zicmp.out) }, error = print_warning) ## ----------------------------------------------------------------------------- y.hat = predict(zicmp.out) ## Fitted values based on ecmp link.hat = predict(zicmp.out, type = "link") head(y.hat) head(link.hat) ## ----fig.width = 3, fig.height = 3, fig.align = "center", prompt = FALSE, fig.show = "hold"---- res.raw = residuals(zicmp.out, type = "raw") res.qtl = residuals(zicmp.out, type = "quantile") plot.fit.res(y.hat, res.raw) + ggtitle("Fitted Values vs. Raw Residuals") plot.qq.res(res.raw) + ggtitle("Q-Q Plot of Raw Residuals") plot.fit.res(y.hat, res.qtl) + ggtitle("Fitted Values vs. Quantile Residuals") plot.qq.res(res.qtl) + ggtitle("Q-Q Plot of Quantile Residuals") ## ----prompt = FALSE----------------------------------------------------------- new.df = data.frame(EDUCATION = round(1:20 / 20), ANXIETY = seq(-3,3, length.out = 20)) X.new = model.matrix(~ EDUCATION + ANXIETY, data = new.df) S.new = model.matrix(~ 1, data = new.df) W.new = model.matrix(~ EDUCATION + ANXIETY, data = new.df) new.data = get.modelmatrix(X.new, S.new, W.new) # For model fit using raw interface, use get.modelmatrix to prepare new design # matrices, offsets, etc y.hat.new = predict(zicmp.out, newdata = new.data) # For models fit with the formula interface, pass a data.frame with the same # structure as used in the fit. y.hat.new = predict(zicmp0.out, newdata = new.df) ## ----------------------------------------------------------------------------- print(y.hat.new) ## ----eval = FALSE------------------------------------------------------------- # zicmp.boot = parametric.bootstrap(zicmp.out, reps = 100) # head(zicmp.boot) # apply(zicmp.boot, 2, quantile, c(0.025,0.975))