## -----------------------------------------------------------------------------
#| label: preliminaries
#| include: false
library("betareg")

knitr::opts_chunk$set(
  engine = "R",
  collapse = TRUE,
  comment = "##",
  message = FALSE,
  warning = FALSE,
  echo = TRUE
)
options(width = 70, prompt = "R> ", continue = "+  ", useFancyQuotes = FALSE, digits = 5)

combine <- function(x, sep, width) {
  cs <- cumsum(nchar(x))
  remaining <- if (any(cs[-1] > width)) combine(x[c(FALSE, cs[-1] > width)], sep, width)
  c(paste(x[c(TRUE, cs[-1] <= width)], collapse= sep), remaining)
}
prettyPrint <- function(x, sep = " ", linebreak = "\n\t", width = getOption("width")) {
  x <- strsplit(x, sep)[[1]]
  paste(combine(x, sep, width), collapse = paste(sep, linebreak, collapse = ""))
}
cache <- FALSE
enumerate <- function(x) paste(paste(x[-length(x)], collapse = ", "), x[length(x)], sep = " and ")
betamix_methods <- enumerate(paste("`", gsub("\\.betamix", "", as.character(methods(class = "betamix"))), "`", sep = ""))


## ----eval=FALSE---------------------------------------------------------------
# betareg(formula, data, subset, na.action, weights, offset,
#   link = "logit", link.phi = NULL, type = c("ML", "BC", "BR"),
#   control = betareg.control(...), model = TRUE, y = TRUE, x = FALSE, ...)


## ----eval=FALSE---------------------------------------------------------------
# betatree(formula, partition, data, subset, na.action, weights, offset,
#   link = "logit", link.phi = "log", control = betareg.control(), ...)


## ----eval=FALSE---------------------------------------------------------------
# betamix(formula, data, k, fixed, subset, na.action,
#   link = "logit", link.phi = "log", control = betareg.control(...),
#   FLXconcomitant = NULL, extra_components,
#   verbose = FALSE, ID, nstart = 3, FLXcontrol = list(), cluster = NULL,
#   which = "BIC", ...)


## ----include=FALSE------------------------------------------------------------
data("ReadingSkills", package = "betareg")
mean_accuracy <-
  format(round(with(ReadingSkills, tapply(accuracy, dyslexia, mean)), digits = 3),
         nsmall = 3)
mean_iq <-
  format(round(with(ReadingSkills, tapply(iq, dyslexia, mean)), digits = 3),
         nsmall = 3)


## -----------------------------------------------------------------------------
#| echo: false
#| fig-width: 6
#| fig-height: 5.5
#| out-width: 100%
#| label: fig-ReadingSkills
#| fig-cap: "Reading skills data from @betareg:Smithson+Verkuilen:2006. Linearly transformed reading accuracy by IQ score and dyslexia status (control, blue vs. dyslexic, red). Fitted curves correspond to beta regression (solid) and OLS regression with logit-transformed dependent variable (dashed)."
data("ReadingSkills", package = "betareg")
rs_ols <- lm(qlogis(accuracy) ~ dyslexia * iq, data = ReadingSkills)
rs_beta <- betareg(accuracy ~ dyslexia * iq | dyslexia + iq,
  data = ReadingSkills, hessian = TRUE)
cl1 <- hcl(c(260, 0), 90, 40)
cl2 <- hcl(c(260, 0), 10, 95)
plot(accuracy ~ iq, data = ReadingSkills, col = cl2[as.numeric(dyslexia)],
  main = "Reading skills data", xlab = "IQ score", ylab = "Reading accuracy",
  pch = c(19, 17)[as.numeric(dyslexia)], cex = 1.5)
points(accuracy ~ iq, data = ReadingSkills, cex = 1.5,
  pch = (1:2)[as.numeric(dyslexia)], col = cl1[as.numeric(dyslexia)])
nd <- data.frame(dyslexia = "no", iq = -30:30/10)
lines(nd$iq, predict(rs_beta, nd), col = cl1[1], lwd = 2)
lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[1], lty = 2, lwd = 2)
nd <- data.frame(dyslexia = "yes", iq = -30:30/10)
lines(nd$iq, predict(rs_beta, nd), col = cl1[2], lwd = 2)
lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[2], lty = 2, lwd = 2)
legend("topleft", c("control", "dyslexic", "betareg", "lm"),
  lty = c(NA, NA, 1:2), pch = c(19, 17, NA, NA), lwd = 2,
  col = c(cl2, 1, 1), bty = "n")
legend("topleft", c("control", "dyslexic", "betareg", "lm"),
  lty = c(NA, NA, 1:2), pch = c(1, 2, NA, NA),
  col = c(cl1, NA, NA), bty = "n")


## -----------------------------------------------------------------------------
#| label: ReadingSkills-bias
data("ReadingSkills", package = "betareg")
rs_f <- accuracy ~ dyslexia * iq | dyslexia * iq
rs_ml <- betareg(rs_f, data = ReadingSkills, type = "ML")
rs_bc <- betareg(rs_f, data = ReadingSkills, type = "BC")
rs_br <- betareg(rs_f, data = ReadingSkills, type = "BR")


## -----------------------------------------------------------------------------
#| label: ReadingSkills-bias-table
#| echo: false
#| results: asis
rs_list <- list(rs_ml, rs_bc, rs_br)
cf <- paste("$", sapply(round(sapply(rs_list, coef), digits = 3), format, nsmall = 3), "$", sep = "")
se <- paste("$", format(round(sapply(rs_list, function(x) sqrt(diag(vcov(x)))), digits = 3), nsmall = 3), "$", sep = "")
ll <- paste("$", format(round(sapply(rs_list, logLik), digits = 3), nsmall = 3), "$", sep = "")
cfse <- matrix(as.vector(rbind(cf, se)), ncol = 3)
cfse <- cbind(
  c("Mean", rep("", 7), "Precision", rep("", 7)),
  rep(as.vector(rbind(c("(Intercept)", "`dyslexia`", "`iq`", "`dyslexia:iq`"), "")), 2),
  cfse)
cfse <- rbind(cfse, c("Log-likelihood", "", ll))
knitr::kable(cfse, align = c("l", "l", "r", "r", "r"), col.names = c("", "", "Maximum likelihood", "Bias correction", "Bias reduction"))


## -----------------------------------------------------------------------------
#| echo: false
#| fig-height: 6.5
#| fig-width: 6.5
#| out-width: 100%
#| label: fig-readingskillsbias
#| fig-cap: "Scatterplots of the logarithm of the estimated precision parameters $\\log(\\phi_i)$ based on the maximum likelihood, bias-corrected and bias-reduced estimates. The dashed black line is the main diagonal, the solid red line is a scatterplot smoother."
pr_phi <- sapply(list("Maximum likelihood" = rs_ml,
                      "Bias correction" = rs_bc,
                      "Bias reduction" = rs_br), predict, type = "precision")
pairs(log(pr_phi), panel = function(x, y, ...) {
  panel.smooth(x, y, ...)
  abline(0, 1, lty = 2)
  })


## -----------------------------------------------------------------------------
#| label: ReadingSkills-noise
#| echo: true
suppressWarnings(RNGversion("3.5.0"))
set.seed(1071)
n <- nrow(ReadingSkills)
ReadingSkills$x1 <- rnorm(n)
ReadingSkills$x2 <- runif(n)
ReadingSkills$x3 <- factor(sample(0:1, n, replace = TRUE))


## -----------------------------------------------------------------------------
#| label: ReadingSkills-tree
rs_tree <- betatree(accuracy ~ iq | iq, ~ dyslexia + x1 + x2 + x3,
  data = ReadingSkills, minsize = 10)


## -----------------------------------------------------------------------------
#| label: ReadingSkills-tree2
#| echo: true
#| eval: false
# rs_tree <- betatree(accuracy ~ iq | iq | dyslexia + x1 + x2 + x3,
#   data = ReadingSkills, minsize = 10)


## -----------------------------------------------------------------------------
#| label: ReadingSkills-tree3
#| eval: false
# plot(rs_tree)


## -----------------------------------------------------------------------------
#| echo: false
#| fig-height: 7
#| fig-width: 10
#| out-width: 100%
#| label: fig-betatree
#| fig-cap: "Partitioned beta regression model for the `ReadingSkills` data."
plot(rs_tree)


## -----------------------------------------------------------------------------
#| label: ReadingSkills-tree-coef
coef(rs_tree)


## -----------------------------------------------------------------------------
#| label: ReadingSkills-tree4
#| echo: true
rs_tree


## -----------------------------------------------------------------------------
#| label: ReadingSkills-tree-sctest
library("strucchange")
sctest(rs_tree)


## -----------------------------------------------------------------------------
#| label: ReadingSkills-mix
rs_mix <- betamix(accuracy ~ iq, data = ReadingSkills, k = 3,
  extra_components = extraComponent(type = "uniform",
    coef = 0.99, delta = 0.01), nstart = 10)


## -----------------------------------------------------------------------------
#| label: ReadingSkills-mix3
rs_mix


## -----------------------------------------------------------------------------
#| label: ReadingSkills-mix4
summary(rs_mix)


## -----------------------------------------------------------------------------
#| echo: false
#| fig-height: 5.5
#| fig-width: 10
#| out-width: 100%
#| label: fig-betamix
#| fig-cap: "Fitted regression lines for the mixture model with three components and the observations shaded according to their posterior probabilities (left). Fitted regression lines for the partitioned beta regression model with shading according to the observed `dyslexic` variable where nondyslexic and dyslexic children are in blue and red, respectively (right)."
par(mfrow = c(1, 2))
ix <- as.numeric(ReadingSkills$dyslexia)
prob <- 2 * (posterior(rs_mix)[cbind(seq_along(ix), clusters(rs_mix))] - 0.5)
col3 <- hcl(c(0, 260, 130), 65, 45, fixup = FALSE)
col1 <- col3[clusters(rs_mix)]
col2 <- hcl(c(0, 260, 130)[clusters(rs_mix)], 65 * abs(prob)^1.5, 95 - 50 * abs(prob)^1.5, fixup = FALSE)
plot(accuracy ~ iq, data = ReadingSkills, col = col2, pch = 19, cex = 1.5,
  xlim = c(-2, 2), main = "Mixture model (dyslexia unobserved)")
points(accuracy ~ iq, data = ReadingSkills, cex = 1.5, pch = 1, col = col1)
iq <- -30:30/10
cf <- rbind(coef(rs_mix, model = "mean", component = 1:2), c(qlogis(0.99), 0))
for(i in 1:3) lines(iq, plogis(cf[i, 1] + cf[i, 2] * iq), lwd = 2, col = col3[i])

ix <- as.numeric(ReadingSkills$dyslexia)
col1 <- hcl(c(260, 0), 90, 40)[ix]
col2 <- hcl(c(260, 0), 10, 95)[ix]
plot(accuracy ~ iq, data = ReadingSkills, col = col2, pch = 19,
  cex = 1.5, xlim = c(-2, 2), main = "Partitioned model (dyslexia observed)")
points(accuracy ~ iq, data = ReadingSkills, cex = 1.5, pch = 1, col = col1)

cf <- coef(rs_tree, model = "mean")
col3 <- hcl(c(260, 0), 90, 40)
for(i in 1:2) lines(iq, plogis(cf[i, 1] + cf[i, 2] * iq), lwd = 2, col = col3[i])


## -----------------------------------------------------------------------------
#| label: ReadingSkills-mix5
table(clusters(rs_mix), ReadingSkills$dyslexia)


## -----------------------------------------------------------------------------
#| label: GasolineYield-bias
data("GasolineYield", package = "betareg")
gy <- lapply(c("ML", "BC", "BR"), function(x)
  betareg(yield ~ batch + temp, data = GasolineYield, type = x))


## -----------------------------------------------------------------------------
#| label: GasolineYield-bias-table
#| echo: false
#| results: asis
cf <- matrix(paste("$", sapply(round(sapply(gy, coef), digits = 5), format, nsmall = 5), "$", sep = ""), ncol = 3)
se <- matrix(gsub(" ", "",
  paste("$", format(round(sapply(gy, function(x) sqrt(diag(vcov(x)))), digits = 5), nsmall = 5), "$", sep = ""),
  fixed = TRUE), ncol = 3)
cfse <- cbind(c(paste("$\\beta_{", 1:11, "}$", sep = ""), "$\\phi$"), cf[,1], se[,1], cf[,2], se[,2], cf[,3], se[,3])
knitr::kable(cfse, align = c("l", rep("r", 6)), col.names = c("", "Maximum likelihood", "", "Bias correction", "", "Bias reduction", ""))


## -----------------------------------------------------------------------------
#| label: GasolineYield-phi
sapply(gy, coef, model = "precision")


## -----------------------------------------------------------------------------
#| label: GasolineYield-phi-loglik
sapply(gy, logLik)


## -----------------------------------------------------------------------------
#| label: GasolineYield-bias2
data("GasolineYield", package = "betareg")
gy2 <- lapply(c("ML", "BC", "BR"), function(x)
  betareg(yield ~ batch + temp | 1, data = GasolineYield, type = x))
sapply(gy2, logLik)


## -----------------------------------------------------------------------------
#| label: GasolineYield-bias-table2
#| echo: false
#| results: asis
cf <- matrix(paste("$", sapply(round(sapply(gy2, coef), digits = 5), format, nsmall = 5), "$", sep = ""), ncol = 3)
se <- matrix(gsub(" ", "",
  paste("$", format(round(sapply(gy2, function(x) sqrt(diag(vcov(x)))), digits = 5), nsmall = 5), "$", sep = ""),
  fixed = TRUE), ncol = 3)
cfse <- cbind(c(paste("$\\beta_{", 1:11, "}$", sep = ""), "$\\log\\phi$"), cf[,1], se[,1], cf[,2], se[,2], cf[,3], se[,3])
knitr::kable(cfse, align = c("l", rep("r", 6)), col.names = c("", "Maximum likelihood", "", "Bias correction", "", "Bias reduction", ""))