---
title: "Updated JSS code"
authors: "Adam Loy"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Updated JSS code}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown_notangle}
editor_options: 
  chunk_output_type: console
---

```{r, include = FALSE}
is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_",
             "_R_CHECK_LICENSE_") %in% names(Sys.getenv()))

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.show = "hide",
  eval = !is_check
)
```

# Preliminaries

```{r}
library(HLMdiag)
library(ggplot2)
library(lme4)
```


# Exam data

```{r}
data("Exam", package = "mlmRev")
head(Exam)
```


# Residual analysis

```{r}
# Model fm1 on page 6
(fm1 <- lmer(normexam ~ standLRT + (1 | school), Exam, REML = FALSE))

# Extract level-1 residuals
# Residual plot from page 7
resid_fm1 <- hlm_resid(fm1)
head(resid_fm1)

ggplot(resid_fm1, aes(x = standLRT, y = .ls.resid)) + 
  geom_point() + 
  geom_smooth() +
  labs(y = "LS level-1 residuals")
```

```{r}
# Model fm2 on page 7
fm2 <- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) + 
              (1 | school), Exam, REML = FALSE)


resid_fm2 <- hlm_resid(fm2)


# Figure 2 page 8
ggplot(resid_fm2, aes(x = `I(standLRT^2)`, y = .ls.resid)) + 
  geom_hline(yintercept = 0, color = "blue") + 
  geom_point() +
  labs( y = "LS residuals", x = "standLRT2")
```


`ggplot_qqnorm()` function is now defunct, Q-Q plots are now much easier to create in **ggplot2** directly than when the package was first created.

```{r}
ggplot(resid_fm2, aes(sample = .ls.resid)) +
  geom_qq() + 
  geom_qq_line() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")
```

A better alternative if found in the **qqplotr** package
```{r}
library(qqplotr)
ggplot(resid_fm2, aes(sample = .ls.resid)) +
  stat_qq_band() +
  stat_qq_line() + 
  stat_qq_point() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")
```

EB residuals are now called `.ranef.` residuals

```{r}
# Model fm3, page 11
fm3 <- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) + sex +
              (standLRT | school), Exam, REML = FALSE)

## Extract level-2 EB residuals
resid_fm3 <- hlm_resid(fm3, level = "school")
resid_fm3
```

```{r message=FALSE}
## Construct school-level data set
library(dplyr)
SchoolExam <- Exam %>%
  group_by(school) %>%
  dplyr::summarize(
    size = length(school),
    schgend = unique(schgend),
    schavg = unique(schavg),
    type = unique(type), 
    schLRT = mean(standLRT)
  )

SchoolExam <- SchoolExam %>% left_join(resid_fm3, by = "school")
SchoolExam

## Left panel -- figure 5
ggplot(
  SchoolExam, 
  aes(
    x = reorder(schgend, .ranef.intercept, median), 
    y = .ranef.intercept)
  ) +
  geom_boxplot() +
  labs(x = "school gender", y = "level-2 residual (Intercept)")
  
## Right panel -- figure 5
ggplot(SchoolExam, aes(x = schavg, y = .ranef.intercept)) +
  geom_point() +
  geom_smooth() +
  labs(x = "average intake score", y = "level-2 residual (Intercept)")
```

```{r}
## Model fm4, page 12
fm4 <- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) +
              sex + schgend + schavg + (standLRT | school), 
            data = Exam, REML = FALSE)

resid_fm4 <- hlm_resid(fm4, level = "school", include.ls = FALSE)

## Figure 6, page 13
ggplot(resid_fm4, aes(sample = .ranef.intercept)) +
  stat_qq_band() +
  stat_qq_line() + 
  stat_qq_point() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")

ggplot(resid_fm4, aes(sample = .ranef.stand_lrt)) +
  stat_qq_band() +
  stat_qq_line() + 
  stat_qq_point() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")

```


# Influence analysis

We can now use `hlm_influence` to pull off all of the case-deletion diagnostics for the fixed effects at the specified level:

```{r}
# Calculating influence diagnostics for model fm4
influence_fm4 <- hlm_influence(fm4, level = "school")
influence_fm4

dotplot_diag(influence_fm4$cooksd, cutoff = "internal", name = "cooks.distance")
dotplot_diag(influence_fm4$cooksd, cutoff = "internal", name = "cooks.distance", modify = "dotplot")
```

```{r}
beta_cdd <- cooks.distance(fm4, level = "school", include.attr = TRUE)
beta_cdd[25,]
```


## Diagnostics for variance components

To calculate the relative variance change, we use `hlm_influence()`, but `approx = FALSE` must be specified:

```{r eval=FALSE}
hlm_influence(fm4, approx = FALSE)
```