---
title: "Tutorial: Repeated Measures"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Tutorial: Repeated Measures}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

This vignette documents how the `dabestr` package can generate estimation plots for experiments with repeated-measures designs. With `dabestr`, you can calculate and plot effect sizes for:

- Comparing each group to a shared control (control vs. group i; `baseline`)
- Comparing each measurement to the one directly preceding it (group i vs group i+1; `sequential`)

This is an improved version of `paired data plotting` in previous versions, which only supported computations involving one test group and one control group.

To use these features, simply declare the `paired` argument as either "sequential" or "baseline" when running the `load()` function. Additionally, you must pass a column in the dataset that indicates the identity of each observation using the `id_col` keyword.

```{r setup, warning = FALSE, message = FALSE}
library(dabestr)
```

## Create dataset for demo
```{r}
set.seed(12345) # Fix the seed so the results are reproducible.
N <- 20 # The number of samples taken from each population

# Create samples
c1 <- rnorm(N, mean = 3, sd = 0.4)
c2 <- rnorm(N, mean = 3.5, sd = 0.75)
c3 <- rnorm(N, mean = 3.25, sd = 0.4)

t1 <- rnorm(N, mean = 3.5, sd = 0.5)
t2 <- rnorm(N, mean = 2.5, sd = 0.6)
t3 <- rnorm(N, mean = 3, sd = 0.75)
t4 <- rnorm(N, mean = 3.5, sd = 0.75)
t5 <- rnorm(N, mean = 3.25, sd = 0.4)
t6 <- rnorm(N, mean = 3.25, sd = 0.4)

# Add a `gender` column for coloring the data.
gender <- c(rep("Male", N / 2), rep("Female", N / 2))

# Add an `id` column for paired data plotting.
id <- 1:N

# Combine samples and gender into a DataFrame.
df <- tibble::tibble(
  `Control 1` = c1, `Control 2` = c2, `Control 3` = c3,
  `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, `Test 6` = t6,
  Gender = gender, ID = id
)

df <- df %>%
  tidyr::gather(key = Group, value = Measurement, -ID, -Gender)
```

## Loading Data
```{r, warning = FALSE}
two_groups_paired_sequential <- load(df,
  x = Group, y = Measurement,
  idx = c("Control 1", "Test 1"),
  paired = "sequential", id_col = ID
)

print(two_groups_paired_sequential)
```

```{r, warning = FALSE}
two_groups_paired_baseline <- load(df,
  x = Group, y = Measurement,
  idx = c("Control 1", "Test 1"),
  paired = "baseline", id_col = ID
)

print(two_groups_paired_baseline)
```

When only 2 paired data groups are involved, assigning either "baseline" or "sequential" to `paired` will give you the same numerical results. 
```{r}
two_groups_paired_sequential.mean_diff <- mean_diff(two_groups_paired_sequential)
two_groups_paired_baseline.mean_diff <- mean_diff(two_groups_paired_baseline)
```

```{r}
print(two_groups_paired_sequential.mean_diff)
```

```{r}
print(two_groups_paired_baseline.mean_diff)
```

For paired data, we use [slopegraphs](http://www.edwardtufte.com/notes-sketches/?msg_id=0003nk%3E) (another innovation from Edward Tufte) to connect paired observations. Both Gardner-Altman and Cumming plots support this.

```{r}
dabest_plot(two_groups_paired_sequential.mean_diff,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3
)
```

```{r, warning = FALSE, eval = FALSE}
dabest_plot(two_groups_paired_sequential.mean_diff,
  float_contrast = FALSE,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3,
  contrast_ylim = c(-0.3, 1.3)
)
```

```{r, warning = FALSE, echo = FALSE}
pp_plot <- dabest_plot(two_groups_paired_sequential.mean_diff,
  float_contrast = FALSE,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3,
  contrast_ylim = c(-0.3, 1.3)
)

cowplot::plot_grid(
  plotlist = list(NULL, pp_plot, NULL),
  nrow = 1,
  ncol = 3,
  rel_widths = c(2.5, 5, 2.5)
)
```

```{r, warning = FALSE}
dabest_plot(two_groups_paired_baseline.mean_diff,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3
)
```

```{r, warning = FALSE, eval = FALSE}
dabest_plot(two_groups_paired_baseline.mean_diff,
  float_contrast = FALSE,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3,
  contrast_ylim = c(-0.3, 1.3)
)
```

```{r, warning = FALSE, echo = FALSE}
pp_plot <- dabest_plot(two_groups_paired_baseline.mean_diff,
  float_contrast = FALSE,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3,
  contrast_ylim = c(-0.3, 1.3)
)

cowplot::plot_grid(
  plotlist = list(NULL, pp_plot, NULL),
  nrow = 1,
  ncol = 3,
  rel_widths = c(2.5, 5, 2.5)
)
```

You can also create repeated-measures plots with multiple test groups. In this case, declaring `paired` to be "sequential" or "baseline" will generate the same slopegraph, reflecting the repeated-measures experimental design, but different contrast plots, to show the "sequential" or "baseline" comparison:

```{r, warning = FALSE}
sequential_repeated_measures.mean_diff <- load(df,
  x = Group, y = Measurement,
  idx = c(
    "Control 1", "Test 1",
    "Test 2", "Test 3"
  ),
  paired = "sequential", id_col = ID
) %>%
  mean_diff()

print(sequential_repeated_measures.mean_diff)
```

```{r, warning = FALSE}
dabest_plot(sequential_repeated_measures.mean_diff,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3
)
```

```{r, warning = FALSE}
baseline_repeated_measures.mean_diff <- load(df,
  x = Group, y = Measurement,
  idx = c(
    "Control 1", "Test 1",
    "Test 2", "Test 3"
  ),
  paired = "baseline", id_col = ID
) %>%
  mean_diff()

print(baseline_repeated_measures.mean_diff)
```

```{r, warning = FALSE}
dabest_plot(baseline_repeated_measures.mean_diff,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3
)
```

Just as with unpaired data, the `dabestr` package enables you to perform complex visualizations and statistics for paired data.

```{r, warning = FALSE}
multi_baseline_repeated_measures.mean_diff <- load(df,
  x = Group, y = Measurement,
  idx = list(
    c(
      "Control 1", "Test 1",
      "Test 2", "Test 3"
    ),
    c(
      "Control 2", "Test 4",
      "Test 5", "Test 6"
    )
  ),
  paired = "baseline", id_col = ID
) %>%
  mean_diff()

dabest_plot(multi_baseline_repeated_measures.mean_diff,
  raw_marker_size = 0.5, raw_marker_alpha = 0.3
)
```