---
title: "Customising Violin Plots with Formula Input"
author: "Tom Kelly"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{vioplot: Customising Violin Plots with Formula Input}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

Since boxplots have become the _de facto_ standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard `vioplot` package. Thus it has been restored here for enhanced backwards compatibility with `boxplot`.

As shown below for the `iris` dataset, violin plots show distribution information taking formula input that `boxplot` implements but `vioplot` is unable to. This demonstrates the customisation demonstrated in [the main vioplot vignette using vioplot syntax](violin_customisation.html) with the formula method commonly used for `boxplot`, `t.test`, and `lm`.

```{r}
library("vioplot")
```

```{r, message=FALSE, eval=FALSE}
data(iris)
boxplot(Sepal.Length~Species, data = iris)
```

```{r, message=FALSE, echo=FALSE}
data(iris)
boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length")
```

Whereas performing the same function does not work with `vioplot` (0.2).

```{r, message=FALSE, eval=FALSE}
devtools::install_version("vioplot", version = "0.2")
library("vioplot")
vioplot(Sepal.Length~Species, data = iris)
```

```
Error in min(data) : invalid 'type' (language) of argument
```

## Plot Defaults

```{r, message=FALSE, eval=FALSE}
vioplot(Sepal.Length~Species, data = iris)
```

```{r, message=FALSE, echo=FALSE}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta")
```

Another concern we see here is that the `vioplot` defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here:

```{r}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length")
```

## Plot colours: Violin Fill

Plot colours can be further customised as with the original vioplot package using the `col` argument:

```{r}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue")
```

### Vectorisation

However the `vioplot` (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised `col` in `vioplot` (0.3):

```{r}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)
```


## Plot colours: Violin Lines and Boxplot

Colours can also be customised for the violin fill and border separately using the `col` and `border` arguments:

```{r}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue")
```

Similarly, the arguments `lineCol` and `rectCol` specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour.

```{r}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred")
```
 
 The same applies to the colour of the median point with `colMed`:
 
```{r}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet")
```
 
 ### Combined customisation
 
 These can be customised colours can be combined:
 
```{r}
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet")
```
 
### Vectorisation

These colour and shape settings can also be customised separately for each violin:
```{r}
vioplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))
```
 
### Enhanced Annotation

Here we demonstrate additional annotation features to display outliers and group sizes.

#### Labelling group size

Note that y-axes limits need to be adjusted to avoid overlaying text.

```{r, fig.align = 'center', fig.height = 4, fig.width = 8, fig.keep = 'last'}
data("iris")
attach(iris)
vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", ylab = "",
        col=c("lightgreen", "lightblue", "palevioletred"), ylim = c(0, max(Sepal.Length) * 1.1))
legend("bottomright", legend=c("setosa", "versicolor", "virginica"),
       fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.8)
add_labels(unlist(iris$Sepal.Length), iris$Species, height = 0.5, cex = 0.8)
```
#### Plotting outliers and medians

Here we add outliers and show annotation features.

```{r, warning=FALSE}
# add outliers to demo data
iris2 <- iris
iris2 <- rbind(iris2, c(7, 1, 0, 0, "setosa"))
iris2 <- rbind(iris2, c(1, 10, 0, 0, "setosa"))
iris2 <- rbind(iris2, c(9, 2, 0, 0, "versicolor"))
iris2 <- rbind(iris2, c(2, 12, 0, 0, "versicolor"))
iris2 <- rbind(iris2, c(10, 1, 0, 0, "virginica"))
iris2 <- rbind(iris2, c(12, 7, 0, 0, "virginica"))
iris2$Species <- factor(iris2$Species)
iris2$Sepal.Length <- as.numeric(iris2$Sepal.Length)
iris2$Sepal.Width <- as.numeric(iris2$Sepal.Width)
table(iris2$Species)
```

This adds outliers to the plot.

```{r, fig.align = 'center', fig.height = 4, fig.width = 8, fig.keep = 'last'}
attach(iris2)
vioplot(Sepal.Length~Species, data = iris2, main = "Sepal Length",
        col=c("lightgreen", "lightblue", "palevioletred"), ylim = c(min(Sepal.Length) * 0.9, max(Sepal.Length) * 1.1))
Sepal.medians <- sapply(unique(Species), function(sp) median(Sepal.Length[Species == sp]))
# highlights medians
points(x = c(1:length(Sepal.medians)), y = Sepal.medians, pch = 21, cex = 1.25, lwd = 2,
       col = "white", bg = c("forestgreen", "lightblue4", "palevioletred4"))
# plots outliers above 2 SD
add_outliers(unlist(iris2$Sepal.Length), iris2$Species, cutoff = 2,
             col = "black", bars = "grey85", lwd = 2,
             fill = c("palegreen3", "lightblue3", "palevioletred3"))
legend("bottomright", legend=c("setosa", "versicolor", "virginica"),
       fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.6)
add_labels(unlist(iris2$Sepal.Length), iris2$Species, height = 0.5, cex = 0.8)
```
Annotation on split violins are shown here. See the split violin plot vignette for details on these parameters.

```{r, fig.align = 'center', fig.height = 4, fig.width = 8, fig.keep = 'last'}
data(iris)
summary(iris2$Sepal.Width)
table(iris2$Sepal.Width > mean(iris2$Sepal.Width))
iris_large <- iris2[iris2$Sepal.Width > mean(iris2$Sepal.Width), ]
iris_small <- iris2[iris2$Sepal.Width <= mean(iris2$Sepal.Width), ]

attach(iris_large)
vioplot(Sepal.Length~Species, data=iris_large, plotCentre = "line", side = "right", col=c("lightgreen", "lightblue", "palevioletred"), ylim = c(min(iris2$Sepal.Length) * 0.9, max(iris2$Sepal.Length) * 1.1),
        names=c("setosa", "versicolor", "virginica"))
Sepal.medians <- sapply(unique(Species), function(sp) median(iris_large$Sepal.Length[Species == sp]))
# highlights medians
points(x = c(1:length(Sepal.medians)), y = Sepal.medians, pch = 21, cex = 1.25, lwd = 2,
       col = "white", bg = c("forestgreen", "lightblue4", "palevioletred4"))
# plots outliers above 2 SD
add_outliers(unlist(iris_large$Sepal.Length), iris2$Species, cutoff = 2,
             col = c("palegreen3", "lightblue3", "palevioletred3"), bars = "grey85", lwd = 2,
             fill = "grey85")
legend("bottomright", legend=c("setosa", "versicolor", "virginica"),
       fill=c("palegreen3", "lightblue3", "palevioletred3"), cex = 0.6)
add_labels(unlist(iris2$Sepal.Length), iris2$Species, height = 0.5, cex = 0.8)

attach(iris_small)
vioplot(Sepal.Length~Species, data=iris_small, plotCentre = "line", side = "left", add = T, col=c("palegreen1", "lightblue1", "palevioletred1"), ylim = c(min(Sepal.Length) * 0.9, max(Sepal.Length) * 1.1),
        names=c("setosa", "versicolor", "virginica"))


Sepal.medians <- sapply(unique(Species), function(sp) median(iris_small$Sepal.Length[Species == sp]))
# highlights medians
points(x = c(1:length(Sepal.medians)), y = Sepal.medians, pch = 21, cex = 1.25, lwd = 2,
       col = "white", bg = c("forestgreen", "lightblue4", "palevioletred4"))
# plots outliers above 2 SD
add_outliers(unlist(iris2$Sepal.Length), iris2$Species, cutoff = 2,
             col = c("palegreen3", "lightblue3", "palevioletred3"), bars = "grey85", lwd = 2,
             fill = "grey50")
legend("bottomright", legend=c("setosa", "versicolor", "virginica"),
       fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.6)
add_labels(unlist(iris2$Sepal.Length), iris2$Species, height = 0.5, cex = 0.8)

# add legend and titles
legend("topleft", fill = c("lightblue2", "lightblue3"), legend = c("small", "large"), title = "Sepal Width")
title(xlab = "Species", ylab = "Sepal Length")
```