## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(superspreading)
library(fitdistrplus)
library(ggplot2)

## ----prep-data----------------------------------------------------------------
# total number of cases (i.e. individuals in transmission chain)
n <- 152

# number of secondary cases for all cases
all_cases_transmission <- c(
  1, 2, 2, 5, 14, 1, 4, 4, 1, 3, 3, 8, 2, 1, 1, 4, 9, 9, 1, 1, 17, 2, 1,
  1, 1, 4, 3, 3, 4, 2, 5, 1, 2, 2, 1, 9, 1, 3, 1, 2, 1, 1, 2
)

# add zeros for each cases that did not cause a secondary case
# (i.e. cases that did not transmit)
all_cases <- c(
  all_cases_transmission,
  rep(0, n - length(all_cases_transmission))
)

## ----fit-dists----------------------------------------------------------------
# fit a standard set of offspring distribution models:
# - Poisson
# - Geometric
# - Negative Binomial

pois_fit <- fitdist(data = all_cases, distr = "pois")
geom_fit <- fitdist(data = all_cases, distr = "geom")
nbinom_fit <- fitdist(data = all_cases, distr = "nbinom")

## ----fit-table----------------------------------------------------------------
model_tbl <- ic_tbl(pois_fit, geom_fit, nbinom_fit)
col.names <- gsub(
  pattern = "^Delta", replacement = "$\\\\Delta$", x = colnames(model_tbl)
)
col.names <- gsub(pattern = "^w", replacement = "$w$", x = col.names)
knitr::kable(model_tbl, col.names = col.names, row.names = FALSE, digits = 1)

## ----nbinom-estim-------------------------------------------------------------
nbinom_fit$estimate

## ----prep-plot-nbinom---------------------------------------------------------
# tally cases
tally <- table(all_cases)

# pad with zeros when number of cases not in tally
num_cases <- rep(0, 21)
names(num_cases) <- as.character(seq(0, 20, 1))
num_cases[names(tally)] <- tally

# convert cases to proportional of total cases to plot on the same scale as
# the density
prop_num_cases <- num_cases / sum(num_cases)

# create data frame with proportion of cases, density and number of secondary
# cases
nbinom_data <- data.frame(
  x = seq(0, 20, 1),
  prop_num_cases = prop_num_cases,
  density = dnbinom(
    x = seq(0, 20, 1),
    size = nbinom_fit$estimate[["size"]],
    mu = nbinom_fit$estimate[["mu"]]
  )
)

## ----plot-nbinom, fig.cap="Number of secondary cases from the empirical data (bar plot) and the density of the negative binomial with the maximum likelihood estimates when fit to the empirical data (points and line). This plot is reproduced from Althaus (2015) figure 1.", fig.width = 8, fig.height = 5----
# make plot
ggplot(data = nbinom_data) +
  geom_col(
    mapping = aes(x = x, y = prop_num_cases),
    fill = "cyan3",
    colour = "black"
  ) +
  geom_point(
    mapping = aes(x = x, y = density),
    colour = "#f58231",
    size = 2
  ) +
  geom_line(
    mapping = aes(x = x, y = density),
    colour = "#f58231"
  ) +
  scale_x_continuous(name = "Number of secondary cases") +
  scale_y_continuous(name = "Frequency") +
  theme_bw()

## ----split-data---------------------------------------------------------------
index_case_transmission <- c(2, 17, 5, 1, 8, 2, 14)
secondary_case_transmission <- c(
  1, 2, 1, 4, 4, 1, 3, 3, 1, 1, 4, 9, 9, 1, 2, 1, 1, 1, 4, 3, 3, 4, 2,
  5, 1, 2, 2, 1, 9, 1, 3, 1, 2, 1, 1, 2
)

# Format data into index and non-index cases

# total non-index cases
n_non_index <- sum(c(index_case_transmission, secondary_case_transmission))
# transmission from all non-index cases
non_index_cases <- c(
  secondary_case_transmission,
  rep(0, n_non_index - length(secondary_case_transmission))
)

## ----fit-nbinom---------------------------------------------------------------
# Estimate R and k for index and non-index cases
param_index <- fitdist(
  data = index_case_transmission,
  distr = "nbinom"
)
param_index
param_non_index <- fitdist(
  data = non_index_cases,
  distr = "nbinom"
)
param_non_index

## ----fit-extra-dists----------------------------------------------------------
# fit an expanded set of offspring distribution models:
# - Poisson
# - Geometric
# - Negative Binomial
# - Poisson-lognormal compound
# - Poisson-Weibull compound

pois_fit <- fitdist(data = all_cases, distr = "pois")
geom_fit <- fitdist(data = all_cases, distr = "geom")
nbinom_fit <- fitdist(data = all_cases, distr = "nbinom")
poislnorm_fit <- fitdist(
  data = all_cases,
  distr = "poislnorm",
  start = list(meanlog = 1, sdlog = 1)
)
poisweibull_fit <- fitdist(
  data = all_cases,
  distr = "poisweibull",
  start = list(shape = 1, scale = 1)
)

## ----fit-extra-table----------------------------------------------------------
model_tbl <- ic_tbl(
  pois_fit, geom_fit, nbinom_fit, poislnorm_fit, poisweibull_fit
)
col.names <- gsub(
  pattern = "^Delta", replacement = "$\\\\Delta$", x = colnames(model_tbl)
)
col.names <- gsub(pattern = "^w", replacement = "$w$", x = col.names)
knitr::kable(model_tbl, col.names = col.names, row.names = FALSE, digits = 1)

## ----prep-plot-dists----------------------------------------------------------
# create data frame with proportion of cases, density of each distribution
dist_compare_data <- data.frame(
  x = seq(0, 20, 1),
  prop_num_cases = c(prop_num_cases, rep(0, 21)),
  dens = c(
    dnbinom(
      x = seq(0, 20, 1),
      size = nbinom_fit$estimate[["size"]],
      mu = nbinom_fit$estimate[["mu"]]
    ),
    poisweibull_density = dpoisweibull(
      x = seq(0, 20, 1),
      shape = poisweibull_fit$estimate[["shape"]],
      scale = poisweibull_fit$estimate[["scale"]]
    )
  ),
  dist = c(rep("Negative Binomial", 21), rep("Poisson-Weibull", 21))
)

## ----plot-dists, fig.cap="Number of secondary cases from the empirical data (bar plot) and the density of the negative binomial (orange) and poisson-Weibull (pink) with the maximum likelihood estimates when fit to the empirical data (points and line).", fig.width = 8, fig.height = 5----
# make plot
ggplot(data = dist_compare_data) +
  geom_col(
    mapping = aes(x = x, y = prop_num_cases),
    fill = "cyan3",
    colour = "black"
  ) +
  geom_point(
    mapping = aes(
      x = x,
      y = dens,
      colour = dist
    ),
    size = 2
  ) +
  geom_line(mapping = aes(x = x, y = dens, colour = dist)) +
  scale_x_continuous(name = "Number of secondary cases") +
  scale_y_continuous(name = "Frequency") +
  scale_colour_manual(
    name = "Distribution",
    values = c("#f58231", "#ffe119")
  ) +
  theme_bw() +
  theme(legend.position = "top")