## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(superspreading) library(fitdistrplus) library(ggplot2) ## ----prep-data---------------------------------------------------------------- # total number of cases (i.e. individuals in transmission chain) n <- 152 # number of secondary cases for all cases all_cases_transmission <- c( 1, 2, 2, 5, 14, 1, 4, 4, 1, 3, 3, 8, 2, 1, 1, 4, 9, 9, 1, 1, 17, 2, 1, 1, 1, 4, 3, 3, 4, 2, 5, 1, 2, 2, 1, 9, 1, 3, 1, 2, 1, 1, 2 ) # add zeros for each cases that did not cause a secondary case # (i.e. cases that did not transmit) all_cases <- c( all_cases_transmission, rep(0, n - length(all_cases_transmission)) ) ## ----fit-dists---------------------------------------------------------------- # fit a standard set of offspring distribution models: # - Poisson # - Geometric # - Negative Binomial pois_fit <- fitdist(data = all_cases, distr = "pois") geom_fit <- fitdist(data = all_cases, distr = "geom") nbinom_fit <- fitdist(data = all_cases, distr = "nbinom") ## ----fit-table---------------------------------------------------------------- model_tbl <- ic_tbl(pois_fit, geom_fit, nbinom_fit) col.names <- gsub( pattern = "^Delta", replacement = "$\\\\Delta$", x = colnames(model_tbl) ) col.names <- gsub(pattern = "^w", replacement = "$w$", x = col.names) knitr::kable(model_tbl, col.names = col.names, row.names = FALSE, digits = 1) ## ----nbinom-estim------------------------------------------------------------- nbinom_fit$estimate ## ----prep-plot-nbinom--------------------------------------------------------- # tally cases tally <- table(all_cases) # pad with zeros when number of cases not in tally num_cases <- rep(0, 21) names(num_cases) <- as.character(seq(0, 20, 1)) num_cases[names(tally)] <- tally # convert cases to proportional of total cases to plot on the same scale as # the density prop_num_cases <- num_cases / sum(num_cases) # create data frame with proportion of cases, density and number of secondary # cases nbinom_data <- data.frame( x = seq(0, 20, 1), prop_num_cases = prop_num_cases, density = dnbinom( x = seq(0, 20, 1), size = nbinom_fit$estimate[["size"]], mu = nbinom_fit$estimate[["mu"]] ) ) ## ----plot-nbinom, fig.cap="Number of secondary cases from the empirical data (bar plot) and the density of the negative binomial with the maximum likelihood estimates when fit to the empirical data (points and line). This plot is reproduced from Althaus (2015) figure 1.", fig.width = 8, fig.height = 5---- # make plot ggplot(data = nbinom_data) + geom_col( mapping = aes(x = x, y = prop_num_cases), fill = "cyan3", colour = "black" ) + geom_point( mapping = aes(x = x, y = density), colour = "#f58231", size = 2 ) + geom_line( mapping = aes(x = x, y = density), colour = "#f58231" ) + scale_x_continuous(name = "Number of secondary cases") + scale_y_continuous(name = "Frequency") + theme_bw() ## ----split-data--------------------------------------------------------------- index_case_transmission <- c(2, 17, 5, 1, 8, 2, 14) secondary_case_transmission <- c( 1, 2, 1, 4, 4, 1, 3, 3, 1, 1, 4, 9, 9, 1, 2, 1, 1, 1, 4, 3, 3, 4, 2, 5, 1, 2, 2, 1, 9, 1, 3, 1, 2, 1, 1, 2 ) # Format data into index and non-index cases # total non-index cases n_non_index <- sum(c(index_case_transmission, secondary_case_transmission)) # transmission from all non-index cases non_index_cases <- c( secondary_case_transmission, rep(0, n_non_index - length(secondary_case_transmission)) ) ## ----fit-nbinom--------------------------------------------------------------- # Estimate R and k for index and non-index cases param_index <- fitdist( data = index_case_transmission, distr = "nbinom" ) param_index param_non_index <- fitdist( data = non_index_cases, distr = "nbinom" ) param_non_index ## ----fit-extra-dists---------------------------------------------------------- # fit an expanded set of offspring distribution models: # - Poisson # - Geometric # - Negative Binomial # - Poisson-lognormal compound # - Poisson-Weibull compound pois_fit <- fitdist(data = all_cases, distr = "pois") geom_fit <- fitdist(data = all_cases, distr = "geom") nbinom_fit <- fitdist(data = all_cases, distr = "nbinom") poislnorm_fit <- fitdist( data = all_cases, distr = "poislnorm", start = list(meanlog = 1, sdlog = 1) ) poisweibull_fit <- fitdist( data = all_cases, distr = "poisweibull", start = list(shape = 1, scale = 1) ) ## ----fit-extra-table---------------------------------------------------------- model_tbl <- ic_tbl( pois_fit, geom_fit, nbinom_fit, poislnorm_fit, poisweibull_fit ) col.names <- gsub( pattern = "^Delta", replacement = "$\\\\Delta$", x = colnames(model_tbl) ) col.names <- gsub(pattern = "^w", replacement = "$w$", x = col.names) knitr::kable(model_tbl, col.names = col.names, row.names = FALSE, digits = 1) ## ----prep-plot-dists---------------------------------------------------------- # create data frame with proportion of cases, density of each distribution dist_compare_data <- data.frame( x = seq(0, 20, 1), prop_num_cases = c(prop_num_cases, rep(0, 21)), dens = c( dnbinom( x = seq(0, 20, 1), size = nbinom_fit$estimate[["size"]], mu = nbinom_fit$estimate[["mu"]] ), poisweibull_density = dpoisweibull( x = seq(0, 20, 1), shape = poisweibull_fit$estimate[["shape"]], scale = poisweibull_fit$estimate[["scale"]] ) ), dist = c(rep("Negative Binomial", 21), rep("Poisson-Weibull", 21)) ) ## ----plot-dists, fig.cap="Number of secondary cases from the empirical data (bar plot) and the density of the negative binomial (orange) and poisson-Weibull (pink) with the maximum likelihood estimates when fit to the empirical data (points and line).", fig.width = 8, fig.height = 5---- # make plot ggplot(data = dist_compare_data) + geom_col( mapping = aes(x = x, y = prop_num_cases), fill = "cyan3", colour = "black" ) + geom_point( mapping = aes( x = x, y = dens, colour = dist ), size = 2 ) + geom_line(mapping = aes(x = x, y = dens, colour = dist)) + scale_x_continuous(name = "Number of secondary cases") + scale_y_continuous(name = "Frequency") + scale_colour_manual( name = "Distribution", values = c("#f58231", "#ffe119") ) + theme_bw() + theme(legend.position = "top")