## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, warning=FALSE, message=FALSE--------------------------------------
library(NobBS)
library(dplyr)

data(denguedat)

# Define the current date and window size for nowcasting
now <- as.Date("1994-09-26")
win_size <- 8

# Original data for nowcasting
denguedat1 <- denguedat %>% filter(onset_week <= now)

# Altered data: Move some report dates forward without indicating batch reporting
denguedat2 <- denguedat1 %>%
  mutate(report_week = if_else(onset_week == as.Date("1994-08-29") &
                               report_week == as.Date("1994-09-05"),
                               as.Date("1994-09-26"), report_week))

# Altered data with batch reporting indicated
denguedat3 <- denguedat2 %>%
  mutate(batched = denguedat1$onset_week == "1994-08-29" & denguedat1$report_week == "1994-09-05")

## ----nowcasting, warning=FALSE, message=FALSE---------------------------------
# Helper function to run nowcasting
run_nowcast <- function(data, now, win_size) {
  NobBS(data, now, units = "1 week", onset_date = "onset_week", 
        report_date = "report_week", moving_window = win_size)
}

# Run nowcasts for all scenarios
nowcast1 <- run_nowcast(denguedat1, now, win_size)
nowcast2 <- run_nowcast(denguedat2, now, win_size)
nowcast3 <- run_nowcast(denguedat3, now, win_size)

## ----fig.width=7, fig.height=4------------------------------------------------
# Function to extract the probability distribution of the reporting delay (betas) from nowcast results
extract_reporting_delay_distribution <- function(nowcast, win_size) {
  sapply(0:(win_size - 1), function(i) {
    mean(exp(nowcast$params.post[[paste0("Beta ", i)]]))
  })
}

# Extract probability distribution of the reporting delay for each scenario
betas1 <- extract_reporting_delay_distribution(nowcast1, win_size)
betas2 <- extract_reporting_delay_distribution(nowcast2, win_size)
betas3 <- extract_reporting_delay_distribution(nowcast3, win_size)

# Plot the delay distribution
barplot(rbind(betas1, betas2, betas3), beside = TRUE,
        main = 'Estimated Reporting Delay Distribution',
        col = c('blue', 'red', 'green'),
        names.arg = seq(0, win_size - 1),
        xlab = 'Delay (weeks)', ylab = 'Probability of Reporting Delay')

legend('topright',
       legend = c('Original Data', 'Altered - Batch Not Indicated', 'Altered - Batch Indicated'),
       fill = c('blue', 'red', 'green'))

## ----fig.width=7, fig.height=4------------------------------------------------
# Extract nowcasting estimates
estimates1 <- nowcast1$estimates %>% mutate(scenario = "Original")
estimates2 <- nowcast2$estimates %>% mutate(scenario = "Altered - Batch Not Indicated")
estimates3 <- nowcast3$estimates %>% mutate(scenario = "Altered - Batch Indicated")

# Combine estimates for visualization
estimates <- bind_rows(estimates1, estimates2, estimates3)

# Eventual cases (true counts)
cases_per_date <- denguedat1 %>%
  group_by(onset_week) %>%
  summarize(count = n()) %>%
  filter(onset_week %in% estimates1$onset_date)

# Plot nowcasting estimates
plot(estimates1$onset_date, estimates1$estimate, col = 'blue', lwd=2, type = 'l',
     ylim = range(c(estimates$q_0.025, estimates$q_0.975)), 
     xlab = 'Onset Date', ylab = 'Cases', main = 'Incidence Estimates')
lines(estimates2$onset_date, estimates2$estimate, lwd=2, col = 'red')
lines(estimates3$onset_date, estimates3$estimate, lwd=2, col = 'green')
# Add 95% PI shaded regions
polygon(c(estimates1$onset_date, rev(estimates1$onset_date)),
        c(estimates1$q_0.025, rev(estimates1$q_0.975)),
        col = rgb(0, 0, 1, 0.2), border = NA)
polygon(c(estimates2$onset_date, rev(estimates2$onset_date)),
        c(estimates2$q_0.025, rev(estimates2$q_0.975)),
        col = rgb(1, 0, 0, 0.2), border = NA)
polygon(c(estimates3$onset_date, rev(estimates3$onset_date)),
        c(estimates3$q_0.025, rev(estimates3$q_0.975)),
        col = rgb(0, 1, 0, 0.2), border = NA)
points(cases_per_date$onset_week, cases_per_date$count, col = 'black', pch = 20)

legend('topleft',
       legend = c('Original Data', 
                  'Altered - Batch Not Indicated', 
                  'Altered - Batch Indicated', 
                  '95% PI (Original Data)', 
                  '95% PI (Altered - Batch Not Indicated)',
                  '95% PI (Altered - Batch Indicated)',
                  'Eventual Cases'),
       col = c('blue', 'red', 'green', rgb(0, 0, 1, 0.2), rgb(1, 0, 0, 0.2), rgb(0, 1, 0, 0.2), 'black'), 
       lwd = c(2, 2, 2, NA, NA, NA, NA), 
       lty = c(1, 1, 1, NA, NA, NA, NA), 
       pch = c(NA, NA, NA, 15, 15, 15, 20),
       cex = 0.9)