## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = (4/6)*7
)

## ----setup--------------------------------------------------------------------
library(QAEnsemble)

## ----eval=TRUE----------------------------------------------------------------
library(svMisc)
library(coda)
library(expm)
library(diagram)

## ----eval=TRUE----------------------------------------------------------------
set.seed(10)

## ----eval=TRUE----------------------------------------------------------------
names_diagram = c("W", "S", "D")

#Note: byrow = true is not used in the matrix functions in order to have the 
#direction of the arrows correctin the diagram
diagram_matrix = matrix(c("q_WW", "q_WS", "q_WD", "q_SW", "q_SS", "q_SD", 0, 0, "q_DD"), nrow = 3, ncol = 3)

curve_matrix = matrix(c(0.1, 0.1, 0, 0.1, 0.1, 0, 0, 0, 0.1), nrow = 3, ncol = 3)

diagram::plotmat(diagram_matrix, pos = c(2, 1), curve = curve_matrix, name = names_diagram, lwd = 2, box.lwd = 2, cex.txt = 0.8, self.lwd = 2, self.cex = 0.5, self.shiftx = c(-0.1, 0.1, -0.1), box.type = "circle", box.prop = 0.5, arr.type = "triangle")

## ----eval=TRUE----------------------------------------------------------------
num_cycles = 20

## ----eval=TRUE----------------------------------------------------------------
#well people that become sick per day
q_WS = 5e-2

#sick people that become well per day
q_SW = 1e-3

#well people that die per day
q_WD = 1e-7

#sick people that die per day
q_SD = 5e-4

q_WW = -1*(q_WS + q_WD)
q_SS = -1*(q_SW + q_SD)
q_DD = 0

## ----eval=TRUE----------------------------------------------------------------
MarkovModel_sol <- function(param)
{
  q_WS_use = param[1]
  q_SW_use = param[2]
  q_WD_use = param[3]
  q_SD_use = param[4]

  q_WW_use = -1*(q_WS_use + q_WD_use)
  q_SS_use = -1*(q_SW_use + q_SD_use)
  q_DD_use = 0

  Q_transition_matrix_use = matrix(c(q_WW_use, q_WS_use, q_WD_use, q_SW_use, q_SS_use, q_SD_use, 0, 0, q_DD_use), nrow = 3, ncol = 3, byrow = TRUE)

  num_cycles = 20

  num_states = 3
  name_states = c("W", "S", "D")

  #People over time begin day 0
  cohort_over_time = matrix(NA, nrow = (num_cycles+1), ncol = num_states, dimnames = list(0:num_cycles, name_states))
  cohort_0day = c(W = 3995, S = 5, D = 0)
  cohort_over_time[1,] = cohort_0day
  rownames(cohort_over_time) = paste("Day", c(0:(num_cycles)), sep = "_")

  for(t in 1:num_cycles)
  {
    cohort_over_time[t+1, ] = cohort_over_time[t, ] %*% expm::expm(Q_transition_matrix_use*(1))
  }

  return(cohort_over_time)
}

## ----eval=TRUE----------------------------------------------------------------
time_use = c(1:num_cycles)

cohort_over_time = MarkovModel_sol(c(q_WS, q_SW, q_WD, q_SD))

plot(time_use, diff(cohort_over_time[,2]), xlab="Days (t)", ylab="New daily sick people", type = "l", col = "red")

plot(time_use, diff(cohort_over_time[,3]), xlab="Days (t)", ylab="New daily deaths", type = "l", col = "red")

## ----eval=TRUE----------------------------------------------------------------
p1 = 0.1
p2 = 0.3

data_nbin_new_S = matrix(NA, nrow = 1, ncol = num_cycles)
data_nbin_new_D = matrix(NA, nrow = 1, ncol = num_cycles)

new_S_true = diff(cohort_over_time[,2])
new_D_true = diff(cohort_over_time[,3])

for(t in 1:num_cycles)
{
  data_nbin_new_S[t] = rnbinom(1, size = (new_S_true[t]*p1)/(1 - p1), prob = p1)

  if(new_D_true[t] == 0)
  {
    data_nbin_new_D[t] = 0
  }
  else
  {
    data_nbin_new_D[t] = rnbinom(1, size = (new_D_true[t]*p2)/(1 - p2), prob = p2)
  }
}

plot(time_use, data_nbin_new_S, xlab="Days (t)", ylab="New daily sick people")

plot(time_use, data_nbin_new_D, xlab="Days (t)", ylab="New daily deaths")

## ----eval=TRUE----------------------------------------------------------------
logp <- function(param)
{
  q_WS_use = param[1]
  q_SW_use = param[2]
  q_SD_use = param[3]
  p1_use = param[4]
  p2_use = param[5]

  logp_val = dunif(q_WS_use, min = 1e-4, max = 10, log = TRUE) + dunif(q_SW_use, min = 1e-7, max = 1e-1, log = TRUE) + dunif(q_SD_use, min = 1e-6, max = 1e-2, log = TRUE) + dunif(p1_use, min = 1e-4, max = 1, log = TRUE) + dunif(p2_use, min = 1e-4, max = 1, log = TRUE)

  return(logp_val)
}

logl <- function(param)
{
  cohort_over_time_use = MarkovModel_sol(c(param[1], param[2], q_WD, param[3]))
  
  new_S_sol = diff(cohort_over_time_use[,2])
  
  new_D_sol = diff(cohort_over_time_use[,3])

  issue_flag = 0

  for(t in 1:num_cycles)
  {
    if(new_S_sol[t] < 0 || new_D_sol[t] < 0)
    {
      issue_flag = 1
    }

    if(issue_flag == 1)
    {
      break
    }

    if(new_S_sol[t] == 0)
    {
      new_S_sol[t] = 1e-4;
    }

    if(new_D_sol[t] == 0)
    {
      new_D_sol[t] = 1e-4;
    }
  }

  if(issue_flag == 0)
  {
    logl_val = sum(dnbinom(data_nbin_new_S, size = ((new_S_sol)*param[4])/(1 - param[4]), prob = param[4], log = TRUE)) + sum(dnbinom(data_nbin_new_D, size = ((new_D_sol)*param[5])/(1 - param[5]), prob = param[5], log = TRUE))
  }
  else
  {
    logl_val = -Inf
  }

  return(logl_val)
}

logfuns = list(logp = logp, logl = logl)

## ----eval=TRUE----------------------------------------------------------------
num_par = 5
num_chains = 2*num_par

theta0 = matrix(0, nrow = num_par, ncol = num_chains)

temp_val = 0
j = 0

while(j < num_chains)
{
  initial = c(runif(1, 1e-4, 10), runif(1, 1e-7, 1e-1), runif(1, 1e-6, 1e-2), runif(1, 1e-4, 1), runif(1, 1e-4, 1))
  temp_val = logl(initial) + logp(initial)

  while(is.na(temp_val) || is.infinite(temp_val))
  {
    initial = c(runif(1, 1e-4, 10), runif(1, 1e-7, 1e-1), runif(1, 1e-6, 1e-2), runif(1, 1e-4, 1), runif(1, 1e-4, 1))
    temp_val = logl(initial) + logp(initial)
  }

  j = j + 1

  message(paste('j:', j))
  
  theta0[1,j] = initial[1]
  theta0[2,j] = initial[2]
  theta0[3,j] = initial[3]
  theta0[4,j] = initial[4]
  theta0[5,j] = initial[5]

}

## ----eval=TRUE----------------------------------------------------------------
num_chain_iterations = 1.5e4
thin_val_par = 10

floor(num_chain_iterations/thin_val_par)*num_chains

## ----eval=TRUE----------------------------------------------------------------

Dis_Model_Quad_result = ensemblealg(theta0, logfuns, T_iter = num_chain_iterations, Thin_val = thin_val_par, ShowProgress = TRUE, ReturnCODA = TRUE)


## ----eval=TRUE----------------------------------------------------------------
my_samples = Dis_Model_Quad_result$theta_sample
my_log_prior = Dis_Model_Quad_result$log_prior_sample
my_log_like = Dis_Model_Quad_result$log_like_sample
my_mcmc_list_coda = Dis_Model_Quad_result$mcmc_list_coda

## ----eval=TRUE----------------------------------------------------------------
varnames(my_mcmc_list_coda) = c("q_WS", "q_SW", "q_SD", "p1", "p2")

plot(my_mcmc_list_coda)

## ----eval=TRUE----------------------------------------------------------------
my_samples_burn_in = my_samples[,,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]

my_log_prior_burn_in = my_log_prior[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]

my_log_like_burn_in = my_log_like[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]

## ----eval=TRUE----------------------------------------------------------------
diagnostic_result = psrfdiagnostic(my_samples_burn_in, 0.05)

diagnostic_result$p_s_r_f_vec

## ----eval=TRUE----------------------------------------------------------------
log_un_post_vec = as.vector(my_log_prior_burn_in + my_log_like_burn_in)
k1 = as.vector(my_samples_burn_in[1,,])
k2 = as.vector(my_samples_burn_in[2,,])
k3 = as.vector(my_samples_burn_in[3,,])
k4 = as.vector(my_samples_burn_in[4,,])
k5 = as.vector(my_samples_burn_in[5,,])

## ----eval=TRUE----------------------------------------------------------------
#q_WS posterior median and 95% credible interval
median(k1)
hpdparameter(k1, 0.05)
#(The 95% CI is narrower than the prior distribution and it contains the true 
#value of q_WS, 5e-2)

#q_SW posterior median and 95% credible interval
median(k2)
hpdparameter(k2, 0.05)
#(The 95% CI is narrower than the prior distribution and it contains the true 
#value of q_SW, 1e-3)

#q_SD posterior median and 95% credible interval
median(k3)
hpdparameter(k3, 0.05)
#(The 95% CI is narrower than the prior distribution and it contains the true 
#value of q_SD, 5e-4)

#p1 posterior median and 95% credible interval
median(k4)
hpdparameter(k4, 0.05)
#(The 95% CI is narrower than the prior distribution and it contains the true 
#value of p1, 0.1)

#p2 posterior median and 95% credible interval
median(k5)
hpdparameter(k5, 0.05)
#(The 95% CI is narrower than the prior distribution and it contains the true 
#value of p2, 0.3)

#q_WS, q_SW, q_SD, p1, and p2 maximum posterior
k1[which.max(log_un_post_vec)]
k2[which.max(log_un_post_vec)]
k3[which.max(log_un_post_vec)]
k4[which.max(log_un_post_vec)]
k5[which.max(log_un_post_vec)]

## ----eval=TRUE----------------------------------------------------------------
plot(k1, exp(log_un_post_vec), xlab="q_WS", ylab="unnormalized posterior density")

## ----eval=TRUE----------------------------------------------------------------
plot(k2, exp(log_un_post_vec), xlab="q_SW", ylab="unnormalized posterior density")

## ----eval=TRUE----------------------------------------------------------------
plot(k3, exp(log_un_post_vec), xlab="q_SD", ylab="unnormalized posterior density")

## ----eval=TRUE----------------------------------------------------------------
plot(k4, exp(log_un_post_vec), xlab="p1", ylab="unnormalized posterior density")

## ----eval=TRUE----------------------------------------------------------------
plot(k5, exp(log_un_post_vec), xlab="p2", ylab="unnormalized posterior density")

## ----eval=TRUE----------------------------------------------------------------
num_samples = 10000

new_S_predict = matrix(NA, nrow = num_samples, ncol = length(time_use))
new_D_predict = matrix(NA, nrow = num_samples, ncol = length(time_use))

discrepancy_new_S = matrix(NA, nrow = num_samples, ncol = 1)
discrepancy_new_S_pred = matrix(NA, nrow = num_samples, ncol = 1)
ind_pred_exceeds_new_S = matrix(NA, nrow = num_samples, ncol = 1)

discrepancy_new_D = matrix(NA, nrow = num_samples, ncol = 1)
discrepancy_new_D_pred = matrix(NA, nrow = num_samples, ncol = 1)
ind_pred_exceeds_new_D = matrix(NA, nrow = num_samples, ncol = 1)

for (i in (length(log_un_post_vec) - num_samples + 1):length(log_un_post_vec))
{
  l = i - (length(log_un_post_vec) - num_samples)

  my_sol = MarkovModel_sol(c(k1[i], k2[i], 1e-7, k3[i]))

  my_new_S_mean = diff(my_sol[,2])

  my_new_D_mean = diff(my_sol[,3])

  for(j in 1:length(time_use))
  {
    new_S_predict[l,j] = rnbinom(1, size = ((my_new_S_mean[j])*k4[i])/(1 - k4[i]), prob = k4[i])

    new_D_predict[l,j] = rnbinom(1, size = ((my_new_D_mean[j])*k5[i])/(1 - k5[i]), prob = k5[i])
  }

  discrepancy_new_S[l,1] = sum((data_nbin_new_S - my_new_S_mean)^2)
  discrepancy_new_S_pred[l,1] = sum((new_S_predict[l,] - my_new_S_mean)^2)

  discrepancy_new_D[l,1] = sum((data_nbin_new_D - my_new_D_mean)^2)
  discrepancy_new_D_pred[l,1] = sum((new_D_predict[l,] - my_new_D_mean)^2)

  if(discrepancy_new_S_pred[l,1] > discrepancy_new_S[l,1])
  {
    ind_pred_exceeds_new_S[l,1] = 1
  }
  else
  {
    ind_pred_exceeds_new_S[l,1] = 0
  }

  if(discrepancy_new_D_pred[l,1] > discrepancy_new_D[l,1])
  {
    ind_pred_exceeds_new_D[l,1] = 1
  }
  else
  {
    ind_pred_exceeds_new_D[l,1] = 0
  }

  svMisc::progress(l, num_samples, progress.bar = TRUE, init = (l == 1))
}

## ----eval=TRUE----------------------------------------------------------------
Bayes_p_value = sum(c(ind_pred_exceeds_new_S, ind_pred_exceeds_new_D))/length(c(ind_pred_exceeds_new_S, ind_pred_exceeds_new_D))

Bayes_p_value

## ----eval=TRUE----------------------------------------------------------------
new_S_predict_lower = matrix(NA, nrow = 1, ncol = length(time_use))
new_S_predict_upper = matrix(NA, nrow = 1, ncol = length(time_use))

new_D_predict_lower = matrix(NA, nrow = 1, ncol = length(time_use))
new_D_predict_upper = matrix(NA, nrow = 1, ncol = length(time_use))

for(j in 1:length(time_use))
{
  new_S_predict_lower[j] = quantile(new_S_predict[,j], probs = 0.025)
  new_S_predict_upper[j] = quantile(new_S_predict[,j], probs = 0.975)

  new_D_predict_lower[j] = quantile(new_D_predict[,j], probs = 0.025)
  new_D_predict_upper[j] = quantile(new_D_predict[,j], probs = 0.975)
}

## ----eval=TRUE----------------------------------------------------------------
plot(time_use, data_nbin_new_S, main="New daily sicknesses (Disease Markov Model Example)", cex.main = 1,
     xlab="Days (t)", ylab="New daily sick people", type = "p", lty = 0, ylim = c(0, 500))
lines(time_use, diff(cohort_over_time[,2]), type = "l", lty = 1, col = "red")
lines(time_use, colMeans(new_S_predict), type = "l", lty = 1, col = "blue")
lines(time_use, new_S_predict_lower, type = "l", lty = 2, col = "blue")
lines(time_use, new_S_predict_upper, type = "l", lty = 2, col = "blue")

legend("topleft", legend = c("Data", "True model", "Posterior predictive mean", "95% prediction interval"), lty=c(0,1,1,2), col = c("black", "red", "blue", "blue"), pch=c(1,NA,NA,NA))

## ----eval=TRUE----------------------------------------------------------------
plot(time_use, data_nbin_new_D, main="New daily deaths (Disease Markov Model Example)", cex.main = 1,
     xlab="Days (t)", ylab="New daily deaths", type = "p", lty = 0, ylim = c(0, 20))
lines(time_use, diff(cohort_over_time[,3]), type = "l", lty = 1, col = "red")
lines(time_use, colMeans(new_D_predict), type = "l", lty = 1, col = "blue")
lines(time_use, new_D_predict_lower, type = "l", lty = 2, col = "blue")
lines(time_use, new_D_predict_upper, type = "l", lty = 2, col = "blue")

legend("topleft", legend = c("Data", "True model", "Posterior predictive mean", "95% prediction interval"), lty=c(0,1,1,2), col = c("black", "red", "blue", "blue"), pch=c(1,NA,NA,NA))