## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 7,
  fig.align = "center",
  cache.path = ".sampling_cache/"
)

## ----setup, message=FALSE-----------------------------------------------------
library(mixvlmc)
library(ggplot2)
library(data.table) ## for frollapply

## -----------------------------------------------------------------------------
dts <- sample(0:1, 500, replace = TRUE)

## -----------------------------------------------------------------------------
best_vlmc <- tune_vlmc(dts)
draw(as_vlmc(best_vlmc))

## -----------------------------------------------------------------------------
dts_sample <- simulate(as_vlmc(best_vlmc), 600)[-(1:100)]

## -----------------------------------------------------------------------------
dts_sample_2 <- simulate(as_vlmc(best_vlmc), 10, init = c(0L, 0L))

## -----------------------------------------------------------------------------
dts_sample_2

## -----------------------------------------------------------------------------
dts_sample_3 <- simulate(as_vlmc(best_vlmc), 10, burnin = 20)

## -----------------------------------------------------------------------------
CAC_raw <- as.data.frame(EuStockMarkets)$CAC

## -----------------------------------------------------------------------------
CAC_rel_evol <- diff(CAC_raw) / CAC_raw[-length(CAC_raw)]
CAC_dts <- factor(
  ifelse(CAC_rel_evol >= 0.005, "Up",
    ifelse(CAC_rel_evol <= -0.005, "Down", "Stay")
  ),
  levels = c("Down", "Stay", "Up")
)

## ----cache=TRUE---------------------------------------------------------------
CAC_vlmc <- tune_vlmc(CAC_dts, criterion = "AIC")
CAC_model <- as_vlmc(CAC_vlmc)

## ----fig.height=4-------------------------------------------------------------
CAC_rle <- rle(as.integer(CAC_dts))
CAC_rle_df <- data.frame(value = CAC_rle$values, length = CAC_rle$lengths)
ggplot(CAC_rle_df, aes(x = length)) +
  geom_bar() +
  labs(
    title = "Distribution of the lengths of constant subsequences",
    x = "Length",
    y = "Count"
  )

## -----------------------------------------------------------------------------
long_sequence <- function(dts) {
  dts_int <- as.integer(dts)
  ## RLE cannot be used directly as we need to account for overlapping
  ## patterns
  dts_freq <- frollapply(dts_int, 10, \(x) max(rle(x)$length) >= 5)
  mean(dts_freq, na.rm = TRUE)
}

## ----cache=TRUE---------------------------------------------------------------
bootstrap_samples <- vector(100, mode = "list")
burn_in_time <- 50 * depth(CAC_model)
for (b in seq_along(bootstrap_samples)) {
  bootstrap_samples[[b]] <- simulate(CAC_model,
    length(CAC_dts),
    burin = burn_in_time,
    seed = b
  )
}

## ----cache=TRUE---------------------------------------------------------------
bootstrap_ls <- sapply(bootstrap_samples, long_sequence)

## ----fig.height=4-------------------------------------------------------------
ggplot(mapping = aes(x = bootstrap_ls)) +
  geom_density() +
  geom_rug() +
  labs(
    title = "Bootstrap distribution of the probability of long constant subsequences",
    x = "Probability"
  ) +
  geom_vline(xintercept = long_sequence(CAC_dts), color = "red")

## ----cache=TRUE---------------------------------------------------------------
CAC_covariates <- as.data.frame(EuStockMarkets)[c("DAX", "SMI", "FTSE")][-1, ]
CAC_covlmc <- tune_covlmc(CAC_dts, CAC_covariates, criterion = "AIC")
CAC_comodel <- as_covlmc(CAC_covlmc)

## -----------------------------------------------------------------------------
draw(CAC_comodel, model = "full", with_state = TRUE)

## ----cache=TRUE---------------------------------------------------------------
co_sample <- simulate(CAC_comodel,
  length(CAC_dts),
  seed = 0,
  init = CAC_dts[1:depth(CAC_comodel)],
  covariate = CAC_covariates
)

## ----fig.height=5-------------------------------------------------------------
pc_week <- powerconsumption[powerconsumption$week == 12, ]
elec <- pc_week$active_power
ggplot(pc_week, aes(x = date_time, y = active_power)) +
  geom_line() +
  xlab("Date") +
  ylab("Activer power (kW)")

## -----------------------------------------------------------------------------
elec_dts <- cut(elec, breaks = c(0, 1.2, 8), labels = c("background", "active"))
elec_cov <- data.frame(day = (pc_week$hour >= 7 & pc_week$hour <= 17))

## -----------------------------------------------------------------------------
elec_covlmc_tune <- tune_covlmc(elec_dts, elec_cov, criterion = "AIC")
best_elec_covlmc <- as_covlmc(elec_covlmc_tune)
draw(best_elec_covlmc, model = "full", time_sep = " | ", p_value = FALSE)

## -----------------------------------------------------------------------------
day_only <- simulate(best_elec_covlmc,
  length(elec_dts),
  seed = 0,
  covariate = data.frame(day = rep(TRUE, length(elec_dts)))
)

## -----------------------------------------------------------------------------
night_only <- simulate(best_elec_covlmc,
  length(elec_dts),
  seed = 0,
  covariate = data.frame(day = rep(FALSE, length(elec_dts)))
)