## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup,echo=FALSE,message=FALSE,warning=FALSE----------------------------- library(knitr) library(tsaux) library(xts) library(zoo) library(kableExtra) function_groups <- data.frame( Group = c( "Anomalies", "Calendar/Time", "Metrics", "Simulation", "Transformations", "Miscellaneous" ), Description = c( "Anomaly detection", "Inference and conversion utilities for time/dates", "Performance metrics for point and distributional forecasts", "Simulation by parts including Trend, Seasonal, ARMA, Irregular and Anomalies", "Data transformations including Box-Cox, Logit, Softplus-Logit and Sigmoid", "Miscellaneous functions" ) ) function_groups |> kable(format = "html", escape = FALSE, align = "l", caption = "Function Groups") |> kable_styling(font_size = 10, bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE) ## ----highlight=TRUE,fig.width=6, fig.height=3--------------------------------- x <- rep(0, 100) x[50] <- 1 oldpar <- par(mfrow = c(1,1)) par(mar = c(2,2,1,1), cex.main = 0.8, cex.axis = 0.8, cex.lab = 0.8) plot(filter(x, filter = 0, method = "recursive", init = 0), ylab = "", xlab = "") lines(filter(x, filter = 0.5, method = "recursive", init = 0), col = 2, lty = 2) lines(filter(x, filter = 0.8, method = "recursive", init = 0), col = "orange", lty = 2) lines(filter(x, filter = 1, method = "recursive", init = 0), col = 3, lty = 3, lwd = 2) grid() legend("topleft", c("Additive Outlier (alpha = 0.0)", "Temporary Change (alpha = 0.5)", "Temporary Change (alpha = 0.8)", "Permanent Shift (alpha = 1.0)"), cex = 0.8, lty = c(1,2,2,3), bty = "n", col = c(1,2,"orange",3)) par(oldpar) ## ----highlight=T,fig.width=6, fig.height=4------------------------------------ set.seed(154) r <- rnorm(300, mean = 0, sd = 6) d <- seq(as.Date("2000-01-01"), by = "months", length.out = 300) mod <- initialize_simulator(r, index = d, sampling = "months", model = "issm") mod <- mod |> add_polynomial(order = 2, alpha = 0.22, beta = 0.01, b0 = 1, l0 = 140) mod <- mod |> add_seasonal(frequency = 12, gamma = 0.7, init_scale = 2) y <- xts(mod$simulated, mod$index) oldpar <- par(mfrow = c(1,1)) par(mfrow = c(2, 2), mar = c(2,2,1,1), cex.main = 0.8, cex.axis = 0.8, cex.lab = 0.8) plot(as.zoo(y), main = "Original", ylab = "", xlab = "", col = "blue") grid() y_contaminated <- y |> additive_outlier(time = 25, parameter = 1) plot(as.zoo(y_contaminated), main = "+Additive Outlier[t=25]", ylab = "", xlab = "", col = "green") grid() y_contaminated <- y_contaminated |> temporary_change(time = 120, parameter = 0.75, alpha = 0.7) plot(as.zoo(y_contaminated), main = "+Temporary Change[t=120]", ylab = "", xlab = "", col = "purple") grid() y_contaminated <- y_contaminated |> level_shift(time = 200, parameter = -0.25) plot(as.zoo(y_contaminated), main = "+Level Shift[t=200]", ylab = "", xlab = "", col = "red") grid() par(oldpar) ## ----------------------------------------------------------------------------- xreg <- auto_regressors(y_contaminated, frequency = 12, return_table = TRUE, method = "full") xreg[,time := which(index(y_contaminated) %in% xreg$date)] print(xreg) ## ----highlight=T,fig.width=6, fig.height=4------------------------------------ x <- cbind(additive_outlier(y_contaminated, time = xreg$time[1], add = FALSE), temporary_change(y_contaminated, time = xreg$time[2], alpha = xreg$filter[2], add = FALSE), level_shift(y_contaminated, time = xreg$time[3], add = FALSE)) x <- x %*% xreg$coef y_clean <- y_contaminated - x oldpar <- par(mfrow = c(1,1)) par(mfrow = c(2,1), mar = c(2,2,1,1), cex.main = 0.8, cex.axis = 0.8, cex.lab = 0.8) plot(as.zoo(y_contaminated), main = "Anomaly Decomtanination", xlab = "", ylab = "", ylim = c(90, 700)) lines(as.zoo(y_clean), col = 2, lty = 2, lwd = 2) lines(as.zoo(y), col = 3, lty = 3, lwd = 2.5) grid() legend("topleft", c("Contaminated", "Clean", "Original"), col = 1:3, bty = "n", lty = 1:3, cex = 0.8) plot(zoo(x, index(y)), main = "Anomalies", xlab = "", ylab = "") par(oldpar) ## ----echo=FALSE, results='asis'----------------------------------------------- library(knitr) # Create data frame for comparison transformations <- data.frame( Transformation = c("Sigmoid", "Softplus Logit", "Box-Cox", "Logit"), # Input range (domain of the transformation) Range = c( "$(-\\infty, \\infty)$", "$(\\text{lower}, \\text{upper})$", "$(0, \\infty)$", "$(\\text{lower}, \\text{upper})$" ), # Output range (codomain of the transformation) Output_Range = c( "$(0,1)$", "$(\\text{lower}, \\text{upper})$", "$(\\mathbb{R})$", "$(\\mathbb{R})$" ), # Transformation formulas Formula = c( "$\\frac{1}{1 + e^{-x}}$", "$\\log(1 + e^{\\log(\\frac{x - \\text{lower}}{\\text{upper} - x})})$", "$\\begin{cases} \\log(x), & \\text{if } \\lambda = 0; \\\\ \\frac{x^\\lambda - 1}{\\lambda}, & \\text{otherwise}. \\end{cases}$", "$\\log \\left( \\frac{x - \\text{lower}}{\\text{upper} - x} \\right)$" ), # Inverse transformation formulas Inverse = c( "$\\log \\left( \\frac{x}{1 - x} \\right)$", "$\\frac{\\text{lower} + \\text{upper} (e^x - 1)}{e^x}$", "$\\begin{cases} e^{x}, & \\text{if } \\lambda = 0; \\\\ (\\lambda x + 1)^{\\frac{1}{\\lambda}}, & \\text{otherwise}. \\end{cases}$", "$\\frac{\\text{lower} + \\text{upper} (e^x)}{1 + e^x}$" ), # Growth behavior of each transformation Growth = c( "Saturates at 0 and 1", "Can grow exponentially near upper limit", "Varies (linear for $\\lambda = 1$)", "Linear growth" ), # Typical use cases for each transformation Use_Cases = c( "Probability estimation, logistic regression, neural networks", "Ensuring positivity, constrained optimization, Bayesian models", "Variance stabilization, handling nonlinearity", "Probability modeling, transformations for bounded variables" ) ) transformations |> kable(format = "html", escape = FALSE, align = "l", caption = "Transformations") |> kable_styling(font_size = 10, bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE) ## ----highlight=TRUE,fig.width=6, fig.height=4--------------------------------- set.seed(42) # Generate data based on DGP assumptions boxcox_data <- rgamma(1000, shape = 2, scale = 2) # Strictly positive logit_data <- rbeta(1000, shape1 = 2, shape2 = 5) # In (0,1) softplus_logit_data <- runif(1000, min = 0.1, max = 10) # Bounded but flexible sigmoid_data <- rnorm(1000, mean = 0, sd = 2) # Real-valued lower <- 0 upper <- 1 B <- box_cox(lambda = 0.5) boxcox_transformed <- B$transform(boxcox_data) boxcox_recovered <- B$inverse(boxcox_transformed, lambda = 0.5) L <- logit(lower = lower, upper = upper) logit_transformed <- L$transform(logit_data) logit_recovered <- L$inverse(logit_transformed) SPL <- softlogit(lower = 0.1, upper = 10) softlogit_transformed <- SPL$transform(softplus_logit_data) softlogit_recovered <- SPL$inverse(softlogit_transformed) SIG <- sigmoid(lower = -1, upper = 1) sigmoid_transformed <- SIG$transform(sigmoid_data) sigmoid_recovered <- SIG$inverse(sigmoid_transformed) oldpar <- par(mfrow = c(1,1)) par(mfrow = c(2, 2), mar = c(2,2,1,1), cex.main = 0.8, cex.axis = 0.8, cex.lab = 0.8) hist(boxcox_transformed, main = "Box-Cox Transformed Data", col = "blue", breaks = 30) hist(logit_transformed, main = "Logit Transformed Data", col = "green", breaks = 30) hist(softlogit_transformed, main = "Softplus-Logit Transformed Data", col = "purple", breaks = 30) hist(sigmoid_transformed, main = "Sigmoid Transformed Data", col = "red", breaks = 30) par(oldpar) ## ----echo=FALSE, results='asis'----------------------------------------------- metrics_table <- data.frame( Function = c("mape", "rmape", "smape", "mase", "mslre", "mis", "msis", "bias", "wape", "wslre", "wse", "pinball", "crps"), Equation = c( "$\\frac{1}{n} \\sum_{t=1}^{n} \\left| \\frac{A_t - P_t}{A_t} \\right|$", "Box-Cox transformation of MAPE", "$\\frac{2}{n} \\sum_{t=1}^{n} \\frac{|A_t - P_t|}{|A_t| + |P_t|}$", "$\\frac{\\frac{1}{n} \\sum_{t=1}^{n} |P_t - A_t|}{\\frac{1}{N-s} \\sum_{t=s+1}^{N} |A_t - A_{t-s}|}$", "$\\frac{1}{n} \\sum_{t=1}^{n} \\left( \\log(1 + A_t) - \\log(1 + P_t) \\right)^2$", "$\\frac{1}{n} \\sum_{t=1}^{n} (U_t - L_t) + \\frac{2}{\\alpha} [(L_t - A_t) I(A_t < L_t) + (A_t - U_t) I(A_t > U_t)]$", "$\\frac{1}{h} \\sum_{t=1}^{h} \\frac{(U_t - L_t) + \\frac{2}{\\alpha} [(L_t - A_t) I(A_t < L_t) + (A_t - U_t) I(A_t > U_t)]}{\\frac{1}{N-s} \\sum_{t=s+1}^{N} |A_t - A_{t-s}|}$", "$\\frac{1}{n} \\sum_{t=1}^{n} (P_t - A_t)$", "$\\mathbf{w}^T \\left( \\frac{|P - A|}{A} \\right)$", "$\\mathbf{w}^T \\left( \\log \\left( \\frac{P}{A} \\right) \\right)^2$", "$\\mathbf{w}^T \\left( \\frac{P}{A} \\right)^2$", "$\\frac{1}{n} \\sum_{t=1}^{n} [ \\tau (A_t - Q^\\tau_t) I(A_t \\geq Q^\\tau_t) + (1 - \\tau) (Q^\\tau_t - A_t) I(A_t \\lt Q^\\tau_t)]$", "$\\frac{1}{n} \\sum_{t=1}^{n} \\int_{-\\infty}^{\\infty} (F_t(y) - I(y \\geq A_t))^2 dy$" ), Scope = c("U", "U", "U", "U", "U", "U", "U", "U", "M", "M", "M", "U", "U"), Description = c( "Measures the average percentage deviation of predictions from actual values.", "Rescaled MAPE using a Box-Cox transformation for scale invariance.", "An alternative to MAPE that symmetrizes the denominator.", "Compares the absolute error to the mean absolute error of a naive seasonal forecast.", "Measures squared log relative errors to penalize large deviations.", "Evaluates the accuracy of prediction intervals.", "A scaled version of MIS, dividing by the mean absolute seasonal error.", "Measures systematic overestimation or underestimation.", "A weighted version of MAPE for multivariate data.", "A weighted version of squared log relative errors.", "A weighted version of squared errors.", "A scoring rule used for quantile forecasts.", "A measure of probabilistic forecast accuracy." ) ) metrics_table |> kable(format = "html", escape = FALSE, align = "l", caption = "Forecast Performance Metrics") |> kable_styling(font_size = 10, bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE) |> column_spec(2, width = "30%") # Adjust equation column width ## ----------------------------------------------------------------------------- set.seed(42) time_points <- 100 actual <- cumsum(rnorm(time_points, mean = 0.5, sd = 1)) + 50 # Increasing trend # Generate "Good" Forecast (small errors) good_forecast <- actual + rnorm(time_points, mean = 0, sd = 1) # Small deviations # Generate "Bad" Forecast (biased and larger errors) bad_forecast <- actual * 1.2 + rnorm(time_points, mean = 5, sd = 5) # Large bias good_distribution <- t(replicate(1000, good_forecast + rnorm(time_points, mean = 0, sd = 1))) bad_distribution <- t(replicate(1000, bad_forecast + rnorm(time_points, mean = 0, sd = 3))) # Compute Metrics metrics <- function(actual, forecast, distribution) { list( MAPE = mape(actual, forecast), BIAS = bias(actual, forecast), MASE = mase(actual, forecast, original_series = actual, frequency = 1), RMAPE = rmape(actual, forecast), SMAPE = smape(actual, forecast), MSLRE = mslre(actual, forecast), MIS = mis(actual, lower = forecast - 2, upper = forecast + 2, alpha = 0.05), MSIS = msis(actual, lower = forecast - 2, upper = forecast + 2, original_series = actual, frequency = 1, alpha = 0.05), CRPS = crps(actual, distribution) ) } # Compute metrics for both cases good_metrics <- metrics(actual, good_forecast, good_distribution) bad_metrics <- metrics(actual, bad_forecast, bad_distribution) # Convert to a clean table comparison_table <- data.frame( Metric = names(good_metrics), Good_Forecast = unlist(good_metrics), Bad_Forecast = unlist(bad_metrics) ) rownames(comparison_table) <- NULL comparison_table |> kable(format = "html", escape = FALSE, align = "c", caption = "Comparison of Forecast Metrics for Good vs. Bad Forecasts") |> kable_styling(font_size = 10, bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE) |> column_spec(2, width = "30%") # Adjust equation column width ## ----echo=FALSE, results='asis'----------------------------------------------- simulator_functions <- data.frame( Function = c("initialize_simulator", "add_polynomial", "add_seasonal", "add_arma", "add_regressor", "add_transform", "add_anomaly", "add_custom", "tsdecompose", "plot", "lines", "mixture_modelspec", "tsensemble"), Details = c( "Initializes the simulation model with an error component.", "Adds a polynomial trend component with optional dampening.", "Adds a seasonal component with a specified frequency.", "Adds an ARMA process to the simulation.", "Adds a regressor component with given coefficients.", "Applies a transformation such as Box-Cox or logit.", "Adds an anomaly component like an additive outlier or level shift.", "Adds a custom user-defined component.", "Decomposes the simulated series into its state components.", "Plots the simulated time series or its components.", "Adds line segments to an existing plot.", "Creates an ensemble setup for multiple simulation models.", "Aggregates multiple simulated series using a weighting scheme." ), stringsAsFactors = FALSE ) simulator_functions |> kable(format = "html", escape = FALSE, align = "l", caption = "Simulator Functions") |> kable_styling(font_size = 10, bootstrap_options = c("striped", "hover", "responsive"), full_width = FALSE) |> column_spec(2, width = "30%") # Adjust equation column width ## ----highlight=TRUE,fig.width=6, fig.height=6--------------------------------- set.seed(154) r <- rnorm(100, mean = 0, sd = 5) d <- seq(as.Date("2000-01-01"), by = "months", length.out = 100) mod <- initialize_simulator(r, index = d, sampling = "months", model = "issm") oldpar <- par(mfrow = c(1,1)) par(mfrow = c(3,2), mar = c(2,2,1,1), cex.main = 0.8, cex.axis = 0.8, cex.lab = 0.8) plot(zoo(mod$simulated, mod$index), ylab = "", xlab = "", main = "Step 1: Error Component", col = "black") mod <- mod |> add_polynomial(order = 2, alpha = 0.22, beta = 0.01, b0 = 1, l0 = 140) plot(zoo(mod$simulated, mod$index), ylab = "", xlab = "", main = "Step 2: + Level Component", col = "blue") mod <- mod |> add_seasonal(frequency = 12, gamma = 0.7, init_scale = 2) plot(zoo(mod$simulated, mod$index), ylab = "", xlab = "", main = "Step 3: + Seasonal Component", col = "green") mod <- mod |> add_arma(order = c(2,1), ar = c(0.5, -0.3), ma = 0.4) plot(zoo(mod$simulated, mod$index), ylab = "", xlab = "", main = "Step 4: + ARMA Component", col = "purple") mod <- mod |> add_anomaly(time = 50, delta = 0.5, ratio = 1) plot(zoo(mod$simulated, mod$index), ylab = "", xlab = "", main = "Step 5: + Anomaly Component", col = "red") par(oldpar) ## ----highlight=TRUE,fig.width=6, fig.height=5--------------------------------- decomp <- tsdecompose(mod) oldpar <- par(mfrow = c(1,1)) par(mfrow = c(1,1), mar = c(1,3,1,1), cex.main = 0.9, cex.axis = 0.8) plot(as.zoo(decomp), main = "Decomposition", col = c("blue","green","purple","black"), xlab = "", cex.lab = 0.7) par(oldpar) ## ----highlight=TRUE,fig.width=6, fig.height=4--------------------------------- set.seed(123) r <- rnorm(200, mean = 0, sd = 5) d <- seq(as.Date("2000-01-01"), by = "months", length.out = 200) mod1 <- initialize_simulator(r, index = d, sampling = "months", model = "issm") |> add_polynomial(order = 2, alpha = 0.22, beta = 0.01, b0 = 1, l0 = 140) |> add_seasonal(frequency = 12, gamma = 0.7, init_scale = 2) |> add_arma(order = c(2,1), ar = c(0.5, -0.3), ma = 0.4) |> add_transform(method = "box-cox", lambda = 1) mod2 <- initialize_simulator(r, index = d, sampling = "months", model = "issm") |> add_polynomial(order = 2, alpha = 0.22, beta = 0.01, b0 = 1, l0 = 140) |> add_seasonal(frequency = 12, gamma = 0.7, init_scale = 2) |> add_arma(order = c(2,1), ar = c(0.5, -0.3), ma = 0.4) |> add_transform(method = "box-cox", lambda = 0.5) ensemble <- mixture_modelspec(mod1, mod2) t <- 1:200 weights1 <- exp(-t / 50) weights2 <- 1 - weights1 weights <- cbind(weights1, weights2) ens_sim <- tsensemble(ensemble, weights = weights, difference = FALSE) oldpar <- par(mfrow = c(1,1)) par(mfrow = c(3,1), mar = c(2,2,1,1), cex.main = 0.8, cex.axis = 0.8, cex.lab = 0.8) plot(mod1, main = "Simulation [lambda = 1]", col = "blue") plot(mod2, main = "Simulation [lambda = 0.5]", col = "green") plot(as.zoo(ens_sim), main = "Ensembled Simulation", ylab = "", xlab = "", col = "red") par(oldpar) ## ----highlight=TRUE,fig.width=6, fig.height=3--------------------------------- set.seed(200) r <- rnorm(300, mean = 0, sd = 5) d <- seq(as.Date("2000-01-01"), by = "months", length.out = 300) mod <- initialize_simulator(r, index = d, sampling = "months", model = "issm") mod <- mod |> add_polynomial(order = 2, alpha = 0.22, beta = 0.01, b0 = 1, l0 = 140) mod <- mod |> add_seasonal(frequency = 12, gamma = 0.7, init_scale = 2) y <- xts(mod$simulated, mod$index) train <- y train[251:300] <- NA colnames(train) <- "train" colnames(y) <- "y" estimated_model <- tslinear(train, trend = TRUE, seasonal = TRUE, frequency = 12) oldpar <- par(mfrow = c(1,1)) par(mfrow = c(1,1), mar = c(2,2,1,1), cex.main = 0.8, cex.axis = 0.8, cex.lab = 0.8) plot(as.zoo(y), ylab = "", xlab = "", type = "l", main = "tslinear") lines(zoo(estimated_model$fitted.values, d), col = "blue") lines(zoo(tail(estimated_model$fitted.values, 50), tail(d, 50)), col = "red") grid() legend("topleft",c("Actual","Fitted","Predicted"), col = c("black","blue","red"), lty = 1, bty = "n", cex = 0.9) par(oldpar)