## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----fig.show='hold'----------------------------------------------------------
library(HDclust)

## -----------------------------------------------------------------------------
# variable structure with one variable block of 3 variables and two mixture components, which corresponds to GMM model
Vb<- vb(1, dim=3, numst=2)
show(Vb)

## -----------------------------------------------------------------------------
# variable structure with two blocks. Dimensionality of data is 40. First block contains 10 variable with 3 mixture components. Second block has 30 variables with 5 mixture components. Variable order is natural.
Vb <- vb(2, dim=40, bdim=c(10,30), numst=c(3,5), varorder=list(c(1:10),c(11:40)))
show(Vb)

## ----results='hide'-----------------------------------------------------------
# by default number of states in each block is varied from 1 to 9
data("faithful")
Vb <- vb(1, dim=2, numst=1)
set.seed(12345)
modelBIC <- hmmvbBIC(faithful, VbStructure=Vb)

## -----------------------------------------------------------------------------
show(modelBIC)

## ----fig1, fig.height = 5, fig.width = 5--------------------------------------
plot(modelBIC, xlab='# mixture components per block', ylab='BIC')

## ----results='hide'-----------------------------------------------------------
# user-provided configurations for the number of states in each block
data("sim3")
Vb <- vb(2, dim=40, bdim=c(10,30), numst=c(1,1), varorder=list(c(1:10),c(11:40)))
set.seed(12345)
configs = list(c(1,2), c(3,5))
modelBIC <- hmmvbBIC(sim3[,1:40], VbStructure=Vb, configList=configs)

## -----------------------------------------------------------------------------
show(modelBIC)

## -----------------------------------------------------------------------------
# we just illustrate the loglikelihood of the first 10 data points beacause the data set is huge
getLoglikehd(modelBIC)[1:10]

## -----------------------------------------------------------------------------
data("sim3")
set.seed(12345)
Vb <- vb(2, dim=40, bdim=c(10,30), numst=c(3,5), varorder=list(c(1:10),c(11:40)))
hmmvb <- hmmvbTrain(sim3[1:40], VbStructure=Vb)
show(hmmvb)

## -----------------------------------------------------------------------------
Vb <- getVb(hmmvb) # variable block structure
hmmChain <- getHmmChain(hmmvb) # list with HMM models
diagCov <- getDiagCov(hmmvb) # indicator whether covariance matrices are diagonal 
bic <- getBIC(hmmvb) # BIC value

# below we show HMM-VB parameters for the first variable block
numst <- getNumst(hmmChain[[1]]) # number of mixture components in variable block
prenumst <- getPrenumst(hmmChain[[1]]) # number of mixture components in the previous variable block
hmmParam <- getHmmParam(hmmChain[[1]]) # list with priors, transition probabilities, means, covariance matrices and other parameters of all states of HMM


## -----------------------------------------------------------------------------
data("sim2")
set.seed(12345)
hmmvb <- hmmvbTrain(sim2[1:5], searchControl=vbSearchControl(nperm=5), nthread=1)
show(hmmvb)

## ----results='hide'-----------------------------------------------------------
modelBIC <- hmmvbBIC(sim2[1:5], VbStructure=getVb(hmmvb), numst=1:15, nthread=1)

## -----------------------------------------------------------------------------
show(modelBIC)

## ----fig.show='hold', results='hide'------------------------------------------
data("faithful")
VbStructure <- vb(nb=1, dim=2, numst=1)
set.seed(12345)
modelBIC <- hmmvbBIC(faithful, VbStructure)

## -----------------------------------------------------------------------------
clust <- hmmvbClust(faithful, bicObj=modelBIC)
show(clust)

## ----fig2, fig.height = 5, fig.width = 5--------------------------------------
plot(faithful[,1], faithful[,2], xlab='eruptions', ylab='waiting', col=getClsid(clust))

## -----------------------------------------------------------------------------
clustParam <- getClustParam(clust)
mode <- clustParam$mode # a matrix with cluster modes
vseq <- clustParam$vseq # A list with integer vectors representing distinct Viterbi sequences for the dataset

## ----fig.show='hold', results='hide'------------------------------------------
# If variable block structure is unknown
data("sim2")
set.seed(12345)

# find variable block structure
hmmvb <- hmmvbTrain(sim2[,1:5], searchControl=vbSearchControl(nperm=5), nthread=1)

# refine number of states in variable block structure by model selection
modelBIC <- hmmvbBIC(sim2[,1:5], VbStructure=getVb(hmmvb), numst=1:15, nthread=1)

## -----------------------------------------------------------------------------
clust <- hmmvbClust(data=sim2[,1:5], bicObj=modelBIC)
show(clust)

## ----fig3, fig.height = 5, fig.width = 5--------------------------------------
palette(c(palette(), "purple", "brown")) # extend palette to show all 10 clusters
plot(clust)

## ----fig4, fig.height = 5, fig.width = 5--------------------------------------
plot(clust, method='PCA')

## ----fig.show='hold'----------------------------------------------------------
data("sim3")

# split data set in two halves
X1 = sim3[1:500,]
X2 = sim3[501:1000,]

set.seed(12345)
Vb <- vb(2, 40, c(10,30), c(3,5), list(c(1:10),c(11:40)))

# train HMM-VB on dataset X1 and cluster data
hmmvb <- hmmvbTrain(X1[1:40], VbStructure=Vb)
clust1 <- hmmvbClust(X1[1:40], model=hmmvb)
show(clust1)

# cluster data set X2 using HMM-VB and cluster parameters for dataset X1
clust2 <- hmmvbClust(X2[1:40], model=hmmvb, rfsClust=getClustParam(clust1))

show(clust2)

## ----fig.show='hold'----------------------------------------------------------
clust2 <- hmmvbClust(X2[1:40], model=hmmvb, rfsClust=getClustParam(clust1), control=clustControl(getlikelh = TRUE))
# we just illustrate the loglikelihood of the first 10 data points beacause the data set is huge
getLoglikehd(clust2)[1:10]

## ----fig.show='hold'----------------------------------------------------------
data("sim3")

set.seed(12345)
Vb <- vb(2, 40, c(10,30), c(3,5), list(c(1:10),c(11:40)))

# train HMM-VB
hmmvb <- hmmvbTrain(sim3[1:40], VbStructure=Vb)

# find all density modes
modes <- hmmvbFindModes(sim3[1:40], model=hmmvb)
show(modes)

# cluster density modes
mergedModes <- clustModes(modes, cutree.args = list(h=1.0))

show(mergedModes)

## ----fig5, fig.height = 5, fig.width = 5--------------------------------------
plot(mergedModes)