## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  out.width = "60%",
  comment = "#>"
)

## ----table, echo=FALSE, results='asis'----------------------------------------
knitr::kable(
  data.frame(
    clusType = c("Partitioning (K-centroids)", "", "", "", "", "", "", "Model-based", "", "", ""),
    Funtype = c("distance", "", "", "centroid", "", "", "wrapper", "driver", "", "", ""),
    Fun = c(
      "`distSimMatch`",
      "`distGDM2`",
      "`distGower`",
      "`centMode`",
      "`centMin`",
      "`centOptimNA`",
      "`kccaExtendedFamily`",
      "`FLXMCregnorm`",
      "`FLXMCregmultinom`",
      "`FLXMCregbinom`",
      "`FLXMCbetabinomial`"
    ),    
    Method = c(
      "Simple Matching Distance", 
      "GDM2 distance for ordinal data",
      "Gower's distance",
      "Mode as centroid",
      "Factor level with minimal distance as centroid",
      "Centroid calculation by general purpose optimizer",
      "Creates a `kccaFamily` object pre-configured for kModes-, kGDM2- or kGower clustering",
      "Regularized multivariate normal distribution",
      "Regularized multivariate multinomial distribution",
      "Regularized multivariate binomial distribution",
      "Regularized multivariate beta-binomial distribution"
    ),
    Scale = c(
      "nominal", 
      "ordinal",
      "mixed-with-ordinal",
      "nominal", 
      "nominal/ordinal",
      "numeric",
      "",
      "numeric", 
      "nominal",
      "ordinal",
      "ordinal"
    ),
    NAs = c(
      "not implemented", 
      "not implemented",
      "upweighing of present variables",
      "not implemented", 
      "not implemented",
      "complete-case analysis",
      "",
      "not implemented", 
      "not implemented",
      "not implemented", 
      "not implemented"
    ),
    Source = c(
      "@kaufman_finding_1990, p. 19",
      "@walesiak_finding_2010; @ernst_ordinal_2025",
      "@kaufman_finding_1990, p. 32-37", 
      "@weihs_klaR_2005; @leisch_toolbox_2006",
      "@ernst_ordinal_2025",
      "@leisch_toolbox_2006",
      "",
      "@fraley2007bayesian; @ernst_ordinal_2025",
      "@galindo2006avoiding; @ernst_ordinal_2025",
      "@ernst_ordinal_2025",
      "@kondofersky2008; @ernst_ordinal_2025"
    )
  ), 
  format = "html", 
  escape = FALSE, 
  col.names = c(
    "Clustering Type", "Function Type", "Function Name", 
    "Method", "Scale Assumptions", "NA Handling", "Source"
  )
)

## ----setup, message=FALSE-----------------------------------------------------
library("flexord")
library("flexclust")
library("flexmix")
set.seed(1111)

## ----nominal_1----------------------------------------------------------------
titanic_df <- data.frame(Titanic)
titanic_df <- titanic_df[rep(1:nrow(titanic_df), titanic_df$Freq), -5]
str(titanic_df)

## ----nominal_p2---------------------------------------------------------------
kcca(titanic_df, k = 4, family = kccaExtendedFamily('kModes'))

## ----nominal_p3---------------------------------------------------------------
kcca(titanic_df, k = 4,
     family = kccaFamily(dist = distSimMatch, 
                         cent = \(y) centMin(y, dist = distSimMatch,
                                             xrange = 'columnwise')))

## ----nominal_p4---------------------------------------------------------------
titanic_dm <- data.matrix(titanic_df)
stepFlexclust(titanic_dm, k = 2:4, nrep = 1, 
              family = kccaExtendedFamily('kModes')) 

## ----nominal_p5---------------------------------------------------------------
(nom <- bootFlexclust(titanic_dm, k = 2:4, nrep = 1, nboot = 5, 
                      family = kccaExtendedFamily('kModes')))

## ----nominal_p6---------------------------------------------------------------
plot(nom)

## ----nominal_m2---------------------------------------------------------------
titanic_ncats <- apply(titanic_dm, 2, max)
flexmix(formula = titanic_dm ~ 1, k = 3,
        model = FLXMCregmultinom(r = titanic_ncats)) 

## ----nominal_m3---------------------------------------------------------------
flexmix(titanic_dm ~ 1, k = 3,
        model = FLXMCregmultinom(r = titanic_ncats, alpha = 1))

## ----nominal_m4---------------------------------------------------------------
(nom <- stepFlexmix(titanic_dm ~ 1, k = 2:4,
                    nrep = 1, # please increase for real-life use
                    model = FLXMCregmultinom(r = titanic_ncats)))

## ----nominal_m5---------------------------------------------------------------
plot(nom)

## ----ordinal_1----------------------------------------------------------------
data("risk", package = "flexord")
str(risk)
colnames(risk)

## ----ordinal_p2---------------------------------------------------------------
kcca(risk, k = 3, family = kccaExtendedFamily('kGower'))

## ----ordinal_p3---------------------------------------------------------------
kcca(risk, k = 3,
     family = kccaExtendedFamily('kGower', cent = centMedian))

## ----ordinal_p4---------------------------------------------------------------
kcca(risk, k = 3, family = kccaExtendedFamily('kGDM2'))

## ----ordinal_p5---------------------------------------------------------------
kcca(risk, k = 3,
     family = kccaExtendedFamily('kGDM2', xrange = c(1, 6)))

## ----ordinal_m2---------------------------------------------------------------
risk1 <- risk - 1
flexmix(risk1 ~ 1, k = 3, model = FLXMCregbinom(size = 4))
flexmix(risk1 ~ 1, k = 3, model = FLXMCregbetabinom(size = 4, alpha = 1))

## ----numerical----------------------------------------------------------------
params <- FLXMCregnorm_defaults(risk, kappa_p = 0.1, k = 3)
flexmix(risk ~ 1, k = 3, model = FLXMCregnorm(params = params))

## ----mixed_1------------------------------------------------------------------
data("vacmot", package = "flexclust")
vacmot2 <- cbind(vacmotdesc,
                 apply(vacmot, 2, as.logical))
vacmot2 <- vacmot2[, c('Gender', 'Age', 'Income2', 'Relationship.Status', 'Vacation.Behaviour',
                       sample(colnames(vacmot), 3, replace = FALSE))]
vacmot2$Income2 <- as.ordered(vacmot2$Income2) 
str(vacmot2)
colMeans(is.na(vacmot2))*100 

## ----mixed_2------------------------------------------------------------------
kcca(vacmot2, k = 3, family = kccaExtendedFamily('kGower'),
     control = list(iter.max = 5))

## ----mixed_3------------------------------------------------------------------
colnames(vacmot2)
xmthds <- c('distSimMatch', rep('distManhattan', 3),
            'distSimMatch', rep('distEuclidean', 3))
kcca(vacmot2, k = 3,
     family = kccaExtendedFamily('kGower', xmethods = xmthds),
     control = list(iter.max = 5))