## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, out.width = "60%", comment = "#>" ) ## ----table, echo=FALSE, results='asis'---------------------------------------- knitr::kable( data.frame( clusType = c("Partitioning (K-centroids)", "", "", "", "", "", "", "Model-based", "", "", ""), Funtype = c("distance", "", "", "centroid", "", "", "wrapper", "driver", "", "", ""), Fun = c( "`distSimMatch`", "`distGDM2`", "`distGower`", "`centMode`", "`centMin`", "`centOptimNA`", "`kccaExtendedFamily`", "`FLXMCregnorm`", "`FLXMCregmultinom`", "`FLXMCregbinom`", "`FLXMCbetabinomial`" ), Method = c( "Simple Matching Distance", "GDM2 distance for ordinal data", "Gower's distance", "Mode as centroid", "Factor level with minimal distance as centroid", "Centroid calculation by general purpose optimizer", "Creates a `kccaFamily` object pre-configured for kModes-, kGDM2- or kGower clustering", "Regularized multivariate normal distribution", "Regularized multivariate multinomial distribution", "Regularized multivariate binomial distribution", "Regularized multivariate beta-binomial distribution" ), Scale = c( "nominal", "ordinal", "mixed-with-ordinal", "nominal", "nominal/ordinal", "numeric", "", "numeric", "nominal", "ordinal", "ordinal" ), NAs = c( "not implemented", "not implemented", "upweighing of present variables", "not implemented", "not implemented", "complete-case analysis", "", "not implemented", "not implemented", "not implemented", "not implemented" ), Source = c( "@kaufman_finding_1990, p. 19", "@walesiak_finding_2010; @ernst_ordinal_2025", "@kaufman_finding_1990, p. 32-37", "@weihs_klaR_2005; @leisch_toolbox_2006", "@ernst_ordinal_2025", "@leisch_toolbox_2006", "", "@fraley2007bayesian; @ernst_ordinal_2025", "@galindo2006avoiding; @ernst_ordinal_2025", "@ernst_ordinal_2025", "@kondofersky2008; @ernst_ordinal_2025" ) ), format = "html", escape = FALSE, col.names = c( "Clustering Type", "Function Type", "Function Name", "Method", "Scale Assumptions", "NA Handling", "Source" ) ) ## ----setup, message=FALSE----------------------------------------------------- library("flexord") library("flexclust") library("flexmix") set.seed(1111) ## ----nominal_1---------------------------------------------------------------- titanic_df <- data.frame(Titanic) titanic_df <- titanic_df[rep(1:nrow(titanic_df), titanic_df$Freq), -5] str(titanic_df) ## ----nominal_p2--------------------------------------------------------------- kcca(titanic_df, k = 4, family = kccaExtendedFamily('kModes')) ## ----nominal_p3--------------------------------------------------------------- kcca(titanic_df, k = 4, family = kccaFamily(dist = distSimMatch, cent = \(y) centMin(y, dist = distSimMatch, xrange = 'columnwise'))) ## ----nominal_p4--------------------------------------------------------------- titanic_dm <- data.matrix(titanic_df) stepFlexclust(titanic_dm, k = 2:4, nrep = 1, family = kccaExtendedFamily('kModes')) ## ----nominal_p5--------------------------------------------------------------- (nom <- bootFlexclust(titanic_dm, k = 2:4, nrep = 1, nboot = 5, family = kccaExtendedFamily('kModes'))) ## ----nominal_p6--------------------------------------------------------------- plot(nom) ## ----nominal_m2--------------------------------------------------------------- titanic_ncats <- apply(titanic_dm, 2, max) flexmix(formula = titanic_dm ~ 1, k = 3, model = FLXMCregmultinom(r = titanic_ncats)) ## ----nominal_m3--------------------------------------------------------------- flexmix(titanic_dm ~ 1, k = 3, model = FLXMCregmultinom(r = titanic_ncats, alpha = 1)) ## ----nominal_m4--------------------------------------------------------------- (nom <- stepFlexmix(titanic_dm ~ 1, k = 2:4, nrep = 1, # please increase for real-life use model = FLXMCregmultinom(r = titanic_ncats))) ## ----nominal_m5--------------------------------------------------------------- plot(nom) ## ----ordinal_1---------------------------------------------------------------- data("risk", package = "flexord") str(risk) colnames(risk) ## ----ordinal_p2--------------------------------------------------------------- kcca(risk, k = 3, family = kccaExtendedFamily('kGower')) ## ----ordinal_p3--------------------------------------------------------------- kcca(risk, k = 3, family = kccaExtendedFamily('kGower', cent = centMedian)) ## ----ordinal_p4--------------------------------------------------------------- kcca(risk, k = 3, family = kccaExtendedFamily('kGDM2')) ## ----ordinal_p5--------------------------------------------------------------- kcca(risk, k = 3, family = kccaExtendedFamily('kGDM2', xrange = c(1, 6))) ## ----ordinal_m2--------------------------------------------------------------- risk1 <- risk - 1 flexmix(risk1 ~ 1, k = 3, model = FLXMCregbinom(size = 4)) flexmix(risk1 ~ 1, k = 3, model = FLXMCregbetabinom(size = 4, alpha = 1)) ## ----numerical---------------------------------------------------------------- params <- FLXMCregnorm_defaults(risk, kappa_p = 0.1, k = 3) flexmix(risk ~ 1, k = 3, model = FLXMCregnorm(params = params)) ## ----mixed_1------------------------------------------------------------------ data("vacmot", package = "flexclust") vacmot2 <- cbind(vacmotdesc, apply(vacmot, 2, as.logical)) vacmot2 <- vacmot2[, c('Gender', 'Age', 'Income2', 'Relationship.Status', 'Vacation.Behaviour', sample(colnames(vacmot), 3, replace = FALSE))] vacmot2$Income2 <- as.ordered(vacmot2$Income2) str(vacmot2) colMeans(is.na(vacmot2))*100 ## ----mixed_2------------------------------------------------------------------ kcca(vacmot2, k = 3, family = kccaExtendedFamily('kGower'), control = list(iter.max = 5)) ## ----mixed_3------------------------------------------------------------------ colnames(vacmot2) xmthds <- c('distSimMatch', rep('distManhattan', 3), 'distSimMatch', rep('distEuclidean', 3)) kcca(vacmot2, k = 3, family = kccaExtendedFamily('kGower', xmethods = xmthds), control = list(iter.max = 5))