## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----echo = FALSE-------------------------------------------------------------
options(crayon.enabled = FALSE, cli.num_colors = 0)

## -----------------------------------------------------------------------------
# Load the package
library(metasnf)

dl <- data_list(
    list(subc_v, "subcortical_volume", "neuroimaging", "continuous"),
    list(income, "household_income", "demographics", "continuous"),
    list(pubertal, "pubertal_status", "demographics", "continuous"),
    list(anxiety, "anxiety", "behaviour", "ordinal"),
    list(depress, "depressed", "behaviour", "ordinal"),
    uid = "unique_id"
)

set.seed(42)
sc <- snf_config(
    dl = dl,
    n_solutions = 5,
    max_k = 40
)

## -----------------------------------------------------------------------------
# Available functions
sc$"clust_fns_list"

# Which functions will be used
sc$"settings_df"$"clust_alg"

## -----------------------------------------------------------------------------
# The default list:
sc <- snf_config(
    dl = dl,
    n_solutions = 5,
    use_default_clust_fns = TRUE
)

sc$"clust_fns_list"

# Adding algorithms provided by the package
sc <- snf_config(
    dl = dl,
    n_solutions = 5,
    clust_fns = list(
        "two_cluster_spectral" = spectral_two,
        "five_cluster_spectral" = spectral_five
    ),
    use_default_clust_fns = TRUE
)

# Note that this one has the default algorithms as well as the newly added ones
sc$"clust_fns_list"

# This list has only the newly added ones
sc <- snf_config(
    dl = dl,
    n_solutions = 5,
    clust_fns = list(
        "two_cluster_spectral" = spectral_two,
        "five_cluster_spectral" = spectral_five
    )
)

sc$"clust_fns_list"

## -----------------------------------------------------------------------------
sc

## ----eval = FALSE-------------------------------------------------------------
# sol_df <- batch_snf(
#     dl = dl,
#     sc = sc
# )

## ----eval = FALSE-------------------------------------------------------------
# # Default clustering algorithm #1
# spectral_eigen <- function(similarity_matrix) {
#     estimated_n <- estimate_nclust_given_graph(
#         W = similarity_matrix,
#         NUMC = 2:10
#     )
#     nclust_estimate <- estimated_n$`Eigen-gap best`
#     solution <- SNFtool::spectralClustering(
#         similarity_matrix,
#         nclust_estimate
#     )
#     return(solution)
# }
# 
# # Default clustering algorithm #2
# spectral_rot <- function(similarity_matrix) {
#     estimated_n <- estimate_nclust_given_graph(
#         W = similarity_matrix,
#         NUMC = 2:10
#     )
#     nclust_estimate <- estimated_n$`Rotation cost best`
#     solution <- SNFtool::spectralClustering(
#         similarity_matrix,
#         nclust_estimate
#     )
#     return(solution)
# }

## -----------------------------------------------------------------------------
sol_df <- batch_snf(
    dl,
    sc,
    return_sim_mats = TRUE
)

# Similarity matrices are in the list below:
similarity_matrices <- sim_mats_list(sol_df)

length(similarity_matrices)

dim(similarity_matrices[[1]])

# Your manual clustering goes here...

## ----eval = FALSE-------------------------------------------------------------
# library(dbscan)
# ## Example 1: use dbscan on the iris data set
# data(iris)
# iris <- as.matrix(iris[, 1:4])
# iris_dist <- dist(iris)
# 
# ## Find suitable DBSCAN parameters:
# ## 1. We use minPts = dim + 1 = 5 for iris. A larger value can also be used.
# ## 2. We inspect the k-NN distance plot for k = minPts - 1 = 4
# kNNdistplot(iris, minPts = 5)
# 
# ## Noise seems to start around a 4-NN distance of .7
# abline(h=.7, col = "red", lty = 2)
# 
# results <- dbscan(iris_dist, eps = 0.7, minPts = 5)
# 
# # The 1 is added to ensure that those with no cluster (cluster 0) are still
# # plotted.
# pairs(iris, col = results$cluster + 1)

## ----fig.width = 5, fig.height = 4.5------------------------------------------
library(dbscan)
library(ggplot2)

dl <- data_list(
    list(
        data = expression_df,
        name = "genes_1_and_2_exp",
        domain = "gene_expression",
        type = "continuous"
    ),
    list(
        data = methylation_df,
        name = "genes_1_and_2_meth",
        domain = "gene_methylation",
        type = "continuous"
    ),
    uid = "patient_id"
)

set.seed(42)
sc <- snf_config(
    dl = dl,
    n_solutions = 5
)

sol_df <- batch_snf(
    dl = dl,
    sc = sc,
    return_sim_mats = TRUE
)

similarity_matrices <- sim_mats_list(sol_df)

representative_sm <- similarity_matrices[[1]]

representative_sms <- similarity_matrices[c(1, 2)]

distance_matrix1 <- as.dist(
    max(representative_sm) - representative_sm
)

kNNdistplot(
    distance_matrix1,
    minPts = 10
)
## Maybe there?
abline(h=0.4872, col = "red", lty = 2)

dbscan_results <- dbscan(distance_matrix1, eps = 0.4872, minPts = 10)$"cluster"

spectral_results <- t(sol_df[1, ])[, 2]

dbscan_vs_spectral <- data.frame(
    dbscan = dbscan_results,
    spectral = spectral_results
)

ggplot(dbscan_vs_spectral, aes(x = dbscan, y = spectral)) +
    geom_jitter(height = 0.1, width = 0.1, alpha = 0.5) +
    theme_bw()

## ----eval = FALSE-------------------------------------------------------------
# for (i in seq(0.485, 0.488, by = 0.0001)) {
#     results <- dbscan(distance_matrix1, eps = i, minPts = 10)
#     if (length(unique(results$"cluster")) == 3) {
#         print(i)
#     }
# }