## ----setup, class.source='fold-hide', warning=FALSE---------------------------
#install.packages("knitr")
library(knitr)
opts_chunk$set(echo = TRUE)

# To ensure reproducibility
set.seed(12)

## ----packages_installation, eval=FALSE, class.source='fold-hide'--------------
# pkgs <- c("ggplot2", "ggrepel", "DT", "psych", "ConsensusOPLS")
# sapply(pkgs, function(x) {
#   if (!requireNamespace(x, quietly = TRUE)) {
#     install.packages(x)
#   }
# })
# if (!require("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
#   BiocManager::install("ComplexHeatmap")
# }

## ----packages_load, warning=FALSE, message=FALSE, class.source='fold-hide'----
library(ggplot2)        # to make beautiful graphs
library(ggrepel)        # to annotate ggplot2 graph
library(DT)             # to make interactive data tables
library(psych)          # to make specific quantitative summaries
library(ComplexHeatmap) # to make heatmap with density plot
library(ConsensusOPLS)  # to load ConsensusOPLS

## ----theme_ggplot2------------------------------------------------------------
require(ggplot2)
theme_graphs <- theme_bw() + theme(strip.text = element_text(size=14),
                                   axis.title = element_text(size=16),
                                   axis.text = element_text(size=14),
                                   plot.title = element_text(size=16),
                                   legend.title = element_text(size=14))

## ----import_demo_3_Omics------------------------------------------------------
data(demo_3_Omics, package = "ConsensusOPLS")

## ----check_dims, class.source='fold-hide'-------------------------------------
# Check dimension
BlockNames <- c("MetaboData", "MicroData", "ProteoData")
nbrBlocs <- length(BlockNames)
dims <- lapply(X=demo_3_Omics[BlockNames], FUN=dim)
names(dims) <- BlockNames
dims

# Remove unuseful object for the next steps
rm(dims)

## ----check_orders_and_names, class.source='fold-hide'-------------------------
# Check rows names in any order
row_names <- lapply(X=demo_3_Omics[BlockNames], FUN=rownames)
rns <- do.call(cbind, row_names)
rns.unique <- apply(rns, 1, function(x) length(unique(x)))
if (max(rns.unique) > 1) {
  stop("Rows names are not identical between blocks.")
}

# Check order of samples
check_row_names <- all(sapply(X=row_names, FUN=identical, y = row_names[[1]]))
if (!check_row_names && max(rns.unique) == 1) {
  print("Rows names are not in the same order for all blocks.")
}

# Remove unuseful object for the next steps
rm(row_names, rns, rns.unique, check_row_names)

## ----describe_data_by_Y_function, class.source='fold-hide'--------------------
require(psych)
require(DT)
describe_data_by_Y <- function(data, group) {
  bloc_by_Y <- describeBy(x = data, group = group,
                          mat = TRUE)[, c("group1", "n", "mean", "sd",
                                          "median", "min", "max", "range",
                                          "se")]
  bloc_by_Y[3:ncol(bloc_by_Y)] <- round(bloc_by_Y[3:ncol(bloc_by_Y)], 
                                        digits = 2)
  return (datatable(bloc_by_Y))
}

## ----describe_data_by_Y_bloc1-------------------------------------------------
describe_data_by_Y(data = demo_3_Omics[[BlockNames[1]]],
                   group = demo_3_Omics$ObsNames[,1])

## ----describe_data_by_Y_bloc2-------------------------------------------------
describe_data_by_Y(data = demo_3_Omics[[BlockNames[2]]],
                   group = demo_3_Omics$ObsNames[,1])

## ----describe_data_by_Y_bloc3-------------------------------------------------
describe_data_by_Y(data = demo_3_Omics[[BlockNames[3]]],
                   group = demo_3_Omics$ObsNames[,1])

## ----scale_data, class.source='fold-hide'-------------------------------------
# Save not scaled data
demo_3_Omics_not_scaled <- demo_3_Omics

# Scaling data
demo_3_Omics[BlockNames] <- lapply(X = demo_3_Omics[BlockNames], 
                                   FUN = function(x){
                                     scale(x, center = TRUE, scale = TRUE)
                                   }
)

## ----heatmap_function, message = FALSE, class.source='fold-hide'--------------
heatmap_data <- function(data, bloc_name, factor = NULL){
  if(!is.null(factor)){
    ht <- Heatmap(
      matrix = data, name = "Values",
      row_dend_width = unit(3, "cm"),
      column_dend_height = unit(3, "cm"),
      column_title = paste0("Heatmap of ", bloc_name),
      row_split = factor,
      row_title = "Y = %s",
      row_title_rot = 0
    )
  } else{
    ht <- Heatmap(
      matrix = data, name = "Values",
      row_dend_width = unit(3, "cm"),
      column_dend_height = unit(3, "cm"),
      column_title = paste0("Heatmap of ", bloc_name)
    )
  }
  return(ht)
}

## ----heatmap_no_scale, message = FALSE, class.source='fold-hide'--------------
# Heat map for each data block
lapply(X = 1:nbrBlocs,
       FUN = function(X){
         bloc <- BlockNames[X]
         heatmap_data(data = demo_3_Omics_not_scaled[[bloc]],
                      bloc_name = bloc,
                      factor = demo_3_Omics_not_scaled$Y[,1])})

## ----heatmap_scale, message = FALSE, class.source='fold-hide'-----------------
# Heat map for each data block
lapply(X = 1:nbrBlocs,
       FUN = function(X){
         bloc <- BlockNames[X]
         heatmap_data(data = demo_3_Omics[[bloc]],
                      bloc_name = bloc,
                      factor = demo_3_Omics$Y[,1])})

## ----heatmap_density, class.source='fold-hide'--------------------------------
# Heatmap with density for each data bloc
lapply(X = 1:nbrBlocs,
       FUN = function(X){
         bloc <- BlockNames[X]
         factor <- demo_3_Omics$Y[, 1]
         densityHeatmap(t(demo_3_Omics[[bloc]]),
                        ylab = bloc,
                        column_split  = factor,
                        column_title = "Y = %s")})

## ----rm_unscale_data, class.source='fold-hide'--------------------------------
# Remove unscaled data
rm(demo_3_Omics_not_scaled)

## ----define_cv_parameters-----------------------------------------------------
# Number of predictive component(s)
LVsPred <- 1

# Maximum number of orthogonal components
LVsOrtho <- 3

# Number of cross-validation folds
CVfolds <- nrow(demo_3_Omics[[BlockNames[[1]]]])
CVfolds

## ----run_consensusOPLSmodel---------------------------------------------------
copls.da <- ConsensusOPLS(data = demo_3_Omics[BlockNames],
                          Y = demo_3_Omics$Y,
                          maxPcomp = LVsPred,
                          maxOcomp  = LVsOrtho,
                          modelType = "da",
                          nperm = 1000,
                          cvType = "nfold",
                          nfold = 14,
                          nMC = 100,
                          cvFrac = 4/5,
                          kernelParams = list(type = "p", 
                                              params = c(order = 1)),
                          mc.cores = 1)

## ----outputs_model_summary, class.source='fold-hide'--------------------------
copls.da

## ----outputs_model_attributes, class.source='fold-hide'-----------------------
summary(attributes(copls.da))

## ----print_main_results_R2, class.source='fold-hide'--------------------------
position <- copls.da@nOcomp

paste0('R2: ', round(copls.da@R2Y[paste0("po", position)], 4))

## ----print_main_results_Q2, class.source='fold-hide'--------------------------
paste0('Q2: ', round(copls.da@Q2[paste0("po", position)], 4))

## ----print_main_results_DQ2, class.source='fold-hide'-------------------------
paste0('DQ2: ', round(copls.da@DQ2[paste0("po", position)], 4))

## ----extract_VIP, class.source='fold-hide'------------------------------------
# Compute the VIP
VIP <- copls.da@VIP

# Multiply VIP * sign(loadings for predictive component)
VIP_plot <- lapply(X = 1:nbrBlocs,
                   FUN = function(X){
                     sign_loadings <- sign(copls.da@loadings[[X]][, "p_1"])
                     result <- VIP[[X]][, "p"]*sign_loadings
                     return(sort(result, decreasing = TRUE))})
names(VIP_plot) <- BlockNames

## ----plot_VIP, class.source='fold-hide'---------------------------------------
# Metabo data
ggplot(data = data.frame(
  "variables" = factor(names(VIP_plot[[1]]),
                       levels=names(VIP_plot[[1]])[order(abs(VIP_plot[[1]]), 
                                                         decreasing=T)]), 
  "valeur" = VIP_plot[[1]]), 
  aes(x = variables, y = valeur)) +
  geom_bar(stat = "identity") +
  labs(title = paste0("Barplot of ", names(VIP_plot)[1])) +
  xlab("Predictive variables") +
  ylab("VIP x loading sign") +
  theme_graphs +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) 

# Microarray data
ggplot(data = data.frame(
  "variables" = factor(names(VIP_plot[[2]]),
                       levels=names(VIP_plot[[2]])[order(abs(VIP_plot[[2]]), 
                                                         decreasing=T)]), 
  "valeur" = VIP_plot[[2]]), 
  aes(x = variables, y = valeur)) +
  geom_bar(stat = "identity") +
  labs(title = paste0("Barplot of ", names(VIP_plot)[2])) +
  xlab("Predictive variables") +
  ylab("VIP x loading sign") +
  theme_graphs +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) 

# Proteo data
ggplot(data = data.frame(
  "variables" = factor(names(VIP_plot[[3]]),
                       levels=names(VIP_plot[[3]])[order(abs(VIP_plot[[3]]), 
                                                         decreasing=T)]), 
  "valeur" = VIP_plot[[3]]), 
  aes(x = variables, y = valeur)) +
  geom_bar(stat = "identity") +
  labs(title = paste0("Barplot of ", names(VIP_plot)[3])) +
  xlab("Predictive variables") +
  ylab("VIP x loading sign") +
  theme_graphs +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) 

## ----ggplot_score_data, class.source='fold-hide', warning=FALSE---------------
ggplot(data = data.frame("p_1" = copls.da@scores[, "p_1"],
                         "o_1" = copls.da@scores[, "o_1"],
                         "Labs" = as.matrix(unlist(demo_3_Omics$ObsNames[, 1]))),
       aes(x = p_1, y = o_1, label = Labs, 
           shape = Labs, colour = Labs)) +
  xlab("Predictive component") +
  ylab("Orthogonal component") +
  ggtitle("ConsensusOPLS Score plot")+
  geom_point(size = 2.5) + 
  geom_text_repel(size = 4, show.legend = FALSE) + 
  theme_graphs+
  scale_color_manual(values = c("#7F3C8D", "#11A579"))

## ----ggplot_data_pred_compo, class.source='fold-hide'-------------------------
ggplot(
  data = data.frame("Values" = copls.da@blockContribution[, "p_1"],
                    "Blocks" = as.factor(labels(demo_3_Omics[1:nbrBlocs]))),
  aes(x = Blocks, y = Values,
      fill = Blocks, labels = Values)) +
  geom_bar(stat = 'identity') + 
  geom_text(aes(label = round(Values, 2), y = Values), 
            vjust = 1.5, color = "black", fontface = "bold") +
  ggtitle("Block contributions to the predictive component")+
  xlab("Data blocks") +
  ylab("Weight") +
  theme_graphs +
  scale_color_manual(values = c("#1B9E77", "#D95F02", "#7570B3"))

## ----ggplot_data_1st_ortho_compo, class.source='fold-hide'--------------------
ggplot(
  data = 
    data.frame("Values" = copls.da@blockContribution[, "o_1"],
               "Blocks" = as.factor(labels(demo_3_Omics[1:nbrBlocs]))),
  aes(x = Blocks, y = Values,
      fill = Blocks, labels = Values)) +
  geom_bar(stat = 'identity') + 
  geom_text(aes(label = round(Values, 2), y = Values), 
            vjust = 1.5, color = "black", fontface = "bold") +
  ggtitle("Block contributions to the first orthogonal component") +
  xlab("Data blocks") +
  ylab("Weight") +
  theme_graphs +
  scale_color_manual(values = c("#1B9E77", "#D95F02", "#7570B3"))

## ----ggplot_data_pred_ortho, message = FALSE, class.source='fold-hide'--------
data_two_plots <- data.frame("Values" = copls.da@blockContribution[, "p_1"],
                             "Type" = "Pred",
                             "Blocks" = labels(demo_3_Omics[1:nbrBlocs]))
data_two_plots <- data.frame("Values" = c(data_two_plots$Values,
                                          copls.da@blockContribution[, "o_1"]),
                             "Type" = c(data_two_plots$Type,
                                        rep("Ortho", times = length(copls.da@blockContribution[, "o_1"]))),
                             "Blocks" = c(data_two_plots$Blocks,
                                          labels(demo_3_Omics[1:nbrBlocs])))

ggplot(data = data_two_plots,
       aes(x = factor(Type), 
           y = Values, 
           fill = factor(Type))) +
  geom_bar(stat = 'identity') + 
  ggtitle("Block contributions to each component")+
  geom_text(aes(label = round(Values, 2), y = Values), 
            vjust = 1.5, color = "black", fontface = "bold") +
  xlab("Data blocks") +
  ylab("Weight") +
  facet_wrap(. ~ Blocks)+
  theme_graphs+
  scale_fill_discrete(name = "Component")+
  scale_fill_manual(values = c("#7F3C8D", "#11A579"))

## ----plot_bloc_PredVSOrtho_bis, message = FALSE, class.source='fold-hide'-----
ggplot(data = data_two_plots,
       aes(x = Blocks, 
           y = Values, 
           fill = Blocks)) +
  geom_bar(stat = 'identity') +
  geom_text(aes(label = round(Values, 2), y = Values), 
            vjust = 1.5, color = "black", fontface = "bold") +
  ggtitle("Block contributions to each component") +
  xlab("Components") +
  ylab("Weight") +
  facet_wrap(. ~ factor(Type, levels = c("Pred", "Ortho"))) +
  theme_graphs +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        plot.title = element_text(hjust = 0.5, 
                                  margin = margin(t = 5, r = 0, b = 0, l = 100))) +
  scale_fill_manual(values = c("#1B9E77", "#D95F02", "#7570B3"))

## ----ggplot_data_pred_vs_ortho, message = FALSE, warning = FALSE, class.source='fold-hide'----
ggplot(data = data.frame("Pred" = copls.da@blockContribution[, "p_1"],
                         "Ortho" = copls.da@blockContribution[, "o_1"],
                         "Labels" = labels(demo_3_Omics[1:nbrBlocs])),
       aes(x = Pred, y = Ortho, label = Labels, 
           shape = Labels, colour = Labels)) +
  xlab("Predictive component") +
  ylab("Orthogonal component") +
  ggtitle("Block contributions predictive vs. orthogonal") +
  geom_point(size = 2.5) + 
  geom_text_repel(size = 4, show.legend = FALSE) + 
  theme_graphs +
  scale_color_manual(values = c("#1B9E77", "#D95F02", "#7570B3"))

## ----create_data_loadings-----------------------------------------------------
loadings <- copls.da@loadings
data_loads <- sapply(X = 1:nbrBlocs,
                     FUN = function(X){
                       data.frame("Pred" = 
                                    loadings[[X]][, grep(pattern = "p_",
                                                         x = colnames(loadings[[X]]),
                                                         fixed = TRUE)],
                                  "Ortho" = 
                                    loadings[[X]][, grep(pattern = "o_",
                                                         x = colnames(loadings[[X]]),
                                                         fixed = TRUE)],
                                  "Labels" = labels(demo_3_Omics[1:nbrBlocs])[[X]])
                     })
data_loads <- as.data.frame(data_loads)

## ----ggplot_data_loadings, class.source='fold-hide'---------------------------
ggplot() +
  geom_point(data = as.data.frame(data_loads$V1),
             aes(x = Pred, y = Ortho, colour = Labels), 
             size = 2.5, alpha = 0.5) + 
  geom_point(data = as.data.frame(data_loads$V2),
             aes(x = Pred, y = Ortho, colour = Labels),
             size = 2.5, alpha = 0.5) +
  geom_point(data = as.data.frame(data_loads$V3),
             aes(x = Pred, y = Ortho, colour = Labels),
             size = 2.5, alpha = 0.5) +
  xlab("Predictive component") +
  ylab("Orthogonal component") +
  ggtitle("Loadings plot on first orthogonal and predictive component")+
  theme_graphs+
  scale_color_manual(values = c("#1B9E77", "#D95F02", "#7570B3"))

## ----create_data_loadings_VIP-------------------------------------------------
loadings <- do.call(rbind.data.frame, copls.da@loadings)
loadings$block <- do.call(c, lapply(names(copls.da@loadings), function(x) 
  rep(x, nrow(copls.da@loadings[[x]]))))
loadings$variable <- gsub(paste(paste0(names(copls.da@loadings), '.'), 
                                collapse='|'), '', 
                          rownames(loadings))

VIP <- do.call(rbind.data.frame, copls.da@VIP)
VIP$block <- do.call(c, lapply(names(copls.da@VIP), function(x) 
  rep(x, nrow(copls.da@VIP[[x]]))))
VIP$variable <- gsub(paste(paste0(names(copls.da@VIP), '.'), 
                           collapse='|'), '', 
                     rownames(VIP))

loadings_VIP <- merge(x = loadings[, c("p_1", "variable")], 
                      y = VIP[, c("p", "variable")], 
                      by = "variable", all = TRUE)
colnames(loadings_VIP) <- c("variable", "loadings", "VIP")
loadings_VIP <- merge(x = loadings_VIP, 
                      y = loadings[, c("block", "variable")], 
                      by = "variable", all = TRUE)
loadings_VIP$label <- ifelse(loadings_VIP$VIP > 1, loadings_VIP$variable, NA)

## ----ggplot_data_loadings_VIP, class.source='fold-hide'-----------------------
ggplot(data = loadings_VIP,
       aes(x=loadings, y=VIP, col=block, label = label)) +
  geom_point(size = 2.5, alpha = 0.5) + 
  xlab("Predictive component") +
  ylab("Variable Importance in Projection") +
  ggtitle("VIP versus loadings on predictive components")+
  theme_graphs+
  scale_color_manual(values = c("#1B9E77", "#D95F02", "#7570B3"))

## ----run_permutations, warning=FALSE------------------------------------------
PermRes <- copls.da@permStats

## ----plot_R2_perm, warning=FALSE, class.source='fold-hide'--------------------
ggplot(data = data.frame("R2Yperm" = PermRes$R2Y),
       aes(x = R2Yperm)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30,
                 color="grey", fill="grey") +
  geom_density(color = "blue", linewidth = 0.5) +
  geom_vline(aes(xintercept=PermRes$R2Y[1]), 
             color="blue", linetype="dashed", size=1) +
  xlab("R2 values") +
  ylab("Frequency") +
  ggtitle("R2 Permutation test")+
  theme_graphs

## ----plot_Q2_perm, warning=FALSE, class.source='fold-hide'--------------------
ggplot(data = data.frame("Q2Yperm" = PermRes$Q2Y),
       aes(x = Q2Yperm)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30,
                 color="grey", fill="grey") +
  geom_density(color = "blue", size = 0.5) +
  geom_vline(aes(xintercept=PermRes$Q2Y[1]), 
             color="blue", linetype="dashed", size=1) +
  xlab("Q2 values") +
  ylab("Frequency") +
  ggtitle("Q2 Permutation test")+
  theme_graphs

## ----plot_DQ2_perm, warning=FALSE, class.source='fold-hide'-------------------
ggplot(data = data.frame("DQ2Yperm" = PermRes$DQ2Y),
       aes(x = DQ2Yperm)) +
  geom_histogram(aes(y = after_stat(density)), bins = 30,
                 color="grey", fill="grey") +
  geom_density(color = "blue", size = 0.5) +
  geom_vline(aes(xintercept=PermRes$DQ2Y[1]), 
             color="blue", linetype="dashed", size=1) +
  xlab("DQ2 values") +
  ylab("Frequency") +
  ggtitle("DQ2 Permutation test")+
  theme_graphs

## ----prediction---------------------------------------------------------------
reprediction <- predict(copls.da, newdata = demo_3_Omics[BlockNames])

## ----prediction_output--------------------------------------------------------
reprediction$Y
reprediction$class

## ----reproducibility----------------------------------------------------------
sessionInfo()