## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  url1 <- "https://github.com/feiyoung/SpaCOAP/tree/master/vignettes_data/"
#  rna_object <- "seu_rna_over_Spleen.RDS?raw=true"
#  download.file(paste0(url1, rna_object),"seu_rna_over_Spleen.RDS",mode='wb')
#  protein_object <- "seu_adt_over_Spleen.RDS?raw=true"
#  download.file(paste0(url1,protein_object), 'seu_adt_over_Spleen.RDS', mode='wb')
#  ## download  annotation
#  download.file(paste0(url1, "cell_clusters_anno.rds?raw=true"), "cell_clusters_anno.rds", "wb")

## ----eval =FALSE--------------------------------------------------------------
#  seu_rna_over <- readRDS("./seu_rna_over_Spleen.RDS")
#  seu_adt_over <- readRDS("./seu_adt_over_Spleen.RDS")
#  load("./cell_clusters_anno.rds")

## ----eval =FALSE--------------------------------------------------------------
#  library(SpaCOAP) #

## ----eval =FALSE--------------------------------------------------------------
#  searchRadius <- function(pos, lower.med=8, upper.med=10, radius.upper= NULL){
#    if (!inherits(pos, "matrix"))
#      stop("method is only for  matrix object!")
#  
#  
#    ## Automatically determine the upper radius
#    n_spots <- nrow(pos)
#    idx <- sample(n_spots, min(100, n_spots))
#    dis <- dist(pos[idx,])
#    if(is.null(radius.upper)){
#      #radius.upper <- max(dis)
#      radius.upper <- sort(dis)[20] ## select the nearest 20 spots.
#    }
#    radius.lower <- min(dis[dis>0])
#    Adj_sp <- DR.SC:::getneighborhood_fast(pos, radius=radius.upper)
#    Med <- summary(Matrix::rowSums(Adj_sp))['Median']
#    if(Med < lower.med) stop("The radius.upper is too smaller that cannot find median neighbors greater than 4.")
#    start.radius <- 1
#    Med <- 0
#    message("Find the adjacency matrix by bisection method...")
#    maxIter <- 30
#    k <- 1
#    while(!(Med >= lower.med && Med <=upper.med)){ # ensure that each spot has about 4~6 neighborhoods in median.
#  
#      Adj_sp <- DR.SC:::getneighborhood_fast(pos, radius=start.radius)
#      Med <- summary(Matrix::rowSums(Adj_sp))['Median']
#      if(Med < lower.med){
#        radius.lower <- start.radius
#        start.radius <- (radius.lower + radius.upper)/2
#      }else if(Med >upper.med){
#        radius.upper <- start.radius
#        start.radius <- (radius.lower + radius.upper)/2
#      }
#      message("Current radius is ", round(start.radius, 2))
#      message("Median of neighborhoods is ", Med)
#      if(k > maxIter) {
#        message("Reach the maximum iteration but can not find a proper radius!")
#        break;
#      }
#      k <- k + 1
#    }
#  
#    return(start.radius)
#  }
#  
#  acc_fun <- function(y1, y2){
#    n1 <- length(unique(y1))
#    n2 <- length(unique(y2))
#    if(n1<n2){ ## ensure n1> n2
#      a <- y1
#      y1 <- y2
#      y2 <- a
#      n1 <- length(unique(y1))
#      n2 <- length(unique(y2))
#    }
#    cm <- as.matrix(table(Actual = y1, Predicted = y2))
#    rnames <-row.names(cm)
#    cnames <- colnames(cm)
#    union_names <- union(rnames, cnames)
#    n <- length(union_names)
#    cm_new <- matrix(0, n, n)
#    row.names(cm_new) <- colnames(cm_new) <- union_names
#    for(r in 1:n2){
#      cm_new[rnames,cnames[r]] <- cm[rnames,cnames[r]]
#    }
#  
#    sum(diag(cm_new)) / length(y1)
#  }
#  
#  kappa_fun <- function(y1, y2){
#    require(irr)
#    dat <- data.frame(y1, y2)
#    k_res <- kappa2(dat)
#    k_res$value
#  }

## ----eval =FALSE--------------------------------------------------------------
#  
#  X_count <- Matrix::t(seu_rna_over[["RNA"]][seu_rna_over[['RNA']]@var.features,])
#  X_count <-  as.matrix(X_count)
#  H <- t(as.matrix(seu_adt_over[["ADT"]]@data))
#  Z <- matrix(1, nrow(H), 1)
#  pos <- cbind(seu_rna_over$X0, seu_rna_over$X1)
#  
#  
#  radius_use <- searchRadius(pos, radius.upper = NULL)
#  set.seed(1)
#  n_spots <- nrow(pos)
#  idx <- sample(n_spots, min(100, n_spots))
#  dis <- dist(pos[idx,])
#  Adj_sp <-  SpaCOAP:::getneighbor_weightmat(pos, radius = radius_use, width=median(dis))

## ----eval =FALSE--------------------------------------------------------------
#  q_max <- 20
#  d <- ncol(H)
#  rank_max <- d
#  tic <- proc.time()
#  reslist_max <- SpaCOAP(X_count, Adj_sp,  H,  Z,
#                     rank_use = rank_max, q=q_max, epsELBO = 1e-8,  maxIter = 30)
#  toc <- proc.time()
#  time_spacoap_max <- toc[3] - tic[3]

## ----eval =FALSE--------------------------------------------------------------
#  threshold=c(1e-15, 1e-20)
#  thre1 <- threshold[1]
#  beta_svalues <- svd(reslist_max$bbeta)$d
#  beta_svalues <- beta_svalues[beta_svalues>thre1]
#  ratio1 <- beta_svalues[-length(beta_svalues)] / beta_svalues[-1]
#  ratio1[1:10]
#  ## Here, we set hr = 9 rather 1 to retain more information
#  
#  thre2 <- threshold[2]
#  B_svalues <- svd(reslist_max$B)$d
#  B_svalues <- B_svalues[B_svalues>thre2]
#  ratio_fac <- B_svalues[-length(B_svalues)] / B_svalues[-1]
#  ratio_fac[1:6]
#  # [1] 6.123909e+00 2.749414e+00 1.376498e+00 1.314487e+00 3.310699e+14
#  # Here, we choose q=5 since huge decrease of singular values happen in q=5.
#  hq <- 5

## ----eval =FALSE--------------------------------------------------------------
#  hr <- 9;hq <- 5
#  featureList <- list()
#  tic <- proc.time()
#  reslist <- SpaCOAP(X_count, Adj_sp, H=H, Z= Z,
#                     rank_use = hr, q=hq, epsELBO = 1e-8)
#  toc <- proc.time()
#  time_spacoap <- toc[3] - tic[3]
#  Matrix::rankMatrix(reslist$bbeta)
#  svd_x <- svd(reslist$bbeta, nu = hr, nv=hr)
#  H_spacoap <- H %*% svd_x$v
#  (R2_spacoap <- ProFAST::get_r2_mcfadden(embeds= cbind(H_spacoap, reslist$F), y=as.factor(cell_clusters)))
#  featureList[['SpaCOAP']] <- cbind(H_spacoap, reslist$F)
#  
#  

## ----eval =FALSE--------------------------------------------------------------
#  ##COAP
#  library(COAP)
#  tic <- proc.time()
#  res_coap <- RR_COAP(X_count, Z = cbind(Z, H), rank_use= 2+hr, q=hq,
#                      epsELBO = 1e-7, maxIter = 30)
#  toc <- proc.time()
#  time_coap <- toc[3] - tic[3]
#  save(res_coap, time_coap, file='reslist_time_coap.rds')
#  svd_x <- svd(res_coap$bbeta[,-c(1)], nu = hr, nv=hr)
#  H_coap <- H %*% svd_x$v
#  (R2_coap <- ProFAST::get_r2_mcfadden(embeds= cbind(res_coap$H, H_coap), y=as.factor(cell_clusters)))
#  featureList[['COAP']] <- cbind(res_coap$H, H_coap)

## ----eval = FALSE-------------------------------------------------------------
#  ## MRRR
#  mrrr_run <- function(Y, X, rank0, q=NULL, family=list(poisson()),
#                       familygroup=rep(1,ncol(Y)), epsilon = 1e-4, sv.tol = 1e-2,
#                       maxIter = 2000, trace=TRUE, truncflag=FALSE, trunc=500){
#    # epsilon = 1e-4; sv.tol = 1e-2; maxIter = 30; trace=TRUE
#    # Y <- X_count; X <- cbind(Z, H); rank0 = r + ncol(Z)
#  
#    require(rrpack)
#  
#    n <- nrow(Y); p <- ncol(Y)
#  
#    if(!is.null(q)){
#      rank0 <- rank0+q
#      X <- cbind(X, diag(n))
#    }
#    if(truncflag){
#      ## Trunction
#      Y[Y>trunc] <- trunc
#  
#    }
#  
#    svdX0d1 <- svd(X)$d[1]
#    init1 = list(kappaC0 = svdX0d1 * 5)
#    offset = NULL
#    control = list(epsilon = epsilon, sv.tol = sv.tol, maxit = maxIter,
#                   trace = trace, gammaC0 = 1.1, plot.cv = TRUE,
#                   conv.obj = TRUE)
#    fit.mrrr <- mrrr(Y=Y, X=X[,-1], family = family, familygroup = familygroup,
#                     penstr = list(penaltySVD = "rankCon", lambdaSVD = 1),
#                     control = control, init = init1, maxrank = rank0)
#  
#    return(fit.mrrr)
#  }
#  
#  tic <- proc.time()
#  res_mrrr <- mrrr_run(X_count, cbind(Z,H), rank0=hr+ncol(Z), q=hq)
#  str(res_mrrr)
#  toc <- proc.time()
#  time_mrrr <- toc[3] - tic[3]
#  hbbeta_mrrr <-t(res_mrrr$coef[1:ncol(cbind(Z,H)), ])
#  svd_x <- svd(hbbeta_mrrr[,-c(1)], nu = hr, nv=hr)
#  H_mrrr <- H %*% svd_x$v
#  Theta_hb <- (res_mrrr$coef[(ncol(cbind(Z,H))+1): (nrow(cbind(Z,H))+ncol(cbind(Z,H))), ])
#  svdTheta <- svd(Theta_hb, nu=hq, nv=hq)
#  F_mrrr <- svdTheta$u
#  (R2_mrrr <- ProFAST::get_r2_mcfadden(embeds= cbind(H_mrrr, F_mrrr), y=as.factor(cell_clusters)))
#  featureList[['MRRR']] <- cbind(H_mrrr, F_mrrr)

## ----eval =FALSE--------------------------------------------------------------
#  fast_run <- function(X_count, Adj_sp, q, verbose=TRUE, epsELBO=1e-8){
#    require(ProFAST)
#  
#    reslist <- FAST_run(XList = list(X_count),
#                        AdjList = list(Adj_sp), q = q, fit.model = 'poisson',
#                        verbose=verbose, epsLogLik=epsELBO)
#    reslist$hV <- reslist$hV[[1]]
#    return(reslist)
#  }
#  tic <- proc.time()
#  res_fast <- fast_run(X_count, Adj_sp, q=hq, verbose=TRUE, epsELBO=1e-8)
#  toc <- proc.time()
#  time_fast <- toc - tic
#  (R2_fast <- ProFAST::get_r2_mcfadden(embeds= res_fast$hV, y=as.factor(cell_clusters)))
#  featureList[['FAST']] <- res_fast$hV
#  

## ----eval =FALSE--------------------------------------------------------------
#  R2List <- list()
#  cell_label <- cell_clusters
#  for(im in 1: length(featureList)){
#    message("im = ", im)
#    R2List[[im]] <- ProFAST::get_r2_mcfadden(embeds= featureList[[im]], y=cell_label)
#  }
#  names(R2List) <- names(featureList)

## ----eval =FALSE--------------------------------------------------------------
#  R2Vec <- unlist(R2List)
#  names(R2Vec) <- names(R2List)
#  barplot(R2Vec, ylim=c(0, 0.8))

## ----eval =FALSE--------------------------------------------------------------
#  N <- 10
#  n <- length(cell_label)
#  methodNames <- c("SpaCOAP", "COAP",  "MRRR", "FAST")
#  n_methods <- length(methodNames)
#  metricList <- list(ACC = matrix(NA,N, n_methods), Kappa = matrix(NA,N, n_methods))
#  for(ii in 1: length(metricList)) colnames(metricList[[ii]]) <- methodNames
#  library(randomForest)
#  for(i in 1:N){
#    # i <- 1
#    message("i = ", i)
#    set.seed(i)
#    idx_train <- sort(sample(n, round(n*0.7)))
#    idx_test <- sort(setdiff(1:n, idx_train))
#    rf_spacoap <- randomForest(featureList[['SpaCOAP']][idx_train,], y=cell_label[idx_train])
#    hy_spacoap <- predict(rf_spacoap, newdata=featureList[['SpaCOAP']][idx_test,])
#    metricList$ACC[i,1] <- acc_fun(hy_spacoap, cell_label[idx_test])
#    metricList$Kappa[i,1] <- kappa_fun(hy_spacoap, cell_label[idx_test])
#  
#    rf_coap <- randomForest(featureList[['COAP']][idx_train,], y=cell_label[idx_train])
#    hy_coap <- predict(rf_coap, newdata=featureList[['COAP']][idx_test,])
#    metricList$ACC[i,2] <- acc_fun(hy_coap, cell_label[idx_test])
#    metricList$Kappa[i,2] <- kappa_fun(hy_coap, cell_label[idx_test])
#  
#    colnames(featureList[['MRRR']]) <- paste0("MRRR", 1:ncol(featureList[['MRRR']]))
#    rf_MRRR <- randomForest(featureList[['MRRR']][idx_train,], y=cell_label[idx_train])
#    hy_MRRR <- predict(rf_MRRR, newdata=featureList[['MRRR']][idx_test,])
#    metricList$ACC[i,3] <- acc_fun(hy_MRRR, cell_label[idx_test])
#    metricList$Kappa[i,3] <- kappa_fun(hy_MRRR, cell_label[idx_test])
#  
#    colnames(featureList[['FAST']]) <- paste0("FAST", 1:ncol(featureList[['FAST']]))
#    rf_FAST <- randomForest(featureList[['FAST']][idx_train,], y=cell_label[idx_train])
#    hy_FAST <- predict(rf_FAST, newdata=featureList[['FAST']][idx_test,])
#    metricList$ACC[i,4] <- acc_fun(hy_FAST, cell_label[idx_test])
#    metricList$Kappa[i,4] <- kappa_fun(hy_FAST, cell_label[idx_test])
#  }
#  
#  DT::datatable(t(round(sapply(metricList, colMeans, na.rm=T),2) ))
#  

## -----------------------------------------------------------------------------
sessionInfo()