## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # url1 <- "https://github.com/feiyoung/SpaCOAP/tree/master/vignettes_data/" # rna_object <- "seu_rna_over_Spleen.RDS?raw=true" # download.file(paste0(url1, rna_object),"seu_rna_over_Spleen.RDS",mode='wb') # protein_object <- "seu_adt_over_Spleen.RDS?raw=true" # download.file(paste0(url1,protein_object), 'seu_adt_over_Spleen.RDS', mode='wb') # ## download annotation # download.file(paste0(url1, "cell_clusters_anno.rds?raw=true"), "cell_clusters_anno.rds", "wb") ## ----eval =FALSE-------------------------------------------------------------- # seu_rna_over <- readRDS("./seu_rna_over_Spleen.RDS") # seu_adt_over <- readRDS("./seu_adt_over_Spleen.RDS") # load("./cell_clusters_anno.rds") ## ----eval =FALSE-------------------------------------------------------------- # library(SpaCOAP) # ## ----eval =FALSE-------------------------------------------------------------- # searchRadius <- function(pos, lower.med=8, upper.med=10, radius.upper= NULL){ # if (!inherits(pos, "matrix")) # stop("method is only for matrix object!") # # # ## Automatically determine the upper radius # n_spots <- nrow(pos) # idx <- sample(n_spots, min(100, n_spots)) # dis <- dist(pos[idx,]) # if(is.null(radius.upper)){ # #radius.upper <- max(dis) # radius.upper <- sort(dis)[20] ## select the nearest 20 spots. # } # radius.lower <- min(dis[dis>0]) # Adj_sp <- DR.SC:::getneighborhood_fast(pos, radius=radius.upper) # Med <- summary(Matrix::rowSums(Adj_sp))['Median'] # if(Med < lower.med) stop("The radius.upper is too smaller that cannot find median neighbors greater than 4.") # start.radius <- 1 # Med <- 0 # message("Find the adjacency matrix by bisection method...") # maxIter <- 30 # k <- 1 # while(!(Med >= lower.med && Med <=upper.med)){ # ensure that each spot has about 4~6 neighborhoods in median. # # Adj_sp <- DR.SC:::getneighborhood_fast(pos, radius=start.radius) # Med <- summary(Matrix::rowSums(Adj_sp))['Median'] # if(Med < lower.med){ # radius.lower <- start.radius # start.radius <- (radius.lower + radius.upper)/2 # }else if(Med >upper.med){ # radius.upper <- start.radius # start.radius <- (radius.lower + radius.upper)/2 # } # message("Current radius is ", round(start.radius, 2)) # message("Median of neighborhoods is ", Med) # if(k > maxIter) { # message("Reach the maximum iteration but can not find a proper radius!") # break; # } # k <- k + 1 # } # # return(start.radius) # } # # acc_fun <- function(y1, y2){ # n1 <- length(unique(y1)) # n2 <- length(unique(y2)) # if(n1<n2){ ## ensure n1> n2 # a <- y1 # y1 <- y2 # y2 <- a # n1 <- length(unique(y1)) # n2 <- length(unique(y2)) # } # cm <- as.matrix(table(Actual = y1, Predicted = y2)) # rnames <-row.names(cm) # cnames <- colnames(cm) # union_names <- union(rnames, cnames) # n <- length(union_names) # cm_new <- matrix(0, n, n) # row.names(cm_new) <- colnames(cm_new) <- union_names # for(r in 1:n2){ # cm_new[rnames,cnames[r]] <- cm[rnames,cnames[r]] # } # # sum(diag(cm_new)) / length(y1) # } # # kappa_fun <- function(y1, y2){ # require(irr) # dat <- data.frame(y1, y2) # k_res <- kappa2(dat) # k_res$value # } ## ----eval =FALSE-------------------------------------------------------------- # # X_count <- Matrix::t(seu_rna_over[["RNA"]][seu_rna_over[['RNA']]@var.features,]) # X_count <- as.matrix(X_count) # H <- t(as.matrix(seu_adt_over[["ADT"]]@data)) # Z <- matrix(1, nrow(H), 1) # pos <- cbind(seu_rna_over$X0, seu_rna_over$X1) # # # radius_use <- searchRadius(pos, radius.upper = NULL) # set.seed(1) # n_spots <- nrow(pos) # idx <- sample(n_spots, min(100, n_spots)) # dis <- dist(pos[idx,]) # Adj_sp <- SpaCOAP:::getneighbor_weightmat(pos, radius = radius_use, width=median(dis)) ## ----eval =FALSE-------------------------------------------------------------- # q_max <- 20 # d <- ncol(H) # rank_max <- d # tic <- proc.time() # reslist_max <- SpaCOAP(X_count, Adj_sp, H, Z, # rank_use = rank_max, q=q_max, epsELBO = 1e-8, maxIter = 30) # toc <- proc.time() # time_spacoap_max <- toc[3] - tic[3] ## ----eval =FALSE-------------------------------------------------------------- # threshold=c(1e-15, 1e-20) # thre1 <- threshold[1] # beta_svalues <- svd(reslist_max$bbeta)$d # beta_svalues <- beta_svalues[beta_svalues>thre1] # ratio1 <- beta_svalues[-length(beta_svalues)] / beta_svalues[-1] # ratio1[1:10] # ## Here, we set hr = 9 rather 1 to retain more information # # thre2 <- threshold[2] # B_svalues <- svd(reslist_max$B)$d # B_svalues <- B_svalues[B_svalues>thre2] # ratio_fac <- B_svalues[-length(B_svalues)] / B_svalues[-1] # ratio_fac[1:6] # # [1] 6.123909e+00 2.749414e+00 1.376498e+00 1.314487e+00 3.310699e+14 # # Here, we choose q=5 since huge decrease of singular values happen in q=5. # hq <- 5 ## ----eval =FALSE-------------------------------------------------------------- # hr <- 9;hq <- 5 # featureList <- list() # tic <- proc.time() # reslist <- SpaCOAP(X_count, Adj_sp, H=H, Z= Z, # rank_use = hr, q=hq, epsELBO = 1e-8) # toc <- proc.time() # time_spacoap <- toc[3] - tic[3] # Matrix::rankMatrix(reslist$bbeta) # svd_x <- svd(reslist$bbeta, nu = hr, nv=hr) # H_spacoap <- H %*% svd_x$v # (R2_spacoap <- ProFAST::get_r2_mcfadden(embeds= cbind(H_spacoap, reslist$F), y=as.factor(cell_clusters))) # featureList[['SpaCOAP']] <- cbind(H_spacoap, reslist$F) # # ## ----eval =FALSE-------------------------------------------------------------- # ##COAP # library(COAP) # tic <- proc.time() # res_coap <- RR_COAP(X_count, Z = cbind(Z, H), rank_use= 2+hr, q=hq, # epsELBO = 1e-7, maxIter = 30) # toc <- proc.time() # time_coap <- toc[3] - tic[3] # save(res_coap, time_coap, file='reslist_time_coap.rds') # svd_x <- svd(res_coap$bbeta[,-c(1)], nu = hr, nv=hr) # H_coap <- H %*% svd_x$v # (R2_coap <- ProFAST::get_r2_mcfadden(embeds= cbind(res_coap$H, H_coap), y=as.factor(cell_clusters))) # featureList[['COAP']] <- cbind(res_coap$H, H_coap) ## ----eval = FALSE------------------------------------------------------------- # ## MRRR # mrrr_run <- function(Y, X, rank0, q=NULL, family=list(poisson()), # familygroup=rep(1,ncol(Y)), epsilon = 1e-4, sv.tol = 1e-2, # maxIter = 2000, trace=TRUE, truncflag=FALSE, trunc=500){ # # epsilon = 1e-4; sv.tol = 1e-2; maxIter = 30; trace=TRUE # # Y <- X_count; X <- cbind(Z, H); rank0 = r + ncol(Z) # # require(rrpack) # # n <- nrow(Y); p <- ncol(Y) # # if(!is.null(q)){ # rank0 <- rank0+q # X <- cbind(X, diag(n)) # } # if(truncflag){ # ## Trunction # Y[Y>trunc] <- trunc # # } # # svdX0d1 <- svd(X)$d[1] # init1 = list(kappaC0 = svdX0d1 * 5) # offset = NULL # control = list(epsilon = epsilon, sv.tol = sv.tol, maxit = maxIter, # trace = trace, gammaC0 = 1.1, plot.cv = TRUE, # conv.obj = TRUE) # fit.mrrr <- mrrr(Y=Y, X=X[,-1], family = family, familygroup = familygroup, # penstr = list(penaltySVD = "rankCon", lambdaSVD = 1), # control = control, init = init1, maxrank = rank0) # # return(fit.mrrr) # } # # tic <- proc.time() # res_mrrr <- mrrr_run(X_count, cbind(Z,H), rank0=hr+ncol(Z), q=hq) # str(res_mrrr) # toc <- proc.time() # time_mrrr <- toc[3] - tic[3] # hbbeta_mrrr <-t(res_mrrr$coef[1:ncol(cbind(Z,H)), ]) # svd_x <- svd(hbbeta_mrrr[,-c(1)], nu = hr, nv=hr) # H_mrrr <- H %*% svd_x$v # Theta_hb <- (res_mrrr$coef[(ncol(cbind(Z,H))+1): (nrow(cbind(Z,H))+ncol(cbind(Z,H))), ]) # svdTheta <- svd(Theta_hb, nu=hq, nv=hq) # F_mrrr <- svdTheta$u # (R2_mrrr <- ProFAST::get_r2_mcfadden(embeds= cbind(H_mrrr, F_mrrr), y=as.factor(cell_clusters))) # featureList[['MRRR']] <- cbind(H_mrrr, F_mrrr) ## ----eval =FALSE-------------------------------------------------------------- # fast_run <- function(X_count, Adj_sp, q, verbose=TRUE, epsELBO=1e-8){ # require(ProFAST) # # reslist <- FAST_run(XList = list(X_count), # AdjList = list(Adj_sp), q = q, fit.model = 'poisson', # verbose=verbose, epsLogLik=epsELBO) # reslist$hV <- reslist$hV[[1]] # return(reslist) # } # tic <- proc.time() # res_fast <- fast_run(X_count, Adj_sp, q=hq, verbose=TRUE, epsELBO=1e-8) # toc <- proc.time() # time_fast <- toc - tic # (R2_fast <- ProFAST::get_r2_mcfadden(embeds= res_fast$hV, y=as.factor(cell_clusters))) # featureList[['FAST']] <- res_fast$hV # ## ----eval =FALSE-------------------------------------------------------------- # R2List <- list() # cell_label <- cell_clusters # for(im in 1: length(featureList)){ # message("im = ", im) # R2List[[im]] <- ProFAST::get_r2_mcfadden(embeds= featureList[[im]], y=cell_label) # } # names(R2List) <- names(featureList) ## ----eval =FALSE-------------------------------------------------------------- # R2Vec <- unlist(R2List) # names(R2Vec) <- names(R2List) # barplot(R2Vec, ylim=c(0, 0.8)) ## ----eval =FALSE-------------------------------------------------------------- # N <- 10 # n <- length(cell_label) # methodNames <- c("SpaCOAP", "COAP", "MRRR", "FAST") # n_methods <- length(methodNames) # metricList <- list(ACC = matrix(NA,N, n_methods), Kappa = matrix(NA,N, n_methods)) # for(ii in 1: length(metricList)) colnames(metricList[[ii]]) <- methodNames # library(randomForest) # for(i in 1:N){ # # i <- 1 # message("i = ", i) # set.seed(i) # idx_train <- sort(sample(n, round(n*0.7))) # idx_test <- sort(setdiff(1:n, idx_train)) # rf_spacoap <- randomForest(featureList[['SpaCOAP']][idx_train,], y=cell_label[idx_train]) # hy_spacoap <- predict(rf_spacoap, newdata=featureList[['SpaCOAP']][idx_test,]) # metricList$ACC[i,1] <- acc_fun(hy_spacoap, cell_label[idx_test]) # metricList$Kappa[i,1] <- kappa_fun(hy_spacoap, cell_label[idx_test]) # # rf_coap <- randomForest(featureList[['COAP']][idx_train,], y=cell_label[idx_train]) # hy_coap <- predict(rf_coap, newdata=featureList[['COAP']][idx_test,]) # metricList$ACC[i,2] <- acc_fun(hy_coap, cell_label[idx_test]) # metricList$Kappa[i,2] <- kappa_fun(hy_coap, cell_label[idx_test]) # # colnames(featureList[['MRRR']]) <- paste0("MRRR", 1:ncol(featureList[['MRRR']])) # rf_MRRR <- randomForest(featureList[['MRRR']][idx_train,], y=cell_label[idx_train]) # hy_MRRR <- predict(rf_MRRR, newdata=featureList[['MRRR']][idx_test,]) # metricList$ACC[i,3] <- acc_fun(hy_MRRR, cell_label[idx_test]) # metricList$Kappa[i,3] <- kappa_fun(hy_MRRR, cell_label[idx_test]) # # colnames(featureList[['FAST']]) <- paste0("FAST", 1:ncol(featureList[['FAST']])) # rf_FAST <- randomForest(featureList[['FAST']][idx_train,], y=cell_label[idx_train]) # hy_FAST <- predict(rf_FAST, newdata=featureList[['FAST']][idx_test,]) # metricList$ACC[i,4] <- acc_fun(hy_FAST, cell_label[idx_test]) # metricList$Kappa[i,4] <- kappa_fun(hy_FAST, cell_label[idx_test]) # } # # DT::datatable(t(round(sapply(metricList, colMeans, na.rm=T),2) )) # ## ----------------------------------------------------------------------------- sessionInfo()