--- title: "A simple workflow for PoweREST" author: "Lan Shui" date: "2024-06-27" # "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{PoweREST} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette highlights a simple example workflow for performing power analysis for ST data using the `PoweREST` R package. A detailed version can be found on PoweREST GitHub page. ## Load Packages Once installed, **`PoweREST`** can be simply loaded (along with the required packages) as follows: ```{r setup} #install.packages("devtools") #devtools::install_github("lanshui1998/PoweREST") #----or #install.packages("PoweREST") library(PoweREST) ``` ## Prepare data ```{r,eval=FALSE} #load ST data in R by Seurat: #here we load the pancreatic cancer data which is available on GitHub page three_areas <- readRDS("your path to/GSE233293_scMC.all.3areas.final") Idents(three_areas) #Levels: Peri Juxta Epi SeuratObject_splitlist<-Seurat::SplitObject(three_areas, split.by = "ident") #split the ST data into three areas for (i in 1:length(SeuratObject_splitlist)) { SeuratObject_splitlist[[i]][['Condition']]<-ifelse(SeuratObject_splitlist[[i]][['Type']]=='LG','LG','HR') } for (i in 1:length(SeuratObject_splitlist)) { Seurat::Idents(SeuratObject_splitlist[[i]])<-"Condition" } # Take Peri area for example for downstream analysis Peri<-SeuratObject_splitlist$Peri ``` ## Compute power values through Bootstrapping ```{r,eval=FALSE} result<-PoweREST(Peri,cond='Condition',replicates=5,spots_num=80,iteration=100) #---For test, try this first #PoweREST(Peri,cond='Condition',replicates=5,spots_num=80,iteration=2) #---To get faster, try this #devtools::install_github('immunogenomics/presto') ``` ### Instead of using default Wilcoxon test ```{r,eval=FALSE} # For example, use the Student's t-test result2<-PoweREST(Peri,cond='Condition',replicates=5,spots_num=80,iteration=100,test.use="t") ``` Users can also use PoweREST_gene and PoweREST_subset to perform the power estimation upon one gene or a subset of genes. ### PoweREST_gene ```{r,eval=FALSE} PoweREST_gene(Peri,cond='Condition',replicates=5,spots_num=80,gene_name='MUC1',pvalue=0.00001) ``` ### PoweREST_subset ```{r,eval=FALSE} PoweREST_subset(Peri,cond='Condition',replicates=5,spots_num=80,pvalue=0.05,logfc.threshold = 0.1,min.pct = 0.01) ``` ## Fit the power surface using smoothing splines ```{r,eval=FALSE} #Fit the power surface for sample size=5 in each arm b<-fit_powerest(result$power,result$avg_logFC,result$avg_PCT) ``` ### Visualize the power surface ```{r,eval=FALSE} pred <- pred_powerest(b,xlim= c(0,6),ylim=c(0,1)) vis_powerest(pred,theta=-30,phi=30,color='heat',ticktype = "detailed",xlim=c(0,6),nticks=5) ``` ### Create interactive visualization result ```{r,eval=FALSE} plotly_powerest(pred,fig_title='Power estimation result') ``` ## Fit local power surface with XGBoost ```{r,eval=FALSE} # Fit the local power surface of avg_log2FC_abs between 1 and 2 avg_log2FC_abs_1_2<-dplyr::filter(power,avg_log2FC_abs>1 & avg_log2FC_abs<2) # Fit the model bst<-fit_XGBoost(power$power,avg_log2FC=power$avg_log2FC_abs,avg_PCT=power$mean_pct,replicates=power$sample_size) # Make predictions pred<-pred_XGBoost(bst,n.grid=30,xlim=c(0,1.5),ylim=c(0,0.1),replicates=3) ``` ### Visualize the local power surface ```{r,eval=FALSE} #2D version vis_XGBoost(pred,view='2D',legend_name='Power',xlab='avg_log2FC_abs',ylab='mean_pct') #3D version vis_XGBoost(pred,view='3D',legend_name='Power',xlab='avg_log2FC_abs',ylab='mean_pct') ```