## ----quickstart, results = 'hide', message = FALSE, warning = FALSE-----------

library(tepr)

#########
# Pre-processing - takes ~ 21 seconds
#########

## Parameters
expprepath <- system.file("extdata", "exptab-preprocessing.csv", package="tepr")
windsize <- 200

## Read input tables
expdfpre <- read.csv(expprepath)

## Retrieving the bedgraph paths
bgpathvec <- sapply(expdfpre$path, function(x) system.file("extdata", x,
    package = "tepr"))

## Replace the path column of expdfpre with the previously retrieved paths
## and writing the new experiment file to the current folder
expdfpre$path <- bgpathvec
write.csv(expdfpre, file = "exptab-preprocessing.csv", row.names = FALSE, quote = FALSE)
expprepath <- "exptab-preprocessing.csv"

gencodepath <- system.file("extdata", "gencode-chr13.gtf", package = "tepr")
maptrackpath <- system.file("extdata", "k50.umap.chr13.hg38.0.8.bed",
    package = "tepr")
blacklistpath <- system.file("extdata", "hg38-blacklist-chr13.v2.bed",
    package = "tepr")
genomename <- "hg38"

## The lines below are optional. The chromosome info can be retrieved automatically
## Make chromosome info retrieval explicit for building the vignette
chromtabtest <- rtracklayer::SeqinfoForUCSCGenome(genomename)
allchromvec <- GenomeInfoDb::seqnames(chromtabtest)
chromtabtest <- chromtabtest[allchromvec[which(allchromvec == "chr13")], ]

finaltab <- preprocessing(expprepath, gencodepath, windsize, maptrackpath,
    blacklistpath, genomename, finaltabpath = tempdir(), finaltabname = "anno.tsv",
    chromtab = chromtabtest, showtime = FALSE, verbose = FALSE)


#########
# tepr analysis - takes ~ 1 seconds
#########

## Parameters (transpath limited to 6 transcripts)
exppath <-  system.file("extdata", "exptab.csv", package="tepr")
transpath <- system.file("extdata", "cugusi_6.tsv", package="tepr")
expthres <- 0.1

## Read input tables
expdf <- read.csv(exppath)
transdf <- read.delim(transpath, header = FALSE)

reslist <- tepr(expdf, transdf, expthres, showtime = FALSE, verbose = FALSE)

## ----prequick, echo = FALSE---------------------------------------------------
print(head(finaltab, 3))

## ----teprquick, echo = FALSE--------------------------------------------------
message("The table meandifference:\n")
print(head(reslist[[1]], 2))

message("\n\n The table universegroup:\n")
print(head(reslist[[2]], 2))

## ----echo=FALSE, fig.cap="structure"------------------------------------------
knitr::include_graphics(system.file("extdata", "structure.png", package = "tepr"))

## ----retrieveanno-------------------------------------------------------------
allannobed <- retrieveanno(expprepath, gencodepath, verbose = FALSE)
message("\n The result is:\n")
print(head(allannobed, 3))

## ----makewindows--------------------------------------------------------------
allwindowsbed <- makewindows(allannobed, windsize, verbose = FALSE)
message("\n The result is:\n")
print(head(allwindowsbed, 3))

## ----showfinaltab, echo = FALSE-----------------------------------------------
finaltabpath <- system.file("extdata", "finaltab-chr13.tsv", package = "tepr")
finaltab <- read.delim(finaltabpath, header = FALSE)
print(head(finaltab, 3))

## ----reading-anno-scores------------------------------------------------------
## Define manually the column names for display purpose
colnames(transdf) <- c("biotype", "chr", "coor1", "coor2", "transcript", "gene",
    "strand", "window", "id", "ctrl_rep1.plus", "ctrl_rep1.plus_score",
    "ctrl_rep1.minus", "ctrl_rep1.minus_score", "ctrl_rep2.plus",
    "ctrl_rep2.plus_score", "ctrl_rep2.minus", "ctrl_rep2.minus_score",
    "HS_rep1.plus", "HS_rep1.plus_score", "HS_rep1.minus",
    "HS_rep1.minus_score", "HS_rep2.plus", "HS_rep2.plus_score",
    "HS_rep2.minus", "HS_rep2.minus_score")

message("The table given by the preprocessing function is:\n")
print(head(transdf, 3))

message("\n The expdf table contains information about each replicate (here limited to one):\n")
head(expdf)

## ----checkexptab--------------------------------------------------------------
checkexptab(expdf)

## ----averageandfilterexprs----------------------------------------------------
resallexprs <- averageandfilterexprs(expdf, transdf, expthres, verbose = FALSE)

## ----exprsvec-----------------------------------------------------------------
print(resallexprs[[2]])

## ----countna------------------------------------------------------------------
rescountna <- countna(resallexprs, expdf, verbose = FALSE)
print(rescountna)

## ----ecdf---------------------------------------------------------------------
resecdflist <- genesECDF(resallexprs, expdf, verbose = FALSE)

## ----resecdf------------------------------------------------------------------
print(head(as.data.frame(resecdflist[[1]]), 3))

## ----nbwindows----------------------------------------------------------------
nbwindows <- resecdflist[[2]]
print(nbwindows)

## ----meandifference-----------------------------------------------------------
resmeandiff <- meandifference(resecdflist[[1]], expdf, nbwindows, verbose = FALSE)
print(head(resmeandiff, 3))

## ----splitmeans---------------------------------------------------------------
## Split the results by transcripts
bytranslistmean <- split(resmeandiff, factor(resmeandiff$transcript))

## ----allauc-------------------------------------------------------------------
## Calculate Area Under Curve (AUC) and Differences of AUC for Transcript Data
resauc <- allauc(bytranslistmean, expdf, nbwindows, verbose = FALSE)
print(head(resauc, 3))

## ----knee---------------------------------------------------------------------
resknee <- kneeid(bytranslistmean, expdf, verbose = FALSE)
print(resknee)

## ----attenuation--------------------------------------------------------------
resatt <- attenuation(resauc, resknee, rescountna, bytranslistmean, expdf,
    resmeandiff, verbose = FALSE)
print(head(resatt, 3))

## ----universegroup------------------------------------------------------------
res <- universegroup(resatt, expdf, verbose = FALSE)
print(head(res, 2))

## ----plotecdf, warning = FALSE------------------------------------------------
colvec <- c("#90AFBB", "#10AFBB", "#FF9A04", "#FC4E07")
plotecdf(resmeandiff, res, expdf, "EGFR", colvec, plot = TRUE, verbose = FALSE)

## ----plotaucgroups, warning = FALSE-------------------------------------------
plotauc(res, expdf, plottype = "groups", plot = TRUE)

## ----echo=FALSE, fig.cap="AUC group plot"-------------------------------------
knitr::include_graphics(system.file("extdata", "AUCcompare_group.png", package = "tepr"))

## ----plotaucpval, warning = FALSE---------------------------------------------
genevec <- c("EGFR", "DAP", "FLI1", "MARCHF6", "LINC01619")
plotauc(res, expdf, genevec, plot = TRUE)

## ----echo=FALSE, fig.cap="AUC pval plot"--------------------------------------
knitr::include_graphics(system.file("extdata", "AUCcompare_pval.png", package = "tepr"))

## ----metageneplot, warning = FALSE--------------------------------------------
plotmetagenes(res, resmeandiff, expdf, plottype = "attenuation", plot = TRUE)

## ----echo=FALSE, fig.cap="Attenuation meta"-----------------------------------
knitr::include_graphics(system.file("extdata", "metagene_attenuation.png", package = "tepr"))

## ----kneepercent, warning = FALSE---------------------------------------------
plothistoknee(res, plot = TRUE)

## ----echo=FALSE, fig.cap="histo percent"--------------------------------------
knitr::include_graphics(system.file("extdata", "histo_percent.png", package = "tepr"))

## ----kneekb, warning = FALSE--------------------------------------------------
plothistoknee(res, plottype = "kb", plot = TRUE)

## ----echo=FALSE, fig.cap="histo kb"-------------------------------------------
knitr::include_graphics(system.file("extdata", "histo_kb.png", package = "tepr"))

## ----singlecond---------------------------------------------------------------
## The experiment table is limited to one condition and one replicate
expdfonecond <- expdf[which(expdf$condition == "HS" & expdf$replicate == 1), ]

## The table obtained by preprocessing is limited to the condition 'HS' and replicate 1
transdfonecond <- transdf[, c(seq_len(9), 18, 19, 20, 21)]

## Computing the object 'res' with tepr on one condition
resonecond <- tepr(expdfonecond, transdfonecond, expthres, controlcondname = "HS", verbose = FALSE)

## ----ecdfcond, warning = FALSE------------------------------------------------
plotecdf(resonecond[[1]], resonecond[[2]], expdfonecond, genename = "EGFR",
    colvec = c("#90AFBB"), plot = TRUE, verbose = FALSE)

## ----histocond, warning = FALSE-----------------------------------------------
## Randomly marking 3 transcripts as attenuated as a mock example
idxatt <- sample(seq_len(6), 3)
resonecond[[2]][idxatt, "Group"] <- "Attenuated"
resonecond[[2]][idxatt, "Universe"] <- TRUE
plothistoknee(resonecond[[2]], kneename = "knee_AUC_HS", plottype = "percent", plot = TRUE, verbose = FALSE)