## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = '#>'
)

## ----setup--------------------------------------------------------------------
library(OutSeekR);

## ----view-data----------------------------------------------------------------
str(outliers, list.len = 5);
outliers[1:6, 1:6];
outliers[495:500, 45:50];

## ----run-1--------------------------------------------------------------------
# Set random seed for reproducibility.
set.seed(371892);

# Set up parallel processing.
future::plan(future::multisession);

outlier.test.run.1 <- detect.outliers(
    data = outliers,
    num.null = 1e3
    );

str(outlier.test.run.1, max.level = 2);

# Restore sequential processing.
future::plan(future::sequential);

## ----examine-p-values---------------------------------------------------------
head(outlier.test.run.1$p.values);
head(outlier.test.run.1$fdr);

## ----transcript-level-outlier-counts------------------------------------------
head(outlier.test.run.1$num.outliers);

## ----table--------------------------------------------------------------------
table(outlier.test.run.1$num.outliers);

## ----view-outlier-test-results-list-------------------------------------------
str(outlier.test.run.1$outlier.test.results.list);

## ----collapse-rounds----------------------------------------------------------
outlier.test.results.combined <- lapply(
    X = seq_along(outlier.test.run.1$outlier.test.results.list),
    FUN = function(i) {
        df <- outlier.test.run.1$outlier.test.results.list[[i]];
        df$round <- i;
        df <- df[, c(
            'round',
            colnames(outlier.test.run.1$outlier.test.results.list[[i]])
            )];
        }
    );
outlier.test.results.combined <- do.call(
    what = 'rbind',
    args = outlier.test.results.combined
    );
# Combining the data frames produces duplicates in the row names.  R
# will de-duplicate them, but as all the necessary information is
# included in the columns of the data frame (specifically, 'round' and
# 'transcript'), we'll simply discard the row names.
rownames(outlier.test.results.combined) <- NULL;
head(outlier.test.results.combined);

## ----distributions------------------------------------------------------------
head(outlier.test.run.1$distributions);
table(outlier.test.run.1$distributions);

## ----run-2--------------------------------------------------------------------
# Set up parallel processing.
future::plan(future::multisession);

outlier.test.run.2 <- detect.outliers(
    data = outliers,
    num.null = 1e3,
    initial.screen.method = 'fdr',
    p.value.threshold = 0.25,
    fdr.threshold = 0.05
    );

# Restore sequential processing.
future::plan(future::sequential);

str(outlier.test.run.2, max.level = 2);

# Examine p-value and FDR matrices.
head(outlier.test.run.2$p.values);
head(outlier.test.run.2$fdr);

# Check the distribution of number of outliers detected.
table(outlier.test.run.2$num.outliers);