---
title: 'NACHO Analysis'
subtitle: "A NAnostring quality Control dasHbOard"
author: "Mickaël Canouil, Ph.D., Gerard A. Bouland and Roderick C. Slieker, Ph.D."
email: "mickael.canouil@cnrs.fr"
date: '`r format(Sys.time(), "%B %d, %Y")`'
bibliography: nacho.bib
link-citations: true
output: 
  rmarkdown::html_vignette:
    number_sections: true
    toc: true
    toc_depth: 2
    fig_width: 6.3
    fig_height: 4.7
vignette: >
  %\VignetteIndexEntry{NACHO-analysis}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
  eval = TRUE,
  collapse = TRUE,
  # results = "asis",
  include = TRUE,
  echo = TRUE,
  warning = TRUE,
  message = TRUE,
  error = TRUE,
  # tidy = FALSE,
  # crop = TRUE,
  # autodep = TRUE,
  fig.align = "center",
  cache = FALSE
)
```

```{r logo, echo = FALSE, out.width = "150px"}
knitr::include_graphics(path = "nacho_hex.png")
```

# Installation

```{r, eval = FALSE}
# Install NACHO from CRAN:
install.packages("NACHO")

# Or the the development version from GitHub:
# install.packages("remotes")
remotes::install_github("mcanouil/NACHO")
```

# Overview 

```{r, echo = FALSE, results = "asis"}
cat(readLines(system.file("app", "www", "about-nacho.md", package = "NACHO"))[-c(1, 2)], sep = "\n")
```

```{r, echo = FALSE, results = "asis"}
print(citation("NACHO"), "html")
```

```{r, echo = FALSE, comment = ""}
print(citation("NACHO"), "bibtex")
```

# Analyse NanoString data

## Load packages

```{r}
library(NACHO)
library(GEOquery, quietly = TRUE, warn.conflicts = FALSE)
```

## Download `GSE70970` from GEO (or use your own data)

```{r}
data_directory <- file.path(tempdir(), "GSE70970", "Data")

# Download data
gse <- getGEO("GSE70970")
getGEOSuppFiles(GEO = "GSE70970", baseDir = tempdir())
# Unzip data
untar(
  tarfile = file.path(tempdir(), "GSE70970", "GSE70970_RAW.tar"),
  exdir = data_directory
)
# Get phenotypes and add IDs
targets <- pData(phenoData(gse[[1]]))
targets$IDFILE <- list.files(data_directory)
```

## Import RCC files

```{r}
GSE70970 <- load_rcc(data_directory, targets, id_colname = "IDFILE")
```

## Perform the analyses using `limma`

```{r}
library(limma)
```

### Get the phenotypes

```{r}
selected_pheno <- GSE70970[["nacho"]][
  j = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)),
  .SDcols = c("IDFILE", "age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1")
]
selected_pheno <- na.exclude(selected_pheno)
```

```{r, echo = FALSE}
head(selected_pheno)
```

### Get the normalised counts

```{r}
expr_counts <- GSE70970[["nacho"]][
  i = grepl("Endogenous", CodeClass),
  j = as.matrix(
    dcast(.SD, Name ~ IDFILE, value.var = "Count_Norm"),
    "Name"
  ),
  .SDcols = c("IDFILE", "Name", "Count_Norm")
]
```

```{r, echo = FALSE}
expr_counts[1:5, 1:5]
```

Alternatively, `"Accession"` number is also available.

```{r, eval = FALSE}
GSE70970[["nacho"]][
  i = grepl("Endogenous", CodeClass),
  j = as.matrix(
    dcast(.SD, Accession ~ IDFILE, value.var = "Count_Norm"),
    "Accession"
  ),
  .SDcols = c("IDFILE", "Accession", "Count_Norm")
]
```

### Select phenotypes and counts

1. Make sure count matrix and phenotypes have the same samples

```{r}
samples_kept <- intersect(selected_pheno[["IDFILE"]], colnames(expr_counts))
expr_counts <- expr_counts[, samples_kept]
selected_pheno <- selected_pheno[IDFILE %in% c(samples_kept)]
```

2. Build the numeric design matrix

```{r}
design <- model.matrix(~ `disease.event:ch1`, selected_pheno)
```

3. `limma`

```{r}
eBayes(lmFit(expr_counts, design))
```


## Perform the analyses using `lm` (or any other model)

```{r}
GSE70970[["nacho"]][
  i = grepl("Endogenous", CodeClass),
  j = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)),
  .SDcols = c(
    "IDFILE", "Name", "Accession", "Count", "Count_Norm",
    "age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1"
  )
][
  Name %in% head(unique(Name), 10)
][
  j = as.data.table(
    coef(summary(lm(
      formula = Count_Norm ~ `disease.event:ch1`,
      data = na.exclude(.SD)
    ))),
    "term"
  ),
  by = c("Name", "Accession")
]
```