## ----SetUp, echo = FALSE, eval = TRUE, results = "hide"-----------------------
# R options & configuration:
set.seed(123)
suppressPackageStartupMessages(library("knitr"))
suppressPackageStartupMessages(library("LearnPCA"))
suppressPackageStartupMessages(library("xtable"))


# Stuff specifically for knitr:
opts_chunk$set(eval = TRUE, echo = TRUE, results = "show")

## ----echo = FALSE-------------------------------------------------------------
desc <- packageDescription("LearnPCA")

## ----echo = FALSE, results = "asis"-------------------------------------------
res <- knitr::knit_child("top_matter.md", quiet = TRUE)
cat(res, sep = '\n')

## -----------------------------------------------------------------------------
A <- matrix(1:20, nrow = 4)
dim(A)

## -----------------------------------------------------------------------------
Ac <- scale(A, center = TRUE, scale = FALSE)
covA <- (t(Ac) %*% Ac)/(nrow(Ac) -1)
dim(covA) # it's square
identical(covA, cov(A))

## -----------------------------------------------------------------------------
Acs <- scale(A, center = TRUE, scale = TRUE)
corA <- (t(Acs) %*% Acs)/(nrow(Acs) -1)
dim(corA) # square like covA
identical(corA, cor(A))

## -----------------------------------------------------------------------------
data(mtcars)
mt <- as.matrix(mtcars[seq(1, 20, 2), 1:5]) # select every other row to get 10 samples

## -----------------------------------------------------------------------------
pca <- prcomp(mt, center = TRUE, scale. = TRUE)
prin <- princomp(mt, cor = TRUE)

## -----------------------------------------------------------------------------
pca$x

## -----------------------------------------------------------------------------
prin$scores

## -----------------------------------------------------------------------------
all.equal(abs(prin$scores), abs(pca$x), check.attributes = FALSE)

## ----overlayScores, fig.asp = 1, fig.cap = "Comparison of scores computed by `prcomp` (black points) and `princomp` (pink points). Note that they are related by a 180 degreee rotation about an axis out of paper at the origin (plus a little wiggle due to algorithm differences).", echo = FALSE----
plot(pca$x[,1], pca$x[,2], type = "p", pch = 20, col = "black",
  xlim = c(-3, 3), ylim = c(-3, 3),
  xlab = "PC1", ylab = "PC2")
points(prin$scores[,1], prin$scores[,2], pch = 20, col = "pink")
abline(v = 0, h = 0, lty = 2)

## -----------------------------------------------------------------------------
pca$rotation

## -----------------------------------------------------------------------------
prin_loads <- matrix(prin$loadings, ncol = 5, byrow = FALSE)
prin_loads

## -----------------------------------------------------------------------------
all.equal(abs(pca$rotation), abs(prin_loads), check.attributes = FALSE)

## ----compareLoads, fig.cap = "A comparison of the signs of the first loadings from `prcomp` (black circles) and `princomp` (pink circles). Where the circles coincide, the value of the loading from each method is the same. ", echo = FALSE, fig.height = 3, fig.width = 5----
plot(x = 1:25, type = "n", axes = FALSE,
  xlab = "loadings", ylab = "sign", ylim = c(-1.1, 1.1))
points(sign(c(pca$rotation)))
points(sign(c(prin_loads)), col = "pink", cex = 1.5)
line.pos <- c(5.5, 10.5, 15.5, 20.5)
abline(v = line.pos, col = "gray")
axis(side = 2, labels = c("-1", "1"), at = c(-1, 1), lwd = 0, lwd.ticks = 0)
lab.pos <- c(line.pos, 25.5)
axis(side = 1, labels = c("PC1", "PC2", "PC3", "PC4", "PC5"), at = lab.pos - 2.5, lwd = 0, lwd.ticks = 0)

## ----varExp-------------------------------------------------------------------
summary(pca)
summary(prin)

## -----------------------------------------------------------------------------
MTpca <- PCAtoXhat(pca) # not checking attributes as the internal labeling varies between functions
all.equal(MTpca, mt, check.attributes = FALSE)

## -----------------------------------------------------------------------------
MTprin <- PCAtoXhat(prin)
all.equal(MTprin, mt, check.attributes = FALSE)

## -----------------------------------------------------------------------------
mt_cen_scl <- scale(mt, center = TRUE, scale = TRUE)

## -----------------------------------------------------------------------------
sing <- svd(mt_cen_scl)

## -----------------------------------------------------------------------------
svd_scores <- mt_cen_scl %*% sing$v
all.equal(svd_scores, pca$x, check.attributes = FALSE)

## -----------------------------------------------------------------------------
all.equal(sing$v, pca$rotation, check.attributes = FALSE)

## -----------------------------------------------------------------------------
mt_cen_scl_cor <- cor(mt_cen_scl)
eig <- eigen(mt_cen_scl_cor)

## -----------------------------------------------------------------------------
eig_scores <- mt_cen_scl %*% eig$vectors
all.equal(abs(eig_scores), abs(prin$scores), check.attributes = FALSE)

## -----------------------------------------------------------------------------
all.equal(abs(eig$vectors), abs(prin_loads), check.attributes = FALSE)

## ----echo = FALSE-------------------------------------------------------------
# c1 gives the row names
c1 = c("algorithm", "input dimensions", "user pre-processing", "internal pre-processing", "call", "scores", "loadings", "pct variance explained")
c2 = c("uses svd", "$n \\times p$", "center; scale", "center; scale", "$pca \\leftarrow prcomp(X)$", "$pca\\$x$", "$pca\\$rotation$", "$\\cfrac{100*pca\\$sdev^2}{sum(pca\\$sdev^2)}$")
c3 = c("svd", "$n \\times p$", "center; scale", "none", "$sing \\leftarrow svd(X)$", "$X \\times sing\\$v$", "$sing\\$v$", "$\\cfrac{100*sing\\$d^2}{sum(sing\\$d^2)}$")
c4 <- c("uses eigen", "$n \\times p$", "don't do it!", "center; cov or cor", "$prin \\leftarrow princomp(X)$", "$prin\\$scores$", "$prin\\$loadings$", "$\\cfrac{100*prin\\$sdev^2}{sum(prin\\$sdev^2)}$")
c5 <- c("eigen", "$n \\times n$", "center; scale; cov or cor", "none", "$eig \\leftarrow eigen(X)$", "$X \\times eig\\$vectors$", "$eig\\$vectors$", "$\\cfrac{100*eig\\$values}{sum(eig\\$values)}$")

DF <- data.frame(c1, c2, c3, c4, c5)
colnames(DF) <- c("", "prcomp", "svd", "princomp", "eigen")

## ----SumTable, echo = FALSE, results = "asis"---------------------------------
if (!is_latex_output()) {
  kable(DF, row.names = FALSE, caption = "A comparison of R functions for PCA.  Input data must be centered in all cases.  Scaling is optional.")
}

if (is_latex_output()) {
  bold <- function(x) {paste('{\\textbf{',x,'}}', sep ='')}
  tbl <- xtable(DF)
  caption(tbl) <- "A comparison of R functions for PCA.  Input data must be centered in all cases.  Scaling is optional."
  label(tbl) <- "tab:SumTable"
  print(tbl, include.rownames = FALSE,
    hline.after = 0:8,
    sanitize.text.function = function(x) {x},
    sanitize.colnames.function = bold,
    comment = FALSE)
}

## ----echo = FALSE, results = "asis"-------------------------------------------
res <- knitr::knit_child("refer_to_works_consulted.md", quiet = TRUE)
cat(res, sep = '\n')