## ----------------------------------------------------------------------------- library(MKmisc) ## ----------------------------------------------------------------------------- x <- rnorm(100) IQrange(x) IQR(x) ## ----------------------------------------------------------------------------- sIQR(x) sd(x) ## ----------------------------------------------------------------------------- meanAD(x) ## ----------------------------------------------------------------------------- fiveNS(x) ## ----------------------------------------------------------------------------- ## 5% outliers out <- rbinom(100, prob = 0.05, size = 1) sum(out) x <- (1-out)*rnorm(100, mean = 10, sd = 2) + out*25 CV(x) medCV(x) iqrCV(x) ## ----------------------------------------------------------------------------- SNR(x) medSNR(x) iqrSNR(x) ## ---- fig.width=7, fig.height=7----------------------------------------------- x <- rt(10, df = 3) par(mfrow = c(1,2)) qboxplot(x, main = "1st and 3rd quartile") boxplot(x, main = "Lower and upper hinge") ## ----------------------------------------------------------------------------- ## Example from Wikipedia risks(p0 = 0.4, p1 = 0.1) risks(p0 = 0.4, p1 = 0.5) ## ----------------------------------------------------------------------------- or2rr(or = 1.5, p0 = 0.4) or2rr(or = 1/6, p1 = 0.1) ## ----------------------------------------------------------------------------- curve(log, from = -3, to = 5) curve(glog, from = -3, to = 5, add = TRUE, col = "orange") legend("topleft", fill = c("black", "orange"), legend = c("log", "glog")) ## ----------------------------------------------------------------------------- curve(log10(x), from = -3, to = 5) curve(glog10(x), from = -3, to = 5, add = TRUE, col = "orange") legend("topleft", fill = c("black", "orange"), legend = c("log10", "glog10")) ## ----------------------------------------------------------------------------- inv.glog(glog(10)) inv.glog(glog(10, base = 3), base = 3) inv.glog10(glog10(10)) inv.glog2(glog2(10)) ## ---- fig.width=7, fig.height=7----------------------------------------------- res <- simCorVars(n = 500, r = 0.8) cor(res$Var1, res$Var2) ## ---- fig.width=7, fig.height=7----------------------------------------------- thyroid(TSH = 1.5, fT3 = 2.5, fT4 = 14, TSHref = c(0.2, 3.0), fT3ref = c(1.7, 4.2), fT4ref = c(7.6, 15.0)) ## ----------------------------------------------------------------------------- library(ggplot2) data(mpg) p1 <- ggplot(mpg, aes(displ, hwy)) + geom_point() p1 p1 + scale_x_log10() p1 + scale_x_glog10() p1 + scale_y_log10() p1 + scale_y_glog10() ## ----------------------------------------------------------------------------- x <- matrix(rnorm(1000, mean = 10), nrow = 10) g1 <- rep("control", 10) y1 <- matrix(rnorm(500, mean = 11.25), nrow = 10) y2 <- matrix(rnorm(500, mean = 9.75), nrow = 10) g2 <- rep("treatment", 10) group <- factor(c(g1, g2)) Data <- rbind(x, cbind(y1, y2)) pvals <- apply(Data, 2, function(x, group) t.test(x ~ group)$p.value, group = group) ## compute log-fold change logfc <- function(x, group){ res <- tapply(x, group, mean) log2(res[1]/res[2]) } lfcs <- apply(Data, 2, logfc, group = group) ps <- data.frame(pvals = pvals, logfc = lfcs) ggplot(ps, aes(x = logfc, y = pvals)) + geom_point() + geom_hline(yintercept = 0.05) + scale_y_neglog10() + geom_vline(xintercept = c(-0.1, 0.1)) + xlab("log-fold change") + ylab("-log10(p value)") + ggtitle("A Volcano Plot") ## ---- fig.width=7, fig.height=7----------------------------------------------- library(ggplot2) ## some random data test <- data.frame(x = rnorm(10), y = rnorm(10), z = rnorm(10)) test.long <- melt.long(test) test.long ggplot(test.long, aes(x = variable, y = value)) + geom_boxplot(aes(fill = variable)) ## introducing an additional grouping variable group <- factor(rep(c("a","b"), each = 5)) test.long.gr <- melt.long(test, select = 1:2, group = group) test.long.gr ggplot(test.long.gr, aes(x = variable, y = value, fill = group)) + geom_boxplot() ## ----------------------------------------------------------------------------- ## default: "wilson" binomCI(x = 12, n = 50) ## Clopper-Pearson interval binomCI(x = 12, n = 50, method = "clopper-pearson") ## identical to binom.test(x = 12, n = 50)$conf.int ## ----------------------------------------------------------------------------- x <- rnorm(50, mean = 2, sd = 3) ## mean and SD unknown normCI(x) ## SD known normCI(x, sd = 3) ## mean known normCI(x, mean = 2) ## ----------------------------------------------------------------------------- x <- rnorm(20) y <- rnorm(20, sd = 2) ## paired normDiffCI(x, y, paired = TRUE) ## compare normCI(x-y) ## unpaired y <- rnorm(10, mean = 1, sd = 2) ## classical normDiffCI(x, y, method = "classical") ## Welch (default as in case of function t.test) normDiffCI(x, y, method = "welch") ## Hsu normDiffCI(x, y, method = "hsu") ## ----------------------------------------------------------------------------- M <- 100 CIhsu <- CIwelch <- CIclass <- matrix(NA, nrow = M, ncol = 2) for(i in 1:M){ x <- rnorm(10) y <- rnorm(30, sd = 0.1) CIclass[i,] <- normDiffCI(x, y, method = "classical")$conf.int CIwelch[i,] <- normDiffCI(x, y, method = "welch")$conf.int CIhsu[i,] <- normDiffCI(x, y, method = "hsu")$conf.int } ## coverage probabilies ## classical sum(CIclass[,1] < 0 & 0 < CIclass[,2])/M ## Welch sum(CIwelch[,1] < 0 & 0 < CIwelch[,2])/M ## Hsu sum(CIhsu[,1] < 0 & 0 < CIhsu[,2])/M ## ----------------------------------------------------------------------------- x <- rnorm(100, mean = 10, sd = 2) # CV = 0.2 ## default: "miller" cvCI(x) ## Gulhar et al. (2012) cvCI(x, method = "gulhar") ## ----------------------------------------------------------------------------- x <- rexp(100, rate = 0.5) ## exact quantileCI(x = x, prob = 0.95) ## asymptotic quantileCI(x = x, prob = 0.95, method = "asymptotic") ## ----------------------------------------------------------------------------- ## exact medianCI(x = x) ## asymptotic medianCI(x = x, method = "asymptotic") ## ----------------------------------------------------------------------------- medianCI(x = x, minLength = TRUE) ## ----------------------------------------------------------------------------- ## exact madCI(x = x) ## aysymptotic madCI(x = x, method = "asymptotic") ## unstandardized madCI(x = x, constant = 1) ## ----------------------------------------------------------------------------- ## Example from Wikipedia rrCI(a = 15, b = 135, c = 100, d = 150) rrCI(a = 75, b = 75, c = 100, d = 150) ## ----------------------------------------------------------------------------- ## identical results as power.t.test, since sd = sd1 = sd2 = 1 power.welch.t.test(n = 20, delta = 1) power.welch.t.test(power = .90, delta = 1) power.welch.t.test(power = .90, delta = 1, alternative = "one.sided") ## sd1 = 0.5, sd2 = 1 power.welch.t.test(delta = 1, sd1 = 0.5, sd2 = 1, power = 0.9) ## ----------------------------------------------------------------------------- ## slightly more conservative than Welch t-test power.hsu.t.test(n = 20, delta = 1) power.hsu.t.test(power = .90, delta = 1) power.hsu.t.test(power = .90, delta = 1, alternative = "one.sided") ## sd1 = 0.5, sd2 = 1 power.welch.t.test(delta = 0.5, sd1 = 0.5, sd2 = 1, power = 0.9) power.hsu.t.test(delta = 0.5, sd1 = 0.5, sd2 = 1, power = 0.9) ## ----------------------------------------------------------------------------- ## examples from Table III in Zhu and Lakkis (2014) power.nb.test(mu0 = 5.0, RR = 2.0, theta = 1/0.5, duration = 1, power = 0.8, approach = 1) power.nb.test(mu0 = 5.0, RR = 2.0, theta = 1/0.5, duration = 1, power = 0.8, approach = 2) power.nb.test(mu0 = 5.0, RR = 2.0, theta = 1/0.5, duration = 1, power = 0.8, approach = 3) ## ----------------------------------------------------------------------------- ## One-sample test X <- matrix(rnorm(10*20, mean = 1), nrow = 10, ncol = 20) mod.t.test(X) ## Two-sample test set.seed(123) X <- rbind(matrix(rnorm(5*20), nrow = 5, ncol = 20), matrix(rnorm(5*20, mean = 1), nrow = 5, ncol = 20)) g2 <- factor(c(rep("group 1", 10), rep("group 2", 10))) mod.t.test(X, group = g2) ## Paired two-sample test mod.t.test(X, group = g2, paired = TRUE) ## ----------------------------------------------------------------------------- set.seed(123) X <- rbind(matrix(rnorm(5*20), nrow = 5, ncol = 20), matrix(rnorm(5*20, mean = 1), nrow = 5, ncol = 20)) gr <- factor(c(rep("A1", 5), rep("B2", 5), rep("C3", 5), rep("D4", 5))) mod.oneway.test(X, gr) ## ----------------------------------------------------------------------------- pairwise.mod.t.test(X, gr) ## ----------------------------------------------------------------------------- t.test(1:10, y = c(7:20)) # P = .00001855 t.test(1:10, y = c(7:20, 200)) # P = .1245 -- NOT significant anymore hsu.t.test(1:10, y = c(7:20)) hsu.t.test(1:10, y = c(7:20, 200)) ## Traditional interface with(sleep, t.test(extra[group == 1], extra[group == 2])) with(sleep, hsu.t.test(extra[group == 1], extra[group == 2])) ## Formula interface t.test(extra ~ group, data = sleep) hsu.t.test(extra ~ group, data = sleep) ## ----------------------------------------------------------------------------- ## Generate some data set.seed(123) x <- rnorm(25, mean = 1) x[sample(1:25, 5)] <- NA y <- rnorm(20, mean = -1) y[sample(1:20, 4)] <- NA pair <- c(rnorm(25, mean = 1), rnorm(20, mean = -1)) g <- factor(c(rep("yes", 25), rep("no", 20))) D <- data.frame(ID = 1:45, variable = c(x, y), pair = pair, group = g) ## Use Amelia to impute missing values library(Amelia) res <- amelia(D, m = 10, p2s = 0, idvars = "ID", noms = "group") ## Per protocol analysis (Welch two-sample t-test) t.test(variable ~ group, data = D) ## Intention to treat analysis (Multiple Imputation Welch two-sample t-test) mi.t.test(res$imputations, x = "variable", y = "group") ## Per protocol analysis (Two-sample t-test) t.test(variable ~ group, data = D, var.equal = TRUE) ## Intention to treat analysis (Multiple Imputation two-sample t-test) mi.t.test(res$imputations, x = "variable", y = "group", var.equal = TRUE) ## Specifying alternatives mi.t.test(res$imputations, x = "variable", y = "group", alternative = "less") mi.t.test(res$imputations, x = "variable", y = "group", alternative = "greater") ## One sample test t.test(D$variable[D$group == "yes"]) mi.t.test(res$imputations, x = "variable", subset = D$group == "yes") mi.t.test(res$imputations, x = "variable", mu = -1, subset = D$group == "yes", alternative = "less") mi.t.test(res$imputations, x = "variable", mu = -1, subset = D$group == "yes", alternative = "greater") ## paired test t.test(D$variable, D$pair, paired = TRUE) mi.t.test(res$imputations, x = "variable", y = "pair", paired = TRUE) ## ----------------------------------------------------------------------------- SD1 <- c(0.149, 0.022, 0.036, 0.085, 0.125, NA, 0.139, 0.124, 0.038) SD2 <- c(NA, 0.039, 0.038, 0.087, 0.125, NA, 0.135, 0.126, 0.038) SDchange <- c(NA, NA, NA, 0.026, 0.058, NA, NA, NA, NA) imputeSD(SD1, SD2, SDchange) ## ----------------------------------------------------------------------------- x <- c(runif(50, max = 0.6), runif(50, min = 0.4)) g <- c(rep(0, 50), rep(1, 50)) AUC(x, group = g) ## ----------------------------------------------------------------------------- g <- c(rep(1, 50), rep(0, 50)) AUC(x, group = g) ## no switching AUC(x, group = g, switchAUC = FALSE) ## ----------------------------------------------------------------------------- g <- c(rep(0, 50), rep(1, 50)) AUC.test(pred1 = x, lab1 = g) ## ----------------------------------------------------------------------------- x2 <- c(runif(50, max = 0.7), runif(50, min = 0.3)) g2 <- c(rep(0, 50), rep(1, 50)) AUC.test(pred1 = x, lab1 = g, pred2 = x2, lab2 = g2) ## ----------------------------------------------------------------------------- x3 <- c(x, x2) g3 <- c(g, c(rep(2, 50), rep(3, 50))) pairwise.auc(x = x3, g = g3) ## ----------------------------------------------------------------------------- x <- rnorm(100) ## assumed as log-data g <- factor(sample(1:4, 100, replace = TRUE)) levels(g) <- c("a", "b", "c", "d") ## modified FC pairwise.fc(x, g) ## "true" FC pairwise.fc(x, g, mod.fc = FALSE) ## without any transformation pairwise.logfc(x, g) ## ----------------------------------------------------------------------------- pairwise.logfc(x, g, ave = median) ## ----------------------------------------------------------------------------- pairwise.wilcox.test(airquality$Ozone, airquality$Month, p.adjust.method = "none") ## To avoid the warnings library(exactRankTests) pairwise.fun(airquality$Ozone, airquality$Month, fun = function(x, y) wilcox.exact(x, y)$p.value) ## ----------------------------------------------------------------------------- ## Example: HIV test ## 1. ELISA screening test (4th generation) predValues(sens = 0.999, spec = 0.998, prev = 0.001) ## 2. Western-Plot confirmation test predValues(sens = 0.998, spec = 0.999996, prev = 1/3) ## ----------------------------------------------------------------------------- ## example from dataset infert fit <- glm(case ~ spontaneous+induced, data = infert, family = binomial()) pred <- predict(fit, type = "response") ## with group numbers perfMeasures(pred, truth = infert$case, namePos = 1) perfScores(pred, truth = infert$case, namePos = 1) ## with group names my.case <- factor(infert$case, labels = c("control", "case")) perfMeasures(pred, truth = my.case, namePos = "case") perfScores(pred, truth = my.case, namePos = "case") ## using weights perfMeasures(pred, truth = infert$case, namePos = 1, weight = 0.3) perfScores(pred, truth = infert$case, namePos = 1, weight = 0.3) ## ----------------------------------------------------------------------------- ## example from dataset infert fit <- glm(case ~ spontaneous+induced, data = infert, family = binomial()) pred <- predict(fit, type = "response") optCutoff(pred, truth = infert$case, namePos = 1) ## ----------------------------------------------------------------------------- ## Hosmer-Lemeshow goodness of fit tests for C and H statistic HLgof.test(fit = pred, obs = infert$case) ## e Cessie-van Houwelingen-Copas-Hosmer global goodness of fit test HLgof.test(fit = pred, obs = infert$case, X = model.matrix(case ~ spontaneous+induced, data = infert)) ## ----------------------------------------------------------------------------- ## see n2 on page 1202 of Chu and Cole (2007) power.diagnostic.test(sens = 0.99, delta = 0.14, power = 0.95) # 40 power.diagnostic.test(sens = 0.99, delta = 0.13, power = 0.95) # 43 power.diagnostic.test(sens = 0.99, delta = 0.12, power = 0.95) # 47 ## ----------------------------------------------------------------------------- ## see Table 2 of Dobbin et al. (2008) g <- 0.1 fc <- 1.6 ssize.pcc(gamma = g, stdFC = fc, nrFeatures = 22000) ## ----------------------------------------------------------------------------- M <- matrix(rnorm(100), ncol = 5) FL <- matrix(rpois(100, lambda = 10), ncol = 5) # only for this example repMeans(x = M, flags = FL, use.flags = "max", ndups = 5, spacing = 4) ## ----------------------------------------------------------------------------- af <- oneWayAnova(c(rep(1,5),rep(2,5))) ## p value af(rnorm(10)) x <- matrix(rnorm(12*10), nrow = 10) ## 2-way ANOVA with interaction af1 <- twoWayAnova(c(rep(1,6),rep(2,6)), rep(c(rep(1,3), rep(2,3)), 2)) ## p values apply(x, 1, af1) ## 2-way ANOVA without interaction af2 <- twoWayAnova(c(rep(1,6),rep(2,6)), rep(c(rep(1,3), rep(2,3)), 2), interaction = FALSE) ## p values apply(x, 1, af2) ## ----------------------------------------------------------------------------- M <- matrix(rcauchy(1000), nrow = 5) ## Pearson corDist(M) ## Spearman corDist(M, method = "spearman") ## Minimum Covariance Determinant corDist(M, method = "mcd") ## ----------------------------------------------------------------------------- madMatrix(t(M)) ## ---- fig.width=8, fig.height=7----------------------------------------------- M <- matrix(rnorm(1000), ncol = 20) colnames(M) <- paste("Sample", 1:20) M.cor <- cor(M) corPlot(M.cor, minCor = min(M.cor), labels = colnames(M)) ## ---- fig.width=8, fig.height=7----------------------------------------------- ## random data x <- matrix(rnorm(1000), ncol = 10) ## outliers x[1:20,5] <- x[1:20,5] + 10 madPlot(x, new = TRUE, maxMAD = 2.5, labels = TRUE, title = "MAD: Outlier visible") ## in contrast corPlot(x, new = TRUE, minCor = -0.5, labels = TRUE, title = "Correlation: Outlier masked") ## ---- fig.width=7, fig.height=9----------------------------------------------- ## generate some random data data.plot <- matrix(rnorm(100*50, sd = 1), ncol = 50) colnames(data.plot) <- paste("patient", 1:50) rownames(data.plot) <- paste("gene", 1:100) data.plot[1:70, 1:30] <- data.plot[1:70, 1:30] + 3 data.plot[71:100, 31:50] <- data.plot[71:100, 31:50] - 1.4 data.plot[1:70, 31:50] <- rnorm(1400, sd = 1.2) data.plot[71:100, 1:30] <- rnorm(900, sd = 1.2) nrcol <- 128 ## Load required packages library(gplots) library(RColorBrewer) myCol <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(nrcol)) heatmap.2(data.plot, col = myCol, trace = "none", tracecol = "black", main = "standard colors") farbe <- heatmapCol(data = data.plot, col = myCol, lim = min(abs(range(data.plot)))-1) heatmap.2(data.plot, col = farbe, trace = "none", tracecol = "black", main = "heatmapCol colors") ## ----------------------------------------------------------------------------- x <- "GACGGATTATG" y <- "GATCGGAATAG" ## Hamming distance stringDist(x, y) ## Levenshtein distance d <- stringDist(x, y) d ## ----------------------------------------------------------------------------- attr(d, "ScoringMatrix") attr(d, "TraceBackMatrix") ## ----------------------------------------------------------------------------- ## optimal global alignment score d <- stringSim(x, y) d attr(d, "ScoringMatrix") attr(d, "TraceBackMatrix") ## optimal local alignment score d <- stringSim(x, y, global = FALSE) d attr(d, "ScoringMatrix") attr(d, "TraceBackMatrix") ## ----------------------------------------------------------------------------- x <- "GACGGATTATG" y <- "GATCGGAATAG" ## Levenshtein distance d <- stringDist(x, y) ## optimal global alignment traceBack(d) ## Optimal global alignment score d <- stringSim(x, y) ## optimal global alignment traceBack(d) ## Optimal local alignment score d <- stringSim(x, y, global = FALSE) ## optimal local alignment traceBack(d, global = FALSE) ## ----------------------------------------------------------------------------- sessionInfo()