## ----setup, echo = FALSE, include = FALSE------------------------------------- require(knitr) ## require(rmarkdown) set.seed(1) knitr::opts_chunk$set(cache = FALSE, autodep = FALSE) ## if(FALSE) { ## # compile ## #rmarkdown::render(file.path("vignettes", "MADPop-quicktut.Rmd")) ## rmarkdown::render("MADPop-quicktut.Rmd") ## } ## # view it by opening MADPop/vignettes/MADPop-tutorial.html ## ----madpop------------------------------------------------------------------- require(MADPop) ## ----preproc, echo = FALSE---------------------------------------------------- nObs <- nrow(fish215) nPop <- nlevels(fish215$Lake) nAlleles <- length(table(c(as.matrix(fish215[,-1])))) - 1 ## ----fish215------------------------------------------------------------------ head(fish215[sample(nObs),]) ## ----setup2, ref.label = "table2", echo = FALSE, results = "hide"------------- popId <- c("Dickey", "Simcoe") # lakes to compare Xsuff <- UM.suff(fish215) # summary statistics for dataset ctab <- Xsuff$tab[popId,] # contingency table ctab <- ctab[,colSums(ctab) > 0] # remove alleles with no counts #ctab rbind(ctab, Total = colSums(ctab)) ## ----table2------------------------------------------------------------------- popId <- c("Dickey", "Simcoe") # lakes to compare Xsuff <- UM.suff(fish215) # summary statistics for dataset ctab <- Xsuff$tab[popId,] # contingency table ctab <- ctab[,colSums(ctab) > 0] # remove alleles with no counts #ctab rbind(ctab, Total = colSums(ctab)) ## ----gtype-------------------------------------------------------------------- gtype <- colnames(ctab)[1] gtype <- as.numeric(strsplit(gtype, "[.]")[[1]]) gtype names(gtype) <- paste0("A", gtype) sapply(gtype, function(ii) Xsuff$A[ii]) ## ----pvasy-------------------------------------------------------------------- # observed values of the test statistics chi2.obs <- chi2.stat(ctab) # Pearson's chi^2 LRT.obs <- LRT.stat(ctab) # LR test statistic T.obs <- c(chi2 = chi2.obs, LRT = LRT.obs) # p-value with asymptotic calculation C <- ncol(ctab) pv.asy <- pchisq(q = T.obs, df = C-1, lower.tail = FALSE) signif(pv.asy, 2) ## ----pvboot, cache = FALSE---------------------------------------------------- N1 <- sum(ctab[1,]) # size of first sample N2 <- sum(ctab[2,]) # size of second sample rho.hat <- colSums(ctab)/(N1+N2) # common probability vector # bootstrap distribution of the test statistics # set verbose = TRUE for progress output system.time({ T.boot <- UM.eqtest(N1 = N1, N2 = N2, p0 = rho.hat, nreps = 1e4, verbose = FALSE) }) # bootstrap p-value pv.boot <- rowMeans(t(T.boot) >= T.obs) signif(pv.boot, 2) ## ----table3------------------------------------------------------------------- itab1 <- colSums(ctab) == 1 # single count genotypes cbind(ctab[,itab1], Other = rowSums(ctab[,!itab1]), Total = rowSums(ctab)) ## ----setup4, echo = FALSE----------------------------------------------------- c1 <- sum(itab1) # number of single-count columns n1 <- sum(ctab[,itab1]) # number of single counts ## ----popId0------------------------------------------------------------------- popId # equal lakes under H0 eqId0 <- paste0(popId, collapse = ".") # merged lake name popId0 <- as.character(fish215$Lake) # all lake names popId0[popId0 %in% popId] <- eqId0 table(popId0, dnn = NULL) # merged lake counts ## ----stan, cache = FALSE------------------------------------------------------ X0 <- cbind(Id = popId0, fish215[-1]) # allele data with merged lakes nsamples <- 1e4 fit0 <- hUM.post(nsamples = nsamples, X = X0, rhoId = eqId0, # output rho only for merged lakes chains = 1, # next two arguments are passed to rstan warmup = min(1e4, floor(nsamples/10)), full.stan.out = FALSE) ## ----setup3, ref.label = "bxp", echo = FALSE, fig.keep = "none"--------------- rho.post <- fit0$rho[,1,] # p(rho12 | Y) # sort genotype counts by decreasing order rho.count <- colSums(Xsuff$tab[popId,]) rho.ord <- order(colMeans(rho.post), decreasing = TRUE) # plot nbxp <- 50 # number of genotypes for which to make boxplots clrs <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") rho.ct <- rho.count[rho.ord[1:nbxp]] rho.lgd <- unique(sort(rho.ct)) rho.box <- rho.post[,rho.ord[1:nbxp]] par(mar = c(5,4.5,.5,.5)+.1, cex.axis = .7) boxplot(x = rho.box, las = 2, col = clrs[rho.ct+1], pch = 16, cex = .2) title(xlab = "Genotype", line = 4) title(ylab = expression(p(bold(rho)[(12)]*" | "*bold(Y)))) legend("topright", legend = rev(rho.lgd), fill = rev(clrs[rho.lgd+1]), title = "Counts", ncol = 2, cex = .8) ## ----bxp, fig.width = 7, fig.height = 3.5------------------------------------- rho.post <- fit0$rho[,1,] # p(rho12 | Y) # sort genotype counts by decreasing order rho.count <- colSums(Xsuff$tab[popId,]) rho.ord <- order(colMeans(rho.post), decreasing = TRUE) # plot nbxp <- 50 # number of genotypes for which to make boxplots clrs <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") rho.ct <- rho.count[rho.ord[1:nbxp]] rho.lgd <- unique(sort(rho.ct)) rho.box <- rho.post[,rho.ord[1:nbxp]] par(mar = c(5,4.5,.5,.5)+.1, cex.axis = .7) boxplot(x = rho.box, las = 2, col = clrs[rho.ct+1], pch = 16, cex = .2) title(xlab = "Genotype", line = 4) title(ylab = expression(p(bold(rho)[(12)]*" | "*bold(Y)))) legend("topright", legend = rev(rho.lgd), fill = rev(clrs[rho.lgd+1]), title = "Counts", ncol = 2, cex = .8) ## ----pvpost, cache = FALSE---------------------------------------------------- system.time({ T.post <- UM.eqtest(N1 = N1, N2 = N2, p0 = rho.post, nreps = 1e4, verbose = FALSE) }) # posterior p-value pv.post <- rowMeans(t(T.post) >= T.obs) ## ----pvtable, echo = FALSE---------------------------------------------------- pv.tab <- cbind(asy = pv.asy, boot = pv.boot, post = pv.post) pv.tab <- signif(pv.tab*100, 2) colnames(pv.tab) <- c("Asymptotic", "Bootstrap", "Posterior") rownames(pv.tab) <- c("$\\mathcal X$", "$\\Lambda$") kable(pv.tab, digits = 2, caption = "p-value ($\\times100\\%$)")