## ---- echo = FALSE, message=FALSE--------------------------------------------- library(knitr) opts_chunk$set(collapse = TRUE, comment = "#>") library("PhenotypeSimulator") ## ----------------------------------------------------------------------------- # Set parameters genVar <- 0.4 noiseVar <- 1- genVar shared <- 0.6 independent <- 1 - shared # simulate simple bi-allelic genotypes and estimate kinship genotypes <- simulateGenotypes(N = 100, NrSNP = 10000, frequencies = c(0.05, 0.1, 0.3, 0.4), verbose = FALSE) genotypes_sd <- standardiseGenotypes(genotypes$genotypes) kinship <- getKinship(N=100, X=genotypes_sd, verbose = FALSE) # simulate phenotype components genBg <- geneticBgEffects(N = 100, kinship = kinship, P = 15) noiseBg <- noiseBgEffects(N = 100, P = 15) # rescale phenotype components genBg_shared_scaled <- rescaleVariance(genBg$shared, shared * genVar) genBg_independent_scaled <- rescaleVariance(genBg$independent, independent * genVar) noiseBg_shared_scaled <- rescaleVariance(noiseBg$shared, shared * noiseVar) noiseBg_independent_scaled <- rescaleVariance(noiseBg$independent, independent *noiseVar) # Total variance proportion shave to add up yo 1 total <- independent * noiseVar + independent * genVar + shared * noiseVar + shared * genVar total == 1 # combine components into final phenotype Y <- scale(genBg_shared_scaled$component + noiseBg_shared_scaled$component + genBg_independent_scaled$component + noiseBg_independent_scaled$component) # transform phenotype non-linearly Y_nl <- transformNonlinear(Y, alpha=0.7, method="exp") ## ----------------------------------------------------------------------------- # simulate phenotype with population structure and observational noise effects # only # genetic variance genVar <- 0.4 # random genetic variance: h2b phenotype <- runSimulation(N = 100, P = 15, tNrSNP = 10000, SNPfrequencies = c(0.05, 0.1,0.3,0.4), genVar = 0.4, h2bg = 1, phi = 1, verbose = TRUE, nonlinear="exp", proportionNonlinear = 0.7) ## ----------------------------------------------------------------------------- # read genotypes from external file # use one of the sample genotype file provided in the # extdata/genotypes/subfolders (e.g.extdata/genotypes/hapgen ) genotypefile <- system.file("extdata/genotypes/hapgen", "genotypes_hapgen.controls.gen", package = "PhenotypeSimulator") # remove the .gen ending (oxgen specific endings .gen and .sample are added # automatically ) genotypefile <- gsub("\\.gen","", genotypefile) genotypes <- readStandardGenotypes(N=100, filename = genotypefile, format="oxgen", delimiter = ",", verbose=TRUE) genotypes_sd <- standardiseGenotypes(genotypes$genotypes) # kinship estimate based on standardised SNPs kinship <- getKinship(N=100, X=genotypes_sd, verbose = FALSE) # simulate 30 genetic variant effects (from non-standardised SNP genotypes) causalSNPs <- getCausalSNPs(N=100, genotypes = genotypes$genotypes, NrCausalSNPs = 30, verbose = FALSE) genFixed <- geneticFixedEffects(N = 100, P = 15, X_causal = causalSNPs) # simulate infinitesimal genetic effects genBg <- geneticBgEffects(N=100, kinship = kinship, P = 15) # simulate 4 different confounder effects: # * 1 binomial covariate effect shared across all traits # * 2 categorical (3 categories) independent covariate effects # * 1 categorical (4 categories) independent covariate effect # * 2 normally distributed independent and shared covariate effects noiseFixed <- noiseFixedEffects(N = 100, P = 15, NrFixedEffects = 4, NrConfounders = c(1, 2, 1, 2), pIndependentConfounders = c(0, 1, 1, 0.5), distConfounders = c("bin", "cat_norm", "cat_unif", "norm"), probConfounders = 0.2, catConfounders = c(3, 4)) # simulate correlated effects with max correlation of 0.8 correlatedBg <- correlatedBgEffects(N = 100, P = 15, pcorr = 0.8) # simulate observational noise effects noiseBg <- noiseBgEffects(N = 100, P = 15) # total SNP effect on phenotype: 0.01 genVar <- 0.6 noiseVar <- 1 - genVar totalSNPeffect <- 0.01 h2s <- totalSNPeffect/genVar phi <- 0.6 rho <- 0.1 delta <- 0.3 shared <- 0.8 independent <- 1 - shared # rescale phenotype components genFixed_shared_scaled <- rescaleVariance(genFixed$shared, shared * h2s *genVar) genFixed_independent_scaled <- rescaleVariance(genFixed$independent, independent * h2s *genVar) genBg_shared_scaled <- rescaleVariance(genBg$shared, shared * (1-h2s) *genVar) genBg_independent_scaled <- rescaleVariance(genBg$independent, independent * (1-h2s) * genVar) noiseBg_shared_scaled <- rescaleVariance(noiseBg$shared, shared * phi* noiseVar) noiseBg_independent_scaled <- rescaleVariance(noiseBg$independent, independent * phi* noiseVar) correlatedBg_scaled <- rescaleVariance(correlatedBg$correlatedBg, shared * rho * noiseVar) noiseFixed_shared_scaled <- rescaleVariance(noiseFixed$shared, shared * delta * noiseVar) noiseFixed_independent_scaled <- rescaleVariance(noiseFixed$independent, independent * delta * noiseVar) # Total variance proportions have to add up yo 1 total <- shared * h2s *genVar + independent * h2s * genVar + shared * (1-h2s) * genVar + independent * (1-h2s) * genVar + shared * phi* noiseVar + independent * phi* noiseVar + rho * noiseVar + shared * delta * noiseVar + independent * delta * noiseVar total == 1 # combine components into final phenotype Y <- scale(genBg_shared_scaled$component + noiseBg_shared_scaled$component + genBg_independent_scaled$component + noiseBg_independent_scaled$component + genFixed_shared_scaled$component + noiseFixed_shared_scaled$component + genFixed_independent_scaled$component + noiseFixed_independent_scaled$component + correlatedBg_scaled$component) ## ---- tidy=TRUE, tidy.opts = list(width.cutoff = 60)-------------------------- # simulate phenotype with the same five phenotype components and settings as # above; display progress via verbose=TRUE phenotype <- runSimulation(N = 100, P = 15, genotypefile=genotypefile, format ="oxgen", cNrSNP=30, genVar = genVar, h2s = h2s, phi = 0.6, delta = 0.3, distBetaGenetic = "unif", mBetaGenetic = 0.5, sdBetaGenetic = 1, NrFixedEffects = 4, NrConfounders = c(1, 2, 1, 2), pIndependentConfounders = c(0, 1, 1, 0.5), distConfounders = c("bin", "cat_norm", "cat_unif", "norm"), probConfounders = 0.2, catConfounders = c(3, 4), pcorr = 0.8, verbose = TRUE ) ## ---- echo=FALSE-------------------------------------------------------------- # show proportion of variance of the different phenotype components in the # final phenotype varGenFixed <- t(phenotype$varComponents [grepl("var_genFix", names(phenotype$varComponents))]) varGenBg <- t(phenotype$varComponents [grepl("var_genBg", names(phenotype$varComponents))]) varNoiseFixed <- t(phenotype$varComponents [grepl("var_noiseFixed", names(phenotype$varComponents))]) varNoiseBg <- t(phenotype$varComponents [grepl("var_noiseBg", names(phenotype$varComponents))]) varNoiseCorr <- t(phenotype$varComponents [grepl("var_noiseCor", names(phenotype$varComponents))]) totalPropVariance <- as.matrix(t(data.frame(varGenFixed, varGenBg, varNoiseFixed, varNoiseBg, varNoiseCorr=c(varNoiseCorr, 0)))) totalPropVariance <- cbind(totalPropVariance, rowSums(totalPropVariance)) totalPropVariance <- rbind(totalPropVariance, sumProportion=colSums(totalPropVariance)) colnames(totalPropVariance) <- c("shared effect", "independent effect", "total effect") knitr::kable(totalPropVariance, caption="Proportion of variance explained by the different phenotype components") ## ---- fig.show='hold', fig.height=3.4, fig.width=3.4-------------------------- ### show 'image' of phenotype and correlation between phenotypic traits image(t(phenotype$phenoComponentsFinal$Y), main="Phenotype: [samples x traits]", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) image(cor(phenotype$phenoComponentsFinal$Y), main="Correlation of traits [traits x traits]", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Traits", line = 1) mtext(side = 2, text = "Traits", line = 1) ## ---- eval = FALSE------------------------------------------------------------ # out <- savePheno(phenotype, directory="/tmp", # outstring="test_simulation", # format=c("csv", "plink"), verbose=FALSE) ## ----------------------------------------------------------------------------- ## a) Draw cuasal SNPs from a simulated genotype matrix # simulate 10,000 bi-allelic SNP genotypes for 100 samples with randomly drawn # allele frequencies of 0.05, 0.1, 0.3 and 0.4. genotypes <- simulateGenotypes(N = 100, NrSNP = 10000, frequencies = c(0.05, 0.1, 0.3,0.4), verbose = FALSE) # draw 10 causal SNPs from the genotype matrix (use non-standardised allele # codes i.e. (0,1,2)) causalSNPs <- getCausalSNPs(N=100, NrCausalSNPs = 10, genotypes = genotypes$genotypes) ## ----------------------------------------------------------------------------- ## b) Draw SNPs from external genotype files: # read genotypes from external file # use one of the sample genotype file provided in the # extdata/genotypes/subfolders (e.g.extdata/genotypes/hapgen ) genotypefile <- system.file("extdata/genotypes/hapgen", "genotypes_hapgen.controls.gen", package = "PhenotypeSimulator") # remove the .gen ending (oxbgen specific endings .gen and .sample are added # automatically ) genotypefile <- gsub("\\.gen","", genotypefile) genotypes <- readStandardGenotypes(N = 100, filename = genotypefile, format="oxgen", delimiter = ",", verbose=TRUE) causalSNPsFromFile <- getCausalSNPs(N = 100, NrCausalSNPs = 10, genotypes=genotypes$genotypes) ## ----------------------------------------------------------------------------- ## c) draw 10 causal SNPs from external genotype files: sample 10 SNPs from ## chromosome 22 # use sample genotype file provided in the extdata/genotypes folder genotypeFile <- system.file("extdata/genotypes/", "genotypes_chr22.csv", package = "PhenotypeSimulator") genoFilePrefix <- gsub("chr.*", "", genotypeFile) genoFileSuffix <- ".csv" causalSNPsSampledFromFile <- getCausalSNPs(N = 10, NrCausalSNPs = 10, chr = 22, genoFilePrefix = genoFilePrefix, genoFileSuffix = genoFileSuffix, delimiter = ",", verbose=TRUE) ## ---- fig.show='hold'--------------------------------------------------------- # create genetic variant effects with 20% of SNPs having a specific effect, # affecting 40% of all simulated traits fixedGenetic <- geneticFixedEffects(X_causal = causalSNPsFromFile, N = 100, P = 10, pIndependentGenetic = 0.2, pTraitIndependentGenetic = 0.4) ## ---- fig.show='hold', echo=FALSE, fig.height=3.4, fig.width=3.4-------------- image(fixedGenetic$shared, main="Shared genetic variant effects", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) image(fixedGenetic$independent, main="Independent genetic variant effects", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) ## ---- fig.show='hold'--------------------------------------------------------- ## a) Estimate kinship from simulated genotypes genotypes <- simulateGenotypes(N = 100, NrSNP = 10000, frequencies = c(0.05, 0.1, 0.3,0.4), verbose = FALSE) genotypes_sd <- standardiseGenotypes(genotypes$genotypes) kinship <- getKinship(N=100, X=genotypes_sd, verbose = FALSE) ## b) Read kinship from external kinship file kinshipFile <- system.file("extdata/kinship/", "kinship.csv", package = "PhenotypeSimulator") kinshipFromFile <- getKinship(N=50, kinshipfile = kinshipFile, verbose = FALSE) genBg <- geneticBgEffects(N=100, kinship = kinship, P = 15) ## ---- fig.show='hold', echo=FALSE, fig.height=3.4, fig.width=3.4-------------- image(genBg$shared, main="Shared infinitesimal genetic effects", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) image(genBg$independent, main="Independent infinitesimal genetic effects", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) ## ---- fig.show='hold'--------------------------------------------------------- # create 1 non-genetic covariate effect affecting 30% of all simulated traits. The effect # follows a uniform distribution between 30 and 40 (resembling for instance age # in a study cohort). fixedNoiseUniform <- noiseFixedEffects(N = 100, P = 10, NrConfounders = 1, pIndependentConfounders = 1, pTraitIndependentConfounders = 0.3, distConfounders = "unif", mConfounders = 35, sdConfounders = 5) # create 2 non-genetic covariate effects with 1 specific confounder affecting # 20% of all simulated traits. The effects follow a normal distribution fixedNoiseNormal <- noiseFixedEffects(N = 100, P = 10, NrConfounders = 2, pIndependentConfounders = 0.5, pTraitIndependentConfounders = 0.2, distConfounders = "norm", mConfounders = 0, sdConfounders = 1) # create 1 non-genetic covariate effects affecting all simulated traits. The # effect follows a binomial distribution with probability 0.5 (resembling for # instance sex in a study cohort). fixedNoiseBinomial <- noiseFixedEffects(N = 100, P = 10, NrConfounders = 1, pIndependentConfounders = 0, distConfounders = "bin", probConfounders = 0.5) ## ---- fig.show='hold', echo=FALSE, fig.height=3.5, fig.width=4---------------- image(fixedNoiseUniform$independent, main="Independent non-genetic covariate effects\n(uniform confounder dist)", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) image(fixedNoiseNormal$shared, main="Shared non-genetic covariate effects\n(normal confounder dist)", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) image(fixedNoiseNormal$independent, main="Independent non-genetic covariate effects\n(normal confounder dist)", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) image(fixedNoiseBinomial$shared, main="Shared non-genetic covariate effects\n(binomial confounder dist)", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) ## ---- fig.show='hold', fig.height=3.4, fig.width=3.4-------------------------- # simulate correlated noise effect for 10 traits with top-level # correlation of 0.8 correlatedNoise <- correlatedBgEffects(N = 100, P = 10, pcorr = 0.8 ) # correlation structure of the traits: strong the closer to the diagonal, # little correlation at the furthest distance to the diagonal furthestDistCorr <- 0.4^(10-1) pairs(correlatedNoise$correlatedBg, pch = ".", main=paste("Correlation at furthest distance to diagonal:\n", furthestDistCorr), cex.main=0.8) image(correlatedNoise$correlatedBg, main="Correlated effects", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) ## ----fig.show='hold'---------------------------------------------------------- # simulate a noise random effect for 10 traits noiseBg <- noiseBgEffects(N = 100, P = 10, mean = 0, sd = 1) ## ---- fig.show='hold', echo=FALSE, fig.height=3.4, fig.width=3.4-------------- image(noiseBg$shared, main="Shared observational noise effects", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1) image(noiseBg$independent, main="Independent observational noise effects", axes=FALSE, cex.main=0.8) mtext(side = 1, text = "Samples", line = 1) mtext(side = 2, text = "Traits", line = 1)