## ---- echo = FALSE------------------------------------------------------- knitr::opts_chunk$set( eval = FALSE ) ## ---- eval = TRUE, echo = FALSE------------------------------------------ example_bin_counts <- structure(c(308, 243, 0, 624, 448, 0, 247, 163, 303, 263, 189, 637, 249, 174, 266, 216, 270, 262, 287, 257, 274, 213, 234, 244, 157, 269, 245, 240, 262, 142, 217, 209, 148, 225, 264, 229, 239, 231, 142, 243, 99, 225, 251, 241, 205, 324, 88, 255, 146, 285, 258, 251, 241, 355, 264, 259, 236, 259, 242, 216, 244, 383, 230, 261), .Dim = c(8L, 8L), .Dimnames = list(c(" 1", "2", "3", "4", "5", "6", "7", "8"), c("1", "2", "3", "4", "5", "6", "7", "8" ))) ## ---- eval = TRUE, echo = FALSE------------------------------------------ example_regression_stats <- structure(c("-0.171248675025602", "0.00615122686158584", "Practical_CV", "0.583158688871931", "11 2 5 12", "1.00050469190991", "0.00335713985926308", "-0.799303637284865", "0.00599131304613039", "Practical_CV", "0.503865450973465", "10 19 17 16", "1.00011356353248", "0.00394053718934269" ), .Dim = c(7L, 2L), .Dimnames = list(c("Z_score_sample", "CV", "cv_types", "P_value_shapiro", "Predictor_chromosomes", "Mean_test_set", "CV_train_set"), c("Prediction_set_1", "Prediction_set_2"))) ## ---- eval = TRUE, echo = FALSE------------------------------------------ example_bin_counts_sex <- structure(c(0, 0, 160, 0, 6, 0, 266, 0, 127, 0, 85, 0, 182, 0, 235, 0), .Dim = c(2L, 8L), .Dimnames = list(c("X", "Y"), c("1", "2", "3", "4", "5", "6", "7", "8"))) ## ---- eval = TRUE, echo = FALSE------------------------------------------ example_match_report <- structure(c(6.76557504762928e-07, 2.15862500903867e-06, 3.10100948006486e-06, 3.29198583437393e-06, 3.58809416090935e-06, 3.83885934221371e-06, 4.43239735020721e-06, 7.8916102881966e-06, 9.73718696931918e-06, 1.12605046722957e-05), .Dim = c(10L, 1L), .Dimnames = list(c("Sample 6", "Sample 8", "Sample 1", "Sample 10", "Sample 9", "Sample 3", "Sample 5", "Sample 2", "Sample 7", "Sample 4"), "Sum_of_squares")) ## ---- eval=TRUE, echo = FALSE, results = "asis"-------------------------- pander::pandoc.table(example_bin_counts, caption = "Table 1: Example data bin counts. The rows represent chromosomes and the columns bins", include.rownames = T) ## ---- eval=TRUE, echo = FALSE, results = "asis"-------------------------- pander::pandoc.table(example_bin_counts_sex, caption = "Table 2: Example data bin counts sex chromosomes. The rows represent chromosomes and the columns bins", include.rownames = T) ## ------------------------------------------------------------------------ # bin_bam_sample(bam_filepath, do_sort = F, separate_strands = F, custom_name = NULL) ## ------------------------------------------------------------------------ # gc_correct(nipt_object, method, include_XY = F, span = 0.75) ## ------------------------------------------------------------------------ # #Correct NIPTSample object using LOESS method # loess_corrected_sample <- gc_correct(nipt_object = sample_of_interest, method = "LOESS", # include_XY = F, span = 0.75) # #Correct NIPTControlGroup object using bin method # gc_bin_corrected_control_group <- gc_correct(nipt_object = control_group, method = "bin", # include_XY = T) # ## ------------------------------------------------------------------------ # matchcontrolgroup(nipt_sample, nipt_control_group, mode, n_of_samples, # include_chromosomes = NULL, exclude_chromosomes = NULL) ## ---- eval=TRUE, echo = FALSE, results = "asis"-------------------------- pander::pandoc.table(example_match_report , caption = "Table 3: Example match_control_group mode 'subset'", include.rownames = T) ## ------------------------------------------------------------------------ # #Run matchcontrolgroup in mode "report" # scores_control_group <- matchcontrolgroup(nipt_sample = gc_LOESS_corrected_sample, # nipt_control_group = gc_LOESS_corrected_control_group, # mode = "report", include_chromosomes = c(13,18)) # #Run matchcontrolgroup in mode "subset" and select 50 best matching samples # subset_loess_corrected_control_group <- matchcontrolgroup(nipt_sample = gc_LOESS_corrected_sample, # nipt_control_group = loess_corrected_control_group, # mode = "subset", n_of_samples = 50) # ## ------------------------------------------------------------------------ # chi_correct(nipt_sample, nipt_control_group, chi_cutoff = 3.5, include_XY = F) ## ------------------------------------------------------------------------ # #Apply chi-squared based variation reduction method # chi_corrected_data <- chicorrect(nipt_sample = gc_LOESS_corrected_sample, # nipt_control_group = subset_loess_corrected_control_group) # #Extract sample and control group # loess_chi_corrected_sample <- chi_corrected_data$sample # subset_loess_chi_corrected_control_group <- chi_corrected_data$control_group ## ------------------------------------------------------------------------ # calculate_z_score(nipt_sample, nipt_control_group, chromo_focus) ## ------------------------------------------------------------------------ # #Calculate Z score for chromosome 13 # z_score_result_13 <- calculate_z_score(nipt_sample = loess_chi_corrected_sample, # nipt_control_group = subset_loess_chi_corrected_control_group, # chromo_focus = 13) ## ------------------------------------------------------------------------ # perform_regression(nipt_sample, nipt_control_group, chromo_focus, # n_models = 4, n_predictors = 4, exclude_chromosomes = NULL, # include_chromosomes = NULL, use_test_train_set = T, # size_of_train_set = 0.6, overdispersion_rate = 1.15, # force_practical_vc = F) ## ---- eval=TRUE, echo = FALSE, results = "asis"-------------------------- pander::pandoc.table(example_regression_stats, caption = "Table 2: Example regression results and statistics", include.rownames = T) ## ------------------------------------------------------------------------ # prepare_ncv(nipt_control_group, chr_focus, max_elements, # exclude_chromosomes = NULL, include_chromosomes = NULL, # use_test_train_set = T, size_of_train_set = 0.6) ## ------------------------------------------------------------------------ # calculate_ncv_score(nipt_sample, ncv_template) ## ------------------------------------------------------------------------ # as_control_group(nipt_samples) ## ------------------------------------------------------------------------ # add_samples_controlgroup(nipt_control_group, samples_to_add) ## ------------------------------------------------------------------------ # remove_duplicates_controlgroup(nipt_control_group) ## ------------------------------------------------------------------------ # remove_sample_controlgroup(samplename, nipt_control_group) ## ------------------------------------------------------------------------ # diagnose_control_group(nipt_control_group) ## ------------------------------------------------------------------------ # # list_NIPT_samples <- lapply(X = bam_filepaths, bin_bam_sample, do_sort = FALSE, # separate_strands = T, custom_name = NULL) # ## ------------------------------------------------------------------------ # control_group <- as_control_group(nipt_samples = lapply(X = bam_filepaths, bin_bam_sample, # do_sort = F, separate_strands = T)) ## ------------------------------------------------------------------------ # control_group <- as_control_group(nipt_samples = mapply(FUN = bin_bam_sample, bam_filepaths, # custom_name = names, SIMPLIFY = FALSE)) ## ------------------------------------------------------------------------ # library(NIPTeR) # #Retrieve filenames # bam_filepaths <- list.files(path = "/Path/to/bamfiles/", pattern = ".bam", full.names = T) # #Load files and convert to control group # control_group <- as_control_group(nipt_samples = lapply(X = bam_filepaths, bin_bam_sample, # do_sort = F, separate_strands = FALSE)) # #Save control group for later # saveRDS(object = control_group, file = "/Path/to/directory/control_group.rds") ## ------------------------------------------------------------------------ # library(NIPTeR) # #Gather all bam filepaths in a vector. Corresponds to 1a in figure # bam_filepaths <- list.files(path = "/Path/to/bamfiles/", pattern = ".bam", full.names = T) # #Load all bam files using lapply and feed the results to function as_control_group, # #converting the NIPTSamples to a NIPTControlGroup object. Corresponds to 1b in figure # control_group <- as_control_group(nipt_samples = lapply(X = bam_filepaths, bin_bam_sample, # do_sort = F, separate_strands = F)) # #apply a gc LOESS correction to all samples. Since this can take up to 30 seconds # #sample, doing this once for a control group and store the results can be rewarding # #in terms of analysis time. Corresponds to 2a in figure # gc_control_group <- gc_correct(nipt_object = control_group, method = "LOESS") # #Retrieve control group diagnostics. Corresponds with 3a in figure # control_group_diagnostics <- diagnose_control_group(nipt_control_group = control_group) # #Retrieve samplenames with an abberant Z score for any chromosome and remove these samples # #from the control group. Corresponds with 3b in figure # abberant_sample_names <- unique(control_group_diagnostics$abberant_scores$Sample_name) # for (i in 1:length(abberant_sample_names)){ # control_group <- remove_sample_controlgroup(samplename = abberant_sample_names[i], # nipt_control_group = control_group) # } # #Save the gc_corrected control groups to disk. Corresponds to 4a in figure # saveRDS(object = control_group, file = "/path/to/controlgroup.rds") ## ------------------------------------------------------------------------ # library(NIPTeR) # #Load sample. Corresponds with 1a in figure # sample_of_interest <- bin_bam_sample(bam_filepath = "/Path/to/bamfile.bam", separate_strands = T) # #Load control group. Corresponds with 1b in figure # control_group <- readRDS("/Path/to/control_group.rds") # #Perform a chi-square based variation reduction on new trimmed control group and sample and # #extract data. Corresponds with 2a in figure # chi_data <- chi_correct(nipt_sample = sample_of_interest, nipt_control_group = control_group) # sample_of_interest <- chi_data$sample # control_group <- chi_data$control_group # #Perform regression for chromosome 21 with default settings, so: # #Create 4 models with 4 predictors each # #All chromosomes are potential predictors except the potential trisomic chromosomes 13, 18 and 21 # #Use a test and train set where the size of the train set is 60% of the control group # #Assume at least 15% overdispersion in the data # #Dont force practical CV, so if the CV is below 1.15 * standard error of the mean use this as VC # #Corresponds with 3a in figure # regression_score_21 <- perform_regression(nipt_sample = sample_of_interest, # nipt_control_group = control_group, chromo_focus = 21) # # # ### Gather relevant data from the objects on the workspace ####### ## ------------------------------------------------------------------------ # library(NIPTeR) # #Load sample. Corresponds with 1a in figure # sample_of_interest <- bin_bam_sample(bam_filepath = "/Path/to/bamfile.bam") # #Load control group. Corresponds with 1b in figure # control_group <- readRDS("/Path/to/control_group.rds") # #Peform a GC correction type bin for the sample and the controlgroup # #Corresponds with 2a in figure # sample_of_interest <- gc_correct(nipt_object = sample_of_interest, method = "bin") # control_group <- gc_correct(nipt_object = control_group, method = "bin") # #Trim control group by only selecting 80% of best matching control samples # #Corresponds with 2b in figure # control_group <- match_control_group(nipt_sample = sample_of_interest, nipt_control_group = control_group, # mode = "subset", n_of_samples = round(length(control_group$samples) *.8, # digits = 0)) # #Perform a chi-square based variation reduction on new trimmed control group and sample and # #extract data. Corresponds with 2c in figure # chi_data <- chi_correct(nipt_sample = sample_of_interest, nipt_control_group = control_group) # sample_of_interest <- chi_data$sample # control_group <- chi_data$control_group # #Retrieve control group diagnostics. Corresponds with 3a in figure # control_group_diagnostics <- diagnose_control_group(nipt_control_group = control_group) # #Retrieve samplenames with an abberant Z score for any chromosome and remove these samples # #from the control group. Corresponds with 3b in figure # abberant_sample_names <- unique(control_group_diagnostics$abberant_scores$Sample_name) # for (i in 1:length(abberant_sample_names)){ # control_group <- remove_sample_controlgroup(samplename = abberant_sample_names[i], # nipt_control_group = control_group) # } # #Calculate Z score from chromosomes 13, 18 and 21. Corresponds with 4a in figure # z_score_13 <- calculate_z_score(nipt_sample = sample_of_interest, # nipt_control_group = control_group, chromo_focus = 13) # z_score_18 <- calculate_z_score(nipt_sample = sample_of_interest, # nipt_control_group = control_group, chromo_focus = 18) # z_score_21 <- calculate_z_score(nipt_sample = sample_of_interest, # nipt_control_group = control_group, chromo_focus = 21) # #Perform regression for all potential trisomic chromosomes with default settings, so: # #Create 4 models for every potential trisomic chromosome with 4 predictors each # #All chromosomes are potential predictors except the potential trisomic chromosomes 13, 18 and 21 # #Use a test and train set where the size of the train set is 60% of the control group # #Assume at least 15% overdispersion in the data # #Dont force practical CV, so if the CV is below 1.15 * standard error of the mean use this as VC # #Corresponds with 4c in figure # regression_score_13 <- perform_regression(nipt_sample = sample_of_interest, # nipt_control_group = control_group, chromo_focus = 13) # regression_score_18 <- perform_regression(nipt_sample = sample_of_interest, # nipt_control_group = control_group, chromo_focus = 18) # regression_score_21 <- perform_regression(nipt_sample = sample_of_interest, # nipt_control_group = control_group, chromo_focus = 21) # #Get NCVTemplates for all potential trisomic chromosomes with max 9 denominators and default settings, so: # #All autosomals chromosomes are potential predictors, except the potential trisomic chromosomes 13, 18 and 21 # #Use a test and train set where the size of the train set is 60% of the control group # #Corresponds with 4c in figure # new_ncv_template_13 <- prepare_ncv(nipt_control_group = control_group, chr_focus = 13, max_elements = 9) # new_ncv_template_18 <- prepare_ncv(nipt_control_group = control_group, chr_focus = 18, max_elements = 9) # new_ncv_template_21 <- prepare_ncv(nipt_control_group = control_group, chr_focus = 21, max_elements = 9) # #Use the NCVTemplates to get NCV scores for the sample of interest # #Corresponds with 4d in figure # ncv_score_13 <- calculate_ncv_score(nipt_sample = sample_of_interest, ncv_template = new_ncv_template_13) # ncv_score_18 <- calculate_ncv_score(nipt_sample = sample_of_interest, ncv_template = new_ncv_template_18) # ncv_score_21 <- calculate_ncv_score(nipt_sample = sample_of_interest, ncv_template = new_ncv_template_21) # # # ### Gather relevant data from the objects on the workspace ####### # # #