## ----include = FALSE----------------------------------------------------------
test_protti <- identical(Sys.getenv("TEST_PROTTI"), "true")
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, eval = test_protti, message = FALSE, warning = FALSE--------------
#  library(protti)
#  library(magrittr)
#  library(dplyr)

## ----eval=FALSE---------------------------------------------------------------
#  # To read in your own data you can use read_protti()
#  your_data <- read_protti(filename = "mydata/data.csv")

## ----load_data, eval = test_protti--------------------------------------------
#  data("rapamycin_10uM")

## ----median_normalisation_filtering, eval = test_protti, message = FALSE, warning = FALSE----
#  data_normalised <- rapamycin_10uM %>%
#    filter(eg_is_decoy == FALSE) %>%
#    mutate(intensity_log2 = log2(fg_quantity)) %>%
#    normalise(
#      sample = r_file_name,
#      intensity_log2 = intensity_log2,
#      method = "median"
#    )
#  
#  data_filtered <- data_normalised %>%
#    filter_cv(
#      grouping = eg_precursor_id,
#      condition = r_condition,
#      log2_intensity = intensity_log2,
#      cv_limit = 0.25,
#      min_conditions = 1
#    )

## ----data_preparation_prot_pep, eval = test_protti, message = FALSE, warning = FALSE----
#  data_filtered_proteotypic <- data_filtered %>%
#    filter(pep_is_proteotypic == TRUE)

## ----fetching_database_information, eval = test_protti, message = FALSE, warning = FALSE----
#  uniprot_ids <- unique(data_filtered_proteotypic$pg_protein_accessions)
#  
#  uniprot <-
#    fetch_uniprot(
#      uniprot_ids = uniprot_ids,
#      columns = c(
#        "protein_name",
#        "gene_names",
#        "go_f",
#        "xref_string",
#        "cc_interaction",
#        "ft_act_site",
#        "ft_binding",
#        "xref_pdb",
#        "length",
#        "sequence"
#      )
#    ) %>%
#    rename(pg_protein_accessions = accession)
#  
#  data_filtered_uniprot <- data_filtered_proteotypic %>%
#    left_join(
#      y = uniprot,
#      by = "pg_protein_accessions"
#    ) %>%
#    find_peptide(
#      protein_sequence = sequence,
#      peptide_sequence = pep_stripped_sequence
#    ) %>%
#    assign_peptide_type(
#      aa_before = aa_before,
#      last_aa = last_aa,
#      aa_after = aa_after
#    ) %>%
#    calculate_sequence_coverage(
#      protein_sequence = sequence,
#      peptides = pep_stripped_sequence
#    )

## ----coverage_plot, eval = test_protti, fig.align= "center", fig.width = 6, fig.height = 5----
#  qc_sequence_coverage(
#    data = data_filtered_uniprot,
#    protein_identifier = pg_protein_accessions,
#    coverage = coverage
#  )

## ----t_test, eval = test_protti, message = FALSE, warning = FALSE-------------
#  diff_abundance_data <- data_filtered_uniprot %>%
#    assign_missingness(
#      sample = r_file_name,
#      condition = r_condition,
#      grouping = eg_precursor_id,
#      intensity = normalised_intensity_log2,
#      ref_condition = "control",
#      completeness_MAR = 0.7,
#      completeness_MNAR = 0.25,
#      retain_columns = c(
#        pg_protein_accessions,
#        go_f,
#        xref_string,
#        start,
#        end,
#        length,
#        coverage
#      )
#    ) %>%
#    calculate_diff_abundance(
#      sample = r_file_name,
#      condition = r_condition,
#      grouping = eg_precursor_id,
#      intensity_log2 = normalised_intensity_log2,
#      missingness = missingness,
#      comparison = comparison,
#      method = "moderated_t-test",
#      retain_columns = c(
#        pg_protein_accessions,
#        go_f,
#        xref_string,
#        start,
#        end,
#        length,
#        coverage
#      )
#    )

## ----pval_distribution, eval = test_protti, message = FALSE, warning = FALSE, fig.align= "center", fig.width = 6, fig.height = 5----
#  pval_distribution_plot(
#    data = diff_abundance_data,
#    grouping = eg_precursor_id,
#    pval = pval
#  )

## ----volcano_plot, eval = test_protti, fig.align= "center", fig.width = 6, fig.height = 5, message = FALSE, warning = FALSE----
#  volcano_plot(
#    data = diff_abundance_data,
#    grouping = eg_precursor_id,
#    log2FC = diff,
#    significance = pval,
#    method = "target",
#    target_column = pg_protein_accessions,
#    target = "P62942",
#    x_axis_label = "log2(fold change) Rapamycin treated vs. untreated",
#    significance_cutoff = c(0.05, "adj_pval")
#  )
#  
#  # The significance_cutoff argument can also just be used for a
#  # regular cutoff line by just providing the cutoff value, e.g.
#  # signficiance_cutoff = 0.05

## ----barcode_plot, eval = test_protti, fig.align = "center", fig.width = 6, message = FALSE, warning = FALSE----
#  FKBP12 <- diff_abundance_data %>%
#    filter(pg_protein_accessions == "P62942")
#  
#  barcode_plot(
#    data = FKBP12,
#    start_position = start,
#    end_position = end,
#    protein_length = length,
#    coverage = coverage,
#    colouring = diff,
#    cutoffs = c(diff = 1, adj_pval = 0.05),
#    protein_id = pg_protein_accessions
#  )

## ----woods_plot, eval = test_protti, fig.align = "center", fig.width = 6, message = FALSE, warning = FALSE----
#  FKBP12 <- FKBP12 %>%
#    mutate(significant = ifelse(adj_pval < 0.01, TRUE, FALSE))
#  
#  woods_plot(
#    data = FKBP12,
#    fold_change = diff,
#    start_position = start,
#    end_position = end,
#    protein_length = length,
#    coverage = coverage,
#    colouring = adj_pval,
#    protein_id = pg_protein_accessions,
#    facet = FALSE,
#    fold_change_cutoff = 1,
#    highlight = significant
#  )

## ----protile_plot, eval = test_protti, fig.align = "center", fig.width = 20, fig.height = 6, message = FALSE, warning = FALSE----
#  FKBP12_intensity <- data_filtered_uniprot %>%
#    filter(pg_protein_accessions == "P62942")
#  
#  peptide_profile_plot(
#    data = FKBP12_intensity,
#    sample = r_file_name,
#    peptide = eg_precursor_id,
#    intensity_log2 = normalised_intensity_log2,
#    grouping = pg_protein_accessions,
#    targets = "P62942",
#    protein_abundance_plot = FALSE
#  )

## ----additional_functions, eval=FALSE-----------------------------------------
#  diff_abundance_significant <- diff_abundance_data %>%
#    # mark significant peptides
#    mutate(is_significant = ifelse((adj_pval < 0.01 & abs(diff) > 1), TRUE, FALSE)) %>%
#    # mark true positive hits
#    mutate(binds_treatment = pg_protein_accessions == "P62942")
#  
#  ### GO enrichment using "molecular function" annotation from UniProt
#  
#  calculate_go_enrichment(
#    data = diff_abundance_significant,
#    protein_id = pg_protein_accessions,
#    is_significant = is_significant,
#    go_annotations_uniprot = go_f
#  )
#  
#  ### Network analysis
#  
#  network_input <- diff_abundance_significant %>%
#    filter(is_significant == TRUE)
#  
#  analyse_functional_network(
#    data = network_input,
#    protein_id = pg_protein_accessions,
#    string_id = xref_string,
#    binds_treatment = binds_treatment,
#    organism_id = 9606
#  )
#  
#  ### KEGG pathway enrichment
#  
#  # First you need to load KEGG pathway annotations from the KEGG database
#  # for your specific organism of interest. In this case HeLa cells were
#  # used, therefore the organism of interest is homo sapiens (hsa)
#  
#  kegg <- fetch_kegg(species = "hsa")
#  
#  # Next we need to annotate our data with KEGG pathway IDs and perform enrichment analysis
#  
#  diff_abundance_significant %>%
#    # columns containing proteins IDs are named differently
#    left_join(kegg, by = c("pg_protein_accessions" = "uniprot_id")) %>%
#    calculate_kegg_enrichment(
#      protein_id = pg_protein_accessions,
#      is_significant = is_significant,
#      pathway_id = pathway_id,
#      pathway_name = pathway_name
#    )
#  
#  ### Treatment enrichment analysis
#  
#  calculate_treatment_enrichment(diff_abundance_significant,
#    protein_id = pg_protein_accessions,
#    is_significant = is_significant,
#    binds_treatment = binds_treatment,
#    treatment_name = "Rapamycin"
#  )