## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(ulrb)
library(dplyr)
library(tidyr)
library(vegan)
library(ggplot2)
library(purrr)
#
set.seed(123)

## -----------------------------------------------------------------------------
# Load raw OTU table from N-ICE
data("nice_raw", package = "ulrb")
data("nice_env", package = "ulrb")

# Change name of first column
nice_clean <- rename(nice_raw, Taxonomy = "X.SampleID")

# Select 16S rRNA amplicon sequencing samples
selected_samples <- c("ERR2044662", "ERR2044663", "ERR2044664",
                      "ERR2044665", "ERR2044666", "ERR2044667",
                      "ERR2044668", "ERR2044669", "ERR2044670")

# Add a column with phylogenetic units ID (OTU in this case)
nice_clean <- mutate(nice_clean, OTU = paste0("OTU_", row_number()))

# Select relevant collumns
nice_clean <- select(nice_clean, selected_samples, OTU, Taxonomy)

# Separate Taxonomy column into each taxonomic level
nice_clean <- separate(nice_clean, Taxonomy,
                       c("Domain","Kingdom","Phylum",
                         "Class","Order","Family","Genus","Species"),
                       sep=";")

# Remove Kingdom column, because it is not used for prokaryotes
nice_clean <- select(nice_clean, -Kingdom)

# Remove eukaryotes
nice_clean <- filter(nice_clean, Domain != "sk__Eukaryota")

# Remove unclassified OTUs at phylum level
nice_clean <- filter(nice_clean, !is.na(Phylum))

# Simplify name
nice <- nice_clean

# Check if everything looks normal
head(nice)

# Change table to tidy format
# You can automatically do this with an ulrb function
nice_tidy <- prepare_tidy_data(nice, ## data to tidy 
                  sample_names = contains("ERR"), ## vector with ID samples
                  samples_in = "cols") ## samples can be in columns (cols) or rows. 


## ----fig.width = 6, fig.height = 4.5------------------------------------------
## First, check how many reads each sample got
nice_tidy %>% 
  group_by(Sample) %>% ## because data is in tidy format
  summarise(TotalReads = sum(Abundance)) %>% 
  ggplot(aes(Sample, TotalReads)) + 
  geom_hline(yintercept = 40000) + 
  geom_col() + 
  theme_bw() + 
  coord_flip()

## -----------------------------------------------------------------------------
nice_rarefid <- 
  nice_tidy %>% 
  group_by(Sample) %>%
  nest() %>% 
  mutate(Rarefied_reads = map(.x = data, 
                              ~as.data.frame(
                                t(
                                  rrarefy(
                                    .x$Abundance, 
                                    sample = 40000))))) %>% 
  unnest(c(data,Rarefied_reads))%>% 
  rename(Rarefied_abundance = "V1")  

## -----------------------------------------------------------------------------
nice_classified <- define_rb(nice_tidy, simplified = TRUE)
head(nice_classified)

## -----------------------------------------------------------------------------
nice_eco <- nice_classified %>% left_join(nice_env, by = c("Sample" = "ENA_ID"))

## -----------------------------------------------------------------------------
# Calculate diversity
nice_diversity <- nice_eco %>% 
  group_by(Sample, Classification, Month, Depth, Water.mass) %>% 
  summarise(SpeciesRichness = specnumber(Abundance),
            ShannonIndex = diversity(Abundance, index = "shannon"),
            SimpsonIndex = diversity(Abundance, index = "simpson")) %>% 
  ungroup()

## -----------------------------------------------------------------------------
## Re-organize table to have all diversity indices in a single col.
nice_diversity_tidy <- nice_diversity %>% 
  pivot_longer(cols = c("SpeciesRichness", "ShannonIndex", "SimpsonIndex"),
               names_to = "Index",
               values_to = "Diversity") %>% 
# correct order
  mutate(Month = factor(Month, levels = c("March", "April", "June"))) %>% 
# edit diversity index names
  mutate(Index = case_when(Index == "ShannonIndex" ~ "Shannon Index",
                           Index == "SimpsonIndex" ~ "Simpson Index",
                           TRUE ~ "Number of OTUs")) %>%
  # order diversity index names
  mutate(Index = factor(Index, c("Number of OTUs", "Shannon Index","Simpson Index")))

## ----fig.width = 6, fig.height = 4.5------------------------------------------
qualitative_colors <- 
  c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

## Plots
nice_diversity_tidy %>%
  ggplot(aes(Month, Diversity, col = Classification)) + 
  geom_point() + 
  facet_wrap(~Index, scales = "free_y")+
  scale_color_manual(values = qualitative_colors[c(5,3,6)]) + 
  theme_bw() + 
  theme(strip.background = element_blank(),
        strip.text = element_text(size = 12),
        legend.position = "top") + 
  labs(col = "Classification: ",
       title = "Alpha diversity across month")

## ----fig.width = 6, fig.height = 4.5------------------------------------------
nice_diversity_tidy %>% 
  ggplot(aes(Water.mass, Diversity, col = Classification)) + 
  geom_point() + 
  facet_wrap(~Index, scales = "free_y")+
  scale_color_manual(values = qualitative_colors[c(5,3,6)]) + 
  theme_bw() + 
  theme(strip.background = element_blank(),
        strip.text = element_text(size = 12),
        legend.position = "top") %>%
  labs(col = "Classification: ",
       title = "Alpha diversity across water mass")

## -----------------------------------------------------------------------------
rare_biosphere <- 
  nice_eco %>%  
  filter(Classification == "Rare") %>% 
  select(Sample, Abundance, OTU, Depth, Month, Water.mass) %>% 
  mutate(Sample_unique = paste(Sample, Month))

rb_env <- rare_biosphere %>% 
  ungroup() %>% 
  select(Sample_unique, Depth, Water.mass, Month) %>% 
  distinct()

rb_sp_prep <- rare_biosphere %>% 
  ungroup() %>% 
  select(Sample_unique, Abundance, OTU)

rb_sp_prep %>% head() # sanity check

rb_sp <- 
  rb_sp_prep %>% 
  tidyr::pivot_wider(names_from = "OTU",
                     values_from = "Abundance",
                     values_fn = list(count= list)) %>% 
  print() %>% 
  unchop(everything())

## -----------------------------------------------------------------------------
rb_sp[is.na(rb_sp)] <- 0

# Prepare aesthetics
rb_env <- rb_env %>% 
  mutate(col_month = case_when(Month == "March" ~ qualitative_colors[1],
                               Month == "April" ~ qualitative_colors[2],
                               TRUE ~ qualitative_colors[3])) %>% 
  mutate()

## ----fig.height = 8, fig.width= 8---------------------------------------------
cca_plot_rare_biosphere <- 
  cca(rb_sp[,-1], display = "sites", scale = TRUE) %>% 
  plot(display = "sites", type = "p", main = "Rare biosphere")
#
cca_plot_rare_biosphere
points(cca_plot_rare_biosphere,
       bg = rb_env$col_month,
       pch = 21, 
       col = "grey", 
       cex = 2)
#
with(rb_env,
     ordispider(cca_plot_rare_biosphere,
                Month, lty = "dashed", label = TRUE))