## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(MethEvolSIM)

## -----------------------------------------------------------------------------
# Define the tree:
tree <- "((a:1, b:1):2, c:2);"
# Define the genomic structure. Here 1 island ("U") containing 100 CpGs 
# surrounded by 2 non-island structures ("M") each containing 10 CpGs
infoStr <- data.frame(n = c(10, 100, 10),
                      globalState = c("M", "U", "M"))
# Simulate 1 replicate of data at the tree tips
output <- simulate_evolData(infoStr = infoStr, tree = tree)

## -----------------------------------------------------------------------------
# In replicate 1, tip 2: name of the tip of the tree
output$data[[1]][[2]]$name
# In replicate 1, tip 2: sequence of methylation states of left non-island structure
output$data[[1]][[2]]$seq[[1]]
# In replicate 1, tip 2: sequence of methylation states of island structure
output$data[[1]][[2]]$seq[[2]]
# In replicate 1, tip 2: sequence of methylation states of right non-island structure
output$data[[1]][[2]]$seq[[3]]

## -----------------------------------------------------------------------------
# Example 3 structures of length 100 each:
# one non-island containing 100 CpG sites,
# one island containing 100 CpG sites,
# and one non-island containing 100 CpG sites
infoStr <- data.frame(n = c(100, 100, 100),
                        globalState = c("M", "U", "M"))
infoStr

## -----------------------------------------------------------------------------
# Example for 2 islands: first of length 1, second of length 15
infoStr <- data.frame(n = c(1, 15),
                      globalState = c("U", "U"))
infoStr

## -----------------------------------------------------------------------------
# Example 3 structures of length 100 with customized initial methylation state
# equilibrium frequencies
infoStr <- data.frame(n = c(100, 100, 100),
                      globalState = c("M", "U", "M"),
                      u_eqFreq = c(0.1, 0.8, 0.1),
                      p_eqFreq = c(0.1, 0.1, 0.1),
                      m_eqFreq = c(0.8, 0.1, 0.8))
infoStr

## -----------------------------------------------------------------------------
default_paramValues <- get_parameterValues()
default_paramValues

## -----------------------------------------------------------------------------
# Example: modification of parameter iota to value 0.2
default_paramValues$iota <- 0.2
default_paramValues

## -----------------------------------------------------------------------------
# Example with customized initial methylation frequencies and customized parameter values
custom_infoStr <- data.frame(n = c(100, 100, 100),
                             globalState = c("M", "U", "M"),
                             u_eqFreq = c(0.1, 0.8, 0.1),
                             p_eqFreq = c(0.1, 0.1, 0.1),
                             m_eqFreq = c(0.8, 0.1, 0.8))
custom_params <- get_parameterValues()
custom_params$mu <- 0.005
initial_customD <- simulate_initialData(infoStr = custom_infoStr, params = custom_params)
# Returns customized parameters and simulated data
initial_customD$params
initial_customD$data

## -----------------------------------------------------------------------------
# Example with sampled initial methylation frequencies and default parameter values
custom_infoStr <- data.frame(n = c(100, 100, 100),
                             globalState = c("M", "U", "M"))
initialD <- simulate_initialData(infoStr = custom_infoStr)
# Returns default parameters
initialD$params
initialD$data

## -----------------------------------------------------------------------------
combiStr_object <- initialD$data
get_parameterValues(rootData = combiStr_object)

## ----eval=FALSE---------------------------------------------------------------
#  # E.g. access fist structure (non-island)
#  combiStr_object$get_singleStr(1)

## -----------------------------------------------------------------------------
# E.g. get the methylation equilibrium frequencies of the first singleStructureGenerator
combiStr_object$get_singleStr(1)$get_eqFreqs()

## -----------------------------------------------------------------------------
# E.g. get the sequence of methylation states of the second singleStructureGenerator
# Encoded as: 1 for unmethylated, 2 for partially-methylated, 3 for methylated
combiStr_object$get_singleStr(2)$get_seq()

## -----------------------------------------------------------------------------
# Example with customized initial methylation frequencies, customized parameter values 
# and default dt (0.01)
tree <- "((a:1, b:1):2, c:2, (d:3.7, (e:4, f:1):3):5);"
custom_infoStr <- data.frame(n = c(100, 100, 100),
                             globalState = c("M", "U", "M"),
                             u_eqFreq = c(0.1, 0.8, 0.1),
                             p_eqFreq = c(0.1, 0.1, 0.1),
                             m_eqFreq = c(0.8, 0.1, 0.8))
custom_params <- get_parameterValues()
custom_params$mu <- 0.005
evolD <- simulate_evolData(infoStr = custom_infoStr, tree = tree, params = custom_params, n_rep = 3, only_tip = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  # Returns customized parameters, tree used, time step length for SSE process used (dt)
#  # and simulated data
#  evolD$data
#  evolD$dt
#  evolD$tree
#  evolD$params

## -----------------------------------------------------------------------------
# The simulated data is returned as a list. Each element of the list corresponds to a
# simulation replicate:
length(evolD$data)
rep1 <- evolD$data[[1]]
rep2 <- evolD$data[[2]]
rep3 <- evolD$data[[3]]

## -----------------------------------------------------------------------------
# When only_tip is TRUE, each replicate contains for each tip: its name and the 
# sequence of methylation states. E.g.
rep1_tip1 <- rep1[[1]]
rep1_tip1$name
# Seq is a list with the methylation states of each simulated structure.
# In this example, 3 structures: non-island, island and non-island ...
length(rep1_tip1$seq)
# Each with 100 CpGs
length(rep1_tip1$seq[[1]])

## -----------------------------------------------------------------------------
tree <- "((a:1, b:1):2, c:2, (d:3.7, (e:4, f:1):3):5);"
custom_infoStr <- data.frame(n = c(100, 100, 100),
                             globalState = c("M", "U", "M"),
                             u_eqFreq = c(0.1, 0.8, 0.1),
                             p_eqFreq = c(0.1, 0.1, 0.1),
                             m_eqFreq = c(0.8, 0.1, 0.8))
custom_params <- get_parameterValues()
custom_params$mu <- 0.005
evolD <- simulate_evolData(infoStr = custom_infoStr, tree = tree, params = custom_params, n_rep = 3, only_tip = FALSE)

## -----------------------------------------------------------------------------
# When only_tip is FALSE. $data output contains the simulated data and the 
# information on the relationship between tree branches.
names(evolD$data)

## -----------------------------------------------------------------------------
# The information of the tree branches can be accessed using $branchInTree. 
treeStr <- evolD$data$branchInTree
# It is a list in which each element index represents the index of the branch
root <- treeStr[[1]] # First branch (or tree root)
b2 <- treeStr[[2]] # Second branch 
# ...
# Each branch contains information of the offspring and parent indexes.
root$parent_index # The tree root does not have parent branches
root$offspring_index # The tree root has 3 daughter branches: 2, 5 and 6. 
b2$parent_index # Branch 2 therefore has as parent branch the root.
b2$offspring_index # Branch 2 also has 2 daughter branches: 3 and 4

## -----------------------------------------------------------------------------
# The simulated data can be accessed using $sim_data.
sim_data <- evolD$data$sim_data
# As before, each list element corresponds to a simulation replicate. 
# E.g. replicate 3
rep3 <- sim_data[[3]]
# In each replicate, each element of the list corresponds to a tree branch
# the indexes correspond to the information in $branchInTree
root <- rep3[[1]]
b2 <- rep3[[2]]
b3 <- rep3[[3]]

## -----------------------------------------------------------------------------
# Each tree branch contains:
# - the branch name (NULL for tree root and inner nodes and 
# tip name for the tree tips):
root$name
b2$name
b3$name

## -----------------------------------------------------------------------------
# - the information on IWE events that happened in that branch. 
root$IWE # The root always has NULL because its branch length is 0
# The rest of the branches have FALSE when no IWE happened or a list containing
# $islands corresponds to the island index(es) to which the event(s) happened 
# $times corresponds to the branch time in which the event happened.
b2$IWE
b3$IWE

## -----------------------------------------------------------------------------
# - A list with each element representing the sequence of methylation states of 
# each simulated island and non-island
# Encoded as 0 for unmethylated, 0.5 for partially methylated and 1 for methylated
root$seq[[1]] # First sequence (non-island)
root$seq[[2]] # Second sequence (island)

## -----------------------------------------------------------------------------
# - A list with each element representing the methylation frequencies for the states: unmethylated, partially-methylated and methylated
root$eqFreqs[[1]][1] # First structure of root (non-island) equilibrium frequency for unmethylated state
root$eqFreqs[[1]][2] # First structure of root (non-island) equilibrium frequency for partially-methylated state
root$eqFreqs[[1]][3] # First structure of root (non-island) equilibrium frequency for methylated state

## -----------------------------------------------------------------------------
# Example with customized initial methylation frequencies, customized parameter values and default dt (0.01)
tree <- "((a:1, b:1):2, c:2, (d:3.7, (e:4, f:1):3):5);"
custom_infoStr <- data.frame(n = c(100, 100, 100),
                             globalState = c("M", "U", "M"),
                             u_eqFreq = c(0.1, 0.8, 0.1),
                             p_eqFreq = c(0.1, 0.1, 0.1),
                             m_eqFreq = c(0.8, 0.1, 0.8))
custom_params <- get_parameterValues()
custom_params$mu <- 0.005
initialD <- simulate_initialData(infoStr = custom_infoStr, params = custom_params)$data
evolD <- simulate_evolData(rootData =initialD, tree = tree)