## ----echo = FALSE, message = FALSE, warning = FALSE---------------------------
library(PatientLevelPrediction)

## ----echo = TRUE, eval=FALSE--------------------------------------------------
# createRandomSampleSettings <- function(n = 10000,
#                                        sampleSeed = sample(10000, 1)) {
#   # add input checks
#   checkIsClass(n, c("numeric", "integer"))
#   checkHigher(n, 0)
#   checkIsClass(sampleSeed, c("numeric", "integer"))
# 
#   # create list of inputs to implement function
#   sampleSettings <- list(
#     n = n,
#     sampleSeed = sampleSeed
#   )
# 
#   # specify the function that will implement the sampling
#   attr(sampleSettings, "fun") <- "implementRandomSampleSettings"
# 
#   # make sure the object returned is of class "sampleSettings"
#   class(sampleSettings) <- "sampleSettings"
#   return(sampleSettings)
# }

## ----tidy=FALSE,eval=FALSE----------------------------------------------------
# implementRandomSampleSettings <- function(trainData, sampleSettings) {
#   n <- sampleSettings$n
#   sampleSeed <- sampleSettings$sampleSeed
# 
#   if (n > nrow(trainData$labels)) {
#     stop("Sample n bigger than training population")
#   }
# 
#   # set the seed for the randomization
#   set.seed(sampleSeed)
# 
#   # now implement the code to do your desired sampling
# 
#   sampleRowIds <- sample(trainData$labels$rowId, n)
# 
#   sampleTrainData <- list()
# 
#   sampleTrainData$labels <- trainData$labels %>%
#     dplyr::filter(.data$rowId %in% sampleRowIds) %>%
#     dplyr::collect()
# 
#   sampleTrainData$folds <- trainData$folds %>%
#     dplyr::filter(.data$rowId %in% sampleRowIds) %>%
#     dplyr::collect()
# 
#   sampleTrainData$covariateData <- Andromeda::andromeda()
#   sampleTrainData$covariateData$covariateRef <- trainData$covariateData$covariateRef
#   sampleTrainData$covariateData$covariates <- trainData$covariateData$covariates %>% dplyr::filter(.data$rowId %in% sampleRowIds)
# 
#   # update metaData$populationSize
#   metaData <- attr(trainData$covariateData, "metaData")
#   metaData$populationSize <- n
#   attr(sampleTrainData$covariateData, "metaData") <- metaData
# 
#   # make the cocvariateData the correct class
#   class(sampleTrainData$covariateData) <- "CovariateData"
# 
#   # return the updated trainData
#   return(sampleTrainData)
# }

## ----tidy=TRUE,eval=TRUE------------------------------------------------------
citation("PatientLevelPrediction")