---
title: "Splitting cohorts"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{a07_split_cohorts}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  eval = TRUE, message = FALSE, warning = FALSE,
  comment = "#>"
)

library(CohortConstructor)
library(CohortCharacteristics)
library(ggplot2)
library(CDMConnector)
library(dplyr, warn.conflicts = FALSE)
library(PatientProfiles)

if (Sys.getenv("EUNOMIA_DATA_FOLDER") == ""){
  Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))}
if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))){ dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER"))
  downloadEunomiaData()  
}
```

For this example we'll use the Eunomia synthetic data from the CDMConnector package.

```{r}
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
cdm <- cdm_from_con(con, cdm_schema = "main", 
                    write_schema = c(prefix = "my_study_", schema = "main"))
```

Let's start by creating two drug cohorts, one for users of diclofenac and another for users of acetaminophen.

```{r}
cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = list("diclofenac" = 1124300,
                                                   "acetaminophen" = 1127433), 
                                 name = "medications")
cohortCount(cdm$medications)
```

We can stratify cohorts based on specified columns using the function `stratifyCohorts()`. In this example, let's stratify the medications cohort by age and sex.

```{r}
cdm$stratified <- cdm$medications |>
  addAge(ageGroup = list("Child" = c(0,17), "18 to 65" = c(18,64), "65 and Over" = c(65, Inf))) |>
  addSex(name = "stratified") |>
  stratifyCohorts(strata = list("sex", "age_group", c("sex", "age_group")), name = "stratified")

settings(cdm$stratified)
```

The age and sex columns are added using functions from the package `PatientProfiles`. The 'stratified' table includes 22 cohorts, representing various combinations of sex and age groups.

We can also split cohorts for specified years using the function `yearCohorts()`.

```{r, include=FALSE, warning=FALSE}
con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
cdm <- cdm_from_con(con, cdm_schema = "main", 
                    write_schema = c(prefix = "my_study_", schema = "main"))

cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = list("diclofenac" = 1124300,
                                                   "acetaminophen" = 1127433), 
                                 name = "medications")
```

```{r}
cdm$years <- cdm$medications |>
  yearCohorts(years = 2005:2010, name = "years")

settings(cdm$years)
```
The 'years' table includes 12 cohorts, with each cohort representing a specific drug and year.