## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----cooc_data----------------------------------------------------------------
  library(nlpembeds)
  df_ehr = data.frame(Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
                      Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                      Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2',
                                      'C3', 'C4'),
                      Count = 1:9)
  df_ehr

## ----cooc_call----------------------------------------------------------------
  spm_cooc = build_df_cooc(df_ehr)
  spm_cooc

## ----cooc_month1--------------------------------------------------------------
  cooc_1 = build_df_cooc(subset(df_ehr, Patient == 1 & Month == 1), min_code_freq = 0)
  cooc_1

## ----cooc_month2--------------------------------------------------------------
  cooc_2 = build_df_cooc(subset(df_ehr, Patient == 1 & Month == 3))
  cooc_2

## ----cooc_sum-----------------------------------------------------------------
   cooc_1 + cooc_2

## ----pmi----------------------------------------------------------------------
   m_pmi = get_pmi(spm_cooc)
   m_pmi

## ----svd----------------------------------------------------------------------
  m_svd = get_svd(m_pmi, embedding_dim = 2)
  m_svd

## ----sql_data-----------------------------------------------------------------
  library(RSQLite)

  test_db_path = tempfile()
  test_db = dbConnect(SQLite(), test_db_path)
  dbWriteTable(test_db, 'df_monthly', df_ehr, overwrite = TRUE)

  ###
  # optional, done automatically by sql_cooc if table 'df_uniq_codes' not found
  # and parameter autoindex set to TRUE
  dbExecute(test_db, "CREATE INDEX patient_idx ON df_monthly (Patient)")

  df_uniq_codes = unique(df_ehr['Parent_Code'])
  dbWriteTable(test_db, 'df_uniq_codes', df_uniq_codes, overwrite = TRUE)
  ###

  dbDisconnect(test_db)

## ----sql_cooc-----------------------------------------------------------------
  output_db_path = tempfile()
  sql_cooc(input_path = test_db_path, output_path = output_db_path)

## ----read_sql-----------------------------------------------------------------
  test_db = dbConnect(SQLite(), output_db_path)
  spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;')
  dbDisconnect(test_db)

  spm_cooc

## ----sql_pmi------------------------------------------------------------------
  m_pmi = get_pmi(spm_cooc)
  m_pmi

## ----read_sql_cooc------------------------------------------------------------
  spm_cooc = build_spm_cooc_sym(spm_cooc)                                       
  m_cooc = as.matrix(spm_cooc)                                       
  m_cooc

## ----dicts_data---------------------------------------------------------------
  df_ehr$Parent_Code %<>% ifelse(. == 'C1', 'C0000545', .)
  df_ehr$Parent_Code %<>% ifelse(. == 'C2', 'C0000578', .)

  df_ehr

## ----dicts_data_write---------------------------------------------------------
  test_db_path = tempfile()
  test_db = dbConnect(SQLite(), test_db_path)
  dbWriteTable(test_db, 'df_monthly', df_ehr)
  dbDisconnect(test_db)

## ----dicts_cooc---------------------------------------------------------------

  codes_dict_fpaths = list.files(system.file('dictionaries',
                                             package = 'nlpembeds'),
                                 full.names = TRUE)

  sql_cooc(input_path = test_db_path, output_path = output_db_path,
           exclude_dict_pattern = 'C[0-9]',
           codes_dict_fpaths = codes_dict_fpaths,
           autoindex = TRUE, overwrite_output = TRUE)

## ----dicts_cooc_read----------------------------------------------------------
  test_db = dbConnect(SQLite(), output_db_path)
  spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;')
  dbDisconnect(test_db)

  spm_cooc

## ----oom_sql_pmi--------------------------------------------------------------
# m_pmi = get_pmi(spm_cooc)
# m_pmi

## ----oom_read_sql_cooc--------------------------------------------------------
# spm_cooc = build_spm_cooc_sym(spm_cooc)                                       
# m_cooc = as.matrix(spm_cooc)                                       
# m_cooc

## -----------------------------------------------------------------------------
  # remotes::install_git('https://github.com/jwood000/RcppAlgos@v2.4.0.git')