## ----init, include = FALSE----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

## ----setup--------------------------------------------------------------------
# library(tidyverse, quietly = TRUE)
# library(cowplot)
# library(RColorBrewer)
# library(ctxR)
# library(flextable)
# library(officer)
# library(patchwork)
# 
# devtools::load_all() #load invivopkfit
# 
# #get today's date
# today <- format(Sys.Date(), "%d%B%Y")

## ----eval = FALSE-------------------------------------------------------------
# Sys.setenv(OD_DIR = "path/to/inputs/",
#            FIG_DIR = "path/to/outputs/")

## ----eval = FALSE-------------------------------------------------------------
# ctxR::register_ctx_api_key(key = "<YOUR-API-KEY>")

## -----------------------------------------------------------------------------
# read_date_pk <- "25November2024"

## -----------------------------------------------------------------------------
# read_date_results <- "27November2024"

## ----pk_obj_fromSQL, eval=FALSE-----------------------------------------------
# 
# ### Minimal PK Object ###----
# # Minimum pk object, add options later
# # Note that these mappings are now a default mappings, just being verbose here.
# # If there are any standard deviations that are zero or NA, but N_Subjects > 1,
# # Set the n_subjects to 6 or the current value if lower than 6.
# # (In Showa the mode is 6, but this condition is present in CVT_Legacy as well)
# minimal_pk <- pk(data = cvt %>%
#                    mutate(n_subjects_normalized = if_else(
#                      n_subjects_normalized > 1 & is.na(invivPK_conc_sd),
#                      min(n_subjects_normalized, 6), n_subjects_normalized)),
#                  mapping = ggplot2::aes(
#                    Chemical = analyte_dtxsid,
#                    Chemical_Name = analyte_name_original,
#                    DTXSID = analyte_dtxsid,
#                    CASRN = analyte_casrn,
#                    Species = species,
#                    Reference = fk_extraction_document_id,
#                    Media = conc_medium_normalized,
#                    Route = administration_route_normalized,
#                    Dose = invivPK_dose_level,
#                    Dose.Units = "mg/kg",
#                    Subject_ID = fk_subject_id,
#                    Series_ID = fk_series_id,
#                    Study_ID = fk_study_id,
#                    ConcTime_ID = conc_time_id,
#                    N_Subjects = n_subjects_normalized,
#                    Weight = weight_kg,
#                    Weight.Units = "kg",
#                    Time = time_hr,
#                    Time.Units = "hours",
#                    Value = invivPK_conc,
#                    Value.Units = "mg/L",
#                    Value_SD = invivPK_conc_sd,
#                    LOQ = invivPK_loq
#                  ))

## ----outlining_all_options, eval=FALSE----------------------------------------
# # Types of Choices:
# ## Dose Normalized
# ## Log10 transform
# ## Scale time
# # List of options
# fitopts <- expand.grid(error_model = c("pooled",
#                                        "hierarchical"),
#                        dose_norm = c(TRUE, FALSE),
#                        log10_trans = c(TRUE, FALSE),
#                        time_scale = c(
#                          TRUE,
#                          FALSE),
#                        stringsAsFactors = FALSE)

## ----fitting_choices, eval=FALSE----------------------------------------------
# fit_data <- function(this_error_model,
#                      this_dose_norm,
#                      this_log10_trans,
#                      this_time_scale,
#                      n_cores = 12,
#                      file_dir = Sys.getenv("OD_DIR")){
#   retval <- -9
#    dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0("mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
# 
#   cat("Fitting: ", file_str, "\n")
# 
#   tryCatch(
#     expr = {
#       this_pk <-   minimal_pk +
#         facet_data(vars(Chemical, Species)) +
#         settings_preprocess(keep_data_original = FALSE,
#                             suppress.messages = TRUE) +
#         scale_conc(dose_norm = this_dose_norm, log10_trans = this_log10_trans) +
#         settings_optimx(method = c(
#           "L-BFGS-B",
#           "bobyqa"
#           ))
# 
#       if(this_error_model %in% "pooled"){
#         this_pk <- this_pk +
#           stat_error_model(error_group = vars(Chemical, Species))
#       } else if(this_error_model %in% c("hierarchical", "joint")){
#         this_pk <- this_pk +
#           stat_error_model(error_group = vars(Chemical, Species, Reference))
#       } else{
#         stop("this_error_model must be either 'pooled' or 'hierarchical'/'joint'")
#       }
# 
#       if(this_time_scale %in% TRUE){
#         this_pk <- this_pk +
#           scale_time(new_units = "auto")
#       }
# 
#       #do the fit
#       this_time <- system.time(this_fit <- do_fit(this_pk, n_cores = n_cores))
# 
# 
# 
#       #save the result
#       saveRDS(this_fit,
#               file = paste0(file_dir,
#                      file_str))
# 
#       cli::cli_alert_success(text = "Fitting success!")
#       print(this_time)
# 
#       retval <- this_time[[3]]
#     },
#     error = function(e){
#       cli::cli_alert_danger(text = paste("Analysis",
#                                          file_str,
#                                          "failed with error",
#                                          e))
#       retval <- -1
#     }
#   )
# 
#   return(retval)
# }

## ----running_all_options, eval=FALSE------------------------------------------
# 
# system.time(tmp_out <- mapply(fit_data,
#                               this_error_model = fitopts$error_model,
#                               this_dose_norm = fitopts$dose_norm,
#                               this_log10_trans = fitopts$log10_trans,
#                               this_time_scale = fitopts$time_scale,
#                               n_cores = 14))
# 
# mean(tmp_out)/60 # Average runtime (in minutes) per fitting option
# median(tmp_out)/60
# sum(tmp_out) # Looks like there's 10 extra seconds of overhead for non-fitting methods

## -----------------------------------------------------------------------------
# my_pk <- readRDS(paste0(Sys.getenv("OD_DIR"),
#                         read_date_pk,
#                         "mypk_fit_110p.Rds"))

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Reference) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Species) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species, Reference) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species, Reference, Route, Media) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species, Reference, Route, Media, Dose) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   group_by(Chemical, Species) %>%
#   summarise(blood_and_plasma = all(c("blood",
#                   "plasma") %in% Media)) %>%
#   ungroup() %>%
#   summarise(n_both = sum(blood_and_plasma))

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   group_by(Chemical, Species) %>%
#   summarise(t_mid = mean(range(Time)),
#     t_mid_log10 = midpt_log10(Time),
#             new_time_units = auto_units(y = Time,
#                        from = "hours",
#                        target = 10)) %>%
#   ungroup() %>%
#   count(new_time_units)

## -----------------------------------------------------------------------------
# get_data_info(my_pk)$dose_norm_check %>%
#   dplyr::summarise(AUC_flag = sum(!is.na(data_flag_AUC)),
#             Cmax_flag = sum(!is.na(data_flag_Cmax)),
#             total_multi_dose = sum(n_dose_groups>1),
#             total_expts = n())

## -----------------------------------------------------------------------------
# #compute duration and concentration range across experiments (Chemical, Species, Reference, Route, Media, Dose)
# data_range <- my_pk$data %>%
#   filter(Detect, !exclude) %>%
#   group_by(Chemical, Species, Reference, Media, Route, Dose, Dose.Units) %>%
#   summarize(maxT = max(Time)/24,
#          concRange = log10(max(Conc)/min(Conc))) %>%
#   ungroup()
# 
# #Panel A: histogram of day of final timepoint per experiment
# sf_2A <- data_range %>%
#   ggplot(aes(x = maxT)) +
#   geom_histogram(fill = "grey5",
#                  bins = 40,
#                  color = "grey5") +
#   scale_x_log10(labels = scales::number,
#                 breaks = c(0.1, 1, 7, 30, 365)) +
#   annotation_logticks(sides = "b") +
#   labs(x = "Day of final Timepoint",
#        y = "Count") +
#   theme(aspect.ratio = 1,
#         text = element_text(size = 10),
#         panel.border = element_rect(color = "black", fill = NA, size = 1),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         axis.ticks = element_blank(),
#         axis.line = element_blank())
# 
# #panel B: histogram of fold concentration range
# sf_2B <- data_range %>%
#   ggplot(aes(x = concRange)) +
#   geom_histogram(fill = "grey5",
#                  bins = 40,
#                  color = "grey5") +
#   scale_y_continuous(expand = c(0, 0),
#                      limits = c(0, 50),
#                      breaks = c(0, 25, 50)) +
#   labs(x = "log10(Concentration Range)",
#        y = "Count") +
#   annotation_logticks(sides = "b") +
#   theme(aspect.ratio = 1,
#         panel.border = element_rect(color = "black", fill = NA, size = 1),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         strip.background = element_rect(fill = "white"),
#         strip.text = element_text(face = "bold"),
#         axis.ticks = element_blank(),
#         axis.line = element_blank(),
#         axis.text = element_text(size = 10),
#         axis.title = element_text(size = 11))
# 
# sf_2AB <- plot_grid(sf_2A, sf_2B,
#                     labels = c("A", "B"),
#                     nrow = 1)
# 
# sf_2AB
# 
# #save PDF version
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#               "Manu_Files/",
#               today,
#               "_Supp_Fig2_ConcTimeRanges",
#               ".pdf"),
#        height = 3.2,
#        width = 6,
#        bg = "white",
#        units = "in")
# 
# #save PNG version
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#               "Manu_Files/",
#               today,
#               "_Supp_Fig2_ConcTimeRanges",
#               ".png"),
#        height = 3.2,
#        width = 6,
#        bg = "white",
#        units = "in",
#        dpi = 300)
# 
# rm(sf_2A, sf_2B, sf_2AB)
# 

## -----------------------------------------------------------------------------
# data_cvt <- get_data(my_pk)
# data_counts <- data_cvt %>%
# dplyr::filter(Detect) %>%
# dplyr::count(Chemical, Species, Reference, Route, Media, Dose, Time, N_Subjects,
# name = "Count")
# data_counts <- dplyr::left_join(data_counts, data_cvt,
# by = c("Chemical", "Species", "Reference",
# "Route", "Media", "Dose", "Time",
# "N_Subjects")) %>%
# dplyr::mutate(data_descr = dplyr::case_when(
# (Count > 1 & N_Subjects == 1) ~ "Individual Data, Multiple Observations",
# (Count == 1 & N_Subjects == 1) ~ "Individual Data, Single Observation",
# (Count == 1 & N_Subjects > 1 & Conc_SD > 0) ~ "Grouped Data w SD",
# (N_Subjects > 1 & Conc_SD == 0) ~ "Grouped Data no SD",
# .default = NA_character_
# ))
# 
# indiv_data <- subset(data_counts,
#                      subset = (data_descr %in% "Individual Data, Multiple Observations"))
# 
# indiv_data <- indiv_data %>%
# dplyr::group_by(Chemical, Species, Reference, Route, Media, Dose, Time) %>%
# dplyr::mutate(replicate_mean = mean(Conc, na.rm = TRUE)) %>%
#   ungroup()
# 
# indiv_data_bygroup <- indiv_data %>%
#   dplyr::group_by(Chemical, Species) %>%
#   dplyr::summarise(AFE = 10^mean(log10(Conc/replicate_mean)),
#                                                                                                AAFE = 10^mean(abs(log10(Conc/replicate_mean)))) %>%
#   ungroup()
# 
# indiv_data_bygroup %>% count(AFE < 0.5)
# indiv_data_bygroup %>% count(AFE > 2)

## -----------------------------------------------------------------------------
# my_tf <- my_pk %>% twofold_test()

## -----------------------------------------------------------------------------
# pl3A <- my_tf$individual_data %>%
#   # Plotting
#   ggplot(aes(x = foldConc)) +
#   geom_histogram(binwidth = 0.2,
#                  fill = "grey5",
#                  color = "grey5",
#                  linewidth = 0.4) +
#   coord_cartesian(xlim =  c(0, 3.1),
#                   ylim = c(0, 1200)) +
#   scale_y_continuous(expand = c(0, 0),
#                      breaks = c(0, 250, 500, 750, 1000),
#                      labels = c("0", "2.5", "5", "7.5", "10")) +
#   expand_limits(y = 0) +
#   geom_vline(xintercept = c(0.5, 2),
#              color = "red3",
#              linetype = "dashed",
#              linewidth = 1) +
#   theme_bw() +
#   labs(x = "Mean-normalized concentration",
#                 y = "Observations (hundreds))") +
#   theme(text = element_text(size = 14),
#         panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
#         panel.background = element_blank(),
#         panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4),
#         axis.title = element_text(face = "bold"),
#         axis.line = element_blank(),
#         axis.text = element_text(size = 14),
#         plot.background = element_blank(),
#         axis.ticks.y = element_blank())
# 
# pl3A

## -----------------------------------------------------------------------------
# #get time of peak concentration for use in calculated ADME-normalized time
# my_nca <- nca(my_pk) %>%
#   dplyr::filter(param_name %in% "tmax")
# 
# pl3B <- my_tf$individual_data %>%
#   inner_join(my_nca %>%
#               dplyr::select(Chemical, Species,
#                             Route, Media,
#                             tmax = param_value) %>%
#               filter(!is.na(tmax))) %>%
#   group_by(Chemical,
#            Species,
#            Reference,
#            Route,
#            Media,
#            Dose) %>%
#   mutate(
#          normTime = ifelse(
#            Route == "iv",
#            1 + Time/max(Time),
#            ifelse(
#              Time > tmax,
#              ((Time - tmax)/max(Time)) + 1,
#              0.5 + Time/(2*tmax)
#            )
#          )) %>%
#   ggplot(aes(
#     x = normTime,
#     y = foldConc
#   )) +
#   scale_x_continuous(breaks = c(1,2),
#                      labels = c("tmax", "end")) +
#   geom_point(alpha = 0.1, size = 0.7) +
#   geom_hline(yintercept = c(0.5, 2),
#              color = "red3",
#              linewidth = 0.8,
#              linetype = "dashed") +
#   facet_grid(cols = vars(Route),
#              scales = "free_x") +
#   labs(x = "ADME-Normalized Time",
#        y = "Mean-Normalized\nConcentration") +
#   theme(text = element_text(size = 14),
#         panel.border = element_rect(color = "black", fill = NA, size = 1),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         strip.background = element_rect(fill = "white"),
#         strip.text = element_text(face = "bold"),
#         axis.line = element_blank(),
#         axis.text = element_text(size = 14),
#         axis.title = element_text(face = "bold"),
#         panel.spacing = unit(0.125, units = "in"),
#         plot.background = element_blank(),
#         legend.position = "none")
# 
# pl3B

## -----------------------------------------------------------------------------
# pl3 <- cowplot::plot_grid(pl3A,
#                           pl3B,
#                           ncol = 2,
#                           rel_widths = c(1,2),
#                           labels = c("A", "B"))
# 
# #save PDF version
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#                 "_Fig3.pdf"),
#       pl3,
#           bg = "white",
#       width = 8,
#       height = 4,
#       units = "in")
# 
# #Save PNG version
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#                 "_Fig3.png"),
#       pl3,
#           bg = "white",
#       width = 8,
#       height = 4,
#       units = "in",
#       dpi = 300)
# 
# #remove unneeded objects
# rm(pl3A, pl3B, pl3)

## -----------------------------------------------------------------------------
# plsupp3 <- my_tf$individual_data %>%
#   inner_join(my_nca %>%
#               dplyr::select(Chemical, Species,
#                             Route, Media,
#                             tmax = param_value) %>%
#               filter(!is.na(tmax))) %>%
#   group_by(Chemical,
#            Species,
#            Reference,
#            Route,
#            Media,
#            Dose) %>%
#   mutate(
#          normTime = ifelse(
#            Route == "iv",
#            1 + Time/max(Time),
#            ifelse(
#              Time > tmax,
#              ((Time - tmax)/max(Time)) + 1,
#              0.5 + Time/(2*tmax)
#            )
#          )) %>%
#   ggplot(aes(
#     x = normTime
#   )) +
#   scale_x_continuous(breaks = c(1,2),
#                      labels = c("tmax", "end")) +
#   geom_histogram() +
#   facet_grid(cols = vars(Route),
#              scales = "free_x") +
#   labs(x = "ADME-Normalized Time",
#        "# Observations") +
#   theme(text = element_text(size = 14),
#         panel.border = element_rect(color = "black", fill = NA, size = 1),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         strip.background = element_rect(fill = "white"),
#         strip.text = element_text(face = "bold"),
#         #axis.ticks = element_blank(),
#         axis.line = element_blank(),
#         #axis.text.x = element_blank(),
#         axis.text = element_text(size = 14),
#         axis.title = element_text(face = "bold"),
#         panel.spacing = unit(0.125, units = "in"),
#         plot.background = element_blank(),
#         legend.position = "none")
# 
# #save PDF version
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#                 "_Supp_Fig3",
#                 ".png"),
#       plsupp3,
#           bg = "white",
#       width = 8,
#       height = 4,
#       units = "in",
#       dpi = 300)
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#                 "_Supp_Fig3",
#                 ".pdf"),
#       plsupp3,
#           bg = "white",
#       width = 8,
#       height = 4,
#       units = "in")

## ----Winmodelfn---------------------------------------------------------------
# # Evaluation
# # Winning Model Tally
# winmodel_comparison <- function(this_error_model,
#                              this_dose_norm,
#                              this_log10_trans,
#                              this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   current_rds <- readRDS(file = file_str)
# 
#   # Wide winning model
#   suppressMessages({
#     winning_model <- get_winning_model(obj = current_rds)
# 
#     wide_winning_model <- winning_model %>%
#       dplyr::group_by(method, model) %>%
#       dplyr::count() %>%
#       tidyr::pivot_wider(names_from = model,
#                          values_from = n)
#   })
#   return(wide_winning_model)
# }

## -----------------------------------------------------------------------------
# AUC_plot <- function(
#     this_error_model,
#     this_dose_norm,
#     this_log10_trans,
#     this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Evaluation for: ", pk_name))
#   current_rds <- readRDS(file = file_str)
#   # Cmax and AUC_inf RMSEs
#   #note that eval_tkstats already filters winning models
#   #use finite_only = FALSE so that we can count the excluded cases
#   suppressMessages(RMSLE_eval <- eval_tkstats(obj = current_rds,
#                                              dose_norm = FALSE,
#                                              finite_only = FALSE,
#                                              suppress.messages = TRUE) %>%
#                                   mutate(Options = pk_name) %>%
#                                   dplyr::select(
#                                     Options,
#                                     Chemical,
#                                     Species,
#                                     Route,
#                                     Media,
#                                     Reference,
#                                     method,
#                                     model,
#                                     starts_with("Cmax"),
#                                     starts_with("AUC_")
#                                   ))
# 
#  these_opts <- paste0(dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic)
# 
#   #plot AUC infinity for tkstats vs. AUC infinity for NCA
#    #show identity line and 20x above and below lines
#  AUC_plot <- RMSLE_eval %>%
#     dplyr::filter(is.finite(AUC_infinity.nca) & is.finite(AUC_infinity.tkstats)) %>%
#     ggplot() +
#     geom_point(aes(x = AUC_infinity.nca,
#                    y = AUC_infinity.tkstats,
#                    color = model)) +
#     scale_x_log10() + scale_y_log10() +
#     annotation_logticks() +
#     geom_abline(aes(intercept = 0, slope =1)) +
#     geom_abline(aes(intercept = log10(20), slope = 1),
#                 linetype = 2) +
#     geom_abline(aes(intercept = log10(1/20), slope = 1),
#                 linetype = 2) +
#       facet_grid(rows = vars(method)) +
#       ggtitle(paste(these_opts,
#                     "AUC_inf pred vs. AUC_inf NCA")) +
#       scale_color_manual(values = c("model_1comp" = "#0398FC",
#                                  "model_2comp" = "#D68E09",
#                                "model_flat" = "black"),
#                     name = "Winning model") +
#       coord_equal()
# 
#  Cmax_plot <- RMSLE_eval %>%
#     dplyr::filter(is.finite(Cmax.nca) & is.finite(Cmax.tkstats)) %>%
#     ggplot() +
#     geom_point(aes(x = Cmax.nca,
#                    y = Cmax.tkstats,
#                    color = model)) +
#     scale_x_log10() + scale_y_log10() +
#     annotation_logticks() +
#     geom_abline(aes(intercept = 0, slope =1)) +
#     geom_abline(aes(intercept = log10(20), slope = 1),
#                 linetype = 2) +
#     geom_abline(aes(intercept = log10(1/20), slope = 1),
#                 linetype = 2) +
#       facet_grid(rows = vars(method)) +
#       ggtitle(paste(these_opts,
#                     "Cmax pred vs. Cmax NCA")) +
#       scale_color_manual(values = c("model_1comp" = "#0398FC",
#                                  "model_2comp" = "#D68E09",
#                                "model_flat" = "black"),
#                     name = "Winning model") +
#       coord_equal()
# 
#   return(
#     list(AUC_plot = AUC_plot,
#     Cmax_plot = Cmax_plot
#     )
#   )
# }

## -----------------------------------------------------------------------------
# all_AUC_plots <- fitopts %>%
#               dplyr::rowwise() %>%
#   dplyr::summarise(this_plot = list(AUC_plot(this_error_model = error_model,
#                                  this_dose_norm = dose_norm,
#                                  this_log10_trans = log10_trans,
#                                  this_time_scale = time_scale)
#                                  )
#                    )
# #save the plots
# pdf(file = paste0(
#   Sys.getenv("FIG_DIR"),
#   today,
#   "_AUCinf_nca_vs_tk_plots.pdf"),
#   onefile = TRUE,
#   height = 5,
#   width = 7)
# 
# plotlist <- all_AUC_plots %>%
#   pull(this_plot)
# 
# for (x in seq_along(plotlist)) {
#   print(plotlist[[x]][["AUC_plot"]])
# }
# dev.off()
# 
# pdf(file = paste0(
#   Sys.getenv("FIG_DIR"),
#   today,
#   "_Cmax_nca_vs_tk_plots.pdf"),
#   onefile = TRUE,
#   height = 5,
#   width = 7)
# 
# plotlist <- all_AUC_plots %>%
#   pull(this_plot)
# 
# for (x in seq_along(plotlist)) {
#   print(plotlist[[x]][["Cmax_plot"]])
# }
# dev.off()
# 
# rm(plotlist, all_AUC_plots)

## ----RMSLE_Cmax_AUC_fun-------------------------------------------------------
# 
# # RMSLE for Cmax and AUC
# rmsles_Cmax_AUC <- function(
#     this_error_model,
#     this_dose_norm,
#     this_log10_trans,
#     this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Evaluation for: ", pk_name))
#   current_rds <- readRDS(file = file_str)
#   # Cmax and AUC_inf RMSEs
#   #note that eval_tkstats already filters winning models
#   #use finite_only = FALSE so that we can count the excluded cases
#   suppressMessages(RMSLE_eval <- eval_tkstats(obj = current_rds,
#                                              dose_norm = FALSE,
#                                              finite_only = FALSE,
#                                              suppress.messages = TRUE) %>%
#                                   mutate(Options = pk_name) %>%
#                                   dplyr::select(
#                                     Options,
#                                     Chemical,
#                                     Species,
#                                     Route,
#                                     Media,
#                                     Reference,
#                                     method,
#                                     model,
#                                     starts_with("Cmax"),
#                                     starts_with("AUC_")
#                                   ))
# 
#   #Couunt bad observations:
#   #zero, NA, infinite
#   RMSLE_badobs <- RMSLE_eval %>%
#     group_by(Options, method) %>%
#     summarise(total_expts = n(),
#               count_AUCinf_NCA_0 = sum(AUC_infinity.nca %in% 0),
#               count_AUCinf_pred_0 = sum(AUC_infinity.tkstats %in% 0),
#               count_AUCinf_NCA_isNA = sum(is.na(AUC_infinity.nca)),
#               count_AUCinf_pred_isNA = sum(is.na(AUC_infinity.tkstats)),
#               count_AUCinf_NCA_isInf = sum(is.infinite(AUC_infinity.nca)),
#               count_AUCinf_pred_isInf = sum(is.infinite(AUC_infinity.tkstats)),
#              count_Cmax_NCA_0 = sum(Cmax.nca %in% 0),
#               count_Cmax_pred_0 = sum(Cmax.tkstats %in% 0),
#               count_Cmax_NCA_isNA = sum(is.na(Cmax.nca)),
#               count_Cmax_pred_isNA = sum(is.na(Cmax.tkstats)),
#               count_Cmax_NCA_isInf = sum(is.infinite(Cmax.nca)),
#               count_Cmax_pred_isInf = sum(is.infinite(Cmax.tkstats))
#     ) %>%
#     ungroup()
# 
#   #count cases where AUC or Cmax was negative,
#   #or where AUC_inf was greater than 1e7
#   RMSLE_neg <- RMSLE_eval %>%
#     dplyr::filter(is.finite(AUC_infinity.nca),
#                             is.finite(AUC_infinity.tkstats),
#                             is.finite(Cmax.nca),
#                             is.finite(Cmax.tkstats)) %>%
#     group_by(Options, method) %>%
#       summarise(count_AUCinf_NCA_lt0 = sum(AUC_infinity.nca < 0),
#               count_AUCinf_pred_lt0 = sum(AUC_infinity.tkstats < 0),
#               count_Cmax_NCA_lt0 = sum(AUC_infinity.nca < 0),
#               count_Cmax_pred_lt0 = sum(AUC_infinity.tkstats < 0),
#               count_AUCinf_NCA_gt_1e7 = sum(AUC_infinity.nca > 1e7),
#                count_AUCinf_pred_gt_1e7 = sum(AUC_infinity.tkstats > 1e7),
#        )
# 
#   RMSLE_badobs <- RMSLE_badobs %>%
#     left_join(RMSLE_neg, by = c("Options",
#                                     "method")) %>%
#         rowwise() %>%
#     dplyr::mutate(total_excl = sum(
#       c_across(
#       starts_with("count")
#       )
#       )
#     )
# 
#   #compute RMSLEs after excluding the bad observations
#   RMSLE_eval <- RMSLE_eval %>%
#     dplyr::filter(is.finite(AUC_infinity.nca),
#            is.finite(AUC_infinity.tkstats)) %>%
#     dplyr::filter(
#       AUC_infinity.nca > 0,
#       AUC_infinity.tkstats > 0,
#       AUC_infinity.nca < 1e7,
#       AUC_infinity.tkstats < 1e7) %>%
#     rowwise() %>%
#     mutate(
#       SLE_Cmax = (log10(Cmax.tkstats) - log10(Cmax.nca)) ^ 2,
#       SLE_AUC_inf = (log10(AUC_infinity.tkstats) - log10(AUC_infinity.nca)) ^ 2
#     ) %>%
#     dplyr::filter(!is.na(SLE_Cmax), !is.na(SLE_AUC_inf)) %>%
#     group_by(Options, method) %>%
#     summarise(
#       n_expts = n(),
#       RMSLE_Cmax = sqrt(mean(SLE_Cmax, na.rm = FALSE)) %>% signif(digits = 4),
#       RMSLE_AUC = sqrt(mean(SLE_AUC_inf, na.rm = FALSE)) %>% signif(digits = 4)
#     ) %>%
#     ungroup()
# 
#   #merge in counts of bad obs
# 
#   RMSLE_eval <- RMSLE_eval %>%
#     dplyr::left_join(RMSLE_badobs,
#               by = c("Options", "method"))
# 
#   return(RMSLE_eval)
# }

## ----gof_tbl_fun--------------------------------------------------------------
# # Goodness-of-fit table with R-squared, RMSLE and AIC
# get_gof <- function(this_error_model,
#                              this_dose_norm,
#                              this_log10_trans,
#                              this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   current_rds <- readRDS(file = file_str)
#   winning_model <- suppressMessages(get_winning_model(obj = current_rds))
# 
#   suppressMessages({
#     this_rsq <- rsq.pk(current_rds,
#                     use_scale_conc = FALSE,
#                     rsq_group = ggplot2::vars(Chemical, Species),
#                     sub_pLOQ = TRUE) %>%
#       semi_join(winning_model)
# 
#     this_AIC <- AIC(current_rds) %>%
#       semi_join(winning_model)
# 
#     this_rmsle <- rmse.pk(current_rds,
#                       rmse_group = vars(Chemical, Species),
#                       use_scale_conc = list(dose_norm = FALSE,
#                                             log10_trans = TRUE),
#                       sub_pLOQ = TRUE) %>%
#       semi_join(winning_model)
# 
#   })
# 
#   # Since they are all joined by winmodel
#   # all three must have same NROW
#   message(
#     paste0("For ", pk_name, "... outputs below must have same number of rows")
#   )
# 
#   message(paste0("rsq: ", NROW(this_rsq)))
#   message(paste0("aic: ", NROW(this_AIC)))
#   message(paste0("rmsle: ", NROW(this_rmsle)))
# 
#   gof_df <- this_rsq %>%
#     inner_join(this_AIC, by = join_by(Chemical, Species, method, model)) %>%
#     inner_join(this_rmsle, by = join_by(Chemical, Species, method, model)) %>%
#     mutate(Options = pk_name, .before = Chemical)
# 
#   return(gof_df)
# }

## ----allpreds_fun-------------------------------------------------------------
# 
# # All predictions
# get_all_preds <- function(this_error_model,
#                              this_dose_norm,
#                              this_log10_trans,
#                              this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
# 
#   current_rds <- readRDS(file = file_str)
# 
#   # Wide winning model
#   suppressMessages({
#     winning_model <- get_winning_model(obj = current_rds) %>%
#       select(-c(near_flat, preds_below_loq))
#     this_preds <- predict(current_rds,
#                           use_scale_conc = FALSE) %>%
#       semi_join(winning_model) %>%
#       mutate(Options = pk_name, .before = Chemical)
#   })
#   return(this_preds)
# }

## ----factor2fun---------------------------------------------------------------
# # Factor of two model error
# tf_tests <- function(this_error_model,
#                              this_dose_norm,
#                              this_log10_trans,
#                              this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
#   current_rds <- readRDS(file = file_str)
#   print(paste0("Now analyzing: ", pk_name))
# 
#   out <- twofold_test(current_rds,
#                       sub_pLOQ = TRUE,
#                       suppress_messages = TRUE)
# 
#   out <- out$model_error_summary %>%
#     mutate(Options = pk_name)
# 
#   return(out)
# }
# 

## ----do_winmodel--------------------------------------------------------------
# 
# #winning model
# system.time(wm_comp <- fitopts %>%
#               dplyr::rowwise() %>%
#               dplyr::mutate(winmodel_comp = list(
#                 winmodel_comparison(this_error_model = error_model,
#                                  this_dose_norm = dose_norm,
#                                  this_log10_trans = log10_trans,
#                                  this_time_scale = time_scale))) %>%
#               tidyr::unnest(winmodel_comp))

## ----do_RMSLE_Cmax_AUC--------------------------------------------------------
# 
# #RMSLE for Cmax and AUC
# system.time(cmax_auc_rmsle_tbl <- fitopts %>%
#               dplyr::rowwise() %>%
#               dplyr::mutate(nca_comp = list(
#                 rmsles_Cmax_AUC(this_error_model = error_model,
#                                  this_dose_norm = dose_norm,
#                                  this_log10_trans = log10_trans,
#                                  this_time_scale = time_scale))) %>%
#               tidyr::unnest(nca_comp))

## ----do_gof_tbl---------------------------------------------------------------
# 
# #GOF table
# system.time(gof_tbl <- fitopts %>%
#               dplyr::rowwise() %>%
#               dplyr::mutate(gof_win = list(
#                 get_gof(this_error_model = error_model,
#                                  this_dose_norm = dose_norm,
#                                  this_log10_trans = log10_trans,
#                                  this_time_scale = time_scale))) %>%
#               tidyr::unnest(gof_win))

## ----do_factor_two------------------------------------------------------------
# 
# #factor of two
# system.time(all_tf_tests <- fitopts %>%
#               dplyr::rowwise() %>%
#               dplyr::mutate(TF = list(
#                 tf_tests(error_model,
#                          this_dose_norm = dose_norm,
#                          this_log10_trans = log10_trans,
#                          this_time_scale = time_scale))) %>%
#               tidyr::unnest(TF))

## ----do_all_preds-------------------------------------------------------------
# #all predictions
# system.time(all_preds <- fitopts %>%
#               dplyr::rowwise() %>%
#               dplyr::mutate(these_preds = list(
#                 get_all_preds(this_error_model = error_model,
#                                  this_dose_norm = dose_norm,
#                                  this_log10_trans = log10_trans,
#                                  this_time_scale = time_scale))) %>%
#               tidyr::unnest(these_preds))

## ----write_preds--------------------------------------------------------------
# 
# #Save preds to a file as well
# saveRDS(all_preds, paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "_all_preds.Rds"))
# 
# rm(all_preds)

## -----------------------------------------------------------------------------
# (win_mdl_count_by_opts <- gof_tbl %>%
#   group_by(Options, method, model) %>%
#   count() %>%
#   pivot_wider(names_from = model,
#               values_from = n) )

## -----------------------------------------------------------------------------
# gof_tbl %>%
#   group_by(Options, method) %>%
#   count(RMSLE < 1) %>%
#   filter(`RMSLE < 1`) %>%
#   arrange(desc(n)) %>%
#   print(n = 8)
# # bobyqa-optimized options 010h, 110h, 110p

## -----------------------------------------------------------------------------
# gof_tbl %>%
#   group_by(Options, method) %>%
#   count(Rsq > 0.75) %>%
#   filter(`Rsq > 0.75`) %>%
#   arrange(desc(n)) %>%
#   print(n = 8)

## -----------------------------------------------------------------------------
# rmsle_rsq_rank <- gof_tbl %>%
#   group_by(Options, method) %>%
#   count(RMSLE < 1 & Rsq > 0.75) %>%
#   filter(`RMSLE < 1 & Rsq > 0.75`) %>%
#   arrange(desc(n)) %>%
#   dplyr::rename(`# Chemical-Species data groups` = n)
# 
# rmsle_rsq_rank %>% print(n=8)

## -----------------------------------------------------------------------------
# cmax_auc_rmsle_rank <- cmax_auc_rmsle_tbl %>%
#   arrange(RMSLE_Cmax, RMSLE_AUC) %>%
#   select(Options, method, RMSLE_Cmax, RMSLE_AUC)
# 
# cmax_auc_rmsle_rank %>%
#   print(n = 8)

## ----write_eval_results-------------------------------------------------------
# 
# # Write the results to file
# writexl::write_xlsx(x = list(
#   Fit_Counts = wm_comp,
#   AUC_and_Cmax_RMSLE_summary = cmax_auc_rmsle_tbl,
#   Prediction_Evaluations = gof_tbl,
#   Twofold_Tests = all_tf_tests,
#    RMSLE_Rsq_rank = rmsle_rsq_rank,
#                          Cmax_AUC_RMSLE_rank = cmax_auc_rmsle_rank),
#   path = paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#                 today,
#                 "_Supp_Table2_eval_results",
#                 ".xlsx"))
# 
# rm(wm_comp, cmax_auc_rmsle_tbl, gof_tbl, all_tf_tests)
# 
# gc()

## ----plot_rsq_rmsle-----------------------------------------------------------
# gof_tbl <- readxl::read_excel(path = paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#                 today,
#                 "_Supp_Table2_eval_results",
#                 ".xlsx"),
#                 sheet = "Prediction_Evaluations")
# 
#  gof_tbl <- gof_tbl %>%
#   dplyr::mutate(opts = substr(Options, 1, 3),
#                 model_text = dplyr::case_when(model %in% "model_1comp" ~ "1comp",
#                                               model %in% "model_2comp" ~ "2comp",
#                                               model %in% "model_flat" ~ "flat",
#                                               .default = NA_character_))
# 
# fig_rsq_rmsle <- ggplot(gof_tbl) +
#   geom_point(aes(x = RMSLE,
#                  y = Rsq,
#                  color = model_text)) +
#   facet_grid(rows = vars(opts),
#              cols = vars(interaction(error_model, method)
#                          )
#              ) +
#  scale_color_manual(values = c("1comp" = "#0398FC",
#                                  "2comp" = "#D68E09",
#                                "flat" = "black"),
#                     name = "Winning model") +
#   theme_bw()
# 
# fig_rsq_rmsle
# 

## ----plot_rsq_rmsle_zoom------------------------------------------------------
# fig_rsq_rmsle_zoom <- ggplot(gof_tbl) +
#   geom_point(aes(x = RMSLE,
#                  y = Rsq,
#                  color = model_text)) +
#   facet_grid(rows = vars(opts),
#              cols = vars(interaction(error_model, method)
#                          )
#              ) +
#  scale_color_manual(values = c("1comp" = "#0398FC",
#                                  "2comp" = "#D68E09",
#                                "flat" = "black"),
#                     name = "Winning model") +
#   coord_cartesian(xlim = c(0,1)) +
#   theme_bw()
# 
# fig_rsq_rmsle_zoom

## -----------------------------------------------------------------------------
# #PDF versions
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#               "_Supp_Fig4",
#               ".pdf"),
#        fig_rsq_rmsle,
#        width = 11,
#        height = 8.5)
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#               "_Supp_Fig5",
#               ".pdf"),
#        fig_rsq_rmsle_zoom,
#        width = 11,
#        height = 8.5)
# 
# #PNG versions
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#               "_Supp_Fig4",
#               ".png"),
#        fig_rsq_rmsle,
#        width = 11,
#        height = 8.5)
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#               "_Supp_Fig5",
#               ".png"),
#        fig_rsq_rmsle_zoom,
#        width = 11,
#        height = 8.5)
# 
# rm(fig_rsq_rmsle_zoom, fig_rsq_rmsle, gof_tbl)

## ----plot_cmax_auc_rmsle------------------------------------------------------
# #read in table
# cmax_auc_rmsle_tbl <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
#                                                    "Manu_Files/",
#                                                   today,
#                                                   "_Supp_Table2_eval_results.xlsx"),
#                                      sheet = "AUC_and_Cmax_RMSLE_summary")
# #make plot
# cmax_auc_rmsle_tbl <- cmax_auc_rmsle_tbl %>%
#   dplyr::mutate(opts = substr(Options, 1, 3))
# 
# cmax_auc_rmsle_fig <- ggplot(cmax_auc_rmsle_tbl) +
#   geom_point(aes(x = RMSLE_AUC,
#                  y = RMSLE_Cmax,
#                  color = opts),
#              size = 4) +
#   facet_grid(rows = vars(error_model),
#              cols = vars(method)) +
#   scale_color_brewer(palette = "Paired",
#                      name = "Fit options") +
#   theme_bw()
# 
# cmax_auc_rmsle_fig
# 
# #save plot
# #PDF version
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#               "_Supp_Fig6",
#                 ".pdf"),
#       cmax_auc_rmsle_fig,
#       width = 7,
#       height = 5)
# 
# #PNG version
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                 "Manu_Files/",
#               today,
#               "_Supp_Fig6",
#                 ".png"),
#       cmax_auc_rmsle_fig,
#       width = 7,
#       height = 5,
#       units = "in",
#       dpi = 300)
# 
# rm(cmax_auc_rmsle_fig, cmax_auc_rmsle_tbl)

## -----------------------------------------------------------------------------
# all_preds <- readRDS(paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "_all_preds.Rds"))
# 
# #select winning fit opts: 110p bobyqa
# 
# all_preds_prep <- all_preds %>%
#   filter(Options %in% c("110p"),
#          method %in% "bobyqa") %>%
#   mutate(Conc_est_sub = dplyr::if_else(Conc_est < pLOQ,
#          pLOQ,
#          Conc_est),
#          Conc_est_belowLOQ = dplyr::if_else(Conc_est < pLOQ,
#          TRUE,
#          FALSE),
#          ID = as.factor(paste(Chemical, Chemical_Name, Species)),
#          .keep = "unused") %>%
#   select(ID, Options,
#          dose_norm, log10_trans, time_scale,
#          Route, Media, Dose,
#          Conc, Conc_est_sub, Conc_est_belowLOQ,
#          Time, Reference)
# 
# split_preds <- split(all_preds_prep, all_preds_prep$ID)
# 
# sp_plots <- lapply(names(split_preds), function(i) {
#   ggplot(data = split_preds[[i]],
#          mapping = aes(x = Conc,
#                        y = Conc_est_sub,
#                        color = as.factor(Dose),
#                        shape = Reference)) +
#     xlab("Observed conc, mg/L") +
#     ylab("Predicted conc, mg/L") +
#     geom_point(size = 2) +
#     geom_abline(slope = 1) +
#     facet_grid(Route ~ Media,
#                scales = "free_y") +
#     scale_y_log10() +
#     scale_x_log10() +
#     annotation_logticks(sides = "bl") +
#     scale_color_viridis_d(name = "Dose, mg/kg") +
#     theme_bw() +
#     labs(title = i) +
#     theme(legend.position = "bottom",
#           plot.title = element_text(hjust = 0.5, face = "bold"))
# }
# )
# 
# # Very BIG FILE
# pdf(file = paste0(
#   Sys.getenv("FIG_DIR"),
#   "Manu_Files/",
#   today,
#    "_log10_predvsobs_plots_110pbobyqa_all.pdf"),
#   onefile = TRUE, height = 8, width = 8)
# for (x in seq_along(sp_plots)) {
#   print(sp_plots[[x]])
# }
# dev.off()
# 
# rm(sp_plots, split_preds, all_preds_prep, all_preds)

## -----------------------------------------------------------------------------
# my_pk <- readRDS(paste0(Sys.getenv("OD_DIR"),
#                         read_date_pk,
#                         "mypk_fit_110p.Rds"))

## ----data_aggregation---------------------------------------------------------
# # after fitting pk object for all cvt objects or reading the fitted pk object in `setup`
# winmodel <- get_winning_model(my_pk, method = "bobyqa")
# my_preds <- semi_join(predict(my_pk, use_scale_conc = FALSE, method = "bobyqa"), winmodel)
# my_residuals <- residuals(my_pk, use_scale_conc = FALSE, method = "bobyqa") %>%
#   semi_join(winmodel)
# my_tkstats <- eval_tkstats(my_pk, method = "bobyqa") %>% semi_join(winmodel)
# my_nca <- get_nca(my_pk)
# all_my_data <- get_data(my_pk)
# 
# #merge together long form coefs and long form coef SDs
# 
# my_tf <- twofold_test(my_pk, method = "bobyqa")
# 
# NROW(all_my_data) > NROW(my_preds) # THis must be true... or else try distinct(my_preds)
# 
# my_coef_sd <- coef_sd(my_pk, method = "bobyqa") %>% #this will take a few minutes to run
#   semi_join(winmodel) #keep only coefs & SDs for winning model
# 
# # Writing file to xlsx
# writexl::write_xlsx(x = list(predictions = my_preds,
#                              tkstats = my_tkstats,
#                              coefs_sds = my_coef_sd),
#                     path = paste0(Sys.getenv("FIG_DIR"),
#                                   "Manu_Files/",
#                                   today,
#                                   "_Supp_Table3",
#                                   ".xlsx"))

## -----------------------------------------------------------------------------
# 
# fe_df <- fold_error(my_pk,
#                      sub_pLOQ = TRUE,
#                      method = "bobyqa") %>% semi_join(winmodel)
# 
# r2_df <- rsq(my_pk,
#              use_scale_conc = FALSE,
#              sub_pLOQ = TRUE,
#              method = "bobyqa") %>%
#   semi_join(winmodel)
# 
# myAIC <- AIC(my_pk, method = "bobyqa") %>%
#   semi_join(winmodel)
# 
# myRMSLE <- rmse(my_pk,
#                use_scale_conc = list(dose_norm = FALSE,
#                                      log10_trans = TRUE),
#                sub_pLOQ = TRUE,
#                method = "bobyqa") %>%
#   semi_join(winmodel)
# 
# myRMSE <- rmse(my_pk,
#                rmse_group = ggplot2::vars(Chemical,
#                                           Species,
#                                           Route,
#                                           Media,
#                                           Dose),
#                sub_pLOQ = TRUE,
#                method = "bobyqa") %>%
#   semi_join(winmodel)

## -----------------------------------------------------------------------------
# my_tf$model_error_all %>%
#   filter(model %in% c("model_1comp", "model_2comp")) %>%
#   mutate(Route.model = factor(paste(Route, model),
#                               levels = c("iv model_1comp",
#                                          "iv model_2comp",
#                                          "oral model_1comp",
#                                          "oral model_2comp"))) %>%
#   ggplot(aes(x = Fold_Error,
#              fill = Route.model)) +
#   geom_histogram(color = NA,
#                  position = position_stack(),
#                  bins = 50) +
#   scale_x_log10(labels = scales::label_math(format = log10)) +
#   annotation_logticks(sides = "b") +
#     scale_fill_brewer(palette = "Paired",
#                       name = "Route/winning model") +
#   expand_limits(y = 0) +
#   geom_vline(xintercept = c(0.5, 2), color = "black", linetype = "dashed") +
#   labs(x = "Predicted/Observed",
#        y = "Number of Observations") +
#   theme_classic() +
#   theme(aspect.ratio = 1,
#         panel.border = element_rect(color = "black", fill = NA),
#         panel.grid.major = element_line(color = "grey95", linewidth = 0.4)) +
#   coord_cartesian(xlim = c(10^(-1.5), 100))
# 
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                "Manu_Files/",
#                                   today,
#               "_Supp_Fig7_PredictedObserved_compartmentModels.png"),
#        height = 5, width = 7)
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#                "Manu_Files/",
#                                   today,
#               "_Supp_Fig7_PredictedObserved_compartmentModels.pdf"),
#        height = 5, width = 7)
# 

## ----fig4-modelPerform-v-dataVar----------------------------------------------
# my_tf$indiv_data_test_fold_errors %>%
#   select(model, starts_with("percent_")) %>%
#   glimpse()
# 
# pl_4A <- my_tf$indiv_data_fold_errors %>%
#   # For Plotting
#   ggplot(aes(
#     y = Fold_Error,
#     x = foldConc
#   )) +
#   geom_bin2d(bins = 40, color = NA) +
#   geom_hline(yintercept = c(0.5, 2), linetype = "dashed") +
#   geom_vline(xintercept = c(0.5, 2), linetype = "dashed") +
#   scale_x_log10(labels = scales::label_math(format = log10),
#                 limits = c(0.001, 20)) +
#   scale_y_log10(labels = scales::label_math(format = log10),
#                 limits = c(0.0001, 10000)) +
#   annotation_logticks(sides = "bl") +
#   scale_fill_viridis_c(option = "cividis",
#                        limits = c(1, 100),
#                        oob = scales::oob_squish) +
#   labs(y = "Model Error",
#        x = "Data Variability",
#        fill = "Count") +
#   theme(aspect.ratio = 1,
#         panel.border = element_rect(color = "black", fill = NA, size = 1.5),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         strip.background = element_rect(fill = "white"),
#         strip.text = element_text(face = "bold"),
#         axis.ticks = element_blank(),
#         axis.line = element_blank(),
#         axis.text = element_text(size = 14),
#         axis.title = element_text(size = 14))
# 
# pl_4A
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#               today,
#               "_Fig4A_ModelPerformance_vDataVariability.png"),
#        height = 5,
#        width = 7,
#        device = "png",
#        dpi = 300,
#        units = "in")

## -----------------------------------------------------------------------------
# 
# my_table <- my_tf$indiv_data_test_fold_errors %>%
#   ungroup() %>%
#   filter(model %in% "All") %>%
#   select(starts_with("percent_")) %>% t() %>%
#   round(digits = 2) %>%
#   as.data.frame() %>%
#   rownames_to_column(var = "percentages") %>%
#   mutate(percentages = case_when(
#     percentages == "percent_both_within" ~ "Both within a factor of 2",
#     percentages == "percent_both_outside" ~ "Both outside a factor of 2",
#     percentages == "percent_model_outside" ~ "Model too variable",
#     percentages == "percent_data_outside" ~ "Data too variable"
#   ))
# 
# names(my_table) <- c(" ", "Overall (%)")
# 
# flextable(my_table) %>%
#   border_inner() %>%
#   fontsize(part = "all", size = 11) %>%
#   bold(part = "all", j = 2) %>%
#   autofit()
# 
# table_plot <- gen_grob(flextable(my_table) %>%
#                          border_inner() %>%
#                          fontsize(part = "all", size = 10) %>%
#                          bold(part = "all", j = 2) %>%
#                          autofit(),
#                        autowidths = TRUE,
#                        fit = "width")
# 
# 
# dual_plot <- plot_grid(pl_4A,
#                        table_plot,
#                        ncol = 1,
#                        align = "hv",
#           rel_heights = c(1, 0.5),
#           rel_widths = c(1, 0.5)
#           )
# 
# dual_plot
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#               "Manu_Files/",
#               today,
#               "_Fig4.png"),
#        plot = dual_plot,
#        height = 4.5,
#        width = 6,
#        device = "png",
#        bg = "white",
#        dpi = 300,
#        units = "in")
# 

## ----fig5-goodness-of-fit-----------------------------------------------------
# combined_gof_df <- my_tf$model_error_all %>%
#   group_by(Chemical, Species, model, method) %>%
#   summarize(within_2fold = sum(between(Fold_Error, 0.5, 2))/n()) %>%
#   left_join(r2_df) %>%
#   left_join(myAIC) %>%
#   left_join(myRMSLE) %>%
#   semi_join(winmodel)
# 
# 
# mypl <- ggplot(data = combined_gof_df,
#                aes(
#                  x = within_2fold,
#                  y = Rsq,
#                  color = model,
#                )) +
#   geom_point() +
#   geom_abline(slope = 1, color = "black", linetype = "longdash") +
#   scale_color_manual(values = c("#0398FC", "#D68E09", "black")) +
#     #coord_cartesian(xlim = c(0,1), ylim = c(0,1)) +
#   labs(x = "Fraction of predictions within 2-fold",
#        y = "R-squared value") +
#   theme(#aspect.ratio = 1,
#         panel.border = element_rect(color = "black", fill = NA, size = 1),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         axis.ticks = element_blank(),
#         axis.line = element_blank(),
#         legend.position = "none",
#         legend.title = element_blank(),
#         legend.key = element_blank()
#   )
# 
# panelA_plot <- ggExtra::ggMarginal(mypl,
#                                    groupFill = TRUE,
#                     type = "histogram",
#                     xparams = list(binwidth = 0.05),
#                     yparams = list(binwidth = 0.05))
# panelA_plot
# 
# panelB_plot <- ggplot(data = myRMSE,
#                aes(
#                  x = RMSE,
#                  fill = model
#                )) +
#   scale_x_log10() +
#   annotation_logticks(sides = "b") +
#   geom_histogram(bins = 50,
#                  position = position_stack(),
#                  color = NA) +
#   labs(y = "Count") +
#   scale_fill_manual(values = c("#0398FC", "#D68E09", "grey10")) +
#    scale_color_manual(values = c("#0398FC", "#D68E09", "grey10"), guide = "none") +
#   theme(text = element_text(size = 10),
#         #aspect.ratio = 1,
#     panel.border = element_rect(color = "black", fill = NA, size = 1),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         axis.ticks = element_blank(),
#         axis.line = element_blank(),
#         strip.background = element_blank(),
#     panel.spacing.y = unit(0.125, units = "in"),
#         legend.title = element_blank(),
#     legend.position = "bottom")
# panelB_plot
# 
# plAB <- patchwork::wrap_elements(panelA_plot) + panelB_plot +
#   plot_annotation(tag_levels = "A") + plot_layout(guides = 'collect', widths = c(1,0.9)) & theme(legend.position = "bottom") & guides(color = "none")
# 
# #save PNG version
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Fig5.png"),
#        plAB,
#        height = 4,
#        width = 6.5,
#        bg = "white",
#        units = "in")
# 
# #save PDF version
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Fig5.pdf"),
#        plAB,
#        height = 4,
#        width = 6.5,
#        bg = "white")

## ----goodness-of-fit_exampleFits----------------------------------------------
# pl <- plot(my_pk,
#            method = "bobyqa",
#            best_fit = TRUE,
#            use_scale_conc = list(dose_norm = TRUE,
#                                  log10_trans = FALSE),
#            drop_nonDetect = FALSE)
# 
# cvt %>% filter(curation_set_tag == "CVT_Showa") %>%
#   pull(analyte_dtxsid) -> Showa_chems
# 
# 
# my_rsq <- rsq(my_pk, method = "bobyqa")
# my_rsq %>% semi_join(winmodel) %>% filter(Chemical %in% c("DTXSID2020139",
#                             "DTXSID4023917",
#                             "DTXSID3061635",
#                             "DTXSID8030760"))
# 
# #High rsq: DTXSID2020139 and DTXSID4023917
# #Low rsq: DTXSID3061635 and DTXSID8030760
# 
# fe_df %>% filter(Chemical %in% c("DTXSID2020139",
#                                   "DTXSID4023917",
#                                   "DTXSID3061635",
#                                   "DTXSID8030760"), Species %in% "rat") %>% group_by(Chemical, Species) %>% summarise(frac_2fold = sum(Fold_Error < 2 & Fold_Error > 0.5)/n())
# 
# # # A tibble: 4 × 3
# # # Groups:   Chemical [4]
# #   Chemical      Species frac_2fold
# #   <chr>         <chr>        <dbl>
# # 1 DTXSID2020139 rat          0.271
# # 2 DTXSID3061635 rat          0.125
# # 3 DTXSID4023917 rat          0.953
# # 4 DTXSID8030760 rat          0.778
# 
# #High frac 2fold: DTXSID4023917 and DTXSID8030760
# #Low frac 2fold: DTXSID2020139 and DTXSID3061635
# 
# pl_sub <- pl %>%
#   filter(Chemical %in% c("DTXSID2020139",
#                          "DTXSID4023917",
#                          "DTXSID3061635",
#                          "DTXSID8030760"),
#          Species %in% "rat")
# 
# ex_fits <- pl_sub %>%
#   pull(final_plot)
# ex_fits <- set_names(ex_fits,
#           pl_sub$Chemical)
# 
# 
# 
# cowplot::plot_grid(plotlist = list(
#   #Low frac 2fold and high Rsq
#   ex_fits[["DTXSID2020139"]] +
#                                      scale_color_manual(values = c("#a6bddb",
#                                                                    "#74a9cf",
#                                                                    "#41bbc4",
#                                                                    "#2b8cbe",
#                                                                    "#045a8d")) +
#                                      theme(legend.position = "none") +
#                                      xlim(0,30) +
#     ggtitle("A: DTXSID2020139 rat"),
#   #high frac 2fold and high Rsq
#                                    ex_fits[["DTXSID4023917"]] +
#                                      scale_color_manual(values = c("black",
#                                                                    "grey40")) +
#                                      theme(legend.position = "none") +
#     ggtitle("B: DTXSID4023917 rat"),
# 
#   #low frac 2fold and low rsq
#                                     ex_fits[["DTXSID3061635"]] +
#                                      scale_color_manual(values = c("#006837")) +
#                                      theme(legend.position = "none") +
#     ggtitle("C: DTXSID3061635 rat"),
# 
#   #high frac2fold and low rsq
#                                    ex_fits[["DTXSID8030760"]] +
#                                      scale_color_manual(values = "magenta3") +
#                                      theme(legend.position = "none") +
#     ggtitle("D: DTXSID8030760 rat")
#                                   ),
#                    scale = 0.85)
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Fig7.png"),
#        height = 12,
#        width = 12)
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Fig7.pdf"),
#        height = 12,
#        width = 12)
# 

## -----------------------------------------------------------------------------
# flat_chems <- winmodel %>%
#   filter(model %in% "model_flat") %>%
#   distinct(Chemical, Species)
# print(flat_chems)
# 
# pl <- plot(my_pk,
# method = "bobyqa",
# best_fit = FALSE,
# use_scale_conc = list(dose_norm = TRUE,
# log10_trans = FALSE),
# drop_nonDetect = FALSE)
# 
# flat_fits <- pl %>%
#   inner_join(flat_chems) %>%
#                               pull(final_plot)
# 
# cowplot::plot_grid(plotlist = flat_fits)
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Supp_Fig8.pdf"),
#        height = 8,
#        width = 16,
#        units = "in")
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Supp_Fig8.png"),
#        bg = "white",
#        height =8,
#        width = 16,
#        dpi = 300,
#        units = "in")

## ----Lombardo_Comparison------------------------------------------------------
# cvt_DTXSIDs <- unique(my_pk$data$Chemical)
# # First things first, let's load the data from Lombardo et al.
# lombardo <- readxl::read_xlsx(
#   paste0(Sys.getenv("OD_DIR"),
#          "Lombardo2018-Supplemental_82966_revised_corrected.xlsx"),
#   skip = 8)
# 
# #use ctxR to search the Dashboard for DTXSIDs corresponding to these CASRNs
# lombardo_dtxsid <- ctxR::chemical_equal_batch(
#   word_list = unique(lombardo$`CAS #`)
# )
# 
# #merge in the DTXSIDs
# lombardo <- lombardo %>%
#    dplyr::left_join(lombardo_dtxsid$valid %>%
#                dplyr::select(searchValue, dtxsid, preferredName),
#             by = c("CAS #" = "searchValue"))
# 
# #any left unidentified: search by name
# lombardo_missing <- lombardo %>%
#   dplyr:: filter(is.na(dtxsid))
# 
# missing_names <- lombardo_missing %>%
#    dplyr::pull(Name)
# 
# lombardo_dtxsid_name <- ctxR::chemical_equal_batch(
#   word_list = unique(missing_names)
# )
# 
# lombardo_missing <- lombardo_missing %>%
#    dplyr::left_join(lombardo_dtxsid_name$valid %>%
#                dplyr::select(searchValue, dtxsid, preferredName),
#             by = c("Name" = "searchValue") )
# 
# lombardo <-  dplyr::bind_rows(lombardo %>% filter(!is.na(dtxsid)),
#                       lombardo_missing)
# 
# lombard_abbr <- lombardo %>%
#   dplyr::select(dtxsid,
#                 preferredName,
#                 Vss = `human VDss (L/kg)`,
#                 CLtot = `human CL (mL/min/kg)`,
#                 halflife = `terminal  t1/2 (h)` ) %>%
#   dplyr::mutate(Species = 'human',
#                 source = "Lombardo et al.",
#                 CLtot = CLtot*60/1000 # Lombardo reports this as mL/min/kg, need L/h/kg
#   ) %>%
#    dplyr::arrange(halflife)
# 
# my_pk_vals <- my_tkstats %>%
#   dplyr::select(dtxsid = Chemical,
#                 Species,
#                 model,
#                 halflife = halflife.tkstats,
#                 Vss = Vss.tkstats,
#                 CLtot = CLtot.tkstats) %>%
#   dplyr::distinct() %>%
#   dplyr::mutate(source = "invivoPKfit") %>%
#   dplyr::filter(model != "model_flat",
#                 !is.na(halflife))
# 
# lombardo_inner <- my_pk_vals %>%
#    dplyr::inner_join(lombard_abbr, by = "dtxsid")
# #79 chemicals in common
# 
# lombardo_comparison <-  dplyr::bind_rows(lombard_abbr %>%
#                                     dplyr::filter(
#                                      dtxsid %in%
#                                        lombardo_inner$dtxsid
#                                    ),
#                                  my_pk_vals %>%
#                                     dplyr::filter(
#                                      dtxsid %in%
#                                        lombardo_inner$dtxsid
#                                    )
# )
# 
# #merge in chemical names
# 
# #get preferred names by DTXSID
# all_details <- ctxR::get_chemical_details_batch(
#   unique(lombardo_comparison$dtxsid)
# )
# 
# lombardo_comparison <- lombardo_comparison %>%
#   select(-preferredName) %>%
#   left_join(all_details %>% select(dtxsid, preferredName))
# 
# #create a factor variable for chemical names that sorts chemicals from highest to lowest Lombardo halflife
# 
# chemnames_levels <- lombardo_comparison %>%
#    dplyr::filter(source %in% "Lombardo et al.") %>%
#    dplyr::arrange(halflife) %>%
#    dplyr::pull(preferredName)
# 
# 
# lombardo_comparison <- lombardo_comparison %>%
#    dplyr::mutate(preferredName = factor(preferredName,
#                                 levels = chemnames_levels))
# 
# #get invivoPKfit species for plot-splitting purposes
# ivpk_species <- lombardo_comparison %>%
#   dplyr::filter(source %in% "invivoPKfit") %>%
#   dplyr::distinct(dtxsid, Species) %>%
#   dplyr::rename(invivopkfit_species = Species)
# 
# lombardo_comparison <- lombardo_comparison %>%
#   dplyr::left_join(ivpk_species,
#                    by = "dtxsid")
# 
# #reshape longer and plot
# lombardo_comparison %>%
#    tidyr::pivot_longer(cols = c("Vss", "CLtot", "halflife")) %>%
#    dplyr::mutate(name = factor(name,
#                                levels = c("halflife", "Vss", "CLtot")
#                                )
#                  ) %>%
#   ggplot(mapping = aes(
#     x = value,
#     y = preferredName
#   )) +
#   geom_point(aes(shape = source,
#                  color = source),
#              #position = position_dodge(0.7),
#              size = 5/.pt, stroke = 2.5/.pt) +
#   facet_grid(cols = vars(name),
#              rows = vars(invivopkfit_species),
#              scales = "free",
#              space =  "free_y",
#              drop = TRUE) +
#   scale_x_log10( labels = scales::label_math(format = log10)) +
#   #annotation_logticks(sides = "b") +
#   scale_shape_manual(values = c("Lombardo et al." = 3,
#                                 "invivoPKfit" = 22)) +
#   scale_color_manual(values = c("#d95f02", "#1b9e77")) +
#   guides(x = "axis_logticks") +
#   theme(panel.border = element_rect(color = "black",
#                                     fill = NA,
#                                     linewidth = 1.5),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          panel.spacing.y = unit(0, "line"),
#         strip.background = element_rect(fill = "white"),
#         strip.text.x = element_text(face = "bold", size = 12),
#        strip.text.y = element_blank(),
#         text = element_text(size = 10),
#         axis.ticks = element_blank(),
#         axis.line = element_blank(),
#         axis.text.y = element_text(face = "bold", size = 6),
#         axis.text.x = element_text(size = 10),
#         axis.title.y = element_blank(),
#         axis.title.x = element_blank(),
#         legend.position = "bottom",
#         legend.key = element_blank(),
#         legend.title = element_text(face = "bold"),
#         legend.text = element_text())
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Fig6.png"),
#        height = 11,
#        width = 10,
#        units = "in",
#        device = "png",
#        dpi = 300)
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Fig6.pdf"),
#        height = 11,
#        width = 10,
#        units = "in")
# 

## ----musther-read-------------------------------------------------------------
# species_cols <- c(paste("Mouse",
#        c("Dosing Formulation",
#                     "Strain",
#                     "Sex",
#                     "F% Mean",
#                     "F% Range"),
#       sep = "."),
# 
# paste("Rat",
#        c("Dosing Formulation",
#                     "Strain",
#                     "Sex",
#                     "F% Mean",
#          "WSD",
#                     "F% Range"),
#       sep = "."),
# 
# paste("Dog",
#        c("Dosing Formulation",
#                     "Strain",
#                     "Sex",
#                     "F% Mean",
#          "WSD",
#                     "F% Range"),
#       sep = "."),
# 
# paste("Non-Human Primate",
#        c("Dosing Formulation",
#                     "Strain",
#                     "Sex",
#                     "F% Mean",
#          "WSD",
#                     "F% Range"),
#       sep = "."),
# 
# paste("Human",
#        c("Dosing Formulation",
#                     "Sex",
#                     "F% Mean",
#          "WSD",
#                     "F% Range"),
#       sep = ".")
# )
# 
# 
# 
# musther2014 <- readxl::read_excel(
#   path = paste0(Sys.getenv("OD_DIR"),
#                 "SupTableMusther2014.xlsx"),
#   sheet = "Sheet1",
#   skip = 1,
#   col_names = c( "Compound",
#                  "CAS",
#                  "MW", "Ion Class",
#                species_cols,
#                 "References.mouse",
#                 "References.rat",
#                 "References.dog",
#                 "References.NHP",
#                 "References.human"
#                 ),
#   col_types = "text"
#   )
# 
# #pivot longer
# musther2014_long <- musther2014 %>%
#   select(c(Compound,
#            CAS,
#          contains("F% Mean"))) %>%
#   pivot_longer(cols = !c(Compound, CAS),
#                names_to = c("Species", "stat"),
#                names_sep = "\\.") %>%
#   mutate(value = gsub(x=value,
#                       pattern = "<",
#                       replacement = ""),
#          value_num = as.numeric(value))
# 
# 

## ----musther-ccd--------------------------------------------------------------
# musther2014_dtxsid <- ctxR::chemical_equal_batch(word_list = unique(musther2014_long$CAS))
# 
# musther2014_long <- musther2014_long %>%
#   left_join(musther2014_dtxsid$valid %>% select(searchValue, dtxsid, preferredName),
#             by = c("CAS" = "searchValue"))

## ----musther-getfgutabs-------------------------------------------------------
# fgutabs <- coef(my_pk, method = "bobyqa") %>%
#   distinct() %>%
#    semi_join(winmodel) %>% #keep only winning models
#   rowwise() %>%
#   mutate(Fgutabs = coefs_vector["Fgutabs"]) %>%
#   filter(!is.na(Fgutabs))

## -----------------------------------------------------------------------------
# length(intersect(musther2014_long$dtxsid,
#                  get_data(my_pk)$Chemical))

## ----musther-join-------------------------------------------------------------
# fgutabs_pk_musther <- fgutabs %>% inner_join(musther2014_long,
#                                              by = c("Chemical" = "dtxsid"))

## ----musther-plot-------------------------------------------------------------
# #capitalize invivopkfit species to harmonize with Musther
# fgutabs_pk_musther <- fgutabs_pk_musther %>%
#   dplyr::mutate(Species.x =  stringr::str_to_sentence(Species.x))
# 
# ggplot(fgutabs_pk_musther) +
#   geom_point(aes(y = preferredName,
#                  x = Fgutabs,
#                  shape = Species.x,
#                  color = "invivopkfit"),
#              size =4,
#              stroke = 2) +
#   geom_point(aes(y = preferredName,
#                  x = value_num/100,
#                  shape = Species.y,
#                  color = "Musther et al. 2014"),
#              size = 4,
#              stroke = 2) +
#   scale_shape_manual(values = c("Rat" = 21,
#                                 "Mouse" = 22,
#                                 "Dog" = 23,
#                                 "Non-Human Primate" = 24,
#                                 "Human" = 3),
#                      breaks = c("Rat", "Mouse", "Dog", "Non-Human Primate", "Human")) +
# 
#   theme_bw() +
#   theme(axis.title.y = element_blank(),
#         legend.title = element_blank(),
#         axis.title.x = element_text(size = 12),
#         axis.text = element_text(size = 12),
#         legend.text = element_text(size = 12)
#         )
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                          "Manu_Files/",
#                          today,
#                          "_Supp_Fig9.pdf"),
#        height = 5,
#        width = 8)
# 
# ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                           "Manu_Files/",
#                          today,
#                          "_Supp_Fig9.png"),
#        height = 5,
#        width = 8,
#        device = "png", dpi = 300)

## ----do_fit-benchmarking------------------------------------------------------
# # Randomize the order of chemical species
# chem_spec <- cvt %>%
#   dplyr::distinct(analyte_dtxsid, species) %>%
#   dplyr::slice_sample(prop = 1)
# 
# test_group_size <- seq(25, 105, by = 10)
# fit_options <- expand.grid(error_model = c("pooled",
#                                            "hierarchical"),
#                            dose_norm = FALSE,
#                            log10_trans = TRUE,
#                            time_scale = c(
#                              TRUE,
#                              FALSE),
#                            stringsAsFactors = FALSE)
# 
# fit_options <- tidyr::expand_grid(fit_options, test_group_size)
# 
# fit_options <- fit_options %>% rowwise() %>%
#   mutate(this_data = list({
#     tmp <- chem_spec %>% slice_head(n = test_group_size) %>%
#       left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
#       pk() +
#         scale_time(new_units = ifelse(!time_scale,
#                                       "identity",
#                                       "auto")) +
#         scale_conc(dose_norm = dose_norm, log10_trans = log10_trans) +
#         settings_preprocess(suppress.messages = TRUE)
#     do_prefit(tmp)
#   }
#   ),
#   data_size = chem_spec %>% slice_head(n = test_group_size) %>%
#     left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
#     NROW())
# 
# # Organization of the benchmarking
# # For each fit_option, return the four values of system.time() that we want
# df_out <- fit_options %>%
#   rowwise() %>%
#   mutate(tim_1core = system.time(do_fit(this_data))[["elapsed"]],
#          tim_4core = system.time(do_fit(this_data, n_cores = 4))[["elapsed"]],
#          tim_7core = system.time(do_fit(this_data, n_cores = 7))[["elapsed"]],
#          tim_12core = system.time(do_fit(this_data, n_cores = 12))[["elapsed"]]
#          )
# 
# df_out <- df_out %>% select(-this_data)
# gc()
# 
# full_long <- df_out %>%
#   pivot_longer(cols = c(tim_1core:tim_12core),
#                         names_to = "N_cores",
#                         values_to = "Time_s") %>%
#   group_by(N_cores, test_group_size, data_size) %>%
#   summarize(mean_time = mean(Time_s, na.rm = TRUE)/60,
#             max_time = max(Time_s, na.rm = TRUE)/60,
#             min_time = min(Time_s, na.rm = TRUE)/60
#             ) %>%
#   mutate(N_cores = as.numeric(str_extract(N_cores, "[:digit:]+")))
# 
# df_out %>% clipr::write_clip()
# ggplot(full_long,
#        aes(x = test_group_size,
#            y = mean_time,
#            color = as.factor(N_cores))) +
#   geom_point() +
#   geom_linerange(aes(ymin = min_time,
#                 ymax = max_time)) +
#   geom_line(aes(group = N_cores)) +
#   labs(x = "Number of Data Groups",
#        y = "Runtime (minutes)",
#        title = "Number of Processing Cores",
#        subtitle = "(with min/max runtime range)") +
#   facet_grid(cols = vars(N_cores)) +
#   scale_color_manual(values = c("black", "#40c679", "#31a354", "#006837")) +
#   coord_fixed(ratio = 8) +
#   theme(panel.border = element_rect(color = "black", fill = NA, size = 1.5),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#         legend.position = "none",
#         plot.title = element_text(hjust = 0.5),
#         plot.subtitle = element_text(hjust = 0.5),
#         axis.ticks = element_blank(),
#         axis.line = element_blank(),
#         strip.background = element_blank())
# 
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#               "Manu_Files/",
#               today,
#               "_Supp_Fig10.png"),
#        width = 5,
#        height = 2.5,
#        units = "in")
# 

## ----fit_summary_bydatagrp----------------------------------------------------
# fit_summary_bydatagrp <- function(this_error_model,
#                              this_dose_norm,
#                              this_log10_trans,
#                              this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   my_pk <- readRDS(file = file_str)
# 
#     pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Summary by data group for: ", pk_name))
# 
# myAFE <- AFE(my_pk,
#               use_scale_conc = FALSE,
#              sub_pLOQ = TRUE)
# 
# myAAFE <- AAFE(my_pk,
#               use_scale_conc = FALSE,
#              sub_pLOQ = TRUE)
# 
# my_r2 <- rsq(my_pk,
#              use_scale_conc = FALSE,
#              sub_pLOQ = TRUE)
# 
# myAIC <- AIC(my_pk)
# 
# myRMSLE <- rmse(my_pk,
#                use_scale_conc = list(dose_norm = FALSE,
#                                      log10_trans = TRUE),
#                sub_pLOQ = TRUE)
# 
# 
# myRMSE <- rmse(my_pk,
#                use_scale_conc = FALSE,
#                sub_pLOQ = TRUE)
# 
# winmodel <- get_winning_model(my_pk) %>%
#   select(Chemical, Species, method, model) %>%
#   dplyr::mutate(is_winning_model = TRUE)
# 
# 
#  #note convergence codes for each model
#   pf <- my_pk$fit %>%
#     dplyr::distinct(Chemical, Species, model, method, convcode)
# 
# 
#   convcode_labels <- data.frame(convcode =c(0,
#                                                1,
#                                                20,
#                                                21,
#                                                10,
#                                                51,
#                                                52,
#                                                9999,
#                                                -9999),
#                                 convcode_label = c('Successful convergence (see ?optimx::optimx)',
#                                                    'The iteration limit maxit had been reached (see ?optimx::optimx)',
#                                 'the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))',
#   'an intermediate set of parameters is inadmissible (see ?optimx::optimx)',
#   'degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)',
#   'a warning from the "L-BFGS-B" method (see ?optimx::optimx)',
#   'an error from the "L-BFGS-B" method (see ?optimx::optimx)',
#   'error in optimx::optimx()',
#   'model status was "abort"')
#   )
# 
#   pf <- pf %>%
#     dplyr::left_join(convcode_labels)
# 
# #join absolutely everything together
# summ_by_datagroup <- pf %>%
#   left_join(winmodel) %>%
#   left_join(myAIC) %>%
#   left_join(myAFE) %>%
#   left_join(myAAFE) %>%
#   left_join(my_r2) %>%
#   left_join(myRMSLE) %>%
#   left_join(myRMSE) %>%
# mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE,
#                                     is.na(is_winning_model) ~ FALSE,
#                                     .default = FALSE)) %>%
#   arrange(Chemical, Species, method, model)
# 
# return(summ_by_datagroup)
# }

## -----------------------------------------------------------------------------
# fit_summary_byexpt <- function(this_error_model,
#                              this_dose_norm,
#                              this_log10_trans,
#                              this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_",
#                      dose_indic,
#                      log10_indic,
#                      time_indic,
#                      errmodel_indic,
#                      ".Rds")
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   my_pk <- readRDS(file = file_str)
# 
#       pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Summary by experiment for: ", pk_name))
# 
# my_tkstats <- eval_tkstats(my_pk,
#                            model = NULL)
# 
# winmodel <- get_winning_model(my_pk) %>%
#   select(Chemical, Species, method, model) %>%
#   dplyr::mutate(is_winning_model = TRUE)
# 
# pf <- my_pk$fit %>%
#     dplyr::distinct(Chemical, Species, model, method, convcode)
# 
# 
#   convcode_labels <- data.frame(convcode =c(0,
#                                                1,
#                                                20,
#                                                21,
#                                                10,
#                                                51,
#                                                52,
#                                                9999,
#                                                -9999),
#                                 convcode_label = c('Successful convergence (see ?optimx::optimx)',
#                                                    'The iteration limit maxit had been reached (see ?optimx::optimx)',
#                                 'the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))',
#   'an intermediate set of parameters is inadmissible (see ?optimx::optimx)',
#   'degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)',
#   'a warning from the "L-BFGS-B" method (see ?optimx::optimx)',
#   'an error from the "L-BFGS-B" method (see ?optimx::optimx)',
#   'error in optimx::optimx()',
#   'model status was "abort"')
#   )
# 
#   pf <- pf %>%
#     dplyr::left_join(convcode_labels)
# 
# 
# summ_by_expt <- pf %>%
#   left_join(winmodel) %>%
#   left_join(my_tkstats) %>%
# mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE,
#                                     is.na(is_winning_model) ~ FALSE,
#                                     .default = FALSE))  %>%
#   arrange(Chemical, Species, method, model)
# 
# return(summ_by_expt)
# }
# 

## -----------------------------------------------------------------------------
# summary_datagrp_all <- fitopts %>%
#               dplyr::rowwise() %>%
#   dplyr::mutate(summ = list(
#     fit_summary_bydatagrp(this_error_model = error_model,
#                                              this_dose_norm = dose_norm,
#                                              this_log10_trans = log10_trans,
#                                              this_time_scale = time_scale)
#     )
#     ) %>%
#   tidyr::unnest(cols = c(summ))
# 
# summary_expt_all <- fitopts %>%
#               dplyr::rowwise() %>%
#   dplyr::mutate(summ_expt = list(
#     fit_summary_byexpt(this_error_model = error_model,
#                                              this_dose_norm = dose_norm,
#                                              this_log10_trans = log10_trans,
#                                              this_time_scale = time_scale)
#     )
#     ) %>%
#   tidyr::unnest(cols = c(summ_expt))
# 
# writexl::write_xlsx(list("Summary by data grp" = summary_datagrp_all,
#                          "Summary by expt" = summary_expt_all),
#                     paste0(Sys.getenv("FIG_DIR"),
#               "Manu_Files/",
#               today,
#               "_Supp_Table4.xlsx"))

## -----------------------------------------------------------------------------
# #set up pk analysis for single-study
# pk_study <- minimal_pk +
#   facet_data(
#     facets = vars(Chemical, Species, Reference)
#   ) +
#   stat_error_model(
#     error_group = vars(Chemical, Species, Reference)
#   ) +
#   settings_preprocess(keep_data_original = FALSE,
#                       suppress.messages = TRUE) +
#   scale_conc(dose_norm = TRUE,
#              log10_trans = TRUE) +
#   settings_optimx(method = "bobyqa") +
#   stat_model(model = c("model_flat",
#                        "model_1comp",
#                        "model_2comp"))
# 
# #do pk analysis
# system.time(
#   pk_study <- do_fit(pk_study,
#                      n_cores = 11)
# )

## -----------------------------------------------------------------------------
# saveRDS(pk_study,
#         paste0(Sys.getenv("OD_DIR"),
#                today,
#                "studylevel_pk_fit_110p.Rds"))

## -----------------------------------------------------------------------------
# tkstats_study <- eval_tkstats(pk_study,
#                               finite_only = FALSE,
#                               dose_norm = TRUE,
#                               model = "winning") %>%
#   dplyr::select(Chemical,
#                 Species,
#                 Reference,
#                 method,
#                 model,
#                 halflife.tkstats,
#                 Vss.tkstats,
#                 `Vss/Fgutabs.tkstats`) %>%
#   dplyr::rename(halflife = halflife.tkstats,
#                 Vss = Vss.tkstats,
#                 `Vss/Fgutabs` = `Vss/Fgutabs.tkstats`) %>%
#   dplyr::distinct()
# 
# #Also: get coefficients and try to pull Fgutabs (if it was optimized)
# coef_study <- coef_sd(pk_study,
#                       method = "bobyqa",
#                        include_type = "optimize") %>%
#   dplyr::filter(param_name %in% "Fgutabs") %>%  dplyr::select(Chemical,
#                 Species,
#                 Reference,
#                 method,
#                 model,
#                 param_name,
#                 param_value) %>%
#   dplyr::distinct()
# 
# #keep only winning model coefs
# win_study <- get_winning_model(pk_study) %>%
#   dplyr::select(-near_flat, -preds_below_loq)
# 
# coef_study <- coef_study %>%
#   dplyr::semi_join(win_study)
# 
# fgutabs_study <- coef_study %>%
#   select(-param_name) %>%
#   rename(Fgutabs = param_value)
# 
# tkstats_study2 <- full_join(tkstats_study,
#                             fgutabs_study)

## -----------------------------------------------------------------------------
#   file_str <- paste0(Sys.getenv("OD_DIR"),
#                      read_date_pk,
#                      "mypk_fit_110p.Rds")
#   pk_pooled <- readRDS(file = file_str)
# 
#  tkstats_pooled <- eval_tkstats(pk_pooled,
#                               finite_only = FALSE,
#                               dose_norm = TRUE,
#                               method = "bobyqa",
#                               model = "winning") %>%
#     dplyr::select(Chemical,
#                 Species,
#                 Reference,
#                 method,
#                 model,
#                 halflife.tkstats,
#                 Vss.tkstats,
#                 `Vss/Fgutabs.tkstats`) %>%
#   dplyr::rename(halflife = halflife.tkstats,
#                 Vss = Vss.tkstats,
#                 `Vss/Fgutabs` = `Vss/Fgutabs.tkstats`) %>%
#    dplyr::group_by(Chemical,
#                    Species,
#                    method,
#                    model,
#                    halflife,
#                    Vss,
#                    `Vss/Fgutabs`) %>%
#    dplyr::summarise(n_ref = n_distinct(Reference)) %>%
#    dplyr::ungroup()
# 
#  #Also: get coefficients and try to pull Fgutabs
# coef_pooled <- coef_sd(pk_pooled,
#                        method = "bobyqa",
#                        include_type = "optimize") %>%
#   dplyr::filter(param_name %in% "Fgutabs") %>%  dplyr::select(Chemical,
#                 Species,
#                 method,
#                 model,
#                 param_name,
#                 param_value) %>%
#   dplyr::distinct()
# 
# #keep only winning model coefs
# win_pooled <- get_winning_model(pk_pooled,
#                                 method = "bobyqa") %>%
#   dplyr::select(-near_flat, -preds_below_loq)
# 
# coef_pooled <- coef_pooled %>%
#   dplyr::semi_join(win_pooled)
# 
# fgutabs_pooled <- coef_pooled %>%
#   select(-param_name) %>%
#   rename(Fgutabs = param_value)
# 
# tkstats_pooled2 <- full_join(tkstats_pooled,
#                             fgutabs_pooled)

## -----------------------------------------------------------------------------
# tkstats_study3 <- tkstats_study2 %>%
#   dplyr::rename(model_study = model) %>%
#   pivot_longer(cols = c(halflife,
#                         Vss,
#                         `Vss/Fgutabs`,
#                         Fgutabs),
#                names_to = "name",
#                values_to = "value_study")
# 
# tkstats_pooled3 <- tkstats_pooled2 %>%
#   dplyr::rename(model_pooled = model,
#                 n_ref_pooled = n_ref) %>%
#   pivot_longer(cols = c(halflife,
#                         Vss,
#                         `Vss/Fgutabs`,
#                         Fgutabs),
#                names_to = "name",
#                values_to = "value_pooled")
# 
# tkstats_all <- full_join(tkstats_study3,
#                          tkstats_pooled3)

## -----------------------------------------------------------------------------
# tkstats_all <- tkstats_all %>%
#   filter(!(model_pooled %in% "model_flat") &
#            !(model_study %in% "model_flat"))

## -----------------------------------------------------------------------------
# ggplot(tkstats_all %>%
#          filter( !is.na(value_study) &
#                   !is.na(value_pooled))) +
#   geom_point(aes(x = value_pooled,
#                  y = value_study,
#                  color = factor(n_ref_pooled))) +
#   facet_wrap(facets = vars(name),
#              scales = "free") +
#   scale_x_log10(guide = "axis_logticks") +
#   scale_y_log10(guide = "axis_logticks")

## -----------------------------------------------------------------------------
# ggplot(tkstats_all %>%
#          filter(n_ref_pooled > 1 &
#                   !is.na(value_study) &
#                   !is.na(value_pooled))) +
#   geom_point(aes(x = value_pooled,
#                  y = value_study),
#              color = "gray50",
#              shape = 1) +
#   geom_line(aes(x = value_pooled,
#                  y = value_study,
#                  group = interaction(Chemical, Species)),
#              color = "gray50") +
#   facet_wrap(facets = vars(name),
#              scales = "free") +
#   geom_abline(intercept = 0, slope = 1,
#               color = "black") +
#   geom_smooth(aes(x = value_pooled,
#                  y = value_study),
#               method = "lm",
#               se = FALSE,
#               color = "black",
#               linetype = 2) +
#   scale_x_log10(guide = "axis_logticks") +
#   scale_y_log10(guide = "axis_logticks") +
#   theme_bw() +
#   theme(strip.background = element_blank())

## -----------------------------------------------------------------------------
# rsq_df <- tkstats_all %>%
#          filter(n_ref_pooled > 1 &
#                   !is.na(value_study) &
#                   !is.na(value_pooled)) %>%
#   mutate(log10_value_pooled = log10(value_pooled),
#          log10_value_study = log10(value_study)) %>%
#   group_by(name) %>%
#   summarise(n = n(),
#             n_grp = n_distinct(Chemical, Species),
#     rsq_log10 = summary(
#       lm(
#         log10_value_study ~ log10_value_pooled,
#         na.action = "na.omit"
#         )
#       )[[
#     "r.squared"
#   ]],
#   rsq = summary(lm(value_study ~ value_pooled))[[
#     "r.squared"
#   ]]) %>%
#   ungroup() %>%
#   mutate(label = sprintf("R^2 == %.3f",
#                          rsq_log10))
# 
# rsq_df %>% select(-label) %>% print()

## -----------------------------------------------------------------------------
# ggplot(tkstats_all %>%
#          filter(n_ref_pooled > 1 &
#                   !is.na(value_study) &
#                   !is.na(value_pooled))) +
#   geom_point(aes(x = value_pooled,
#                  y = value_study),
#              color = "gray50",
#              shape = 1) +
#   geom_line(aes(x = value_pooled,
#                  y = value_study,
#                  group = interaction(Chemical, Species)),
#              color = "gray50") +
#   geom_text(data = rsq_df,
#             aes(x = 0,
#                 y = Inf,
#                 label = label),
#             parse = TRUE,
#             hjust = -0.3,
#             vjust = 1.8) +
#   facet_wrap(facets = vars(name),
#              scales = "free") +
#   geom_abline(intercept = 0, slope = 1,
#               color = "black") +
#   geom_smooth(aes(x = value_pooled,
#                  y = value_study),
#               method = "lm",
#               se = FALSE,
#               color = "black",
#               linetype = 2) +
#   scale_x_log10(guide = "axis_logticks") +
#   scale_y_log10(guide = "axis_logticks") +
#   xlab("Pooled value") +
#   ylab("Single-study value") +
#   theme_bw() +
#   theme(strip.background = element_blank(),
#         strip.text = element_text(size = 12))
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#               "Manu_Files/",
#               today,
#               "_Fig8.png"),
#        height = 5,
#        width = 7)
# 
# ggsave(paste0(Sys.getenv("FIG_DIR"),
#               today,
#               "Manu_Files/",
#               "_Fig8.pdf"),
#        height = 5,
#        width = 7)

## -----------------------------------------------------------------------------
# tkstats_all_wide <- tkstats_all %>%
#   pivot_wider(id_cols = c(Chemical, Species,
#                           Reference, method,
#                           model_study,
#                           model_pooled,
#                           n_ref_pooled),
#               names_from = name,
#               values_from = c(value_study, value_pooled))
# 
# tkstats_all_wide %>% filter(n_ref_pooled > 1) %>%
#   writexl::write_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
#                                     "Manu_Files/",
#                                     today,
#                                     "Supp_Table5.xlsx"))

## -----------------------------------------------------------------------------
# plot_pooled <- plot(pk_pooled,
#                     method = "bobyqa",
#                     use_scale_conc = TRUE,
#                     best_fit = TRUE)
# plot_pooled %>%
#   filter(Chemical %in% "DTXSID2020139" & Species %in% "rat") %>%
#   pull(final_plot[[1]])
# 

## -----------------------------------------------------------------------------
# plot_pooled <- plot(pk_pooled,
#                     method = "bobyqa",
#                     use_scale_conc = TRUE,
#                     print_out = TRUE)
# 
# ggsave(
#    filename = paste0(Sys.getenv("FIG_DIR"),
#                      "Manu_Files/",
#                      today,
#                      "_allplots.pdf"),
#    plot = gridExtra::marrangeGrob(plot_pooled, nrow=1, ncol=1),
#    width = 15, height = 9
# )
# 

## ----information--------------------------------------------------------------
# sessionInfo()