## ----init, include = FALSE---------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----setup-------------------------------------------------------------------- # library(tidyverse, quietly = TRUE) # library(cowplot) # library(RColorBrewer) # library(ctxR) # library(flextable) # library(officer) # library(patchwork) # # devtools::load_all() #load invivopkfit # # #get today's date # today <- format(Sys.Date(), "%d%B%Y") ## ----eval = FALSE------------------------------------------------------------- # Sys.setenv(OD_DIR = "path/to/inputs/", # FIG_DIR = "path/to/outputs/") ## ----eval = FALSE------------------------------------------------------------- # ctxR::register_ctx_api_key(key = "<YOUR-API-KEY>") ## ----------------------------------------------------------------------------- # read_date_pk <- "25November2024" ## ----------------------------------------------------------------------------- # read_date_results <- "27November2024" ## ----pk_obj_fromSQL, eval=FALSE----------------------------------------------- # # ### Minimal PK Object ###---- # # Minimum pk object, add options later # # Note that these mappings are now a default mappings, just being verbose here. # # If there are any standard deviations that are zero or NA, but N_Subjects > 1, # # Set the n_subjects to 6 or the current value if lower than 6. # # (In Showa the mode is 6, but this condition is present in CVT_Legacy as well) # minimal_pk <- pk(data = cvt %>% # mutate(n_subjects_normalized = if_else( # n_subjects_normalized > 1 & is.na(invivPK_conc_sd), # min(n_subjects_normalized, 6), n_subjects_normalized)), # mapping = ggplot2::aes( # Chemical = analyte_dtxsid, # Chemical_Name = analyte_name_original, # DTXSID = analyte_dtxsid, # CASRN = analyte_casrn, # Species = species, # Reference = fk_extraction_document_id, # Media = conc_medium_normalized, # Route = administration_route_normalized, # Dose = invivPK_dose_level, # Dose.Units = "mg/kg", # Subject_ID = fk_subject_id, # Series_ID = fk_series_id, # Study_ID = fk_study_id, # ConcTime_ID = conc_time_id, # N_Subjects = n_subjects_normalized, # Weight = weight_kg, # Weight.Units = "kg", # Time = time_hr, # Time.Units = "hours", # Value = invivPK_conc, # Value.Units = "mg/L", # Value_SD = invivPK_conc_sd, # LOQ = invivPK_loq # )) ## ----outlining_all_options, eval=FALSE---------------------------------------- # # Types of Choices: # ## Dose Normalized # ## Log10 transform # ## Scale time # # List of options # fitopts <- expand.grid(error_model = c("pooled", # "hierarchical"), # dose_norm = c(TRUE, FALSE), # log10_trans = c(TRUE, FALSE), # time_scale = c( # TRUE, # FALSE), # stringsAsFactors = FALSE) ## ----fitting_choices, eval=FALSE---------------------------------------------- # fit_data <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale, # n_cores = 12, # file_dir = Sys.getenv("OD_DIR")){ # retval <- -9 # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0("mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # # cat("Fitting: ", file_str, "\n") # # tryCatch( # expr = { # this_pk <- minimal_pk + # facet_data(vars(Chemical, Species)) + # settings_preprocess(keep_data_original = FALSE, # suppress.messages = TRUE) + # scale_conc(dose_norm = this_dose_norm, log10_trans = this_log10_trans) + # settings_optimx(method = c( # "L-BFGS-B", # "bobyqa" # )) # # if(this_error_model %in% "pooled"){ # this_pk <- this_pk + # stat_error_model(error_group = vars(Chemical, Species)) # } else if(this_error_model %in% c("hierarchical", "joint")){ # this_pk <- this_pk + # stat_error_model(error_group = vars(Chemical, Species, Reference)) # } else{ # stop("this_error_model must be either 'pooled' or 'hierarchical'/'joint'") # } # # if(this_time_scale %in% TRUE){ # this_pk <- this_pk + # scale_time(new_units = "auto") # } # # #do the fit # this_time <- system.time(this_fit <- do_fit(this_pk, n_cores = n_cores)) # # # # #save the result # saveRDS(this_fit, # file = paste0(file_dir, # file_str)) # # cli::cli_alert_success(text = "Fitting success!") # print(this_time) # # retval <- this_time[[3]] # }, # error = function(e){ # cli::cli_alert_danger(text = paste("Analysis", # file_str, # "failed with error", # e)) # retval <- -1 # } # ) # # return(retval) # } ## ----running_all_options, eval=FALSE------------------------------------------ # # system.time(tmp_out <- mapply(fit_data, # this_error_model = fitopts$error_model, # this_dose_norm = fitopts$dose_norm, # this_log10_trans = fitopts$log10_trans, # this_time_scale = fitopts$time_scale, # n_cores = 14)) # # mean(tmp_out)/60 # Average runtime (in minutes) per fitting option # median(tmp_out)/60 # sum(tmp_out) # Looks like there's 10 extra seconds of overhead for non-fitting methods ## ----------------------------------------------------------------------------- # my_pk <- readRDS(paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_110p.Rds")) ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Reference) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Species) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species, Reference) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species, Reference, Route, Media) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species, Reference, Route, Media, Dose) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # group_by(Chemical, Species) %>% # summarise(blood_and_plasma = all(c("blood", # "plasma") %in% Media)) %>% # ungroup() %>% # summarise(n_both = sum(blood_and_plasma)) ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # group_by(Chemical, Species) %>% # summarise(t_mid = mean(range(Time)), # t_mid_log10 = midpt_log10(Time), # new_time_units = auto_units(y = Time, # from = "hours", # target = 10)) %>% # ungroup() %>% # count(new_time_units) ## ----------------------------------------------------------------------------- # get_data_info(my_pk)$dose_norm_check %>% # dplyr::summarise(AUC_flag = sum(!is.na(data_flag_AUC)), # Cmax_flag = sum(!is.na(data_flag_Cmax)), # total_multi_dose = sum(n_dose_groups>1), # total_expts = n()) ## ----------------------------------------------------------------------------- # #compute duration and concentration range across experiments (Chemical, Species, Reference, Route, Media, Dose) # data_range <- my_pk$data %>% # filter(Detect, !exclude) %>% # group_by(Chemical, Species, Reference, Media, Route, Dose, Dose.Units) %>% # summarize(maxT = max(Time)/24, # concRange = log10(max(Conc)/min(Conc))) %>% # ungroup() # # #Panel A: histogram of day of final timepoint per experiment # sf_2A <- data_range %>% # ggplot(aes(x = maxT)) + # geom_histogram(fill = "grey5", # bins = 40, # color = "grey5") + # scale_x_log10(labels = scales::number, # breaks = c(0.1, 1, 7, 30, 365)) + # annotation_logticks(sides = "b") + # labs(x = "Day of final Timepoint", # y = "Count") + # theme(aspect.ratio = 1, # text = element_text(size = 10), # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank()) # # #panel B: histogram of fold concentration range # sf_2B <- data_range %>% # ggplot(aes(x = concRange)) + # geom_histogram(fill = "grey5", # bins = 40, # color = "grey5") + # scale_y_continuous(expand = c(0, 0), # limits = c(0, 50), # breaks = c(0, 25, 50)) + # labs(x = "log10(Concentration Range)", # y = "Count") + # annotation_logticks(sides = "b") + # theme(aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # axis.ticks = element_blank(), # axis.line = element_blank(), # axis.text = element_text(size = 10), # axis.title = element_text(size = 11)) # # sf_2AB <- plot_grid(sf_2A, sf_2B, # labels = c("A", "B"), # nrow = 1) # # sf_2AB # # #save PDF version # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig2_ConcTimeRanges", # ".pdf"), # height = 3.2, # width = 6, # bg = "white", # units = "in") # # #save PNG version # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig2_ConcTimeRanges", # ".png"), # height = 3.2, # width = 6, # bg = "white", # units = "in", # dpi = 300) # # rm(sf_2A, sf_2B, sf_2AB) # ## ----------------------------------------------------------------------------- # data_cvt <- get_data(my_pk) # data_counts <- data_cvt %>% # dplyr::filter(Detect) %>% # dplyr::count(Chemical, Species, Reference, Route, Media, Dose, Time, N_Subjects, # name = "Count") # data_counts <- dplyr::left_join(data_counts, data_cvt, # by = c("Chemical", "Species", "Reference", # "Route", "Media", "Dose", "Time", # "N_Subjects")) %>% # dplyr::mutate(data_descr = dplyr::case_when( # (Count > 1 & N_Subjects == 1) ~ "Individual Data, Multiple Observations", # (Count == 1 & N_Subjects == 1) ~ "Individual Data, Single Observation", # (Count == 1 & N_Subjects > 1 & Conc_SD > 0) ~ "Grouped Data w SD", # (N_Subjects > 1 & Conc_SD == 0) ~ "Grouped Data no SD", # .default = NA_character_ # )) # # indiv_data <- subset(data_counts, # subset = (data_descr %in% "Individual Data, Multiple Observations")) # # indiv_data <- indiv_data %>% # dplyr::group_by(Chemical, Species, Reference, Route, Media, Dose, Time) %>% # dplyr::mutate(replicate_mean = mean(Conc, na.rm = TRUE)) %>% # ungroup() # # indiv_data_bygroup <- indiv_data %>% # dplyr::group_by(Chemical, Species) %>% # dplyr::summarise(AFE = 10^mean(log10(Conc/replicate_mean)), # AAFE = 10^mean(abs(log10(Conc/replicate_mean)))) %>% # ungroup() # # indiv_data_bygroup %>% count(AFE < 0.5) # indiv_data_bygroup %>% count(AFE > 2) ## ----------------------------------------------------------------------------- # my_tf <- my_pk %>% twofold_test() ## ----------------------------------------------------------------------------- # pl3A <- my_tf$individual_data %>% # # Plotting # ggplot(aes(x = foldConc)) + # geom_histogram(binwidth = 0.2, # fill = "grey5", # color = "grey5", # linewidth = 0.4) + # coord_cartesian(xlim = c(0, 3.1), # ylim = c(0, 1200)) + # scale_y_continuous(expand = c(0, 0), # breaks = c(0, 250, 500, 750, 1000), # labels = c("0", "2.5", "5", "7.5", "10")) + # expand_limits(y = 0) + # geom_vline(xintercept = c(0.5, 2), # color = "red3", # linetype = "dashed", # linewidth = 1) + # theme_bw() + # labs(x = "Mean-normalized concentration", # y = "Observations (hundreds))") + # theme(text = element_text(size = 14), # panel.border = element_rect(color = "black", linewidth = 1, fill = NA), # panel.background = element_blank(), # panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4), # axis.title = element_text(face = "bold"), # axis.line = element_blank(), # axis.text = element_text(size = 14), # plot.background = element_blank(), # axis.ticks.y = element_blank()) # # pl3A ## ----------------------------------------------------------------------------- # #get time of peak concentration for use in calculated ADME-normalized time # my_nca <- nca(my_pk) %>% # dplyr::filter(param_name %in% "tmax") # # pl3B <- my_tf$individual_data %>% # inner_join(my_nca %>% # dplyr::select(Chemical, Species, # Route, Media, # tmax = param_value) %>% # filter(!is.na(tmax))) %>% # group_by(Chemical, # Species, # Reference, # Route, # Media, # Dose) %>% # mutate( # normTime = ifelse( # Route == "iv", # 1 + Time/max(Time), # ifelse( # Time > tmax, # ((Time - tmax)/max(Time)) + 1, # 0.5 + Time/(2*tmax) # ) # )) %>% # ggplot(aes( # x = normTime, # y = foldConc # )) + # scale_x_continuous(breaks = c(1,2), # labels = c("tmax", "end")) + # geom_point(alpha = 0.1, size = 0.7) + # geom_hline(yintercept = c(0.5, 2), # color = "red3", # linewidth = 0.8, # linetype = "dashed") + # facet_grid(cols = vars(Route), # scales = "free_x") + # labs(x = "ADME-Normalized Time", # y = "Mean-Normalized\nConcentration") + # theme(text = element_text(size = 14), # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # axis.line = element_blank(), # axis.text = element_text(size = 14), # axis.title = element_text(face = "bold"), # panel.spacing = unit(0.125, units = "in"), # plot.background = element_blank(), # legend.position = "none") # # pl3B ## ----------------------------------------------------------------------------- # pl3 <- cowplot::plot_grid(pl3A, # pl3B, # ncol = 2, # rel_widths = c(1,2), # labels = c("A", "B")) # # #save PDF version # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig3.pdf"), # pl3, # bg = "white", # width = 8, # height = 4, # units = "in") # # #Save PNG version # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig3.png"), # pl3, # bg = "white", # width = 8, # height = 4, # units = "in", # dpi = 300) # # #remove unneeded objects # rm(pl3A, pl3B, pl3) ## ----------------------------------------------------------------------------- # plsupp3 <- my_tf$individual_data %>% # inner_join(my_nca %>% # dplyr::select(Chemical, Species, # Route, Media, # tmax = param_value) %>% # filter(!is.na(tmax))) %>% # group_by(Chemical, # Species, # Reference, # Route, # Media, # Dose) %>% # mutate( # normTime = ifelse( # Route == "iv", # 1 + Time/max(Time), # ifelse( # Time > tmax, # ((Time - tmax)/max(Time)) + 1, # 0.5 + Time/(2*tmax) # ) # )) %>% # ggplot(aes( # x = normTime # )) + # scale_x_continuous(breaks = c(1,2), # labels = c("tmax", "end")) + # geom_histogram() + # facet_grid(cols = vars(Route), # scales = "free_x") + # labs(x = "ADME-Normalized Time", # "# Observations") + # theme(text = element_text(size = 14), # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # #axis.ticks = element_blank(), # axis.line = element_blank(), # #axis.text.x = element_blank(), # axis.text = element_text(size = 14), # axis.title = element_text(face = "bold"), # panel.spacing = unit(0.125, units = "in"), # plot.background = element_blank(), # legend.position = "none") # # #save PDF version # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig3", # ".png"), # plsupp3, # bg = "white", # width = 8, # height = 4, # units = "in", # dpi = 300) # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig3", # ".pdf"), # plsupp3, # bg = "white", # width = 8, # height = 4, # units = "in") ## ----Winmodelfn--------------------------------------------------------------- # # Evaluation # # Winning Model Tally # winmodel_comparison <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # current_rds <- readRDS(file = file_str) # # # Wide winning model # suppressMessages({ # winning_model <- get_winning_model(obj = current_rds) # # wide_winning_model <- winning_model %>% # dplyr::group_by(method, model) %>% # dplyr::count() %>% # tidyr::pivot_wider(names_from = model, # values_from = n) # }) # return(wide_winning_model) # } ## ----------------------------------------------------------------------------- # AUC_plot <- function( # this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Evaluation for: ", pk_name)) # current_rds <- readRDS(file = file_str) # # Cmax and AUC_inf RMSEs # #note that eval_tkstats already filters winning models # #use finite_only = FALSE so that we can count the excluded cases # suppressMessages(RMSLE_eval <- eval_tkstats(obj = current_rds, # dose_norm = FALSE, # finite_only = FALSE, # suppress.messages = TRUE) %>% # mutate(Options = pk_name) %>% # dplyr::select( # Options, # Chemical, # Species, # Route, # Media, # Reference, # method, # model, # starts_with("Cmax"), # starts_with("AUC_") # )) # # these_opts <- paste0(dose_indic, # log10_indic, # time_indic, # errmodel_indic) # # #plot AUC infinity for tkstats vs. AUC infinity for NCA # #show identity line and 20x above and below lines # AUC_plot <- RMSLE_eval %>% # dplyr::filter(is.finite(AUC_infinity.nca) & is.finite(AUC_infinity.tkstats)) %>% # ggplot() + # geom_point(aes(x = AUC_infinity.nca, # y = AUC_infinity.tkstats, # color = model)) + # scale_x_log10() + scale_y_log10() + # annotation_logticks() + # geom_abline(aes(intercept = 0, slope =1)) + # geom_abline(aes(intercept = log10(20), slope = 1), # linetype = 2) + # geom_abline(aes(intercept = log10(1/20), slope = 1), # linetype = 2) + # facet_grid(rows = vars(method)) + # ggtitle(paste(these_opts, # "AUC_inf pred vs. AUC_inf NCA")) + # scale_color_manual(values = c("model_1comp" = "#0398FC", # "model_2comp" = "#D68E09", # "model_flat" = "black"), # name = "Winning model") + # coord_equal() # # Cmax_plot <- RMSLE_eval %>% # dplyr::filter(is.finite(Cmax.nca) & is.finite(Cmax.tkstats)) %>% # ggplot() + # geom_point(aes(x = Cmax.nca, # y = Cmax.tkstats, # color = model)) + # scale_x_log10() + scale_y_log10() + # annotation_logticks() + # geom_abline(aes(intercept = 0, slope =1)) + # geom_abline(aes(intercept = log10(20), slope = 1), # linetype = 2) + # geom_abline(aes(intercept = log10(1/20), slope = 1), # linetype = 2) + # facet_grid(rows = vars(method)) + # ggtitle(paste(these_opts, # "Cmax pred vs. Cmax NCA")) + # scale_color_manual(values = c("model_1comp" = "#0398FC", # "model_2comp" = "#D68E09", # "model_flat" = "black"), # name = "Winning model") + # coord_equal() # # return( # list(AUC_plot = AUC_plot, # Cmax_plot = Cmax_plot # ) # ) # } ## ----------------------------------------------------------------------------- # all_AUC_plots <- fitopts %>% # dplyr::rowwise() %>% # dplyr::summarise(this_plot = list(AUC_plot(this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale) # ) # ) # #save the plots # pdf(file = paste0( # Sys.getenv("FIG_DIR"), # today, # "_AUCinf_nca_vs_tk_plots.pdf"), # onefile = TRUE, # height = 5, # width = 7) # # plotlist <- all_AUC_plots %>% # pull(this_plot) # # for (x in seq_along(plotlist)) { # print(plotlist[[x]][["AUC_plot"]]) # } # dev.off() # # pdf(file = paste0( # Sys.getenv("FIG_DIR"), # today, # "_Cmax_nca_vs_tk_plots.pdf"), # onefile = TRUE, # height = 5, # width = 7) # # plotlist <- all_AUC_plots %>% # pull(this_plot) # # for (x in seq_along(plotlist)) { # print(plotlist[[x]][["Cmax_plot"]]) # } # dev.off() # # rm(plotlist, all_AUC_plots) ## ----RMSLE_Cmax_AUC_fun------------------------------------------------------- # # # RMSLE for Cmax and AUC # rmsles_Cmax_AUC <- function( # this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Evaluation for: ", pk_name)) # current_rds <- readRDS(file = file_str) # # Cmax and AUC_inf RMSEs # #note that eval_tkstats already filters winning models # #use finite_only = FALSE so that we can count the excluded cases # suppressMessages(RMSLE_eval <- eval_tkstats(obj = current_rds, # dose_norm = FALSE, # finite_only = FALSE, # suppress.messages = TRUE) %>% # mutate(Options = pk_name) %>% # dplyr::select( # Options, # Chemical, # Species, # Route, # Media, # Reference, # method, # model, # starts_with("Cmax"), # starts_with("AUC_") # )) # # #Couunt bad observations: # #zero, NA, infinite # RMSLE_badobs <- RMSLE_eval %>% # group_by(Options, method) %>% # summarise(total_expts = n(), # count_AUCinf_NCA_0 = sum(AUC_infinity.nca %in% 0), # count_AUCinf_pred_0 = sum(AUC_infinity.tkstats %in% 0), # count_AUCinf_NCA_isNA = sum(is.na(AUC_infinity.nca)), # count_AUCinf_pred_isNA = sum(is.na(AUC_infinity.tkstats)), # count_AUCinf_NCA_isInf = sum(is.infinite(AUC_infinity.nca)), # count_AUCinf_pred_isInf = sum(is.infinite(AUC_infinity.tkstats)), # count_Cmax_NCA_0 = sum(Cmax.nca %in% 0), # count_Cmax_pred_0 = sum(Cmax.tkstats %in% 0), # count_Cmax_NCA_isNA = sum(is.na(Cmax.nca)), # count_Cmax_pred_isNA = sum(is.na(Cmax.tkstats)), # count_Cmax_NCA_isInf = sum(is.infinite(Cmax.nca)), # count_Cmax_pred_isInf = sum(is.infinite(Cmax.tkstats)) # ) %>% # ungroup() # # #count cases where AUC or Cmax was negative, # #or where AUC_inf was greater than 1e7 # RMSLE_neg <- RMSLE_eval %>% # dplyr::filter(is.finite(AUC_infinity.nca), # is.finite(AUC_infinity.tkstats), # is.finite(Cmax.nca), # is.finite(Cmax.tkstats)) %>% # group_by(Options, method) %>% # summarise(count_AUCinf_NCA_lt0 = sum(AUC_infinity.nca < 0), # count_AUCinf_pred_lt0 = sum(AUC_infinity.tkstats < 0), # count_Cmax_NCA_lt0 = sum(AUC_infinity.nca < 0), # count_Cmax_pred_lt0 = sum(AUC_infinity.tkstats < 0), # count_AUCinf_NCA_gt_1e7 = sum(AUC_infinity.nca > 1e7), # count_AUCinf_pred_gt_1e7 = sum(AUC_infinity.tkstats > 1e7), # ) # # RMSLE_badobs <- RMSLE_badobs %>% # left_join(RMSLE_neg, by = c("Options", # "method")) %>% # rowwise() %>% # dplyr::mutate(total_excl = sum( # c_across( # starts_with("count") # ) # ) # ) # # #compute RMSLEs after excluding the bad observations # RMSLE_eval <- RMSLE_eval %>% # dplyr::filter(is.finite(AUC_infinity.nca), # is.finite(AUC_infinity.tkstats)) %>% # dplyr::filter( # AUC_infinity.nca > 0, # AUC_infinity.tkstats > 0, # AUC_infinity.nca < 1e7, # AUC_infinity.tkstats < 1e7) %>% # rowwise() %>% # mutate( # SLE_Cmax = (log10(Cmax.tkstats) - log10(Cmax.nca)) ^ 2, # SLE_AUC_inf = (log10(AUC_infinity.tkstats) - log10(AUC_infinity.nca)) ^ 2 # ) %>% # dplyr::filter(!is.na(SLE_Cmax), !is.na(SLE_AUC_inf)) %>% # group_by(Options, method) %>% # summarise( # n_expts = n(), # RMSLE_Cmax = sqrt(mean(SLE_Cmax, na.rm = FALSE)) %>% signif(digits = 4), # RMSLE_AUC = sqrt(mean(SLE_AUC_inf, na.rm = FALSE)) %>% signif(digits = 4) # ) %>% # ungroup() # # #merge in counts of bad obs # # RMSLE_eval <- RMSLE_eval %>% # dplyr::left_join(RMSLE_badobs, # by = c("Options", "method")) # # return(RMSLE_eval) # } ## ----gof_tbl_fun-------------------------------------------------------------- # # Goodness-of-fit table with R-squared, RMSLE and AIC # get_gof <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # current_rds <- readRDS(file = file_str) # winning_model <- suppressMessages(get_winning_model(obj = current_rds)) # # suppressMessages({ # this_rsq <- rsq.pk(current_rds, # use_scale_conc = FALSE, # rsq_group = ggplot2::vars(Chemical, Species), # sub_pLOQ = TRUE) %>% # semi_join(winning_model) # # this_AIC <- AIC(current_rds) %>% # semi_join(winning_model) # # this_rmsle <- rmse.pk(current_rds, # rmse_group = vars(Chemical, Species), # use_scale_conc = list(dose_norm = FALSE, # log10_trans = TRUE), # sub_pLOQ = TRUE) %>% # semi_join(winning_model) # # }) # # # Since they are all joined by winmodel # # all three must have same NROW # message( # paste0("For ", pk_name, "... outputs below must have same number of rows") # ) # # message(paste0("rsq: ", NROW(this_rsq))) # message(paste0("aic: ", NROW(this_AIC))) # message(paste0("rmsle: ", NROW(this_rmsle))) # # gof_df <- this_rsq %>% # inner_join(this_AIC, by = join_by(Chemical, Species, method, model)) %>% # inner_join(this_rmsle, by = join_by(Chemical, Species, method, model)) %>% # mutate(Options = pk_name, .before = Chemical) # # return(gof_df) # } ## ----allpreds_fun------------------------------------------------------------- # # # All predictions # get_all_preds <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # # current_rds <- readRDS(file = file_str) # # # Wide winning model # suppressMessages({ # winning_model <- get_winning_model(obj = current_rds) %>% # select(-c(near_flat, preds_below_loq)) # this_preds <- predict(current_rds, # use_scale_conc = FALSE) %>% # semi_join(winning_model) %>% # mutate(Options = pk_name, .before = Chemical) # }) # return(this_preds) # } ## ----factor2fun--------------------------------------------------------------- # # Factor of two model error # tf_tests <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # current_rds <- readRDS(file = file_str) # print(paste0("Now analyzing: ", pk_name)) # # out <- twofold_test(current_rds, # sub_pLOQ = TRUE, # suppress_messages = TRUE) # # out <- out$model_error_summary %>% # mutate(Options = pk_name) # # return(out) # } # ## ----do_winmodel-------------------------------------------------------------- # # #winning model # system.time(wm_comp <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(winmodel_comp = list( # winmodel_comparison(this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale))) %>% # tidyr::unnest(winmodel_comp)) ## ----do_RMSLE_Cmax_AUC-------------------------------------------------------- # # #RMSLE for Cmax and AUC # system.time(cmax_auc_rmsle_tbl <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(nca_comp = list( # rmsles_Cmax_AUC(this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale))) %>% # tidyr::unnest(nca_comp)) ## ----do_gof_tbl--------------------------------------------------------------- # # #GOF table # system.time(gof_tbl <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(gof_win = list( # get_gof(this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale))) %>% # tidyr::unnest(gof_win)) ## ----do_factor_two------------------------------------------------------------ # # #factor of two # system.time(all_tf_tests <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(TF = list( # tf_tests(error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale))) %>% # tidyr::unnest(TF)) ## ----do_all_preds------------------------------------------------------------- # #all predictions # system.time(all_preds <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(these_preds = list( # get_all_preds(this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale))) %>% # tidyr::unnest(these_preds)) ## ----write_preds-------------------------------------------------------------- # # #Save preds to a file as well # saveRDS(all_preds, paste0(Sys.getenv("FIG_DIR"), # today, # "_all_preds.Rds")) # # rm(all_preds) ## ----------------------------------------------------------------------------- # (win_mdl_count_by_opts <- gof_tbl %>% # group_by(Options, method, model) %>% # count() %>% # pivot_wider(names_from = model, # values_from = n) ) ## ----------------------------------------------------------------------------- # gof_tbl %>% # group_by(Options, method) %>% # count(RMSLE < 1) %>% # filter(`RMSLE < 1`) %>% # arrange(desc(n)) %>% # print(n = 8) # # bobyqa-optimized options 010h, 110h, 110p ## ----------------------------------------------------------------------------- # gof_tbl %>% # group_by(Options, method) %>% # count(Rsq > 0.75) %>% # filter(`Rsq > 0.75`) %>% # arrange(desc(n)) %>% # print(n = 8) ## ----------------------------------------------------------------------------- # rmsle_rsq_rank <- gof_tbl %>% # group_by(Options, method) %>% # count(RMSLE < 1 & Rsq > 0.75) %>% # filter(`RMSLE < 1 & Rsq > 0.75`) %>% # arrange(desc(n)) %>% # dplyr::rename(`# Chemical-Species data groups` = n) # # rmsle_rsq_rank %>% print(n=8) ## ----------------------------------------------------------------------------- # cmax_auc_rmsle_rank <- cmax_auc_rmsle_tbl %>% # arrange(RMSLE_Cmax, RMSLE_AUC) %>% # select(Options, method, RMSLE_Cmax, RMSLE_AUC) # # cmax_auc_rmsle_rank %>% # print(n = 8) ## ----write_eval_results------------------------------------------------------- # # # Write the results to file # writexl::write_xlsx(x = list( # Fit_Counts = wm_comp, # AUC_and_Cmax_RMSLE_summary = cmax_auc_rmsle_tbl, # Prediction_Evaluations = gof_tbl, # Twofold_Tests = all_tf_tests, # RMSLE_Rsq_rank = rmsle_rsq_rank, # Cmax_AUC_RMSLE_rank = cmax_auc_rmsle_rank), # path = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table2_eval_results", # ".xlsx")) # # rm(wm_comp, cmax_auc_rmsle_tbl, gof_tbl, all_tf_tests) # # gc() ## ----plot_rsq_rmsle----------------------------------------------------------- # gof_tbl <- readxl::read_excel(path = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table2_eval_results", # ".xlsx"), # sheet = "Prediction_Evaluations") # # gof_tbl <- gof_tbl %>% # dplyr::mutate(opts = substr(Options, 1, 3), # model_text = dplyr::case_when(model %in% "model_1comp" ~ "1comp", # model %in% "model_2comp" ~ "2comp", # model %in% "model_flat" ~ "flat", # .default = NA_character_)) # # fig_rsq_rmsle <- ggplot(gof_tbl) + # geom_point(aes(x = RMSLE, # y = Rsq, # color = model_text)) + # facet_grid(rows = vars(opts), # cols = vars(interaction(error_model, method) # ) # ) + # scale_color_manual(values = c("1comp" = "#0398FC", # "2comp" = "#D68E09", # "flat" = "black"), # name = "Winning model") + # theme_bw() # # fig_rsq_rmsle # ## ----plot_rsq_rmsle_zoom------------------------------------------------------ # fig_rsq_rmsle_zoom <- ggplot(gof_tbl) + # geom_point(aes(x = RMSLE, # y = Rsq, # color = model_text)) + # facet_grid(rows = vars(opts), # cols = vars(interaction(error_model, method) # ) # ) + # scale_color_manual(values = c("1comp" = "#0398FC", # "2comp" = "#D68E09", # "flat" = "black"), # name = "Winning model") + # coord_cartesian(xlim = c(0,1)) + # theme_bw() # # fig_rsq_rmsle_zoom ## ----------------------------------------------------------------------------- # #PDF versions # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig4", # ".pdf"), # fig_rsq_rmsle, # width = 11, # height = 8.5) # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig5", # ".pdf"), # fig_rsq_rmsle_zoom, # width = 11, # height = 8.5) # # #PNG versions # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig4", # ".png"), # fig_rsq_rmsle, # width = 11, # height = 8.5) # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig5", # ".png"), # fig_rsq_rmsle_zoom, # width = 11, # height = 8.5) # # rm(fig_rsq_rmsle_zoom, fig_rsq_rmsle, gof_tbl) ## ----plot_cmax_auc_rmsle------------------------------------------------------ # #read in table # cmax_auc_rmsle_tbl <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table2_eval_results.xlsx"), # sheet = "AUC_and_Cmax_RMSLE_summary") # #make plot # cmax_auc_rmsle_tbl <- cmax_auc_rmsle_tbl %>% # dplyr::mutate(opts = substr(Options, 1, 3)) # # cmax_auc_rmsle_fig <- ggplot(cmax_auc_rmsle_tbl) + # geom_point(aes(x = RMSLE_AUC, # y = RMSLE_Cmax, # color = opts), # size = 4) + # facet_grid(rows = vars(error_model), # cols = vars(method)) + # scale_color_brewer(palette = "Paired", # name = "Fit options") + # theme_bw() # # cmax_auc_rmsle_fig # # #save plot # #PDF version # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig6", # ".pdf"), # cmax_auc_rmsle_fig, # width = 7, # height = 5) # # #PNG version # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig6", # ".png"), # cmax_auc_rmsle_fig, # width = 7, # height = 5, # units = "in", # dpi = 300) # # rm(cmax_auc_rmsle_fig, cmax_auc_rmsle_tbl) ## ----------------------------------------------------------------------------- # all_preds <- readRDS(paste0(Sys.getenv("FIG_DIR"), # today, # "_all_preds.Rds")) # # #select winning fit opts: 110p bobyqa # # all_preds_prep <- all_preds %>% # filter(Options %in% c("110p"), # method %in% "bobyqa") %>% # mutate(Conc_est_sub = dplyr::if_else(Conc_est < pLOQ, # pLOQ, # Conc_est), # Conc_est_belowLOQ = dplyr::if_else(Conc_est < pLOQ, # TRUE, # FALSE), # ID = as.factor(paste(Chemical, Chemical_Name, Species)), # .keep = "unused") %>% # select(ID, Options, # dose_norm, log10_trans, time_scale, # Route, Media, Dose, # Conc, Conc_est_sub, Conc_est_belowLOQ, # Time, Reference) # # split_preds <- split(all_preds_prep, all_preds_prep$ID) # # sp_plots <- lapply(names(split_preds), function(i) { # ggplot(data = split_preds[[i]], # mapping = aes(x = Conc, # y = Conc_est_sub, # color = as.factor(Dose), # shape = Reference)) + # xlab("Observed conc, mg/L") + # ylab("Predicted conc, mg/L") + # geom_point(size = 2) + # geom_abline(slope = 1) + # facet_grid(Route ~ Media, # scales = "free_y") + # scale_y_log10() + # scale_x_log10() + # annotation_logticks(sides = "bl") + # scale_color_viridis_d(name = "Dose, mg/kg") + # theme_bw() + # labs(title = i) + # theme(legend.position = "bottom", # plot.title = element_text(hjust = 0.5, face = "bold")) # } # ) # # # Very BIG FILE # pdf(file = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_log10_predvsobs_plots_110pbobyqa_all.pdf"), # onefile = TRUE, height = 8, width = 8) # for (x in seq_along(sp_plots)) { # print(sp_plots[[x]]) # } # dev.off() # # rm(sp_plots, split_preds, all_preds_prep, all_preds) ## ----------------------------------------------------------------------------- # my_pk <- readRDS(paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_110p.Rds")) ## ----data_aggregation--------------------------------------------------------- # # after fitting pk object for all cvt objects or reading the fitted pk object in `setup` # winmodel <- get_winning_model(my_pk, method = "bobyqa") # my_preds <- semi_join(predict(my_pk, use_scale_conc = FALSE, method = "bobyqa"), winmodel) # my_residuals <- residuals(my_pk, use_scale_conc = FALSE, method = "bobyqa") %>% # semi_join(winmodel) # my_tkstats <- eval_tkstats(my_pk, method = "bobyqa") %>% semi_join(winmodel) # my_nca <- get_nca(my_pk) # all_my_data <- get_data(my_pk) # # #merge together long form coefs and long form coef SDs # # my_tf <- twofold_test(my_pk, method = "bobyqa") # # NROW(all_my_data) > NROW(my_preds) # THis must be true... or else try distinct(my_preds) # # my_coef_sd <- coef_sd(my_pk, method = "bobyqa") %>% #this will take a few minutes to run # semi_join(winmodel) #keep only coefs & SDs for winning model # # # Writing file to xlsx # writexl::write_xlsx(x = list(predictions = my_preds, # tkstats = my_tkstats, # coefs_sds = my_coef_sd), # path = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table3", # ".xlsx")) ## ----------------------------------------------------------------------------- # # fe_df <- fold_error(my_pk, # sub_pLOQ = TRUE, # method = "bobyqa") %>% semi_join(winmodel) # # r2_df <- rsq(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE, # method = "bobyqa") %>% # semi_join(winmodel) # # myAIC <- AIC(my_pk, method = "bobyqa") %>% # semi_join(winmodel) # # myRMSLE <- rmse(my_pk, # use_scale_conc = list(dose_norm = FALSE, # log10_trans = TRUE), # sub_pLOQ = TRUE, # method = "bobyqa") %>% # semi_join(winmodel) # # myRMSE <- rmse(my_pk, # rmse_group = ggplot2::vars(Chemical, # Species, # Route, # Media, # Dose), # sub_pLOQ = TRUE, # method = "bobyqa") %>% # semi_join(winmodel) ## ----------------------------------------------------------------------------- # my_tf$model_error_all %>% # filter(model %in% c("model_1comp", "model_2comp")) %>% # mutate(Route.model = factor(paste(Route, model), # levels = c("iv model_1comp", # "iv model_2comp", # "oral model_1comp", # "oral model_2comp"))) %>% # ggplot(aes(x = Fold_Error, # fill = Route.model)) + # geom_histogram(color = NA, # position = position_stack(), # bins = 50) + # scale_x_log10(labels = scales::label_math(format = log10)) + # annotation_logticks(sides = "b") + # scale_fill_brewer(palette = "Paired", # name = "Route/winning model") + # expand_limits(y = 0) + # geom_vline(xintercept = c(0.5, 2), color = "black", linetype = "dashed") + # labs(x = "Predicted/Observed", # y = "Number of Observations") + # theme_classic() + # theme(aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA), # panel.grid.major = element_line(color = "grey95", linewidth = 0.4)) + # coord_cartesian(xlim = c(10^(-1.5), 100)) # # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig7_PredictedObserved_compartmentModels.png"), # height = 5, width = 7) # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig7_PredictedObserved_compartmentModels.pdf"), # height = 5, width = 7) # ## ----fig4-modelPerform-v-dataVar---------------------------------------------- # my_tf$indiv_data_test_fold_errors %>% # select(model, starts_with("percent_")) %>% # glimpse() # # pl_4A <- my_tf$indiv_data_fold_errors %>% # # For Plotting # ggplot(aes( # y = Fold_Error, # x = foldConc # )) + # geom_bin2d(bins = 40, color = NA) + # geom_hline(yintercept = c(0.5, 2), linetype = "dashed") + # geom_vline(xintercept = c(0.5, 2), linetype = "dashed") + # scale_x_log10(labels = scales::label_math(format = log10), # limits = c(0.001, 20)) + # scale_y_log10(labels = scales::label_math(format = log10), # limits = c(0.0001, 10000)) + # annotation_logticks(sides = "bl") + # scale_fill_viridis_c(option = "cividis", # limits = c(1, 100), # oob = scales::oob_squish) + # labs(y = "Model Error", # x = "Data Variability", # fill = "Count") + # theme(aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1.5), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # axis.ticks = element_blank(), # axis.line = element_blank(), # axis.text = element_text(size = 14), # axis.title = element_text(size = 14)) # # pl_4A # ggsave(paste0(Sys.getenv("FIG_DIR"), # today, # "_Fig4A_ModelPerformance_vDataVariability.png"), # height = 5, # width = 7, # device = "png", # dpi = 300, # units = "in") ## ----------------------------------------------------------------------------- # # my_table <- my_tf$indiv_data_test_fold_errors %>% # ungroup() %>% # filter(model %in% "All") %>% # select(starts_with("percent_")) %>% t() %>% # round(digits = 2) %>% # as.data.frame() %>% # rownames_to_column(var = "percentages") %>% # mutate(percentages = case_when( # percentages == "percent_both_within" ~ "Both within a factor of 2", # percentages == "percent_both_outside" ~ "Both outside a factor of 2", # percentages == "percent_model_outside" ~ "Model too variable", # percentages == "percent_data_outside" ~ "Data too variable" # )) # # names(my_table) <- c(" ", "Overall (%)") # # flextable(my_table) %>% # border_inner() %>% # fontsize(part = "all", size = 11) %>% # bold(part = "all", j = 2) %>% # autofit() # # table_plot <- gen_grob(flextable(my_table) %>% # border_inner() %>% # fontsize(part = "all", size = 10) %>% # bold(part = "all", j = 2) %>% # autofit(), # autowidths = TRUE, # fit = "width") # # # dual_plot <- plot_grid(pl_4A, # table_plot, # ncol = 1, # align = "hv", # rel_heights = c(1, 0.5), # rel_widths = c(1, 0.5) # ) # # dual_plot # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig4.png"), # plot = dual_plot, # height = 4.5, # width = 6, # device = "png", # bg = "white", # dpi = 300, # units = "in") # ## ----fig5-goodness-of-fit----------------------------------------------------- # combined_gof_df <- my_tf$model_error_all %>% # group_by(Chemical, Species, model, method) %>% # summarize(within_2fold = sum(between(Fold_Error, 0.5, 2))/n()) %>% # left_join(r2_df) %>% # left_join(myAIC) %>% # left_join(myRMSLE) %>% # semi_join(winmodel) # # # mypl <- ggplot(data = combined_gof_df, # aes( # x = within_2fold, # y = Rsq, # color = model, # )) + # geom_point() + # geom_abline(slope = 1, color = "black", linetype = "longdash") + # scale_color_manual(values = c("#0398FC", "#D68E09", "black")) + # #coord_cartesian(xlim = c(0,1), ylim = c(0,1)) + # labs(x = "Fraction of predictions within 2-fold", # y = "R-squared value") + # theme(#aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank(), # legend.position = "none", # legend.title = element_blank(), # legend.key = element_blank() # ) # # panelA_plot <- ggExtra::ggMarginal(mypl, # groupFill = TRUE, # type = "histogram", # xparams = list(binwidth = 0.05), # yparams = list(binwidth = 0.05)) # panelA_plot # # panelB_plot <- ggplot(data = myRMSE, # aes( # x = RMSE, # fill = model # )) + # scale_x_log10() + # annotation_logticks(sides = "b") + # geom_histogram(bins = 50, # position = position_stack(), # color = NA) + # labs(y = "Count") + # scale_fill_manual(values = c("#0398FC", "#D68E09", "grey10")) + # scale_color_manual(values = c("#0398FC", "#D68E09", "grey10"), guide = "none") + # theme(text = element_text(size = 10), # #aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank(), # strip.background = element_blank(), # panel.spacing.y = unit(0.125, units = "in"), # legend.title = element_blank(), # legend.position = "bottom") # panelB_plot # # plAB <- patchwork::wrap_elements(panelA_plot) + panelB_plot + # plot_annotation(tag_levels = "A") + plot_layout(guides = 'collect', widths = c(1,0.9)) & theme(legend.position = "bottom") & guides(color = "none") # # #save PNG version # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig5.png"), # plAB, # height = 4, # width = 6.5, # bg = "white", # units = "in") # # #save PDF version # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig5.pdf"), # plAB, # height = 4, # width = 6.5, # bg = "white") ## ----goodness-of-fit_exampleFits---------------------------------------------- # pl <- plot(my_pk, # method = "bobyqa", # best_fit = TRUE, # use_scale_conc = list(dose_norm = TRUE, # log10_trans = FALSE), # drop_nonDetect = FALSE) # # cvt %>% filter(curation_set_tag == "CVT_Showa") %>% # pull(analyte_dtxsid) -> Showa_chems # # # my_rsq <- rsq(my_pk, method = "bobyqa") # my_rsq %>% semi_join(winmodel) %>% filter(Chemical %in% c("DTXSID2020139", # "DTXSID4023917", # "DTXSID3061635", # "DTXSID8030760")) # # #High rsq: DTXSID2020139 and DTXSID4023917 # #Low rsq: DTXSID3061635 and DTXSID8030760 # # fe_df %>% filter(Chemical %in% c("DTXSID2020139", # "DTXSID4023917", # "DTXSID3061635", # "DTXSID8030760"), Species %in% "rat") %>% group_by(Chemical, Species) %>% summarise(frac_2fold = sum(Fold_Error < 2 & Fold_Error > 0.5)/n()) # # # # A tibble: 4 × 3 # # # Groups: Chemical [4] # # Chemical Species frac_2fold # # <chr> <chr> <dbl> # # 1 DTXSID2020139 rat 0.271 # # 2 DTXSID3061635 rat 0.125 # # 3 DTXSID4023917 rat 0.953 # # 4 DTXSID8030760 rat 0.778 # # #High frac 2fold: DTXSID4023917 and DTXSID8030760 # #Low frac 2fold: DTXSID2020139 and DTXSID3061635 # # pl_sub <- pl %>% # filter(Chemical %in% c("DTXSID2020139", # "DTXSID4023917", # "DTXSID3061635", # "DTXSID8030760"), # Species %in% "rat") # # ex_fits <- pl_sub %>% # pull(final_plot) # ex_fits <- set_names(ex_fits, # pl_sub$Chemical) # # # # cowplot::plot_grid(plotlist = list( # #Low frac 2fold and high Rsq # ex_fits[["DTXSID2020139"]] + # scale_color_manual(values = c("#a6bddb", # "#74a9cf", # "#41bbc4", # "#2b8cbe", # "#045a8d")) + # theme(legend.position = "none") + # xlim(0,30) + # ggtitle("A: DTXSID2020139 rat"), # #high frac 2fold and high Rsq # ex_fits[["DTXSID4023917"]] + # scale_color_manual(values = c("black", # "grey40")) + # theme(legend.position = "none") + # ggtitle("B: DTXSID4023917 rat"), # # #low frac 2fold and low rsq # ex_fits[["DTXSID3061635"]] + # scale_color_manual(values = c("#006837")) + # theme(legend.position = "none") + # ggtitle("C: DTXSID3061635 rat"), # # #high frac2fold and low rsq # ex_fits[["DTXSID8030760"]] + # scale_color_manual(values = "magenta3") + # theme(legend.position = "none") + # ggtitle("D: DTXSID8030760 rat") # ), # scale = 0.85) # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig7.png"), # height = 12, # width = 12) # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig7.pdf"), # height = 12, # width = 12) # ## ----------------------------------------------------------------------------- # flat_chems <- winmodel %>% # filter(model %in% "model_flat") %>% # distinct(Chemical, Species) # print(flat_chems) # # pl <- plot(my_pk, # method = "bobyqa", # best_fit = FALSE, # use_scale_conc = list(dose_norm = TRUE, # log10_trans = FALSE), # drop_nonDetect = FALSE) # # flat_fits <- pl %>% # inner_join(flat_chems) %>% # pull(final_plot) # # cowplot::plot_grid(plotlist = flat_fits) # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig8.pdf"), # height = 8, # width = 16, # units = "in") # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig8.png"), # bg = "white", # height =8, # width = 16, # dpi = 300, # units = "in") ## ----Lombardo_Comparison------------------------------------------------------ # cvt_DTXSIDs <- unique(my_pk$data$Chemical) # # First things first, let's load the data from Lombardo et al. # lombardo <- readxl::read_xlsx( # paste0(Sys.getenv("OD_DIR"), # "Lombardo2018-Supplemental_82966_revised_corrected.xlsx"), # skip = 8) # # #use ctxR to search the Dashboard for DTXSIDs corresponding to these CASRNs # lombardo_dtxsid <- ctxR::chemical_equal_batch( # word_list = unique(lombardo$`CAS #`) # ) # # #merge in the DTXSIDs # lombardo <- lombardo %>% # dplyr::left_join(lombardo_dtxsid$valid %>% # dplyr::select(searchValue, dtxsid, preferredName), # by = c("CAS #" = "searchValue")) # # #any left unidentified: search by name # lombardo_missing <- lombardo %>% # dplyr:: filter(is.na(dtxsid)) # # missing_names <- lombardo_missing %>% # dplyr::pull(Name) # # lombardo_dtxsid_name <- ctxR::chemical_equal_batch( # word_list = unique(missing_names) # ) # # lombardo_missing <- lombardo_missing %>% # dplyr::left_join(lombardo_dtxsid_name$valid %>% # dplyr::select(searchValue, dtxsid, preferredName), # by = c("Name" = "searchValue") ) # # lombardo <- dplyr::bind_rows(lombardo %>% filter(!is.na(dtxsid)), # lombardo_missing) # # lombard_abbr <- lombardo %>% # dplyr::select(dtxsid, # preferredName, # Vss = `human VDss (L/kg)`, # CLtot = `human CL (mL/min/kg)`, # halflife = `terminal t1/2 (h)` ) %>% # dplyr::mutate(Species = 'human', # source = "Lombardo et al.", # CLtot = CLtot*60/1000 # Lombardo reports this as mL/min/kg, need L/h/kg # ) %>% # dplyr::arrange(halflife) # # my_pk_vals <- my_tkstats %>% # dplyr::select(dtxsid = Chemical, # Species, # model, # halflife = halflife.tkstats, # Vss = Vss.tkstats, # CLtot = CLtot.tkstats) %>% # dplyr::distinct() %>% # dplyr::mutate(source = "invivoPKfit") %>% # dplyr::filter(model != "model_flat", # !is.na(halflife)) # # lombardo_inner <- my_pk_vals %>% # dplyr::inner_join(lombard_abbr, by = "dtxsid") # #79 chemicals in common # # lombardo_comparison <- dplyr::bind_rows(lombard_abbr %>% # dplyr::filter( # dtxsid %in% # lombardo_inner$dtxsid # ), # my_pk_vals %>% # dplyr::filter( # dtxsid %in% # lombardo_inner$dtxsid # ) # ) # # #merge in chemical names # # #get preferred names by DTXSID # all_details <- ctxR::get_chemical_details_batch( # unique(lombardo_comparison$dtxsid) # ) # # lombardo_comparison <- lombardo_comparison %>% # select(-preferredName) %>% # left_join(all_details %>% select(dtxsid, preferredName)) # # #create a factor variable for chemical names that sorts chemicals from highest to lowest Lombardo halflife # # chemnames_levels <- lombardo_comparison %>% # dplyr::filter(source %in% "Lombardo et al.") %>% # dplyr::arrange(halflife) %>% # dplyr::pull(preferredName) # # # lombardo_comparison <- lombardo_comparison %>% # dplyr::mutate(preferredName = factor(preferredName, # levels = chemnames_levels)) # # #get invivoPKfit species for plot-splitting purposes # ivpk_species <- lombardo_comparison %>% # dplyr::filter(source %in% "invivoPKfit") %>% # dplyr::distinct(dtxsid, Species) %>% # dplyr::rename(invivopkfit_species = Species) # # lombardo_comparison <- lombardo_comparison %>% # dplyr::left_join(ivpk_species, # by = "dtxsid") # # #reshape longer and plot # lombardo_comparison %>% # tidyr::pivot_longer(cols = c("Vss", "CLtot", "halflife")) %>% # dplyr::mutate(name = factor(name, # levels = c("halflife", "Vss", "CLtot") # ) # ) %>% # ggplot(mapping = aes( # x = value, # y = preferredName # )) + # geom_point(aes(shape = source, # color = source), # #position = position_dodge(0.7), # size = 5/.pt, stroke = 2.5/.pt) + # facet_grid(cols = vars(name), # rows = vars(invivopkfit_species), # scales = "free", # space = "free_y", # drop = TRUE) + # scale_x_log10( labels = scales::label_math(format = log10)) + # #annotation_logticks(sides = "b") + # scale_shape_manual(values = c("Lombardo et al." = 3, # "invivoPKfit" = 22)) + # scale_color_manual(values = c("#d95f02", "#1b9e77")) + # guides(x = "axis_logticks") + # theme(panel.border = element_rect(color = "black", # fill = NA, # linewidth = 1.5), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # panel.spacing.y = unit(0, "line"), # strip.background = element_rect(fill = "white"), # strip.text.x = element_text(face = "bold", size = 12), # strip.text.y = element_blank(), # text = element_text(size = 10), # axis.ticks = element_blank(), # axis.line = element_blank(), # axis.text.y = element_text(face = "bold", size = 6), # axis.text.x = element_text(size = 10), # axis.title.y = element_blank(), # axis.title.x = element_blank(), # legend.position = "bottom", # legend.key = element_blank(), # legend.title = element_text(face = "bold"), # legend.text = element_text()) # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig6.png"), # height = 11, # width = 10, # units = "in", # device = "png", # dpi = 300) # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig6.pdf"), # height = 11, # width = 10, # units = "in") # ## ----musther-read------------------------------------------------------------- # species_cols <- c(paste("Mouse", # c("Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "F% Range"), # sep = "."), # # paste("Rat", # c("Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "WSD", # "F% Range"), # sep = "."), # # paste("Dog", # c("Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "WSD", # "F% Range"), # sep = "."), # # paste("Non-Human Primate", # c("Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "WSD", # "F% Range"), # sep = "."), # # paste("Human", # c("Dosing Formulation", # "Sex", # "F% Mean", # "WSD", # "F% Range"), # sep = ".") # ) # # # # musther2014 <- readxl::read_excel( # path = paste0(Sys.getenv("OD_DIR"), # "SupTableMusther2014.xlsx"), # sheet = "Sheet1", # skip = 1, # col_names = c( "Compound", # "CAS", # "MW", "Ion Class", # species_cols, # "References.mouse", # "References.rat", # "References.dog", # "References.NHP", # "References.human" # ), # col_types = "text" # ) # # #pivot longer # musther2014_long <- musther2014 %>% # select(c(Compound, # CAS, # contains("F% Mean"))) %>% # pivot_longer(cols = !c(Compound, CAS), # names_to = c("Species", "stat"), # names_sep = "\\.") %>% # mutate(value = gsub(x=value, # pattern = "<", # replacement = ""), # value_num = as.numeric(value)) # # ## ----musther-ccd-------------------------------------------------------------- # musther2014_dtxsid <- ctxR::chemical_equal_batch(word_list = unique(musther2014_long$CAS)) # # musther2014_long <- musther2014_long %>% # left_join(musther2014_dtxsid$valid %>% select(searchValue, dtxsid, preferredName), # by = c("CAS" = "searchValue")) ## ----musther-getfgutabs------------------------------------------------------- # fgutabs <- coef(my_pk, method = "bobyqa") %>% # distinct() %>% # semi_join(winmodel) %>% #keep only winning models # rowwise() %>% # mutate(Fgutabs = coefs_vector["Fgutabs"]) %>% # filter(!is.na(Fgutabs)) ## ----------------------------------------------------------------------------- # length(intersect(musther2014_long$dtxsid, # get_data(my_pk)$Chemical)) ## ----musther-join------------------------------------------------------------- # fgutabs_pk_musther <- fgutabs %>% inner_join(musther2014_long, # by = c("Chemical" = "dtxsid")) ## ----musther-plot------------------------------------------------------------- # #capitalize invivopkfit species to harmonize with Musther # fgutabs_pk_musther <- fgutabs_pk_musther %>% # dplyr::mutate(Species.x = stringr::str_to_sentence(Species.x)) # # ggplot(fgutabs_pk_musther) + # geom_point(aes(y = preferredName, # x = Fgutabs, # shape = Species.x, # color = "invivopkfit"), # size =4, # stroke = 2) + # geom_point(aes(y = preferredName, # x = value_num/100, # shape = Species.y, # color = "Musther et al. 2014"), # size = 4, # stroke = 2) + # scale_shape_manual(values = c("Rat" = 21, # "Mouse" = 22, # "Dog" = 23, # "Non-Human Primate" = 24, # "Human" = 3), # breaks = c("Rat", "Mouse", "Dog", "Non-Human Primate", "Human")) + # # theme_bw() + # theme(axis.title.y = element_blank(), # legend.title = element_blank(), # axis.title.x = element_text(size = 12), # axis.text = element_text(size = 12), # legend.text = element_text(size = 12) # ) # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig9.pdf"), # height = 5, # width = 8) # # ggsave(filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig9.png"), # height = 5, # width = 8, # device = "png", dpi = 300) ## ----do_fit-benchmarking------------------------------------------------------ # # Randomize the order of chemical species # chem_spec <- cvt %>% # dplyr::distinct(analyte_dtxsid, species) %>% # dplyr::slice_sample(prop = 1) # # test_group_size <- seq(25, 105, by = 10) # fit_options <- expand.grid(error_model = c("pooled", # "hierarchical"), # dose_norm = FALSE, # log10_trans = TRUE, # time_scale = c( # TRUE, # FALSE), # stringsAsFactors = FALSE) # # fit_options <- tidyr::expand_grid(fit_options, test_group_size) # # fit_options <- fit_options %>% rowwise() %>% # mutate(this_data = list({ # tmp <- chem_spec %>% slice_head(n = test_group_size) %>% # left_join(cvt, by = join_by(analyte_dtxsid, species)) %>% # pk() + # scale_time(new_units = ifelse(!time_scale, # "identity", # "auto")) + # scale_conc(dose_norm = dose_norm, log10_trans = log10_trans) + # settings_preprocess(suppress.messages = TRUE) # do_prefit(tmp) # } # ), # data_size = chem_spec %>% slice_head(n = test_group_size) %>% # left_join(cvt, by = join_by(analyte_dtxsid, species)) %>% # NROW()) # # # Organization of the benchmarking # # For each fit_option, return the four values of system.time() that we want # df_out <- fit_options %>% # rowwise() %>% # mutate(tim_1core = system.time(do_fit(this_data))[["elapsed"]], # tim_4core = system.time(do_fit(this_data, n_cores = 4))[["elapsed"]], # tim_7core = system.time(do_fit(this_data, n_cores = 7))[["elapsed"]], # tim_12core = system.time(do_fit(this_data, n_cores = 12))[["elapsed"]] # ) # # df_out <- df_out %>% select(-this_data) # gc() # # full_long <- df_out %>% # pivot_longer(cols = c(tim_1core:tim_12core), # names_to = "N_cores", # values_to = "Time_s") %>% # group_by(N_cores, test_group_size, data_size) %>% # summarize(mean_time = mean(Time_s, na.rm = TRUE)/60, # max_time = max(Time_s, na.rm = TRUE)/60, # min_time = min(Time_s, na.rm = TRUE)/60 # ) %>% # mutate(N_cores = as.numeric(str_extract(N_cores, "[:digit:]+"))) # # df_out %>% clipr::write_clip() # ggplot(full_long, # aes(x = test_group_size, # y = mean_time, # color = as.factor(N_cores))) + # geom_point() + # geom_linerange(aes(ymin = min_time, # ymax = max_time)) + # geom_line(aes(group = N_cores)) + # labs(x = "Number of Data Groups", # y = "Runtime (minutes)", # title = "Number of Processing Cores", # subtitle = "(with min/max runtime range)") + # facet_grid(cols = vars(N_cores)) + # scale_color_manual(values = c("black", "#40c679", "#31a354", "#006837")) + # coord_fixed(ratio = 8) + # theme(panel.border = element_rect(color = "black", fill = NA, size = 1.5), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # legend.position = "none", # plot.title = element_text(hjust = 0.5), # plot.subtitle = element_text(hjust = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank(), # strip.background = element_blank()) # # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig10.png"), # width = 5, # height = 2.5, # units = "in") # ## ----fit_summary_bydatagrp---------------------------------------------------- # fit_summary_bydatagrp <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # my_pk <- readRDS(file = file_str) # # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Summary by data group for: ", pk_name)) # # myAFE <- AFE(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE) # # myAAFE <- AAFE(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE) # # my_r2 <- rsq(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE) # # myAIC <- AIC(my_pk) # # myRMSLE <- rmse(my_pk, # use_scale_conc = list(dose_norm = FALSE, # log10_trans = TRUE), # sub_pLOQ = TRUE) # # # myRMSE <- rmse(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE) # # winmodel <- get_winning_model(my_pk) %>% # select(Chemical, Species, method, model) %>% # dplyr::mutate(is_winning_model = TRUE) # # # #note convergence codes for each model # pf <- my_pk$fit %>% # dplyr::distinct(Chemical, Species, model, method, convcode) # # # convcode_labels <- data.frame(convcode =c(0, # 1, # 20, # 21, # 10, # 51, # 52, # 9999, # -9999), # convcode_label = c('Successful convergence (see ?optimx::optimx)', # 'The iteration limit maxit had been reached (see ?optimx::optimx)', # 'the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))', # 'an intermediate set of parameters is inadmissible (see ?optimx::optimx)', # 'degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)', # 'a warning from the "L-BFGS-B" method (see ?optimx::optimx)', # 'an error from the "L-BFGS-B" method (see ?optimx::optimx)', # 'error in optimx::optimx()', # 'model status was "abort"') # ) # # pf <- pf %>% # dplyr::left_join(convcode_labels) # # #join absolutely everything together # summ_by_datagroup <- pf %>% # left_join(winmodel) %>% # left_join(myAIC) %>% # left_join(myAFE) %>% # left_join(myAAFE) %>% # left_join(my_r2) %>% # left_join(myRMSLE) %>% # left_join(myRMSE) %>% # mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE, # is.na(is_winning_model) ~ FALSE, # .default = FALSE)) %>% # arrange(Chemical, Species, method, model) # # return(summ_by_datagroup) # } ## ----------------------------------------------------------------------------- # fit_summary_byexpt <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds") # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # my_pk <- readRDS(file = file_str) # # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Summary by experiment for: ", pk_name)) # # my_tkstats <- eval_tkstats(my_pk, # model = NULL) # # winmodel <- get_winning_model(my_pk) %>% # select(Chemical, Species, method, model) %>% # dplyr::mutate(is_winning_model = TRUE) # # pf <- my_pk$fit %>% # dplyr::distinct(Chemical, Species, model, method, convcode) # # # convcode_labels <- data.frame(convcode =c(0, # 1, # 20, # 21, # 10, # 51, # 52, # 9999, # -9999), # convcode_label = c('Successful convergence (see ?optimx::optimx)', # 'The iteration limit maxit had been reached (see ?optimx::optimx)', # 'the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))', # 'an intermediate set of parameters is inadmissible (see ?optimx::optimx)', # 'degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)', # 'a warning from the "L-BFGS-B" method (see ?optimx::optimx)', # 'an error from the "L-BFGS-B" method (see ?optimx::optimx)', # 'error in optimx::optimx()', # 'model status was "abort"') # ) # # pf <- pf %>% # dplyr::left_join(convcode_labels) # # # summ_by_expt <- pf %>% # left_join(winmodel) %>% # left_join(my_tkstats) %>% # mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE, # is.na(is_winning_model) ~ FALSE, # .default = FALSE)) %>% # arrange(Chemical, Species, method, model) # # return(summ_by_expt) # } # ## ----------------------------------------------------------------------------- # summary_datagrp_all <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(summ = list( # fit_summary_bydatagrp(this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale) # ) # ) %>% # tidyr::unnest(cols = c(summ)) # # summary_expt_all <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(summ_expt = list( # fit_summary_byexpt(this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale) # ) # ) %>% # tidyr::unnest(cols = c(summ_expt)) # # writexl::write_xlsx(list("Summary by data grp" = summary_datagrp_all, # "Summary by expt" = summary_expt_all), # paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table4.xlsx")) ## ----------------------------------------------------------------------------- # #set up pk analysis for single-study # pk_study <- minimal_pk + # facet_data( # facets = vars(Chemical, Species, Reference) # ) + # stat_error_model( # error_group = vars(Chemical, Species, Reference) # ) + # settings_preprocess(keep_data_original = FALSE, # suppress.messages = TRUE) + # scale_conc(dose_norm = TRUE, # log10_trans = TRUE) + # settings_optimx(method = "bobyqa") + # stat_model(model = c("model_flat", # "model_1comp", # "model_2comp")) # # #do pk analysis # system.time( # pk_study <- do_fit(pk_study, # n_cores = 11) # ) ## ----------------------------------------------------------------------------- # saveRDS(pk_study, # paste0(Sys.getenv("OD_DIR"), # today, # "studylevel_pk_fit_110p.Rds")) ## ----------------------------------------------------------------------------- # tkstats_study <- eval_tkstats(pk_study, # finite_only = FALSE, # dose_norm = TRUE, # model = "winning") %>% # dplyr::select(Chemical, # Species, # Reference, # method, # model, # halflife.tkstats, # Vss.tkstats, # `Vss/Fgutabs.tkstats`) %>% # dplyr::rename(halflife = halflife.tkstats, # Vss = Vss.tkstats, # `Vss/Fgutabs` = `Vss/Fgutabs.tkstats`) %>% # dplyr::distinct() # # #Also: get coefficients and try to pull Fgutabs (if it was optimized) # coef_study <- coef_sd(pk_study, # method = "bobyqa", # include_type = "optimize") %>% # dplyr::filter(param_name %in% "Fgutabs") %>% dplyr::select(Chemical, # Species, # Reference, # method, # model, # param_name, # param_value) %>% # dplyr::distinct() # # #keep only winning model coefs # win_study <- get_winning_model(pk_study) %>% # dplyr::select(-near_flat, -preds_below_loq) # # coef_study <- coef_study %>% # dplyr::semi_join(win_study) # # fgutabs_study <- coef_study %>% # select(-param_name) %>% # rename(Fgutabs = param_value) # # tkstats_study2 <- full_join(tkstats_study, # fgutabs_study) ## ----------------------------------------------------------------------------- # file_str <- paste0(Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_110p.Rds") # pk_pooled <- readRDS(file = file_str) # # tkstats_pooled <- eval_tkstats(pk_pooled, # finite_only = FALSE, # dose_norm = TRUE, # method = "bobyqa", # model = "winning") %>% # dplyr::select(Chemical, # Species, # Reference, # method, # model, # halflife.tkstats, # Vss.tkstats, # `Vss/Fgutabs.tkstats`) %>% # dplyr::rename(halflife = halflife.tkstats, # Vss = Vss.tkstats, # `Vss/Fgutabs` = `Vss/Fgutabs.tkstats`) %>% # dplyr::group_by(Chemical, # Species, # method, # model, # halflife, # Vss, # `Vss/Fgutabs`) %>% # dplyr::summarise(n_ref = n_distinct(Reference)) %>% # dplyr::ungroup() # # #Also: get coefficients and try to pull Fgutabs # coef_pooled <- coef_sd(pk_pooled, # method = "bobyqa", # include_type = "optimize") %>% # dplyr::filter(param_name %in% "Fgutabs") %>% dplyr::select(Chemical, # Species, # method, # model, # param_name, # param_value) %>% # dplyr::distinct() # # #keep only winning model coefs # win_pooled <- get_winning_model(pk_pooled, # method = "bobyqa") %>% # dplyr::select(-near_flat, -preds_below_loq) # # coef_pooled <- coef_pooled %>% # dplyr::semi_join(win_pooled) # # fgutabs_pooled <- coef_pooled %>% # select(-param_name) %>% # rename(Fgutabs = param_value) # # tkstats_pooled2 <- full_join(tkstats_pooled, # fgutabs_pooled) ## ----------------------------------------------------------------------------- # tkstats_study3 <- tkstats_study2 %>% # dplyr::rename(model_study = model) %>% # pivot_longer(cols = c(halflife, # Vss, # `Vss/Fgutabs`, # Fgutabs), # names_to = "name", # values_to = "value_study") # # tkstats_pooled3 <- tkstats_pooled2 %>% # dplyr::rename(model_pooled = model, # n_ref_pooled = n_ref) %>% # pivot_longer(cols = c(halflife, # Vss, # `Vss/Fgutabs`, # Fgutabs), # names_to = "name", # values_to = "value_pooled") # # tkstats_all <- full_join(tkstats_study3, # tkstats_pooled3) ## ----------------------------------------------------------------------------- # tkstats_all <- tkstats_all %>% # filter(!(model_pooled %in% "model_flat") & # !(model_study %in% "model_flat")) ## ----------------------------------------------------------------------------- # ggplot(tkstats_all %>% # filter( !is.na(value_study) & # !is.na(value_pooled))) + # geom_point(aes(x = value_pooled, # y = value_study, # color = factor(n_ref_pooled))) + # facet_wrap(facets = vars(name), # scales = "free") + # scale_x_log10(guide = "axis_logticks") + # scale_y_log10(guide = "axis_logticks") ## ----------------------------------------------------------------------------- # ggplot(tkstats_all %>% # filter(n_ref_pooled > 1 & # !is.na(value_study) & # !is.na(value_pooled))) + # geom_point(aes(x = value_pooled, # y = value_study), # color = "gray50", # shape = 1) + # geom_line(aes(x = value_pooled, # y = value_study, # group = interaction(Chemical, Species)), # color = "gray50") + # facet_wrap(facets = vars(name), # scales = "free") + # geom_abline(intercept = 0, slope = 1, # color = "black") + # geom_smooth(aes(x = value_pooled, # y = value_study), # method = "lm", # se = FALSE, # color = "black", # linetype = 2) + # scale_x_log10(guide = "axis_logticks") + # scale_y_log10(guide = "axis_logticks") + # theme_bw() + # theme(strip.background = element_blank()) ## ----------------------------------------------------------------------------- # rsq_df <- tkstats_all %>% # filter(n_ref_pooled > 1 & # !is.na(value_study) & # !is.na(value_pooled)) %>% # mutate(log10_value_pooled = log10(value_pooled), # log10_value_study = log10(value_study)) %>% # group_by(name) %>% # summarise(n = n(), # n_grp = n_distinct(Chemical, Species), # rsq_log10 = summary( # lm( # log10_value_study ~ log10_value_pooled, # na.action = "na.omit" # ) # )[[ # "r.squared" # ]], # rsq = summary(lm(value_study ~ value_pooled))[[ # "r.squared" # ]]) %>% # ungroup() %>% # mutate(label = sprintf("R^2 == %.3f", # rsq_log10)) # # rsq_df %>% select(-label) %>% print() ## ----------------------------------------------------------------------------- # ggplot(tkstats_all %>% # filter(n_ref_pooled > 1 & # !is.na(value_study) & # !is.na(value_pooled))) + # geom_point(aes(x = value_pooled, # y = value_study), # color = "gray50", # shape = 1) + # geom_line(aes(x = value_pooled, # y = value_study, # group = interaction(Chemical, Species)), # color = "gray50") + # geom_text(data = rsq_df, # aes(x = 0, # y = Inf, # label = label), # parse = TRUE, # hjust = -0.3, # vjust = 1.8) + # facet_wrap(facets = vars(name), # scales = "free") + # geom_abline(intercept = 0, slope = 1, # color = "black") + # geom_smooth(aes(x = value_pooled, # y = value_study), # method = "lm", # se = FALSE, # color = "black", # linetype = 2) + # scale_x_log10(guide = "axis_logticks") + # scale_y_log10(guide = "axis_logticks") + # xlab("Pooled value") + # ylab("Single-study value") + # theme_bw() + # theme(strip.background = element_blank(), # strip.text = element_text(size = 12)) # # ggsave(paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig8.png"), # height = 5, # width = 7) # # ggsave(paste0(Sys.getenv("FIG_DIR"), # today, # "Manu_Files/", # "_Fig8.pdf"), # height = 5, # width = 7) ## ----------------------------------------------------------------------------- # tkstats_all_wide <- tkstats_all %>% # pivot_wider(id_cols = c(Chemical, Species, # Reference, method, # model_study, # model_pooled, # n_ref_pooled), # names_from = name, # values_from = c(value_study, value_pooled)) # # tkstats_all_wide %>% filter(n_ref_pooled > 1) %>% # writexl::write_xlsx(path = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "Supp_Table5.xlsx")) ## ----------------------------------------------------------------------------- # plot_pooled <- plot(pk_pooled, # method = "bobyqa", # use_scale_conc = TRUE, # best_fit = TRUE) # plot_pooled %>% # filter(Chemical %in% "DTXSID2020139" & Species %in% "rat") %>% # pull(final_plot[[1]]) # ## ----------------------------------------------------------------------------- # plot_pooled <- plot(pk_pooled, # method = "bobyqa", # use_scale_conc = TRUE, # print_out = TRUE) # # ggsave( # filename = paste0(Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_allplots.pdf"), # plot = gridExtra::marrangeGrob(plot_pooled, nrow=1, ncol=1), # width = 15, height = 9 # ) # ## ----information-------------------------------------------------------------- # sessionInfo()