Please send questions to mlinak01@gmail.com
from “Development and evaluation of a high throughput inhalation model for organic chemicals”
Matthew W. Linakis, Risa R. Sayre, Robert G. Pearce, Mark A. Sfeir, Nisha S. Sipes, Heather A. Pangburn, Jeffery M. Gearhart, and John F. Wambaugh
Journal of Exposure Science & Environmental Epidemiology volume 30, pages 866–877 (2020)
https://doi.org/10.1038/s41370-020-0238-y
Currently it is difficult to prospectively estimate human toxicokinetics (particularly for novel chemicals) in a high-throughput manner. The R software package httk has been developed, in part, to address this deficiency, and the aim of this investigation was to develop a generalized inhalation model for httk. The structure of the inhalation model was developed from two previously published physiologically-based models from Jongeneelen et al. (2011) and Clewell et al. (2001) while calculated physicochemical data was obtained from EPA’s CompTox Chemicals Dashboard. In total, 142 exposure scenarios across 41 volatile organic chemicals were modeled and compared to published data. The slope of the regression line of best fit between log-transformed simulated and observed combined measured plasma and blood concentrations was 0.59 with an r2= 0.54 and a Root Mean Square Error (RMSE) of direct comparison between the log-transformed simulated and observed values of 0.87. Approximately 3.6% (n = 73) of the data points analyzed were > 2 orders of magnitude different than expected. The volatile organic chemicals examined in this investigation represent small, generally lipophilic molecules. Ultimately this paper details a generalized inhalation component that integrates with the httk physiologically-based toxicokinetic model to provide high-throughput estimates of inhalation chemical exposures.
This vignette was created with httk v2.0.0. Although we attempt to maintain backward compatibility, if you encounter issues with the latest release of httk and cannot easily address the changes, historical versions of httk are available from: https://cran.r-project.org/src/contrib/Archive/httk/
R package knitr generates html and PDF documents from this RMarkdown file, Each bit of code that follows is known as a “chunk”. We start by telling knitr how we want our chunks to look.
::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4) knitr
It is a bad idea to let variables and other information from previous R sessions float around, so we first remove everything in the R memory.
rm(list=ls())
If you are using the RMarkdown version of this vignette (extension, .RMD) you will be able to see that several chunks of code in this vignette have the statement “eval = execute.vignette”. The next chunk of code, by default, sets execute.vignette = FALSE. This means that the code is included (and necessary) but was not run when the vignette was built. We do this because some steps require extensive computing time and the checks on CRAN limit how long we can spend building the package. If you want this vignette to work, you must run all code, either by cutting and pasting it into R. Or, if viewing the .RMD file, you can either change execute.vignette to TRUE or press “play” (the green arrow) on each chunk in RStudio.
# Set whether or not the following chunks will be executed (run):
<- FALSE execute.vignette
We use the command ‘library()’ to load various R packages for our analysis. If you get the message “Error in library(X) : there is no package called ‘X’” then you will need to install that package:
From the R command prompt:
install.packages(“X”)
Or, if using RStudio, look for ‘Install Packages’ under ‘Tools’ tab.
::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)
knitrlibrary(httk)
library(ggplot2)
library(gridExtra)
library(cowplot)
library(ggrepel)
library(dplyr)
library(stringr)
library(forcats)
library(smatr)
The table httk::concentration_data_Linakis2020 contains CvTdb data (Sayre, et al. 2020) for inhalation exposure. The units of all measurements have been converted to uM concentrations.
<- metabolism_data_Linakis2020
met_data <- concentration_data_Linakis2020
conc_data #conc_data <- subset(concentration_data_Linakis2020, !(SAMPLING_MATRIX %in%
# c("EEB","MEB","EB")))
#conc_data <- concentration_data_Linakis2020
# subset(concentration_data_Linakis2020, !(SOURCE_CVT %in% c(
# "11453305")))
"DOSE_U"] <- ifelse(conc_data[,"DOSE_U"] == "ppm",
conc_data[,yes = "ppmv",
"DOSE_U"])
conc_data[,"ORIG_CONC_U"] <- ifelse(conc_data[,"ORIG_CONC_U"] == "ppm",
conc_data[,yes = "ppmv",
"ORIG_CONC_U"])
conc_data[,# Not sure what to do with percent:
<- subset(conc_data,toupper(ORIG_CONC_U) != "PERCENT")
conc_data # Rename this column:
colnames(conc_data)[colnames(conc_data)=="ORIG_CONC_U"] <- "CONC_U"
$ORIGINAL_CONC_U <- conc_data$CONC_U
conc_data$ORIGINAL_CONC <- conc_data$CONCENTRATION
conc_data# Maybe Linakis et al translated all concentrations to uM?
$CONC_U <- "uM" conc_data
Sets the units based on the sampling matrix (gas/liquid): BL : blood EEB : end-exhaled breath MEB : mixed exhaled breath VBL : venous blood ABL : arterial blood EB : unspecified exhaled breath sample (assumed to be EEB) PL: plasma +W with work/exercise
Normalize units for gaseous samples to ppmv:
<- c("EB","MEB","EEB","EB (+W)")
gas.media <- unique(subset(conc_data,
gas.units %in% gas.media)$CONC_U)
SAMPLING_MATRIX <- "ppmv"
target.unit for (this.unit in gas.units)
if (this.unit != target.unit)
{<- unique(subset(conc_data,
these.chems %in% gas.media &
SAMPLING_MATRIX ==this.unit)$DTXSID)
CONC_Ufor (this.chem in these.chems)
{<- convert_units(
this.factor input.units=this.unit,
output.units=target.unit,
dtxsid=this.chem, state="gas")
print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
# Scale the observation
$DTXSID==this.chem &
conc_data[conc_data$SAMPLING_MATRIX %in% gas.media &
conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
conc_data* conc_data[
this.factor $DTXSID==this.chem &
conc_data$SAMPLING_MATRIX %in% gas.media &
conc_data$CONC_U==this.unit,"CONCENTRATION"]
conc_data# Change the reported unit
$DTXSID==this.chem &
conc_data[conc_data$SAMPLING_MATRIX %in% gas.media &
conc_data$CONC_U==this.unit,"CONC_U"] <-
conc_data
target.unit
} }
Normalize the units for tissue samples to uM:
<- c("VBL","BL","ABL","PL","BL (+W)")
tissue.media <- unique(subset(conc_data,
tissue.units %in% tissue.media)$CONC_U)
SAMPLING_MATRIX <- "uM"
target.unit for (this.unit in tissue.units)
if (this.unit != target.unit)
{<- unique(subset(conc_data,
these.chems %in% tissue.media &
SAMPLING_MATRIX ==this.unit)$DTXSID)
CONC_Ufor (this.chem in these.chems)
{<- try(convert_units(
this.factor input.units=this.unit,
output.units=target.unit,
dtxsid=this.chem))
print(paste("Use",this.factor,"to convert",this.unit,"to",target.unit))
# Scale the observation
$DTXSID==this.chem &
conc_data[conc_data$SAMPLING_MATRIX %in% tissue.media &
conc_data$CONC_U==this.unit,"CONCENTRATION"] <-
conc_data* conc_data[
this.factor $DTXSID==this.chem &
conc_data$SAMPLING_MATRIX %in% tissue.media &
conc_data$CONC_U==this.unit,"CONCENTRATION"]
conc_data# Change the reported unit
$DTXSID==this.chem &
conc_data[conc_data$SAMPLING_MATRIX %in% tissue.media &
conc_data$CONC_U==this.unit,"CONC_U"] <-
conc_data
target.unit
} }
Identify chemicals currently in our metabolism data that we don’t have good concentration/time data for and remove them from our training dataset
# Small molecule chemicals
summary(met_data$AVERAGE_MASS)
# Generally more lipophilic chemicals
summary(met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED)
# Unsurprisingly then, the chemicals are generally less water-soluble
summary(met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED)
# ~60% of samples in humans
table(conc_data$CONC_SPECIES)/nrow(conc_data)*100
# ~72% of samples are from blood
table(conc_data$SAMPLING_MATRIX)/nrow(conc_data)*100
# Create a dataframe with 1 row for each unique external exposure scenario
<- conc_data[with(conc_data,
unique_scenarios order(PREFERRED_NAME,
CONC_SPECIES,
SAMPLING_MATRIX,as.numeric(as.character(DOSE)),EXP_LENGTH,-TIME)),] %>%
distinct(DTXSID,DOSE,DOSE_U,EXP_LENGTH,CONC_SPECIES,SAMPLING_MATRIX, .keep_all = TRUE)
Create a list of dataframes of observed and predicted concentrations for each unique external exposure scenario (corresponding to 142 studies)
# Store the output of each simulation:
<- list()
simlist # Store the Cvt data relevant to each simulation
<- list()
obslist # Conduct one simulation for each unique combination of chemical, species, dose:
for (i in 1:nrow(unique_scenarios))
if (unique_scenarios$CASRN[i] %in% get_cheminfo(model="gas_pbtk",
suppress.messages = TRUE))
{# Identify relevant Cvt data:
<- subset(conc_data,conc_data$DTXSID == unique_scenarios$DTXSID[i] &
relconc $DOSE == unique_scenarios$DOSE[i] &
conc_data$EXP_LENGTH == unique_scenarios$EXP_LENGTH[i] &
conc_data$CONC_SPECIES == unique_scenarios$CONC_SPECIES[i] &
conc_data$SAMPLING_MATRIX == unique_scenarios$SAMPLING_MATRIX[i])
conc_data<- relconc
obslist[[i]] #
#
#
#
#
# THE FOLLOWING CODE RUNS solve_gas_pbtk FOR EACH SCENARIO
# (UNIQUE COMBINATION OF CHEMICAL, SPECIES, DOSE, ETC.)
#
#
#
#
#
<- try(suppressWarnings(as.data.frame(solve_gas_pbtk(
solver.out chem.cas = unique_scenarios$CASRN[i],
days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]),
# Make sure we get predicted conc's at the observed times:
times=unique(c(0,signif(obslist[[i]]$TIME,4))), # days
tsteps = 500,
exp.conc = as.numeric(unique_scenarios$DOSE[i]), # SED (06-22-2021) think this is ppmv for all scenarios
input.units = unique_scenarios$DOSE_U[i], # specify the units for exp.conc (ppmv)
exp.duration = unique_scenarios$EXP_LENGTH[i], # days
period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]), # days
species = as.character(unique_scenarios$CONC_SPECIES[i]),
monitor.vars = c(
"Cven",
"Clung",
"Cart",
"Cplasma",
"Calvppmv",
"Cendexhppmv",
"Cmixexhppmv",
"Calv",
"Cendexh",
"Cmixexh",
"Cmuc",
"AUC"),
vmax.km = FALSE,
suppress.messages = TRUE))))
#
#
#
#
#
#
#
#
#
#
if (class(solver.out) %in% "try-error")
<- data.frame(time=NA,Conc=NA)
solver.out
print(solver.out)
# Get the blood:plasma ratio:
<- suppressWarnings(available_rblood2plasma(
this.Rb2p chem.cas=unique_scenarios$CASRN[i],
species=as.character(unique_scenarios$CONC_SPECIES[i])))
$Rb2p <- this.Rb2p
solver.out
# The column simconc holds the appropriate prediction for the sampling matrix
# BL : blood
# EEB : end-exhaled breath
# MEB : mixed exhaled breath
# VBL : venous blood
# ABL : arterial blood
# EB : unspecified exhaled breath sample (assumed to be EEB)
# PL: plasma
# +W with work/exercise
#
# The model outputs are provided in the following units:
# uM: Cven, Clung, Cart, Cplasma, Calv, Cendexh, Cmixexh, Cmuc
# ppmv: Calvppmv, Cendexhppmv, Cmixexhppmv
# uM*days: AUC
if (unique_scenarios$SAMPLING_MATRIX[i] == "VBL" |
$SAMPLING_MATRIX[i] == "BL" |
unique_scenarios$SAMPLING_MATRIX[i] == "BL (+W)")
unique_scenarios
{$simconc <- solver.out$Cven*this.Rb2p
solver.out$unit <- "uM"
solver.outelse if (unique_scenarios$SAMPLING_MATRIX[i] == "ABL") {
} $simconc <- solver.out$Cart*this.Rb2p
solver.out$unit <- "uM"
solver.outelse if (unique_scenarios$SAMPLING_MATRIX[i] == "EB" |
} $SAMPLING_MATRIX[i] == "EEB" |
unique_scenarios$SAMPLING_MATRIX[i] == "EB (+W)")
unique_scenarios
{$simconc <- solver.out$Cendexh # uM
solver.out$unit <- "uM"
solver.outelse if (unique_scenarios$SAMPLING_MATRIX[i] == "MEB") {
} $simconc <- solver.out$Cmixexh # uM
solver.out$unit <- "uM"
solver.outelse if (unique_scenarios$SAMPLING_MATRIX[i] == "PL") {
} $simconc <- solver.out$Cplasma
solver.out$unit <- "uM"
solver.outelse {
} $simconc <- NA
solver.out$unit <- NA
solver.out
}<- solver.out
simlist[[i]] }
Create a predicted vs. observed plot for each study:
<- list()
cvtlist for(i in 1:nrow(unique_scenarios))
{<- simlist[[i]]
plot.data <- obslist[[i]]
relconc
if (!is.null(plot.data))
{#Right now this is only calculating real concentrations according to mg/L in blood
<- ggplot(data=plot.data, aes(time*24, simconc)) +
cvtlist[[i]] geom_line() +
xlab("Time (h)") +
ylab(paste0("Simulated ",
$SAMPLING_MATRIX[i],
unique_scenarios"\nConcentration (" ,
$unit, ")")) +
solver.outggtitle(paste0(
$PREFERRED_NAME[i],
unique_scenarios" (",
$CONC_SPECIES[i],
unique_scenarios", ",
round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
$DOSE_U[i],
unique_scenarios" for ",
round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
"h in ",
$SAMPLING_MATRIX[i], ")")) +
unique_scenariosgeom_point(data = relconc, aes(TIME*24,CONCENTRATION)) +
theme(plot.title = element_text(face = 'bold', size = 20),
axis.title.x = element_text(face = 'bold', size = 20),
axis.text.x = element_text(size=16),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 16),
legend.title = element_text(face = 'bold', size = 16),
legend.text = element_text(face = 'bold',size = 14))+
theme_bw()
} }
Create a list to hold the combined observations and predictions for each scenario:
# Creation of simulated vs. observed concentration dataset
$RSQD <- 0
unique_scenarios$RMSE <- 0
unique_scenarios$AIC <- 0
unique_scenarios<- list()
simobslist <- list() obvpredlist
Merge the simulations and observations on the basis of simulation time:
for(i in 1:length(simlist))
{<- as.data.frame(obslist[[i]])
obsdata <- as.data.frame(simlist[[i]])
simdata # skips over anything for which there was no observed data or
# insufficient information to run simulation:
if (!is.null(simdata) &
!is.null(obsdata) &
dim(simdata)[1]>1)
{ # Make sure we are looking at consistent time points:
<- simdata[simdata$time %in% signif(obsdata$TIME,4),]
simobscomb <- subset(obsdata,signif(TIME,4) %in% simobscomb$time)
obsdata # Merge with obsdata
colnames(obsdata)[colnames(obsdata) ==
"TIME"] <-
"obstime"
# Round to match sim time:
$time <- signif(obsdata$obstime,4)
obsdatacolnames(obsdata)[colnames(obsdata) ==
"CONCENTRATION"] <-
"obsconc"
colnames(obsdata)[colnames(obsdata) ==
"PREFERRED_NAME"] <-
"chem"
colnames(obsdata)[colnames(obsdata) ==
"DOSE"] <-
"dose"
colnames(obsdata)[colnames(obsdata) ==
"EXP_LENGTH"] <-
"explen"
colnames(obsdata)[colnames(obsdata) ==
"CONC_SPECIES"] <-
"species"
colnames(obsdata)[colnames(obsdata) ==
"SAMPLING_MATRIX"] <-
"matrix"
colnames(obsdata)[colnames(obsdata) ==
"AVERAGE_MASS"] <-
"mw"
colnames(obsdata)[colnames(obsdata) ==
"CONC_U"] <-
"CONC_U"
<- suppressWarnings(merge(obsdata[,c(
simobscomb "time",
"obstime",
"obsconc",
"chem",
"dose",
"explen",
"species",
"matrix",
"mw",
"CONC_U",
"ORIGINAL_CONC_U"
by="time", all.x=TRUE))
)], simobscomb,
# Merge with met_data
<- subset(met_data,
this.met_data == simobscomb[1,"chem"] &
PREFERRED_NAME == simobscomb[1,"species"])
SPECIES colnames(this.met_data)[colnames(this.met_data)=="CHEM_CLASS"] <-
"chemclass"
colnames(this.met_data)[colnames(this.met_data) ==
"OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"] <-
"logp"
colnames(this.met_data)[colnames(this.met_data) ==
"WATER_SOLUBILITY_MOL.L_OPERA_PRED"] <-
"sol"
colnames(this.met_data)[colnames(this.met_data) ==
"HENRYS_LAW_ATM.M3.MOLE_OPERA_PRED"] <-
"henry"
colnames(this.met_data)[colnames(this.met_data) ==
"VMAX"] <-
"vmax"
colnames(this.met_data)[colnames(this.met_data) ==
"KM"] <-
"km"
<- suppressWarnings(cbind(simobscomb,this.met_data[c(
simobscomb "chemclass",
"logp",
"sol",
"henry",
"vmax",
"km")]))
<- simobscomb
simobslist[[i]]
} }
Identify which quartile each observation occurred in with respect to the latest (maximum) observed time
for(i in 1:length(simobslist))
if (!is.null(simobslist[[i]]))
{<- simobslist[[i]]
simobscomb for (j in 1:nrow(simobscomb))
{ <- max(simobscomb$time,na.rm=TRUE)
max.time if (is.na(max.time)) simobscomb$tquart <- NA
else if (max.time == 0) simobscomb$tquart <- "1"
else if (!is.na(simobscomb$time[j]))
{$tquart[j] <- as.character(1 +
simobscombfloor(simobscomb$time[j]/max.time/0.25))
$tquart[simobscomb$tquart=="5"] <-
simobscomb"4"
else simobscomb$tquart[j] >- NA
}
}<- simobscomb
simobslist[[i]] }
Calculate the area under the curve (AUC)
for(i in 1:length(simobslist))
if (!is.null(simobslist[[i]]))
{<- simobslist[[i]]
simobscomb # Calculat the AUC with the trapezoidal rule:
if (nrow(simobscomb)>1)
{for (k in 2:max(nrow(simobscomb)-1,2,na.rm=TRUE))
{$obsAUCtrap[1] <- 0
simobscomb$simAUCtrap[1] <- 0
simobscombif (min(simobscomb$time) <= (simobscomb$explen[1]*1.03) &
nrow(simobscomb) >=2)
{$obsAUCtrap[k] <- simobscomb$obsAUCtrap[k-1] +
simobscomb0.5*(simobscomb$time[k] - simobscomb$time[k-1]) *
$obsconc[k] + simobscomb$obsconc[k-1])
(simobscomb$simAUCtrap[k] <- simobscomb$simAUCtrap[k-1] +
simobscomb0.5*(simobscomb$time[k]-simobscomb$time[k-1]) *
$simconc[k] + simobscomb$simconc[k-1])
(simobscombelse {
} $obsAUCtrap <- 0
simobscomb$simAUCtrap <- 0
simobscomb
}
}else {
} $obsAUCtrap <- 0
simobscomb$simAUCtrap <- 0
simobscomb
}$AUCobs <- max(simobscomb$obsAUCtrap)
simobscomb$AUCsim <- max(simobscomb$simAUCtrap)
simobscomb$calcAUC <- max(simobscomb$AUC)
simobscombif (min(simobscomb$time) <= simobscomb$explen[1]*1.03)
{$Cmaxobs <- max(simobscomb$obsconc)
simobscomb$Cmaxsim <- max(simobscomb$simconc)
simobscombelse {
} $Cmaxobs <- 0
simobscomb$Cmaxsim <- 0
simobscomb
}<- simobscomb
simobslist[[i]] }
Calculate performance statistics
for(i in 1:length(simobslist))
if (!is.null(simobslist[[i]]))
{<- simobslist[[i]]
simobscomb $RSQD[i] <- 1 - (
unique_scenariossum((simobscomb$obsconc - simobscomb$simconc)^2) /
sum((simobscomb$obsconc-mean(simobscomb$obsconc))^2)
)$RMSE[i] <-
unique_scenariossqrt(mean((simobscomb$simconc - simobscomb$obsconc)^2))
$AIC[i] <-
unique_scenariosnrow(simobscomb)*(
log(2*pi) + 1 +
log((sum((simobscomb$obsconc-simobscomb$simconc)^2) /
nrow(simobscomb)))
+ ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
) <- simobscomb
simobslist[[i]] }
Combine individual studies into single table
<- list()
obsvpredlist for(i in 1:length(simobslist))
if (!is.null(simobslist[[i]]))
{<- simobslist[[i]]
simobscomb <- ggplot(simobscomb, aes(x = simconc, y = obsconc)) +
obsvpredlist[[i]] geom_point() +
geom_abline() +
xlab("Simulated Concentrations (uM)") +
ylab("Observed Concentrations (uM)") +
ggtitle(paste0(
$PREFERRED_NAME[i],
unique_scenarios" (",
$CONC_SPECIES[i],
unique_scenarios", ",
round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
$DOSE_U[i],
unique_scenarios" for ",
round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
"h in ",
$SAMPLING_MATRIX[i], ")")) +
unique_scenariostheme_bw() +
theme(plot.title = element_text(face = 'bold', size = 20),
axis.title.x = element_text(face = 'bold', size = 20),
axis.text.x = element_text(size=16),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 16),
legend.title = element_text(face = 'bold', size = 16),
legend.text = element_text(face = 'bold',size = 14))
}
# Create pdfs of observed vs. predicted concentration plots
dir.create("Linakis2020")
pdf("Linakis2020/obvpredplots.pdf", width = 10, height = 10)
for (i in 1:length(obsvpredlist)) {
print(obsvpredlist[[i]])
}dev.off()
for (i in 1:length(cvtlist))
if (!is.null(cvtlist[[i]]))
{<- cvtlist[[i]] +
cvtlist[[i]] geom_text(
x = Inf,
y = Inf,
hjust = 1.3,
vjust = 1.3,
# size = 6,
label = paste0(
"RMSE: ",
round(unique_scenarios$RMSE[i],digits = 2),
"\nAIC: ",
round(unique_scenarios$AIC[i],digits = 2)))# +
# theme(
# plot.title = element_text(face = 'bold', size = 15),
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 16),
# legend.text = element_text(face = 'bold',size = 14))
}
# Create pdfs of simulated concentration-time plots against observed c-t values
pdf("Linakis2020/simdataplots.pdf")
for (i in 1:length(cvtlist)) {
print(cvtlist[[i]])
}dev.off()
Combine obs. vs. pred. into single table:
<- do.call("rbind",simobslist)
simobsfull <- subset(simobsfull, simobsfull$species == "Rat")
simobsfullrat <- subset(simobsfull, simobsfull$species == "Human")
simobsfullhum <- subset(unique_scenarios,!is.na(unique_scenarios$RSQD)) unique_scenarios
The observations in simobsfull are in both uM and ppmv – standardize to uM
<- unique(subset(simobsfull,unit=="ppmv")$chem)
these.chems for (this.chem in these.chems)
{# Use HTTK unit conversion:
<- convert_units(
this.factor input.units="ppmv", output.units="um", chem.name=this.chem, state="gas")
# Scale the observation
$chem==this.chem &
simobsfull[simobsfull$unit=="ppmv","obsconc"] <-
simobsfull* simobsfull[
this.factor $chem==this.chem &
simobsfull$unit=="ppmv","obsconc"]
simobsfull# Scale the prediction
$chem==this.chem &
simobsfull[simobsfull$unit=="ppmv","simconc"] <-
simobsfull* simobsfull[
this.factor $chem==this.chem &
simobsfull$unit=="ppmv","simconc"]
simobsfull# Change the reported unit
$chem==this.chem &
simobsfull[simobsfull$unit=="ppmv","unit"] <-
simobsfull"uM"
}
Other analytics including linear regression on overall concentration vs. time observed vs. predicted
table(unique_scenarios$CONC_SPECIES)
nrow(simobsfull) - nrow(simobsfull[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0,])
simobsfull<- (nrow(simobsfull) -
pmiss nrow(simobsfull[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0,])) /
simobsfullnrow(simobsfull) * 100
<- (simobsfull[
missdata is.na(simobsfull$simconc) |
$simconc <= 0 |
simobsfull$obsconc <= 0,])
simobsfull<- simobsfull[simobsfull$obstime == 0,]
t0df <- lm(
lmall #log transforms:
log10(simobsfull$obsconc[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0]) ~
simobsfull#log transforms:
log10(simobsfull$simconc[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0]))
simobsfull#Linear binned 1
<- lm(
lmsub1 $obsconc[
simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc < 0.1] ~
simobsfull$simconc[
simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc < 0.1])
simobsfull#Linear binned 2
<- lm(
lmsub2 $obsconc[
simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc >= 0.1 &
simobsfull$obsconc < 10] ~
simobsfull$simconc[
simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc >= 0.1 &
simobsfull$obsconc < 10])
simobsfull#Linear binned 3
<- lm(
lmsub3 $obsconc[
simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc >= 10] ~
simobsfull$simconc[
simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc >= 10])
simobsfull<- lm(
lmrat log10(simobsfullrat$obsconc[
!is.na(simobsfullrat$simconc) &
$simconc > 0 &
simobsfullrat$obsconc > 0]) ~
simobsfullratlog10(simobsfullrat$simconc[
!is.na(simobsfullrat$simconc) &
$simconc > 0 &
simobsfullrat$obsconc > 0]))
simobsfullratunique(simobsfullrat$chem)
<- lm(
lmhum log10(simobsfullhum$obsconc[
!is.na(simobsfullhum$simconc) &
$simconc > 0 &
simobsfullhum$obsconc > 0]) ~
simobsfullhumlog10(simobsfullhum$simconc[
!is.na(simobsfullhum$simconc) &
$simconc > 0 &
simobsfullhum$obsconc > 0]))
simobsfullhumunique(simobsfullhum$chem)
<- summary(lmall)$coef[2,1]
concregslope <- summary(lmall)$r.squared
concregr2 <- sqrt(mean(lmall$residuals^2))
concregrmse <- sqrt(mean((
totalrmse log10(simobsfull$simconc[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0]) -
simobsfulllog10(simobsfull$obsconc[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0]))^2,
simobsfullna.rm = TRUE))
<- mean(abs(
totalmae log10(simobsfull$simconc[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0]) -
simobsfulllog10(simobsfull$obsconc[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0])),
simobsfullna.rm = TRUE)
<- nrow(
totalaic
simobsfull[!is.na(simobsfull$simconc) &
$simconc >0 &
simobsfull$obsconc >
simobsfull0,]) *
log(2*pi) +
(1 +
log((sum(
$obsconc[
(simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0] -
simobsfull$simconc[
simobsfull!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0])^2,
simobsfullna.rm=TRUE) /
nrow(simobsfull[
!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0,])))) +
simobsfull44+1)*2) #44 is the number of parameters from inhalation_inits.R
((<- table(abs(
mispred log10(simobsfull$simconc) -
log10(simobsfull$obsconc))>2 &
$simconc>0)
simobsfull2]
mispred[2] / nrow(simobsfull[
mispred[!is.na(simobsfull$simconc) &
$simconc >0 &
simobsfull$obsconc > 0,])*100
simobsfull<- table(
overpred log10(simobsfull$simconc) -
log10(simobsfull$obsconc)>2 &
$simconc>0)
simobsfull2]
overpred[2] / nrow(simobsfull[
overpred[!is.na(simobsfull$simconc) &
$simconc >0 &
simobsfull$obsconc > 0,])*100
simobsfull<- table(
underpred log10(simobsfull$obsconc) -
log10(simobsfull$simconc)>2 &
$simconc>0)
simobsfull2]
underpred[2] / nrow(simobsfull[
underpred[!is.na(simobsfull$simconc) &
$simconc >0 &
simobsfull$obsconc > 0,])*100
simobsfull<- table(abs(
mispredhalf log10(simobsfull$simconc) -
log10(simobsfull$obsconc))>0.5 &
$simconc>0)
simobsfull2]
mispredhalf[2] / nrow(simobsfull[
mispredhalf[!is.na(simobsfull$simconc) &
$simconc >0 &
simobsfull$obsconc > 0,])*100
simobsfull<- table(
overpredhalf log10(simobsfull$simconc) -
log10(simobsfull$obsconc)>0.5 &
$simconc>0)
simobsfull2]
overpredhalf[2] / nrow(simobsfull[
overpredhalf[!is.na(simobsfull$simconc) &
$simconc >0 &
simobsfull$obsconc > 0,])*100
simobsfull<- table(
underpredhalf log10(simobsfull$obsconc) -
log10(simobsfull$simconc)>0.5 &
$simconc>0)
simobsfull2]
underpredhalf[2] / nrow(simobsfull[
underpredhalf[!is.na(simobsfull$simconc) &
$simconc > 0 &
simobsfull$obsconc > 0,])*100
simobsfull<- subset(simobsfull,
chemunderpred log10(simobsfull$simconc) -
log10(simobsfull$obsconc) < 0 &
$simconc > 0)
simobsfulltable(chemunderpred$chemclass) / table(simobsfull$chemclass)*100
<- ggplot(
fig2 data = simobsfull[
$simconc > 0 &
simobsfull$obsconc > 0,],
simobsfullaes(x = log10(simconc), y = log10(obsconc))) +
geom_point(
color = ifelse(
abs(
log10(simobsfull[
$simconc > 0 &
simobsfull$obsconc > 0,]$simconc) -
simobsfulllog10(simobsfull[
$simconc > 0 &
simobsfull$obsconc > 0,]$obsconc)) >2,
simobsfull'red',
'black')) +
geom_abline() +
xlab("Log(Simulated Concentrations)") +
ylab("Log(Observed Concentrations)") +
theme_bw() +
geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall')) +
geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
geom_text(
x = Inf,
y = -Inf,
hjust = 1.05,
vjust = -0.15,
size = 8,
label = paste0(
# "Regression slope: ",
# round(summary(lmall)$coef[2,1],digits = 2),
"\nRegression R^2: ",
round(summary(lmall)$r.squared,digits = 2),
"\nRegression RMSE: ",
round(sqrt(mean(lmall$residuals^2)),digits = 2),
"\nRMSE (Identity): ",
round(totalrmse,digits = 2)
# "\n% Missing:",
# round(pmiss, digits = 2), "%")
+
)) #geom_text(
# data = simobsfull[
# abs(log10(simobsfull$simconc) - log10(simobsfull$obsconc))>7 &
# simobsfull$simconc>0 & simobsfull$obsconc > 0,],
# aes(label = paste(chem,species,matrix)),
## fontface = 'bold',
# check_overlap = TRUE,
# size = 3.5,
# hjust = 0.5,
# vjust = -0.8) +
scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
theme(
plot.title = element_text(face = 'bold', size = 15),
axis.title.x = element_text(face = 'bold', size = 30),
axis.text.x = element_text(size=16),
axis.title.y = element_text(face = 'bold', size = 30),
axis.text.y = element_text(size = 16),
legend.title = element_text(face = 'bold', size = 24),
legend.text = element_text(face = 'bold',size = 24))
#Display plot in R fig2
library(scales)
# Function for formatting tick labels:
<- function(x) {
scientific_10 <- gsub("1e", "10^", scientific_format()(x))
out <- gsub("\\+","",out)
out <- gsub("10\\^01","10",out)
out <- parse(text=gsub("10\\^00","1",out))
out
}
<- 10
font.size.large <- 8
font.size.small
<- ggplot(
figaddmodels data = simobsfull[
$simconc > 0 &
simobsfull$obsconc > 0,],
simobsfullaes(x = simconc, y = obsconc)) +
geom_point(
color = ifelse(
abs(
log10(simobsfull[
$simconc > 0 &
simobsfull$obsconc > 0,]$simconc) -
simobsfulllog10(simobsfull[
$simconc > 0 &
simobsfull$obsconc > 0,]$obsconc)) >2,
simobsfull'red',
'black'),alpha=0.15) +
geom_abline() +
scale_y_log10(label=scientific_10,limits=c(1e-4,1e4))+
scale_x_log10(label=scientific_10,limits=c(1e-4,1e4))+
xlab("Simulated Concentrations") +
ylab("Observed Concentrations") +
theme_bw() +
geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall', linetype="Overall")) +
geom_smooth(method = 'lm', se = FALSE, aes(color = species, linetype = species)) +
geom_text(
x = 2,
y = -1,
size = 4,
label = paste0("RMSLE: ",
round(totalrmse,digits = 2)
+
)) scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
scale_linetype_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) +
theme(
plot.title = element_text(face = 'bold', size = font.size.small),
axis.title.x = element_text(face = 'bold', size = font.size.large),
axis.text.x = element_text(size=font.size.small),
axis.title.y = element_text(face = 'bold', size = font.size.large),
axis.text.y = element_text(size = font.size.small),
legend.title = element_text(face = 'bold', size = font.size.large),
legend.text = element_text(face = 'bold',size = font.size.large))
print(figaddmodels)
ggsave("c:/users/jwambaug/AddModelsFig1.tiff", width=6, height=4, dpi=300)
<- simobsfull[,c("chem","dose","explen","species")]
counts <- subset(counts,!duplicated(counts))
counts paste(length(unique(counts$chem)),
"chemicals across",dim(counts)[1],
"experimental conditions in",
length(unique(counts$species)),"species.")
pdf("Linakis2020/Figure2.pdf", width = 10, height = 10)
print(fig2)
dev.off()
# Create data and run calculations for populating plots
<- subset(simobsfull, !duplicated(simobsfull$AUCobs) & simobsfull$Cmaxobs != 0)
cmaxfull <- cmaxfull$Cmaxobs
cmaxobs <- cmaxfull$Cmaxsim
cmaxsim <- cmaxobs[!is.nan(cmaxsim)]
cmaxobs <- cmaxsim[!is.nan(cmaxsim)]
cmaxsim !is.finite(log10(cmaxsim))] <- NA
cmaxsim[<- lm(log10(cmaxobs)~log10(cmaxsim), na.action = na.exclude)
cmaxlm <- subset(cmaxfull,
cmaxvcbkg paste(
$chem,
cmaxfull$dose,
cmaxfull$explen,
cmaxfull$species,
cmaxfull$matrix) %in%
cmaxfullpaste(
$chem,
t0df$dose,
t0df$explen,
t0df$species,
t0df$matrix))
t0df$cmaxcbkgratio <- cmaxvcbkg$Cmaxobs / cmaxvcbkg$obsconc
cmaxvcbkg$adjustedCmaxsim <- cmaxvcbkg$Cmaxsim - cmaxvcbkg$obsconc
cmaxvcbkg<-subset(simobsfull,
aucfull !duplicated(simobsfull$AUCobs) &
$AUCobs != 0)
simobsfull<- aucfull$AUCobs
aucobs <- aucfull$AUCsim
aucsim <- aucobs[!is.nan(aucsim)]
aucobs <- aucsim[!is.nan(aucsim)]
aucsim !is.finite(log10(aucsim))] <- NA
aucsim[<- lm(log10(aucobs)~log10(aucsim), na.action = na.exclude)
auclm <- summary(cmaxlm)$coef[2,1]
cmaxslope <- summary(cmaxlm)$r.squared
cmaxrsq <- sqrt(mean((log10(cmaxfull$Cmaxsim) -
totalrmsecmax log10(cmaxfull$Cmaxobs))^2, na.rm = TRUE))
<- nrow(cmaxfull[
cmaxmiss abs(log10(cmaxfull$Cmaxsim) -
log10(cmaxfull$Cmaxobs)) > 1,])
<- nrow(cmaxfull[
cmaxmissp abs(log10(cmaxfull$Cmaxsim) -
log10(cmaxfull$Cmaxobs)) > 1,]) /
nrow(cmaxfull) * 100
<- table(cmaxfull[
cmaxmisschem abs(log10(cmaxfull$Cmaxsim) -
log10(cmaxfull$Cmaxobs)) > 1,]$chem)
<- summary(auclm)$coef[2,1]
aucslope <- summary(auclm)$r.squared
aucrsq <- sqrt(mean((
totalrmseauc log10(aucfull$AUCsim) -
log10(aucfull$AUCobs))^2, na.rm = TRUE))
<- nrow(aucfull[
aucmiss abs(log10(aucfull$AUCsim) -
log10(aucfull$AUCobs)) > 1,])
<- nrow(aucfull[
aucmissp abs(log10(aucfull$AUCsim) -
log10(aucfull$AUCobs)) > 1,]) /
nrow(aucfull) * 100
<- table(aucfull[
aucmisschem abs(log10(aucfull$AUCsim) -
log10(aucfull$AUCobs)) > 1,]$chem)
<- ggplot(data = cmaxfull, aes(x = log10(Cmaxsim), y = log10(Cmaxobs))) +
cmaxp geom_point(color =
ifelse(abs(log10(cmaxfull$Cmaxsim) -log10(cmaxfull$Cmaxobs))>=2, "red","black")) +
geom_abline() +
xlab("Log(Simulated Max Concentration)") +
ylab("Log(Observed Max Concentration)") +
theme_bw() +
geom_smooth(method = 'lm', se = FALSE, aes(color = 'Overall')) +
geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
geom_text(x = Inf,
y = -Inf,
hjust = 1.05,
vjust = -0.15,
# size = 6,
label = paste0("Regression slope: ",
round(summary(cmaxlm)$coef[2,1],digits = 2),
"\nRegression R^2: ",
round(summary(cmaxlm)$r.squared,digits = 2))) +
geom_text_repel(
data = cmaxfull[
log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
(log10(cmaxfull$Cmaxsim) > 2,],
aes(label = paste(chem,species,matrix)),
force = 2,
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = -0.05,
vjust = 0.5) +
scale_y_continuous(lim = c (-1,5)) +
scale_x_continuous(lim = c(-1,5)) +
geom_text_repel(
data = cmaxfull[
log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=2 &
(log10(cmaxfull$Cmaxsim) <= 2,],
aes(label = paste(chem,species,matrix)),
nudge_x = 0.0,
nudge_y = -0.2,
force = 2,
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = -0.05,
vjust = 0.5) +
geom_text(
data = cmaxfull[
log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))<=-2,],
(aes(label = paste(chem,species,matrix)),
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = 0.5,
vjust = -0.7) +
scale_color_discrete(
name = 'Species',
breaks = c("Overall","Human","Rat")) #+
# theme(plot.title = element_text(face = 'bold', size = 10),
# axis.title.x = element_text(face = 'bold', size = 10),
# axis.text.x = element_text(size=8),
# axis.title.y = element_text(face = 'bold', size = 10),
# axis.text.y = element_text(size = 8),
# legend.title = element_text(face = 'bold', size = 8),
# legend.text = element_text(face = 'bold',size = 8))
cmaxp<- ggplot(
aucp data = aucfull,
aes(x = log10(AUCsim), y = log10(AUCobs))) +
geom_point(color =
ifelse(abs(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2, "red","black")) +
geom_abline() +
xlab("Log(Simulated AUC)") +
ylab("Log(Observed AUC)") +
theme_bw() +
geom_smooth(method = 'lm', se = FALSE, aes(color = "Overall")) +
geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
geom_text(
x = Inf,
y = -Inf,
hjust = 1.05,
vjust = -0.15,
# size = 6,
label = paste0(
"Regression slope: ",
round(summary(auclm)$coef[2,1],digits = 2),
"\nRegression R^2: ",
round(summary(auclm)$r.squared,digits = 2))) +
geom_text_repel(
data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=2,],
aes(label = paste(chem,species,matrix)),
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = -0.05,
vjust = 0.5) +
scale_y_continuous(lim = c (-2,4)) +
scale_x_continuous(lim = c(-2,4)) +
geom_text(
data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))<=-2,],
aes(label = paste(chem,species,matrix)),
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = 0.5,
vjust = -0.8) +
scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) #+
# theme(
# plot.title = element_text(face = 'bold', size = 15),
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 16),
# legend.text = element_text(face = 'bold',size = 14))
aucp
pdf("Linakis2020/Figure4.pdf", width = 20, height = 10)
plot_grid(cmaxp,aucp,nrow = 1, labels = c('A','B'), label_size = 30)
dev.off()
$aggscen <- as.factor(paste(simobsfull$chem,
simobsfull$species,
simobsfull$matrix))
simobsfull<- lm(
chem.lm log10(simconc) - log10(obsconc) ~
aggscen, data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,])
<- resid(chem.lm)
chem.res # Number of observations per chemical class
<- simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,] %>%
numpt group_by(chemclass) %>% summarize(n = paste("n =", length(simconc)))
<- ggplot(
fig3 data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = aggscen, y = log10(simconc)-log10(obsconc), fill = chemclass)) +
geom_boxplot() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("Exposure Scenario") +
ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
facet_wrap(vars(chemclass), scales = 'free_x', nrow = 1) + #35 by 12 for poster
theme_bw() +
geom_text(
data = numpt,
aes(x = Inf, y = -Inf, hjust = 1.05, vjust = -0.5, label = n),
size = 10,
colour = 'black',
inherit.aes = FALSE,
parse = FALSE) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5,size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 24),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 30),
axis.title.y = element_text(face = 'bold', size = 30),
axis.text.y = element_text(face = 'bold',size = 25, color = 'black'))
fig3
pdf("Linakis2020/Figure3.pdf", width = 40, height = 13)
print(fig3)
dev.off()
<- ggplot(
figs1a data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = tquart, y = log10(simconc)-log10(obsconc))) +
geom_boxplot()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nTime Quartile\n") +
ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
theme_bw()+
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1a<- ggplot(
figs1b data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = mw, y = log10(simconc)-log10(obsconc))) +
geom_point()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nMolecular Weight (g/mol)\n") +
ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
theme_bw()+
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1b<- ggplot(
figs1c data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = logp, y = log10(simconc)-log10(obsconc))) +
geom_point()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nLog P") +
ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
theme_bw() +
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1c<- ggplot(
figs1d data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = sol, y = log10(simconc)-log10(obsconc))) +
geom_point()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nSolubility (mol/L)") +
ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
scale_x_log10()+
theme_bw() +
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1d
pdf("Linakis2020/FigureS1.pdf", width = 20, height = 20)
plot_grid(figs1a,figs1b,figs1c,figs1d,nrow = 2, labels = c('A','B','C','D'), label_size = 30)
dev.off()
<- list()
senschem <- list()
sensslope <- list()
sensrsq <- list()
sensregrmse <- list()
senstotalrmse <- list()
senspmiss <- list()
senscmaxslope <- list()
senscmaxrsq <- list()
senstotalrmsecmax <- list()
sensaucslope <- list()
sensaucrsq <- list()
senstotalrmseauc for (i in 1:nrow(simobsfull))
{<- subset(simobsfull,simobsfull$chem != simobsfull$chem[i])
simobsfullsens <- as.character(simobsfull$chem[i])
senschem[i] <- lm(
senslm log10(simobsfullsens$obsconc[
!is.na(simobsfullsens$simconc) &
$simconc > 0 &
simobsfullsens$obsconc > 0]) ~
simobsfullsenslog10(simobsfullsens$simconc[
!is.na(simobsfullsens$simconc) &
$simconc >0 &
simobsfullsens$obsconc > 0]))
simobsfullsens<- round(summary(senslm)$coef[2,1],digits = 2)
sensslope[i] <- round(summary(senslm)$r.squared,digits = 2)
sensrsq[i] <- round(sqrt(mean(senslm$residuals^2)),digits = 2)
sensregrmse[i] <- round(sqrt(mean((
senstotalrmse[i] log10(simobsfullsens$simconc[
!is.na(simobsfullsens$simconc) &
$simconc >0 &
simobsfullsens$obsconc > 0]) -
simobsfullsenslog10(simobsfullsens$obsconc[
!is.na(simobsfullsens$simconc) &
$simconc >0 &
simobsfullsens$obsconc > 0]))^2,
simobsfullsensna.rm = TRUE)), digits = 2)
<- round((nrow(simobsfullsens) -
senspmiss[i] nrow(simobsfullsens[
!is.na(simobsfullsens$simconc) &
$simconc >0 &
simobsfullsens$obsconc > 0,])) / nrow(simobsfullsens) * 100,
simobsfullsensdigits = 2)
<- subset(simobsfullsens, !duplicated(simobsfullsens$Cmaxobs))
senscmaxfull <- lm(
senscmaxlm log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]) ~
log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]),
na.action = na.exclude)
<-subset(simobsfullsens, !duplicated(simobsfullsens$AUCobs))
sensaucfull <- lm(
sensauclm log10(aucfull$AUCobs[aucfull$AUCobs>0]) ~
log10(aucfull$AUCsim[aucfull$AUCobs>0]),
na.action = na.exclude)
<- round(summary(senscmaxlm)$coef[2,1],digits = 2)
senscmaxslope[i] <- round(summary(senscmaxlm)$r.squared,digits = 2)
senscmaxrsq[i] <- sqrt(mean((log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]) - log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]))^2, na.rm = TRUE))
senstotalrmsecmax[i] <- round(summary(sensauclm)$coef[2,1],digits = 2)
sensaucslope[i] <- round(summary(sensauclm)$r.squared,digits = 2)
sensaucrsq[i] <- sqrt(mean((log10(sensaucfull$AUCsim[sensaucfull$AUCobs>0]) - log10(sensaucfull$AUCobs[sensaucfull$AUCobs>0]))^2, na.rm = TRUE))
senstotalrmseauc[i]
}<- data.frame(Chemical <- as.character(senschem),
sensitivitydf <- as.numeric(sensslope),
sensSlope <- as.numeric(sensrsq),
sensRsq <- as.numeric(sensregrmse),
sensRegrmse <- as.numeric(senstotalrmse),
sensTotrmse <- as.numeric(senspmiss),
sensPmiss <- as.numeric(senscmaxslope),
sensCmaxslope <- as.numeric(senscmaxrsq),
sensCmaxrsq <- as.numeric(senstotalrmsecmax),
sensCmaxrmse <- as.numeric(sensaucslope),
sensAUCslope <- as.numeric(sensaucrsq),
sensAUCrsq <- as.numeric(senstotalrmseauc),
sensAUCrmse stringsAsFactors=FALSE)
<- subset(sensitivitydf,!duplicated(sensitivitydf$Chemical....as.character.senschem.))
sensitivitydf names(sensitivitydf) <- c('Chemical Dropped','Regression Slope','Regression R^2','Regression RMSE','Orthogonal RMSE', 'Percent Data Censored','Cmax Regression Slope','Cmax Regression R^2','Cmax Orthogonal RMSE','AUC Regression Slope','AUC Regression R^2','AUC Orthogonal RMSE')
<- c('None',concregslope,concregr2,concregrmse,totalrmse,pmiss,cmaxslope,cmaxrsq,totalrmsecmax,aucslope,aucrsq,totalrmseauc)
notdropped <- rbind(notdropped, sensitivitydf)
sensitivitydf -1] <- sapply(sensitivitydf[,-1],as.numeric)
sensitivitydf[,-1] <- round(sensitivitydf[,-1],2)
sensitivitydf[,# Clean up and write file
rm(chem.lm, obvpredplot, p, pdata1, plot.data, plots, relconc, sensaucfull, sensauclm, sensaucrsq, sensaucslope, senschem, senscmaxfull, senscmaxlm, senscmaxrsq, senscmaxslope, senslm, senspmiss, sensregrmse, sensrsq, sensslope, senstotalrmse, senstotalrmseauc, senstotalrmsecmax, solve, AUCrmse, AUCrsq, AUCslope, chem.res, Chemical, Cmaxrmse, Cmaxrsq, Cmaxslope, colors, count, i, j, k, met_col, name, name1, Pmiss, Regrmse, Rsq, Slope, rem, Totrmse)
write.csv(sensitivitydf, 'supptab2.csv',row.names = FALSE)
<- subset(unique_scenarios, !duplicated(unique_scenarios$PREFERRED_NAME) | !duplicated(unique_scenarios$SOURCE_CVT) | !duplicated(unique_scenarios$CONC_SPECIES))
supptab1 for(i in 1:nrow(supptab1)){
tryCatch({
$Metabolism_Source[i] <- met_data$SOURCE_MET[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Log_P[i] <- met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Solubility[i] <- met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Blood_Air_Partition_Coefficient[i] <- met_data$CALC_PBA[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Chem_Class[i] <- met_data$CHEM_CLASS[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Species[i] <- met_data$SPECIES[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Vmax[i] <- met_data$VMAX[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Km[i] <- met_data$KM[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1error = function(e){})
},
}<- supptab1[c('PREFERRED_NAME','DTXSID','CASRN','Chem_Class','AVERAGE_MASS','Log_P','Solubility','Blood_Air_Partition_Coefficient','Species','Vmax','Km','Metabolism_Source','SOURCE_CVT')]
supptab1 names(supptab1) <- c('Chemical','DTXSID','CASRN','CAMEO Chemical Class','Molecular Weight (g/mol)','Log P','Solubility (mol/L)','Blood Air Partition Coefficient','Available Species Data','Vmax (pmol/min/10^6 cells)','KM (uM)','Metabolism Data Source','Concentration-Time Data Source')
write.csv(supptab1, "supptab1.csv", row.names = FALSE)