## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----echo=FALSE, results='asis'-----------------------------------------------

d = data.frame( Attribute=c("attr(f_model, 'gradient')", "attr(f_model, 'backsolve')",
                            "attr(f_model, 'start')"),
                Purpose=c("Gradient of f_model with respect to theta", 
                          "Find x such that f_model(theta, x) = y",
                          "Starting values for optimization")
)
                           
knitr::kable(d)

## ----echo=FALSE, results='asis'-----------------------------------------------

d = data.frame( Function=c("weights_varIdent(phi, mu)", "weights_varExp(phi, mu)", 
                           "weights_varPower(phi, mu)", "weights_varConstPower(phi, mu)", 
                           "weights_tukey_bw(phi=4.685, resid)", "weights_huber(phi=1.345, resid)"),
                Code=c("rep(1, length(mu))", "exp(phi*mu)", "abs(mu)^(phi)", "phi[1]+abs(mu)^(phi[2])",
                       "see below", "see below") )
                           
knitr::kable(d)

## ----echo=TRUE----------------------------------------------------------------

library(OptimModel)

set.seed(456)
x = rep( c(0, 2^(-4:4)), each=4 )
theta = c(0, 100, log(.5), 2)
y = hill_model(theta, x) + rnorm( length(x), sd=2 )

  ## No need to include a starting value.
fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y)
print(fit)

## Coefficients
coef(fit)

## Fitted values
fitted(fit)

## Residuals
residuals(fit)

## ----echo=TRUE----------------------------------------------------------------
predict(fit, x=2:5)

predict(fit, x=2:5, se.fit=TRUE)

## 95% confidence interval
predict(fit, x=2:5, interval="confidence")

## 90% confidence interval
predict(fit, x=2:5, interval="confidence", level=0.9)

## 95% prediciton interval for next observation (K=1)
predict(fit, x=2:5, interval="prediction", K=1)


d = data.frame(x=x, y=y)
pred = as.data.frame(predict(fit, x=2^seq(-6, 4, length=25), interval="confidence"))

library(ggplot2)
ggplot( d, aes(x=x, y=y) ) + geom_point() + 
  geom_line( data=pred, aes(x, y.hat)) +
  geom_ribbon( data=pred, aes(x=x, y=y.hat, ymin=lower, ymax=upper), alpha=0.3) +
  scale_x_log10() + ggtitle("OLS")

## Fixed weights.  In this example, weights are increasing step-wise with x
w = ifelse(x < 0.1, 0.5, ifelse(x < 2, 1, 2))
  ## No need to include a starting value.
fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y, wts=w)
print(fit)

pred = as.data.frame(predict(fit, x=2^seq(-6, 4, length=25), interval="confidence"))

ggplot( d, aes(x=x, y=y) ) + geom_point() + 
  geom_line( data=pred, aes(x, y.hat)) +
  geom_ribbon( data=pred, aes(x=x, y=y.hat, ymin=lower, ymax=upper), alpha=0.3) +
  scale_x_log10() + ggtitle("WLS")

## ----echo=TRUE----------------------------------------------------------------

fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y, fit.method="mle")
print(fit)

## ----echo=TRUE----------------------------------------------------------------

set.seed(876)

## Standard deviation = sigma*( Mean )^0.5
y2 = hill_model(theta, x) + rnorm( length(x), sd=0.7*sqrt(hill_model(theta, x)) )


fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y2, wts=weights_varPower,
                fit.method="mle", phi.fixed=FALSE)
print(fit)

## ----echo=TRUE----------------------------------------------------------------

fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y2, wts=weights_varPower,
                fit.method="irwls", phi.fixed=FALSE)
print(fit)


d = data.frame(x=x, y=y2)
pred = as.data.frame(predict(fit, x=2^seq(-6, 4, length=25), interval="confidence"))

ggplot( d, aes(x=x, y=y) ) + geom_point() + 
  geom_line( data=pred, aes(x, y.hat)) +
  geom_ribbon( data=pred, aes(x=x, y=y.hat, ymin=lower, ymax=upper), alpha=0.3) +
  scale_x_log10()


## ----echo=TRUE----------------------------------------------------------------

set.seed(456)
x = rep( c(0, 2^(-4:4)), each=4 )
theta = c(0, 100, log(.5), 2)
y = hill_model(theta, x) + rnorm( length(x), sd=2 )

## Create outliers
y[c(8, 11)] = y[c(8, 11)] + -30

## Ordinary least squares
fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y)


d = data.frame(x=x, y=y)
pred = as.data.frame(predict(fit, x=2^seq(-6, 4, length=25), interval="confidence"))

ggplot( d, aes(x=x, y=y) ) + geom_point() + 
  geom_line( data=pred, aes(x, y.hat)) +
  geom_ribbon( data=pred, aes(x=x, y=y.hat, ymin=lower, ymax=upper), alpha=0.3) +
  scale_x_log10() + ggtitle("OLS fit")

## IRWLS with Huber weights
fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y, wts=weights_huber,
                fit.method="irwls")

d = data.frame(x=x, y=y)
pred = as.data.frame(predict(fit, x=2^seq(-6, 4, length=25), interval="confidence"))

ggplot( d, aes(x=x, y=y) ) + geom_point() + 
  geom_line( data=pred, aes(x, y.hat)) +
  geom_ribbon( data=pred, aes(x=x, y=y.hat, ymin=lower, ymax=upper), alpha=0.3) +
  scale_x_log10() + ggtitle("IRWLS + Huber weights fit")



## ----echo=TRUE----------------------------------------------------------------

set.seed(123)
theta = c(0, 100, log(0.25), -3, -2)
y = hill_switchpoint_model(theta, x) + rnorm( length(x), mean=0, sd=5 )

## Try OLS, but original starting value does not provide convergence
fit1 = optim_fit(NULL, hill_switchpoint_model, x=x, y=y) 
print(fit1)


## Fixed grid of equally-spaced starting values
fit2 = optim_fit(NULL, hill_switchpoint_model, x=x, y=y, ntry=50, 
       start.method="fixed", until.converge=FALSE)
print(fit2)

## Random grid of starting values
fit3 = optim_fit(NULL, hill_switchpoint_model, x=x, y=y, ntry=50, 
       start.method="fixed", until.converge=FALSE)
print(fit3)

d = data.frame(x=x, y=y)
x.seq = 2^seq(-6, 4, length=25)
pred = data.frame(x=rep(x.seq, 3),
                  Fit=rep(c("1 starting value", "Fixed grid", "Random grid"), each=25),
                  mu=c(predict(fit1, x=x.seq)[,"y.hat"],
                       predict(fit2, x=x.seq)[,"y.hat"],
                       predict(fit3, x=x.seq)[,"y.hat"])
                )

ggplot( d, aes(x=x, y=y) ) + geom_point() + 
  geom_line( data=pred, aes(x, mu, col=Fit)) +
  scale_x_log10() + theme(legend.position="top") +
  guides(col=guide_legend(ncol=2))



## ----echo=TRUE----------------------------------------------------------------

set.seed(456)
x = rep( c(0, 2^(-4:4)), each=4 )
theta = c(0, log(.5), 2)

hill3_model = function(theta, x){ 
  ## Parameters are theta = (emin, lec50, m).  Assumes emax = 100.
  mu = theta[1] + (100 - theta[1])/(1+exp(theta[3]*log(x) - theta[3]*theta[2]))
  return(mu)
  }

y = hill3_model(theta, x) + rnorm( length(x), sd=2 )

fit = optim_fit(theta0=c(emin=-1, lec50=log(0.5), m=1), f.model=hill3_model, x=x, y=y)
print(fit)

## ----echo=TRUE----------------------------------------------------------------

set.seed(456)
x = rep( c(0, 2^(-4:4)), each=4 )
theta = c(0, 100, log(.5), 2)
y = hill_model(theta, x) + rnorm( length(x), sd=2 )

fit = optim_fit(theta0=NULL, f.model=hill_model, x=x, y=y)

myfunc = function(theta){ theta[3] + (1/theta[4])*log( 0.5*theta[2]/(0.5*theta[2] - theta[1])) }
myfunc( coef(fit) )
get_se_func(object=fit, Func=myfunc) 

## ----echo=TRUE----------------------------------------------------------------

set.seed(456)
x = rep( c(0, 2^(-4:4)), each=4 )
theta = c(0, 100, log(.5), 2)
y = hill_model(theta, x) + rnorm( length(x), sd=2 )

## Create outliers
y[c(8, 11)] = y[c(8, 11)] + -30

## ROUT
fit_r = rout_fitter(theta0=NULL, f.model=hill_model, x=x, y=y)
print(fit_r)

## Get the outliers and remove them
print(which(fit_r$outlier.adj))
index = !fit_r$outlier.adj
x.clean = x[index]
y.clean = y[index]

## Refit model with OLS, but outliers removed
fit = optim_fit(NULL, hill_model, x=x.clean, y=y.clean)


d = data.frame(x=x, y=y)
pred = as.data.frame(predict(fit, x=2^seq(-6, 4, length=25), interval="confidence"))

ggplot( d, aes(x=x, y=y) ) + geom_point() + 
  geom_line( data=pred, aes(x, y.hat)) +
  geom_ribbon( data=pred, aes(x=x, y=y.hat, ymin=lower, ymax=upper), alpha=0.3) +
  scale_x_log10() + ggtitle("OLS fit after ROUT")