\documentclass{article}
\usepackage{amstext}
\usepackage{amsfonts}
\usepackage{hyperref}
\usepackage[round]{natbib}
\usepackage{hyperref}
\usepackage{graphicx}
\usepackage{rotating}
\usepackage{authblk}
\usepackage[left=25mm, right=25mm, top=20mm, bottom=20mm]{geometry}
%\usepackage[nolists]{endfloat}

%\VignetteEngine{knitr::knitr}
%\VignetteDepends{FDboost, fda, fields, maps, mapdata}
%\VignetteIndexEntry{FDboost FLAM viscosity}

\newcommand{\Rpackage}[1]{{\normalfont\fontseries{b}\selectfont #1}}
\newcommand{\Robject}[1]{\texttt{#1}}
\newcommand{\Rclass}[1]{\textit{#1}}
\newcommand{\Rcmd}[1]{\texttt{#1}}
\newcommand{\Roperator}[1]{\texttt{#1}}
\newcommand{\Rarg}[1]{\texttt{#1}}
\newcommand{\Rlevel}[1]{\texttt{#1}}

\newcommand{\RR}{\textsf{R}}
\renewcommand{\S}{\textsf{S}}
\newcommand{\df}{\mbox{df}}

\RequirePackage[T1]{fontenc}
\RequirePackage{graphicx,ae,fancyvrb}
\IfFileExists{upquote.sty}{\RequirePackage{upquote}}{}
\usepackage{relsize}

\renewcommand{\baselinestretch}{1}
\setlength\parindent{0pt}


\hypersetup{%
  pdftitle = {FLAM canada},
  pdfsubject = {package vignette},
  pdfauthor = {Sarah Brockhaus},
%% change colorlinks to false for pretty printing
  colorlinks = {true},
  linkcolor = {blue},
  citecolor = {blue},
  urlcolor = {red},
  hyperindex = {true},
  linktocpage = {true},
}

\begin{document}

\setkeys{Gin}{width=\textwidth}

\title{Canadian climate: function-on-function regression}
\author{Sarah Brockhaus 
\thanks{E-mail: sarah.brockhaus@stat.uni-muenchen.de}}
\affil{\textit{Institut f\"ur Statistik, \\
Ludwig-Maximilians-Universit\"at M\"unchen, \\ 
Ludwigstra{\ss}e 33, D-80539 M\"unchen, Germany.}}
\date{}
\maketitle

The results of this vignette together with more explanations can be found in Brockhaus et al. (2015). 

<<ops, echo=FALSE>>=
knitr::opts_chunk$set(comment=NA, warning=FALSE, message=FALSE)
@

\section{Descriptive analysis}

Load FDboost package and write useful functions for plotting.
<<pkg-attach, echo = FALSE>>=
library(FDboost)

## function based on funplot() to plot values on logscale with nice labels
## for viscosity data
funplotLogscale <- function(x, y, ylim = NULL, col=1, add=FALSE, ...){
  
  dots <- list(...)
  if(!add){
    if(is.null(ylim)) ylim <- range(y, na.rm=TRUE)
    
    plot(x, rep(1, length(x)), col = "white", ylim = ylim, yaxt = "n",
         ylab = "", xlab = "",...)
    
    mtext("time [s]", 1, line=2)#, cex=1.5)
    mtext("viscosity [mPas]", 2, line=2)#, cex=1.5)
    
    abline(h = log(1*10^(0:9)), col="gray")
    axis(2, at = log(1*10^(0:9)), labels=1*10^(0:9))
    
    #axis(4, at=log(0.001*10^(0:9)), labels=FALSE)
    
    axis(2, at=log(2*10^(0:9)), labels=FALSE)
    axis(2, at=log(3*10^(0:9)), labels=FALSE)
    axis(2, at=log(4*10^(0:9)), labels=FALSE)
    axis(2, at=log(5*10^(0:9)), labels=FALSE)
    axis(2, at=log(6*10^(0:9)), labels=FALSE)
    axis(2, at=log(7*10^(0:9)), labels=FALSE)
    axis(2, at=log(8*10^(0:9)), labels=FALSE)
    axis(2, at=log(9*10^(0:9)), labels=FALSE)
    
    #axis(4, at=seq(0, 30, by=5))
    if(diff(ylim) > 5) axis(4, at=seq(-20, 20, by=2))
    if(diff(ylim) > 2 & diff(ylim) < 5) axis(4, at=seq(-10, 10, by=1))
    if(diff(ylim) > 1 & diff(ylim) < 2) axis(4, at=seq(-10, 10, by=0.5))  
    if(diff(ylim) < 1) axis(4, at=seq(-10, 10, by=0.25))
  }
  
  funplot(x, y, add=TRUE, col=col, type="l", ...) 
  
}

# function to color-code according to a factor
getCol2 <- function(x, cols = rainbow(18)[1:length(table(x))]){
  ret <- c()
  for(i in 1:length(cols)){
    ret[x==names(table(x))[i]] <- cols[i] 
  }
  return(ret)
}
@

Load data and choose the time-interval.
<<load-data, echo = TRUE>>=
# load("viscosity.RData")
data(viscosity)
str(viscosity)

## set time-interval that should be modeled
interval <- "509"

## model time until "interval"
end <- which(viscosity$timeAll==as.numeric(interval))
viscosity$vis <- log(viscosity$visAll[,1:end])
viscosity$time <- viscosity$timeAll[1:end]

## set up interactions by hand
vars <- c("T_C", "T_A", "T_B", "rspeed", "mflow")
for(v in 1:length(vars)){
  for(w in v:length(vars))
  viscosity[[paste(vars[v], vars[w], sep="_")]] <- factor(
    (viscosity[[vars[v]]]:viscosity[[vars[w]]]=="high:high")*1)
}

#str(viscosity)
names(viscosity)
@

Plot the data

<<plot-data, echo=TRUE, fig.width=4, fig.height=3, fig.cap='Viscostiy over time with temperature of tools ($T_C$) and temerature of resin ($T_A$) color coded.', fig.align='center'>>=
par(mfrow=c(1,1), mar=c(3, 3, 1, 2))#, cex=1.5)
mycol <- gray(seq(0, 0.8, l=4), alpha=0.8)[c(1,3,2,4)]
int_T_CA <- with(viscosity, paste(T_C,"-", T_A, sep=""))
with(viscosity, funplotLogscale(time, vis, 
                                col=getCol2(int_T_CA, cols=mycol[4:1])))
legend("bottomright", fill=mycol, 
       legend=c("T_C low, T_A low", "T_C low, T_A high", 
                "T_C high, T_A low", "T_C high, T_A high"), cex = 0.8)
@

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newpage

\section{Model with all main effects and interactions of first order}

Fit model with all main effects and interactions. 

<<complete-model, echo = TRUE, eval=FALSE>>=
set.seed(1911)
modAll <- FDboost(vis ~ 1 
                  + bols(T_C) # main effects
                  + bols(T_A)
                  + bols(T_B)
                  + bols(rspeed)
                  + bols(mflow)
                  + bols(T_C_T_A) # interactions T_WZ
                  + bols(T_C_T_B)
                  + bols(T_C_rspeed)
                  + bols(T_C_mflow)
                  + bols(T_A_T_B) # interactions T_A
                  + bols(T_A_rspeed)
                  + bols(T_A_mflow) 
                  + bols(T_B_rspeed) # interactions T_B
                  + bols(T_B_mflow)
                  + bols(rspeed_mflow), # interactions rspeed
                  timeformula=~bbs(time, lambda=100), 
                  numInt="Riemann", family=QuantReg(), 
                  offset=NULL, offset_control = o_control(k_min = 10),
                  data=viscosity, check0=FALSE, 
                  control=boost_control(mstop = 100, nu = 0.2))
@

Get optimal stopping iteration using bootstrap over curves (better use multiple cores).
<<cv-complete, echo = TRUE, eval=FALSE>>=
set.seed(1911)
folds <- cv(weights=rep(1, modAll$ydim[1]), type="bootstrap", B=10)
cvmAll <- suppressWarnings(validateFDboost(modAll, folds = folds, 
                                  getCoefCV=FALSE,  
                                  grid=seq(10, 500, by=10), mc.cores = 1))
mstop(cvmAll) # 180
# modAll <- modAll[mstop(cvmAll)]
# summary(modAll)
# cvmAll
@

% \begin{figure}
% \begin{center}
% <<cv-plot-complete, echo = TRUE, fig = TRUE, width=10, heigth=5, eval=FALSE>>=
%  par(mfrow=c(1,2))
%  plot(cvmAll)
%@
% \end{center}
% \caption{Optimal stopping iteration for model with all main effects and interactions of first order.}
% \end{figure}


Do model selection using stability selection (better use multiple cores).
<<stabsel1, echo = TRUE, eval=FALSE>>=
set.seed(1911)
folds <- cvMa(ydim=modAll$ydim, weights=model.weights(modAll), 
              type = "subsampling", B = 50)

stabsel_parameters(q=5, PFER=2, p=16, sampling.type = "SS")
sel1 <- stabsel(modAll, q=5, PFER=2, folds=folds, grid=1:100, 
                sampling.type="SS", mc.cores = 1)
sel1
# selects effects T_C, T_A, T_C_T_A
@
The effects $T_A$, $T_C$ and their interaction are selected into the model. 


% <<stabsel2, echo = TRUE>>=
% set.seed(1911)
% folds <- cvMa(ydim=modAll$ydim, weights=model.weights(modAll), 
%               type = "subsampling", B = 50)
% 
% stabsel_parameters(q=3, PFER=1, p=16, sampling.type = "SS")
% sel2 <- stabsel(modAll, q=3, PFER=1, folds=folds, grid=1:100, 
%                 sampling.type="SS", mc.cores=10)
% sel2
% # selects effects of T_C, T_A
% @




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newpage

\section{Model with selected effects}

Estimate the model containig only the selected effects $T_C$, $T_A$, and their interaction.
<<selected-model, echo = TRUE>>=
set.seed(1911)
mod1 <- FDboost(vis ~ 1 + bols(T_C) + bols(T_A) + bols(T_C_T_A),
                timeformula = ~bbs(time, lambda = 100),  
                numInt = "Riemann", family = QuantReg(), check0 = FALSE, 
                offset = NULL, offset_control = o_control(k_min = 10),
                data = viscosity, control = boost_control(mstop = 200, nu = 0.2))
@

<<cv-selected-model0, echo = TRUE>>=
mod1 <- mod1[430]
@

Find the optimal stopping iteration (better use multiple cores).
<<cv-selected-model, echo = TRUE, eval=FALSE>>=
set.seed(1911)
folds <- cv(weights = rep(1, mod1$ydim[1]), type = "bootstrap", B = 10)
cvm1 <- validateFDboost(mod1, folds = folds, getCoefCV = FALSE, 
                        grid = seq(10, 500, by = 10), mc.cores = 1)
mstop(cvm1) # 430
mod1 <- mod1[mstop(cvm1)]
# summary(mod1)
@

% \begin{figure}
% \begin{center}
% <<cv-plot-selected-model, echo = TRUE, fig = TRUE, width=10, heigth=5, eval=FALSE>>=
%  par(mfrow=c(1,2))
%   plot(cvm1) 
% @
% \end{center}
% \caption{Optimal stopping iteration for model with selected effects.}
% \end{figure}


Center all coefficient functions at each timepoint, yielding the following model: 
\[\mbox{median} \{  \log( \mbox{vis}_i(t)) | x_i  \}  = \beta_0(t) + T_{Ai} \beta_A(t)  + T_{Ci} \beta_C(t) +  T_{ACi} \beta_{AC}(t), \]
where $\mbox{vis}_i(t)$ is the viscosity of observation $i$ at time $t$, $T_{Ai}$ and $T_{Ci}$ are the temperatures of resin and of tools, respectively, each coded as -1 for the lower and 1 for the higher temperature. The interaction $T_{ACi}$ is 1 if both temperatures are in the higher category and -1 otherwise.
<<coefs-selected-model, echo = TRUE>>=
# set up dataframe containing systematically all variable combinations
newdata <- list(T_C=factor(c(1,1,2,2), levels=1:2, labels=c("low","high")) ,  
             T_A=factor(c(1, 2, 1, 2), levels=1:2, labels=c("low","high")), 
             T_C_T_A=factor(c(1, 1, 1, 2)), time=mod1$yind)
intercept <- 0

## effect of T_C
pred2 <- predict(mod1, which=2, newdata=newdata)
intercept <- intercept + colMeans(pred2)
pred2 <- t(t(pred2)-intercept)

## effect of T_A
pred3 <- predict(mod1, which=3, newdata=newdata)
intercept <- intercept + colMeans(pred3)
pred3 <- t(t(pred3)-colMeans(pred3))

## interaction effect T_C_T_A
pred4 <- predict(mod1, which=4, newdata=newdata)
intercept <- intercept + colMeans(pred4[3:4,])
pred4 <- t(t(pred4)-colMeans(pred4[3:4,]))

# offset+intercept 
smoothIntercept <- mod1$predictOffset(newdata$time) + intercept 
@

Plot the centered coefficient functions.
<<plot-selected-model, echo = 11:16, fig.cap='Viscosity over time and estimated coefficient functions. On the left hand side the viscosity measures are plotted over time with temperature of tools ($T_C$) and temperature of resin ($T_A$) color-coded. On the right hand side the estimated coefficient functions are plotted.', fig.height=4, fig.width=10>>=
par(mfrow=c(1,2), mar=c(3, 3, 1, 2))#, cex=1.5)

mycol <- gray(seq(0, 0.8, l=4), alpha=0.8)[c(1,3,2,4)]
int_T_CA <- with(viscosity, paste(T_C,"-", T_A, sep=""))
with(viscosity, funplotLogscale(time, vis, 
                                col=getCol2(int_T_CA, cols=mycol[4:1])))
legend("bottomright", fill=mycol, 
       legend=c("T_C low, T_A low", "T_C low, T_A high", 
                "T_C high, T_A low", "T_C high, T_A high"))

mycol <- gray(seq(0, 0.5, l=3), alpha=0.8)
funplotLogscale(mod1$yind, pred2[3:4,], col=mycol[1], ylim=c(-0.5,6), lty=2, lwd=2)
lines(mod1$yind, pred3[2,], col=mycol[2], lty=3, lwd=2)
lines(mod1$yind, pred4[4,], col=mycol[3], lty=4, lwd=2)
legend("topright", lty=2:4, lwd=2, col=mycol,
       legend=c("effect T_C high", "effect T_A high", "effect T_C, T_A high"))

@

\section*{References}
\begin{itemize}
\item[] Brockhaus S, Scheipl, F., Hothor, T., and Greven, S. (2015), The functional linear array model, 
        \textit{Statistical Modelling}, 15(3), 279--300. 
\end{itemize}


\end{document}