\documentclass[11pt]{article}
% !Rnw weave = knitr
%% \VignetteIndexEntry{DFT benchmarks: fft vs FFT}
%% \VignetteEngine{knitr::knitr}

\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{fancyvrb}
\usepackage[pdfborder={0 0 0}]{hyperref}
\usepackage{url}
\usepackage{upquote}
\usepackage{graphicx}
\usepackage{grffile}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{float}
\usepackage{natbib}
\usepackage{geometry}
\geometry{verbose,tmargin=3cm,bmargin=5cm,lmargin=2.5cm,rmargin=2.5cm}
\usepackage[font=sf, labelfont={sf,bf}, margin=2cm]{caption}
\usepackage{color}
\usepackage{txfonts}
%%
\input{supp_mathsyms}
%%
\newcommand{\SC}[1]{\textsc{#1}}
\newcommand{\SCY}[0]{\SC{Yes}}
\newcommand{\SCN}[0]{\SC{No}}
\newcommand{\Rcmd}[1]{\texttt{#1}}
\newcommand{\psd}[0]{\href{https://github.com/abarbour/psd/}{\color{blue}\Rcmd{psd}}}
\newcommand{\naive}[0]{na\"{\i}ve}
\newcommand{\bidx}[1]{\index{#1}{\textbf{#1}}} 
\newcommand{\idx}[1]{\index{#1}{#1}} 
%
\title{Benchmarks for Discrete Fourier Transform (DFT) calculations in R}
\author{Andrew J. Barbour}
%
\begin{document}
\maketitle
%
\begin{abstract}
%
\begin{quote}
The DFT calculator in R, \Rcmd{stats::fft}, uses
the Mixed-Radix algorithm of \citet{singleton1969}.
In this vignette we show how this calculator compares
to \Rcmd{FFT} in the \Rcmd{fftw} package \citep{fftw}, which uses the
FFTW algorithm of \citet{frigo2005}.
For univariate DFT computations,
the methods are nearly equivalent with two exceptions which
are not mutually exclusive: 
(1) the series to be transformed is very long ($10^6$ terms), and 
\emph{especially} (2) when the series length is not highly composite.
In both exceptions the algorithm \Rcmd{FFT} outperforms \Rcmd{fft}.
\end{quote}
%
\begin{quote}
\textit{\textbf{Update:} I have decided that (for now) \psd{} will not
use \Rcmd{fftw::FFT}, despite its advantage over \Rcmd{stats::fft} for 
large-$n$ `NHC' series, simply because
the binaries on CRAN have not been reliably built for some time now.
If they do become reliable, I may consider using \Rcmd{fftw::FFT} instead.
}
\end{quote}
%
\end{abstract}

\tableofcontents
\clearpage

\section{Benchmarking function}
We use both functions in their default state, and ask them
to transform the same univariate random series.
Benchmark information comes from the \Rcmd{rbenchmark}
program, and the versatile \Rcmd{plyr} and
\Rcmd{reshape2} packages are used to manipulate the information
for this presentation; \Rcmd{ggplot2} is used for plotting. 
First we load the libraries needed:
<<eval=FALSE, echo=TRUE>>=
rm(list=ls())
library(fftw)
library(rbenchmark)
library(plyr)
library(reshape2)
library(ggplot2)
@
and create a benchmark function:
<<eval=FALSE, echo=TRUE, label="Benchmarkfunction">>=
reps <- 10
dftbm <- function(nd, repls=reps){
	set.seed(1234)
	x <- rnorm(nd, mean=0, sd=1)
	bmd <- benchmark(replications=repls, fftw::FFT(x), stats::fft(x))
	bmd$num_dat <- nd
	bmd$relative[is.na(bmd$relative)] <- 1   # NA happens.
	return(bmd)
}
@

\section{Highly composite (HC) series}
It's well known that DFT algorithms are most efficient
for ``Highly Composite Numbers"\footnote{
This is the reason for the \Rcmd{stats::nextn} function.
}, specifically multiples of (2,3,5).

So, we create a vector of series lengths we wish to benchmark
<<eval=TRUE, echo=TRUE, label="Numtermstobench">>=
(nterms.even <- round(2**seq.int(from=4,to=20,by=1)))
@
and use it with \Rcmd{lapply} and the benchmark function
previously defined.
These data are further distilled into a usable format
with \Rcmd{ldply}:
<<eval=FALSE, echo=TRUE, label="Uselapplytodobenching">>=
bench.even <- function(){
  benchdat.e <- plyr::ldply(lapply(X=nterms.even, FUN=dftbm))
  }
bench.even()
@

\section{Non highly composite (NHC) series}
DFT algorithms can have drastically reduced performance
if the series length is not highly composite (NHC).
We now test NHC series by adding one to the HC series-length
vector (also restricting the total length for sanity's sake):
<<eval=FALSE, echo=TRUE, label="Setupnonhighlycompositelengths">>=
nterms.odd <- nterms.even + 1
nterms.odd <- nterms.odd[nterms.odd < 50e3] # painfully long otherwise!
@
and performing the full set of benchmarks again:
<<eval=FALSE, echo=TRUE, label="Dobenching">>=
bench.odd <- function(){
  benchdat.o <- plyr::ldply(lapply(X=nterms.odd, FUN=dftbm))
  }
bench.odd() # FAIR WARNING: this can take a while!!
@

\section{Visualization}
In order to plot the results, we need to 
perform some map/reduce operations on the data
\citep{wickham2010}. We intend to show faceted \Rcmd{ggplot2}-based
figures with row-wise summary information\footnote{
Based on this post:\\
{\small
\url{https://geokook.wordpress.com/2012/12/29/row-wise-summary-curves-in-faceted-ggplot2-figures/}
}
} so we can easily intercompare the benchmark data.
The benchmark data we will show
are \Rcmd{user.self}, \Rcmd{sys.self}, \Rcmd{elapsed}, and \Rcmd{relative}.
The results are shown
in Figure \ref{fig:results}.

<<eval=FALSE, echo=TRUE, label="MapReduceSummarize">>=
pltbench <- function(lentyp=c("even","odd")){
  benchdat <- switch(match.arg(lentyp), even=benchdat.e, odd=benchdat.o)
  stopifnot(exists("benchdat"))
  tests <- unique(benchdat$test)
  ## subset only information we care about
  allbench.df.drp <- subset(benchdat, 
        select=c(test, num_dat, user.self, sys.self, elapsed, relative))
  ## reduce data.frame with melt
  allbench.df.mlt <- reshape2::melt(allbench.df.drp, 
                                    id.vars=c("test","num_dat"))
  ## calculate the summary information to be plotted:
  tmpd <- plyr::ddply(allbench.df.mlt, 
                      .(variable,  num_dat),
                      summarise, 
                      summary="medians", 
                      value=ggplot2::mean_cl_normal(value)[1,1])
  ## create copies for each test and map to data.frame
  allmeds <<- plyr::ldply(lapply(X=tests, 
                                 FUN=function(x,df=tmpd){
                                       df$test <- x; return(df)
                                     }))
  ## plot the benchmark data
  # 1/sqrt(n) standard errors [assumes N(0,1)]
  g <- ggplot(data=allbench.df.mlt,
              aes(x=log10(num_dat),
                  y=log2(value),
                  ymin=log2(value*(1-1/sqrt(reps))),
                  ymax=log2(value*(1+1/sqrt(reps))),
                  colour=test,
                  group=test)) +
       scale_colour_discrete(guide="none") + 
       theme_bw()+
       ggtitle(sprintf("DFT benchmarks of %s length series",toupper(lentyp))) +
       ylim(c(-11,11))+
       xlim(c(0.5,6.5))
       
  ## add previous summary curves if exist
  if (exists("allmeds.prev")){
     g <- g + geom_path(size=1.5, colour="dark grey", data=allmeds.prev, 
                        aes(group=test))
                        }
  ## create a facetted version
  g2 <- g + facet_grid(variable~test) #, scales="free_y")
  ## add the summary data as a line
  g3 <- g2 + geom_path(colour="black", data=allmeds, aes(group=test))
  ## and finally the data
  print(g4 <<- g3 + geom_pointrange())
}
@

<<eval=FALSE, echo=TRUE,  label="Plotnonhighlycompositeresults">>=
pltbench("even")
allmeds.prev <- allmeds
pltbench("odd")
@

\begin{figure}[htb!]
\begin{center}
\includegraphics[width=0.5\textwidth]{fftw_bench_even}%
\includegraphics[width=0.5\textwidth]{fftw_bench_odd}
\caption{ DFT benchmark results for HC series lengths (left),
and NHC series lengths (right) as a function of logarithmic
series length.  In each figure, the left
facet-column is for results from \Rcmd{fftw::FFT} and the right
column is for \Rcmd{stats::fft}.
We also show the summary curves from the HC results
in the NHC frames (thick grey curve)
to highlight the drastic degradation in performance.}
\label{fig:results}
\end{center}
\end{figure}

\section{Conclusion}

Figure \ref{fig:results} compares the DFT
calculations for HC and NHC length series.
For univariate DFT computations,
the methods are nearly equivalent with two exceptions which
are not mutually exclusive: 
(A) the series to be transformed is very long, and 
especially (B) when the series length is not highly composite.
In both exceptions the algorithm \Rcmd{FFT} outperforms \Rcmd{fft}.
In the case of exception (B), both methods have
drastically increased computation times; hence, zero padding should be
done to ensure the length does not adversely
affect the efficiency of the DFT calculator.

%\pagebreak

\section*{Session Info}
<<eval=TRUE, echo=TRUE, label=SI>>=
utils::sessionInfo()
@

%% bib and index
\bibliographystyle{apalike}
\bibliography{REFS}

\end{document}