%\VignetteIndexEntry{RationalExp vignette}
%\VignetteDepends{RationalExp,knitr,rmarkdown}
%\VignettePackage{RationalExp}
%\VignetteEngine{knitr::knitr}
%\VignetteEncoding{UTF-8}
\documentclass[article, nojss]{jss}

%% -- LaTeX packages and custom commands ---------------------------------------

%% recommended packages
\usepackage{thumbpdf,lmodern,amsmath,amssymb}
%% new custom commands
\newcommand{\class}[1]{`\code{#1}'}
\newcommand{\fct}[1]{\code{#1()}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\R}{\mathbb{R}}
\def\indic{{\rm {\large 1}\hspace{-2.3pt}{\large l}}}


%% -- Article metainformation (author, title, ...) -----------------------------

\author{Xavier D'Haultf\oe{}uille\\CREST
   \And Christophe Gaillac \\CREST and TSE \And Arnaud Maurel \\ Duke University and NBER}
\Plainauthor{Xavier D'Haultf\oe{}uille, Christophe Gaillac, Arnaud Maurel }

\title{RationalExp: Tests of and Deviations from Rational Expectations}
\Plaintitle{Tests of and Deviations from Rational Expectations}
\Shorttitle{Tests of and Deviations from Rational Expectations}

%% - \Abstract{} almost as usual
\Abstract{
This vignette presents the \proglang{R} package \pkg{RationalExp} associated to \cite{DGM} (DGM hereafter). This package implements a test of the rational expectations hypothesis  based on the marginal distributions of realizations and subjective beliefs. This test can be used in cases where realizations and subjective beliefs are observed in two different datasets that cannot be matched, or when they are observed in the same dataset. The test can be implemented with covariates and survey weights. The package also computes the estimator of the  minimal deviations from rational expectations than can be rationalized by the data. \proglang{R}  and the package \pkg{RationalExp} are open-source software projects and can be freely downloaded from CRAN: \texttt{http://cran.r-project.org}.
}

\Keywords{Rational expectations, test, minimal deviations, \proglang{R}}
\Plainkeywords{Rational expectations, test, minimal deviations, R}

\Address{
  Xavier D'Haultf\oe{}uille \\
  CREST\\
  5, avenue Henry Le Chatelier\\
  91 120 Palaiseau, France \\
  E-mail: \email{xavier.dhaultfoeuille@ensae.fr}\\[2mm]

  Christophe Gaillac\\
  CREST \\
  5, avenue Henry Le Chatelier\\
  91 120 Palaiseau, France \\
  and TSE\\
  E-mail: \email{christophe.gaillac@ensae.fr}\\[2mm]

Arnaud Maurel \\
Department of Economics \\
Duke University \\
213 Social Sciences \\
Durham, NC 27708-0097 \\
and NBER\\
 E-mail: \email{arnaud.maurel@duke.edu}
}




\begin{document}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{How to get started}

\proglang{R}  is an open source software project and can be freely downloaded from the CRAN website. The \proglang{R} package \pkg{RationalExp} can be downloaded from \texttt{cran.r-project.org}. To install the \pkg{RationalExp} package from \proglang{R} use

\begin{Code}
install.packages("RationalExp")
\end{Code}

\noindent
Provided that your machine has a proper internet connection and you have write permission in the appropriate system directories, the installation of the package should proceed automatically.
Once the \pkg{RationalExp} package is installed, it can be loaded to the current \proglang{R}  session by the command
\begin{Code}
library(RationalExp)
\end{Code}

\medskip
Online help is available in two ways: either \proglang{help(package="RationalExp")} or \proglang{?test}. The first gives an overview over the available commands in the package. The second gives detailed information about a specific command. A valuable feature of \proglang{R} help files is that the examples used to illustrate commands are executable, so they can be pasted into an \proglang{R} session or run as a group with a command like \proglang{example(test)}.

\medskip
The \proglang{R} package \pkg{RationalExp} can be also downloaded from Github \url{https://github.com/cgaillac/RationalExp}. To
install the \pkg{RationalExp} package from Github, the \proglang{devtools} library is required. Then, use the command

\begin{Code}
library("devtools")
install_github('RationalExp','cgaillac')
\end{Code}


\section{Theory} % (fold)
\label{sec:theory}

\subsection{Testing rational expectations} % (fold)
\label{sub:statistical_principles}

We first explain the test procedure proposed in DGM for the test with covariates:
\begin{align*}
\text{H}_{0X}: & \text{ there exists a pair of random variables } (Y',\psi') \text{ and a sigma-algebra } \mathcal{I}' \\
& \text{ such that } \sigma(\psi', X) \subset \mathcal{I}', \  Y'|X\sim Y|X , \, \psi'|X \sim \psi|X \text{ and } \mathbb{E}\left[ Y'|\mathcal{I}'\right]=\psi'.
\end{align*}

To simplify notation,  we use as in DGM a potential outcome framework to describe our data combination problem. Specifically, instead of observing $(Y,\psi)$, we suppose to observe only the covariates $X$,  $\widetilde{Y} = DY + (1-D)\psi$ and $D$, where $D=1$ (resp. $D=0$) if the unit belongs to the dataset of $Y$ (resp. $\psi$). We assume that the two samples are drawn from the same population, which amounts to supposing that  $D\perp\!\!\!\perp (X, Y,\psi)$. In order to build  DGM's test,  DGM use the following characterization of $\text{H}_{0X}$:
$$  \mathbb{E}\left[ \left(y-Y\right)^+  - \left(y-\psi\right)^+\middle| X \right] \geq 0 \quad \forall y\in \mathbb{R} \text{ and}  \quad \mathbb{E}\left[  Y - \psi\middle| X\right] =0,$$
where $u^+ =\max(0,u)$. Equivalently but written with $\widetilde{Y}$ only,
$$\mathbb{E}\left[ W \left(y-\widetilde{Y}\right)^+\middle|X\right]\geq 0  \quad \forall y\in \mathbb{R} \text{ and} \quad \mathbb{E}\left[W\widetilde{Y}\middle|X\right]= 0,$$
where $W=D/\mathbb{E}(D)-(1-D)/\mathbb{E}(1-D)$. This formulation of the null hypothesis allows one to apply the instrumental functions approach of \cite{andrews2016inference} (AS hereafter), who consider the issue of testing many conditional moment inequalities and equalities. The initial step is to transform the conditional moments into the following unconditional moments conditions:
$$  \E\left[W\left(y-\widetilde{Y}\right)^+ h(X)\right] \geq 0,  \quad \E\left[\left(  Y - \psi\right)h(X)\right] =0 .$$
for all $y\in \R$ and $h\geq 0$ belonging to a suitable class of functions.

\medskip
We suppose to observe here a sample $(D_i, X_i, \widetilde{Y}_i)_{i=1...n}$ of $n$ i.i.d. copies of $(D,X,\widetilde{Y})$. For notational convenience, we let $\widetilde{X}_i$ denote the nontransformed vector of covariates and redefine $X_i$ as the transformed vector in the following way:
$$X_i=\Phi_0\left( \widehat{\Sigma}_{ \widetilde{X},n}^{-1/2}\left(  \widetilde{X}_{i} - \overline{ \widetilde{X}}_{i}\right)\right),$$
where, for any $x=(x_1,\dots,x_{d_X})$, we let $\Phi_0(x)=\left( \Phi(x_1) , \dots, \Phi\left(x_{d_X} \right)\right)^{\top}$. Here $ \Phi $ denotes the standard normal cdf,  $  \widehat{\Sigma}_{\widetilde{X},n} $ is the sample covariance matrix of $ \left(\widetilde{X}_i\right)_{i=1...n}$ and  $ \overline{\widetilde{X}}_n $ its sample mean.

\medskip
Now that $X_i \in [0,1]^{d_X}$, we consider $h$ functions that are indicators of belonging to specific hypercubes within $[0,1]^{d_X}$. Namely, we consider the class of functions $\mathcal{H}_r= \left\{h_{a,r}, \; a\in A_r\right\}$, with
$A_r= \left\{1,2,\dots, 2r\right\}^{d_X}$ ($r\geq 1$), $h_{a,r}(x) = \indic\left\{x \in  C_{a,r}\right\}$ and, for any $a=(a_1,...,a_{d_X})^{\top}\in A_r$,
$$C_{a,r} = \prod_{u=1}^{d_X} \left(\frac{a_u -1 }{2r}, \frac{a_u }{2r}\right].$$

\medskip
To define the test statistic $T$, we need to introduce additional notation. First, we define, for any given $y$,
	\begin{eqnarray}\label{eq:mn}
 m\left(D_i,\widetilde{Y}_i, X_i, h,y\right) =\left( \begin{array}{c}  m_{1}\left(D_i,\widetilde{Y}_i, X_i, h,y\right) \\  m_{2}\left(D_i,\widetilde{Y}_i, X_i, h,y\right)
 	\end{array} \right)
	= \left( \begin{array}{c} w_i\left(y - \widetilde{Y}_i\right)^+h\left(X_i\right) \\
	 w_i\widetilde{Y}_i h\left(X_i\right)
	\end{array} \right),
\end{eqnarray}
where $w_i=nD_i/\sum_{j=1}^n D_j - n(1-D_i)/\sum_{j=1}^n( 1-D_j)$. Let $\overline{m}_n (h,y)= \sum_{i=1}^{n} m\left(D_i,\widetilde{Y}_i, X_i, h,y\right)/n $ and define similarly $\overline{m}_{n,j}$ for $j=1,2$. For any function $h$ and any $y\in\R$, let us also define, for some $\epsilon>0$,
 $$ \overline{\Sigma}_n(h,y) = \widehat{\Sigma}_n(h,y) + \epsilon \mathrm{Diag}\left(   \widehat{\mathbb{V}}\left(\widetilde{Y} \right)  , \widehat{\mathbb{V}}\left(\widetilde{Y} \right)     \right),$$
  where $\widehat{\Sigma}_n(h,y) $ is the sample covariance matrix of $ \sqrt{n}\overline{m}_n\left( h,y\right) $ and
  $\widehat{\mathbb{V}}\left(\widetilde{Y} \right)$ is the empirical variance of $\widetilde{Y}$. We then denote by $\overline{\Sigma}_{n,jj}(h,y) (j=1,2)$ the $j$-th diagonal term of $\overline{\Sigma}_{n}(h,y)$.

  \medskip
  Then the (Cram\'{e}r-von-Mises) test statistic $T$ is defined by:
$$ T= \sup_{y \in \widehat{\mathcal{Y}}} \sum_{r=1}^{r_n}\frac{(2r)^{-d_X}}{\left(r^2 + 100\right) } \sum_{a\in A_r} \left( \left(1-p\right)\left( - \frac{\sqrt{n} \overline{m}_{n,1}\left(h_{a,r},y\right)}{ \overline{\Sigma}_{n,11}(h_{a,r},y)^{1/2} } \right)^{+2} + p \left(  \frac{\sqrt{n} \overline{m}_{n,2}\left(h_{a,r},y\right)}{\overline{\Sigma}_{n,22}(h_{a,r},y)^{1/2}}  \right)^2\right),$$
where $ \widehat{\mathcal{Y}} =\left[\min_{i=1,\dots,n}\widetilde{Y}_i, \max_{i=1,\dots,n}\widetilde{Y}_i \right] $, $p$ is a parameter that weights the moments inequalities versus equalities and $(r_n)_{n\in\N}$ is a deterministic sequence tending to infinity.

\medskip
To test for rational expectations in the absence of covariates, we simply restrict ourselves to the constant function $h(X)=1$, and the test statistic is simply
$$ T= \sup_{y \in \widehat{\mathcal{Y}}} \left(1-p\right)\left( - \frac{\sqrt{n} \overline{m}_{n,1}(y)}{\overline{\Sigma}_{n,11}(y)^{1/2} } \right)^{+2} + p \left(  \frac{\sqrt{n} \overline{m}_{n,2}(y)}{\overline{\Sigma}_{n,22}(y)^{1/2}}  \right)^2,$$
where  $\overline{m}_{n,j}(y)$ and $\overline{\Sigma}_{n,jj}(y)$ $(j=1,2)$ are defined as above but with $h(x)=1$.

\medskip
Whether or not covariates are included, the resulting test is of the form $\varphi_{n,\alpha} = \indic\left\{T> c^*_{n,\alpha}\right\}$ where the estimated critical value $c^*_{n,\alpha}$ is obtained by bootstrap using as in AS the Generalized Moment Selection method. Specifically, we follow these three steps:
\begin{enumerate}
\item Compute the function $ \overline{\varphi}_n\left(y,h\right) = \left(\overline{\varphi}_{n,1}\left(y,h\right), 0  \right)^{\top} $ for $ (y,h) $ in $ \widehat{\mathcal{Y}}\times\cup_{r=1}^{r_n} \mathcal{H}_{r} $, with
\[  \overline{\varphi}_{n,1}\left(y,h\right) = \overline{\Sigma}_{n,11}^{1/2} B_n\indic\left\{   \frac{n^{1/2}}{\kappa_n} \overline{\Sigma}_{n,11}^{-1/2}\overline{m}_{n,1}(y,h)  >1  \right\} , \]
and where $ B_n =  \left(b_0\ln(n)/\ln(\ln(n))\right)^{1/2} $, $ b_0 >0 $, $ \kappa_n =(\kappa\ln(n))^{1/2}$, and $\kappa>0$. To compute $\overline{\Sigma}_{n,11}$, we fix $\epsilon$ to $0.05$, as in AS.
\item Let $\left(D_i^*,\widetilde{Y}_i^*, X_i^*\right)_{i=1,...,n}$ denote a bootstrap sample, i.e., an i.i.d. sample from the empirical cdf of $\left(D,\widetilde{Y}, X\right)$, and compute from this sample $ \overline{m}_n^* $ and $ \overline{\Sigma}_{n}^* $. Then compute $T^*$ like $T$, replacing $ \overline{\Sigma}_{n}\left(y,h_{a,r}\right)$  and $ \sqrt{n}\overline{m}_n\left(y,h_{a,r}\right) $ by  $ \overline{\Sigma}_{n}^*\left(y,h_{a,r}\right)$ and $$\sqrt{n}\left( \overline{m}_n^* - \overline{m}_n \right)\left(y,h_{a,r}\right) +  \overline{\varphi}_n\left(y,h_{a,r}\right).$$
\item The threshold $c^*_{n,\alpha}$ is the (conditional) quantile of order $1-\alpha + \eta$ of $T^* +\eta$ for some $ \eta >0 $. Following AS, we set $\eta$ to $10^{-6} $.
\end{enumerate}

% subsection statistical_principles (end)

\subsection{Minimal Deviations from Rational Expectations} % (fold)
\label{sub:minimal_deviations_from_rational_expectations}

In DGM, we also introduce the minimal deviations from rational expectations, as the unique function $g^*$ satisfying: $$\mathbb{E}[\rho(|\psi-g^*(\psi)|)] = \inf_{(Y',\psi',\psi'')\in \mathbf{\Psi}}\mathbb{E}[\rho(|\psi'-\psi''|)].$$
We refer the user to DGM for the motivation behind the computation of $g^*$ and the formal existence and unicity result. Though $g^*$ does not have a simple form in general, DGM propose in the following a simple procedure to construct a consistent estimator of it, based on i.i.d. copies $(Y_i)_{i=1...L}$ and $(\psi_i)_{i=1...L}$ of $Y$ and $\psi$. For simplicity, we suppose hereafter that the two samples have equal size.\footnote{If both samples do not have equal size, one can first apply our analysis after taking a random subsample of the larger one, with the same size as the smaller one. Then we can compute the average of the estimates over a large number of such random subsamples.}

\medskip
To define the DGM's estimator, note first that we have
\begin{equation}\label{eq:pb_ref}
g^*= \arg\min_{g\in \mathcal{G}_0} \mathbb{E}\left[\left( \psi - g(\psi)\right)^2 \right],
\end{equation}
where the set $\mathcal{G}_0$ is defined by
$$\mathcal{G}_0=\left\{g\  \text{non-decreasing}: \mathbb{E}\left[(y-Y)^+-(y-g(\psi))^+\right]\geq 0 \; \forall y\in\mathbb{R}, \; \mathbb{E}[g(\psi)]  = \mathbb{E}[Y]\right\}.$$
In other words, $g^*$ is the (increasing) function such that (i) $g^*(\psi)$ is closest to $\psi$ for the $L^2$ norm; (ii) $g^*$ belongs to $\mathcal{G}_0$, which means that we can rationalize $\mathbb{E}(Y|g^*(\psi))=g^*(\psi)$.

\medskip
To estimate $g^*$, we replace expectations and cdfs by their empirical counterpart. Letting $\psi_{(1)}<...< \psi_{(L)}$ denote the ordered statistic, our estimator of $\left(g^*(\psi_{(1)}),...,g^*(\psi_{(L)})\right)$ is the solution of:
\begin{align}
\left(\widehat{g}^*(\psi_{(1)}),...,\widehat{g}^*(\psi_{(L)})\right) =  \arg\min_{ \widetilde{\psi}_{(1)} < \dots < \widetilde{\psi}_{(L)}   } \sum_{i=1}^L\left( \psi_{(i)} - \widetilde{\psi}_{(i)}\right)^2 \ \text{s.t.} & \sum_{i=j}^L Y_{(i)} - \widetilde{\psi}_{(i)} \geq 0, \; j=2...L, \nonumber \\
& \sum_{i=1}^L Y_{(i)} - \widetilde{\psi}_{(i)} = 0.
 \label{eq:prgm_quad}
\end{align}
Then, for any $t\in \mathbb{R}$, we let
$$\widehat{g}^*(t)=\widehat{g}^*\left(\min\{(\psi_i)_{i=1...L}:\psi_i\geq \min\{t,\psi_{(L)}\}\right).$$
We solve the convex quadratic programming problem \eqref{eq:prgm_quad} using the algorithm proposed in \cite{suehiro2012online}. We refer to DGM for more details.

% subsection min_dev (end)

\section{The functions in the RationalExp package} % (fold)
\label{sec:the_functions_in_the_pkg_rationalexp}

\subsection{The test function} % (fold)
\label{sub:the_fct_test_function}

This function implements the RE tests proposed in DGM. The code of the \fct{test} function is based on the Stata code \proglang{cmi\_test} from \cite{andrews2017commands}. The syntax of the function \fct{test} is as follows:

\begin{Code}
test(Y_tilde,D,X,weights,generalized,nbCores,tuningParam)
\end{Code}

\begin{tabular}{lp{370pt}}
\proglang{Y\_tilde} & a vector of size $n_Y+n_\psi$ stacking first the $(Y_i)_{i=1,...,n_Y}$, then the $(\psi_i)_{i=1,...,n_\psi}$. \\
\proglang{D} & a vector stacking the $(D_i)_{i=1...n}$: $n_Y$ ones, then $n_\psi$ zeros. \\
\proglang{X} & the matrix of covariates. Equal to a vector of ones by default (in which case the test without covariates is performed). \\
\proglang{weights} & the vector of survey weights. Uniform by default. \\
\proglang{generalized} & whether a generalized test should be performed or not: "Add" for additive shocks (default),  "Mult" for multiplicative shocks. Set by default to "No" (no generalized test). \\
\proglang{nbCores} & the number of cores used by the program. To reduce the computational time, this function can use several cores, in which case the library \proglang{snowfall} should be loaded first. By default, \proglang{nbCores} is set to 1.\\
\proglang{tuningParam} & a dictionnary, including the parameters \proglang{p}, \proglang{epsil}, \proglang{B}, \proglang{c},  \proglang{kap}, and \proglang{y\_grid}. The first four corresponds to the parameters $p$, $\epsilon$, $B$, $c$, and $\kappa$ above (with default values equal to 0.05, 0.05, 500, 0.3 and 0.001 respectively). Following AS, the interval $\widehat{\mathcal{Y}}=\left[\min_{i=1...n} \widetilde{Y}_i,\max_{i=1,...,n} \widetilde{Y}_i \right]$ is approximated by a grid denoted by \proglang{y\_grid}. By default, \proglang{y\_grid} is equal to the empirical quantiles of  $\widetilde{Y}$ of order $0$,  $1/29$ $2/29$,..., and 1.
\end{tabular}

\medskip
We refer to Section \ref{sub:example_without_covariates} below for an example of the use of several cores, and to Section \ref{sub:example_with_covariates} for how to modify \proglang{tuningParam}. We also refer to the reference manuel or help file for additional details.

\subsection{The estimDev function} % (fold)
\label{sub:estimDev_function}

This function estimates the minimal deviations from RE. The \proglang{estimDev}() function has the following syntax:
\begin{Code}
estimDev(psi, y)
\end{Code}

\begin{tabular}{lp{370pt}}
\proglang{psi} & vector of subjective expectations \\
\proglang{y} & vector of realizations of an individual outcome.
\end{tabular}

\medskip
Both vectors should have the same length. If not, one can randomly select a subset of the longer vector with length equal to that of the shorter one. The function returns a function via the \proglang{approxfun} of the package \proglang{stats}. This function can then be evaluated directly on a desired grid. We refer the user to Section \ref{sub:the_fct_estimdev_function} for an example.

% subsect
% subsection estimDev_function (end)


% section the_functions_in_the_pkg_rationalexp (end)

\section{Examples} % (fold)
\label{sec:examples}


\subsection{Test without covariates} % (fold)
\label{sub:example_without_covariates}

We consider the same DGP as in DGM (Section 5), namely we suppose that the outcome $Y$ is given by
$$ Y = \rho \psi  + \epsilon,$$
with $\rho \in [0,1]$, $\psi \sim \mathcal{N}(0,1)$ and
$$\epsilon =  \zeta \left(-1\{U \leq 0.1\} + 1\{U \geq 0.9\}\right),$$
where $\zeta$, $U$ and $\psi$ are mutually independent, $\zeta \sim \mathcal{N}(2, 0.1)$ and $U\sim \mathcal{U}[0,1]$. We consider 1,200 observations and $\rho=0.29$.

\begin{Code}
rm(list=ls())
### load packages
library(RationalExp)
library(snowfall)
set.seed(1829384)

### Data generating process
n_p=1200 # number of observations
n_y=n_p
N <- n_y + n_p
rho <-0.29 # parameter rho
sig=0.1 # parameter sigma
u=1
b=0.10
a=2
psi <-rnorm(n_p,0,u) ## vector of psi's
pp_y <- runif(n_y,0,1)
zeta <- rnorm(n_y,a,sig)
zeta1 <- rnorm(n_y,-a,sig)
pp1_y <- 1*(pp_y <b)
pp2_y <- 1*(pp_y >1-b)
pp3_y <- 1*(pp_y <=(1-b) & pp_y >=b)
psi_y <-rnorm(n_y,0,u)
y =  rho*psi_y+ pp1_y*zeta + pp2_y*zeta1 ## vector of y's
\end{Code}

We then concatenate the two datasets:
\begin{Code}
y_tilde <- rbind(matrix(y,n_y,1),matrix(psi,n_p,1))  ## concatenation of y then psi
D <- rbind(matrix(1,n_y,1),matrix(0,n_p,1)) ## vector of D's
\end{Code}

By default, the function \proglang{test} runs the test without covariates, where \proglang{system.time}() is used to compute the elapsed time:
\begin{Code}
system.time(res <- test(y_tilde ,D))
#> Conditional Moment Inequalities Test   Number of obs :  2400
#> Test Statistic :  4.697479
#>  Critical Value (1%)  2.51336
#> Critical Value (5%)  0.9971287
#> Critical Value (10%)  0.7172106
#> p-value  :  0
#>    user  system elapsed
#>   27.27    0.09   28.74
\end{Code}
The test prints the total number of observations,  the test statistic, the different critical values,  and the p-value. It returns a list containing all these informations and the vector of bootstraped test statistics (see the reference manual) and should be obtained using:
\begin{Code}
N <- res[[1]]
cv01 <- res[[2]]
cv05 <- res[[3]]
cv10 <- res[[4]]
T_n<- res[[5]]
p_value <- res[[7]]
T_reps <- res[[8]]
\end{Code}

We now show how to modify the tuning parameter of the number of cores \proglang{nbClust} (in tuningParam) to 3 and run the test of the parallelized version of the test:
\begin{Code}
system.time(res <- test(y_tilde ,D,NULL,NULL,NULL,3,NULL))
#> Warning in searchCommandline(parallel, cpus = cpus, type = type,
#> socketHosts = socketHosts, : Unknown option on commandline:
#> rmarkdown::render('C:/Users/gaillac/Dropbox/Package_R/RationalExp/
#> vignettes/how-to-use-RationalExp-package.Rmd', encoding
#> R Version:  R version 3.4.4 (2018-03-15)
#> snowfall 1.84-6.1 initialized (using snow 0.4-3): parallel execution on 3 CPUs.
#>
#> Stopping cluster
#>Conditional Moment Inequalities Test   Number of obs :  2400
#>Test Statistic :  4.697479
#> Critical Value (1%)  2.532613
#>Critical Value (5%)  1.28576
#>Critical Value (10%)  0.8261835
#>p-value  :  0
#> user system elapsed
#> 1.12   0.36  15.34
\end{Code}
Note that elapsed time has been divided by 1.63.


% subsection example_without_covariates (end)

\subsection{Test with covariates} % (fold)
\label{sub:example_with_covariates}

We now present an example of the test with covariates. We consider the same DGP as in DGM (Appendix D), namely we suppose that the outcome $Y$ is given by the following DGP:
$$ Y = \rho \psi  + \sqrt{X}\epsilon,$$
with $\rho \in [0,1]$, $\psi \sim \mathcal{N}(0,1)$, $X\sim \text{Beta}(0.1, 10)$ and
$$\epsilon=  \zeta \left(-1\{U \leq 0.1\} + 1\{U \geq 0.9\}\right),$$
where $\zeta \sim \mathcal{N}(2, 0.1)$ and $U\sim \mathcal{U}[0,1]$. $(\psi, \zeta, U, X)$ are supposed to be mutually independent. Again we start with the data generating process:
\begin{Code}
n_p=1200
n_y=n_p
N <- n_y + n_p
sig=0.1
u=1
b=0.10
a=2
alp = 0.1
bet = 10

# Data Generating process
X_p = rbeta(n_p,alp ,  bet)+1
X_y =  rbeta(n_y,alp ,  bet)+1
transf <- function(X_y,   f0){
  res <-f0*sqrt(X_y)
  return(res)
}
psi <-rnorm(n_p,0,u)
pp_y <- runif(n_y,0,1)
zeta <- rnorm(n_y,a,sig)
zeta1 <- rnorm(n_y,-a,sig)
pp1_y <- 1*(pp_y <b)
pp2_y <- 1*(pp_y >1-b)
pp3_y <- 1*(pp_y <=(1-b) & pp_y >=b)
psi_y <-rnorm(n_y,0,u)
y =  rho*psi_y+ transf(X_y, 1)*(pp1_y*zeta + pp2_y*zeta1)
\end{Code}

We concatenate the two datasets as above:
\begin{Code}
y_tilde <- rbind(matrix(y,n_y,1),matrix(psi,n_p,1))## concatenation of y then psi
D <- rbind(matrix(1,n_y,1),matrix(0,n_p,1)) ## vector of D's
\end{Code}

Then we concatenate the covariates by rows, those associated with y coming first.
\begin{Code}
X_tilde <- rbind(matrix(X_y,n_y,1),matrix(X_p,n_p,1))
\end{Code}

Then run the test, after modifying the \proglang{X} parameter, the number of core (from 1 to 3), and the number of grid points (from 30 to 5):
\begin{Code}
tuningParam<- vector(mode="list", length=6)
tuningParam[["p"]] <- 0.05 # the parameter p in  Section 3 in DGM
tuningParam[["epsilon"]] <- 0.05 # the parameter c in  Section 3 in DGM
tuningParam[["B"]] <-500 #the number of bootstrap samples
tuningParam[["y_grid"]] <- quantile(y_tilde,seq(0,1,length.out=5))
tuningParam[["c"]] <- 0.3 # the parameter c in  Section 3 in DGM
tuningParam[["kappa"]] <- 0.001  #the parameter kappa  in  Section 3 in DGM

system.time(res <- test(y_tilde ,D,X,NULL,NULL,NULL,3,tuningParam ))
#> Warning in searchCommandline(parallel, cpus = cpus, type = type,
#> socketHosts = socketHosts, : Unknown option on commandline:
#> rmarkdown::render('C:/Users/gaillac/Dropbox/Package_R/RationalExp/
#> vignettes/how-to-use-RationalExp-package.Rmd', encoding
#> snowfall 1.84-6.1 initialized (using snow 0.4-3): parallel execution on 3 CPUs.
#>
#> Stopping cluster
#> Conditional Moment Inequalities Test   Number of obs :  2400
#>Test Statistic :  2.929523
#> Critical Value (1%)  1.807603
#>Critical Value (5%)  1.162568
#>Critical Value (10%)  0.9045701
#>p-value  :  0
#>user  system  elapsed
#> 0.87  0.36    38.50
#>
#>T_n<-  res[[5]]
#>p_value <- res[[7]]
\end{Code}

%We note that the function \fct{MC\_test} in the \pkg{RationalExp} package  allows to implement a complete parallised version of the Monte Carlo set up illustrated in DGM.

% subsection example_with_covariates (end)

\subsection{Estimation of minimal deviations} % (fold)
\label{sub:the_fct_estimdev_function}


The data generating process is the same as for the test without covariates in Section \ref{sub:example_without_covariates}.
%{r, fig.show='hold'}
\begin{Code}
sig=0.1
u=1
b=0.10
a=2
rho= 0.4
psi <- rnorm(n_p,0,u)
pp_y <- runif(n_y,0,1)
zeta <- rnorm(n_y,a,sig)
zeta1 <- rnorm(n_y,-a,sig)
pp1_y <- 1*(pp_y <b)
pp2_y <- 1*(pp_y >1-b)
pp3_y <- 1*(pp_y <=(1-b) & pp_y >=b)
psi_y <-rnorm(n_p,0,u)
y =  rho*psi_y+ pp1_y*zeta + pp2_y*zeta1
\end{Code}

Then we estimate $g^*$ using the \fct{estimDev} function, and the two vectors \proglang{psi} and \proglang{y} with same length
\begin{Code}
system.time(g_star <- estimDev(psi,y))
#>    user  system elapsed
#>    4.32    0.03    4.62
\end{Code}
We plot the result on a grid (we refer to \cite{DGM} for a detailled analysis and an enhanced plot):
%{r, fig.show='hold'}
\begin{Code}
t<- seq(-2.2,2.2, length.out=300)
plot( t, t- g_star(t),type="l",col=1 , lwd=2, xlim=c(-2.2,2.2),
ylim=c(min(t- g_star(t))-0.1,max(t- g_star(t))+0.1))
abline(h=0)
\end{Code}

\begin{figure}[!h]
\centering
\includegraphics[width=0.8\linewidth, height=0.40\textheight]{./Images/gg.png}
\begin{minipage}{0.9 \textwidth}
{\footnotesize
Note: The plain black curve corresponds to the average of $\psi - \widehat{g}^*(\psi)$ over 1, 000 simulations (with $n=800$)}
\end{minipage}
\caption{Estimation of $\psi - g^*(\psi)$.}
\label{fig:5}
\end{figure}

% subsection the_fct_estimdev_function (end)

% section examples (end)
\bibliography{mybib}
\end{document}