\documentclass[article,nojss]{jss} %% NOTA BENE: More definitions --> further down %%%%%%%%%%%% % %\author{Martin M\"achler \\ Seminar f\"ur Statistik, \ ETH Zurich} \author{Martin M\"achler~\orcidlink{0000-0002-8685-9910} \\ Seminar f\"ur Statistik, \ ETH Zurich\\ Nov.\ 2022 \ {\tiny \LaTeX'ed \today}} \title{Asymptotic Tail Formulas For Gaussian Quantiles}% in R % w/ jss, no effect:\date{Nov 2022 {\tiny \LaTeX'ed \today}} %% %%%\def\mythanks{a version of this paper ... has been published in JSS, \url{http://www.jstatsoft.org/....}.} %% %% for pretty printing and a nice hypersummary also set: \Plainauthor{Martin M\"achler} % \Plaintitle{Asymptotic Tail Formulas For Gaussian Quantiles}% in R % without formatting % \Shorttitle{} % %\VignetteIndexEntry{Asymptotic Tail Formulas For Gaussian Quantiles}% in R% << shown on CRAN page! %\VignetteDepends{DPQ} %\VignetteDepends{sfsmisc} %\VignetteDepends{stats} \newif\ifJSS \JSSfalse %--not-yet--\VignetteDepends{Rmpfr} \SweaveOpts{engine=R,pdf=FALSE,grdevice=pdfDB, width=7,height=4, strip.white=true,keep.source=TRUE} \Abstract{ \R's Gaussian quantile function \code{qnorm(p, ..)} has been based on the published algorithm AS~241 of \cite{AS241:Wichura1988} which is fully accurate only on the regular scale for $p$ down to the smallest double precision numbers $> 0$. When probabilities are used on the log scale, i.e., \code{qnorm(lp, log.p=TRUE)}, the argument is a log probability, and \code{lp}$=\log p \to -\infty$ when $p\to 0$, \fct{qnorm} using AS~241 has been very inaccurate in the very extreme tails. I have derived novel asymptotic formulas for that case, using recursive plug-in to the asymptotic formula for $\Phi(x)$ (which \fct{qnorm} should invert). Using these formulas for order $k=0, 1, \dots, 5$, for six different regions (adjacent intervals) allows to provide fully accurate \fct{qnorm} computations also on the log scale. Pure \R{} implementations of these are provided in % our \R{} package \CRANpkg{DPQ} \citep{DPQ}, functions \fct{qnormAsymp} and \fct{qnormR} and have also been prepared to be added to (the C code in \file{Rmathlib} in) the next version of \R's \fct{qnorm}. } \Keywords{asymptotic, approximation, extreme tail, Gaussian, Normal Quantile, \R} % ditto, without formatting: \Plainkeywords{asymptotic, approximation, extreme tail, Gaussian, Normal Quantile, R} %% publication information %% NOTE: Typically, this can be left commented and will be filled out by the technical editor %% \Volume{13} %% \Issue{9} %% \Month{September} %% \Year{2004} %% \Submitdate{2004-09-29} %% \Acceptdate{2004-09-29} %% The address of (at least) one author should be given %% in the following format: \Address{ Martin M\"achler\\ Seminar f\"ur Statistik, HG G~16\\ ETH Zurich\\ 8092 Zurich, Switzerland\\ E-mail: \email{maechler@stat.math.ethz.ch}\\ URL: \url{http://stat.ethz.ch/~maechler} } %% for those who use Sweave please include the following line (with % symbols): %% MM: this is "substituted" by jss.cls: %% need no \usepackage{Sweave.sty} %% JSS{ ..../examples/JSS/article.Rnw } recommended: \usepackage{orcidlink,thumbpdf,lmodern} %% Marius' packages \usepackage[american]{babel}%for American English %\usepackage{microtype}%for character protrusion and font expansion (only with pdflatex) \usepackage{amsmath}%sophisticated mathematical formulas with amstex (includes \text{}) % \usepackage{mathtools}%fix amsmath deficiencies \usepackage{amssymb}%sophisticated mathematical symbols with amstex (includes \mathbb{}) \usepackage{amsthm}%theorem environments \usepackage{bm}%for bold math symbols: \bm (= bold math) %NON-STANDARD:\RequirePackage{bbm}%only for indicator functions % \usepackage{enumitem}%for automatic numbering of new enumerate environments \usepackage{relsize}%-> \larger and \smaller %%MM: jss.cls load xcolors, but *not* with option [x11names] .. work around \definecolorstrue\input{x11nam.def}\definecolorstrue \usepackage[ format=hang, % NOT for JSS: labelsep=space, justification=justified, singlelinecheck=false%, % NOT for JSS: labelfont=bf ]{caption}%for captions \usepackage{float} \usepackage{afterpage} % \usepackage{tikz}%sophisticated graphics package % \usepackage{tabularx}%for special table environment (tabularx-table) % \usepackage{booktabs}%for table layout \usepackage{fancyvrb}% to use \verb|...| inside \footnote{} --> \VerbatimFootnotes \DeclareMathOperator{\logIp}{log1p} % This is already in jss above -- but withOUT the fontsize=\small part !! \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontsize=\small,fontshape=sl} \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small} \DefineVerbatimEnvironment{Scode}{Verbatim}{fontsize=\small,fontshape=sl} %% makes space between Sinput and Soutput smaller: \fvset{listparameters={\setlength{\topsep}{0pt}}}% !! quite an effect! %% %---{m-floating}: Relaxed Floating FIGUREs options (=> more figures on page w/o text): % defaults from REPORT/ARTICLE.STY \renewcommand{\topfraction}{1} %\def\topfraction{.7} \renewcommand{\bottomfraction}{1} %\def\bottomfraction{.3} \renewcommand{\textfraction}{0} %\def\textfraction{.2} \renewcommand{\floatpagefraction}{.7} %\def\floatpagefraction{.5} %\def\dblfloatpagefraction{.5} \setcounter{bottomnumber}{4} %default: 1 %-- end{m-floating} ----------------------------- % \long\def\symbolfootnote[#1]#2{\begingroup% % \def\thefootnote{\fnsymbol{footnote}}\footnote[#1]{#2}\endgroup} %% and \symbolfootnote[1]{footnote} to get an * , 2: dagger, 3: double dagger... % \newcommand*{\R}{\proglang{R}}%{\textsf{R}} %% Marius' commands \newcommand*{\eps}{\varepsilon} %NON-STANDARD{bbm}:\newcommand*{\I}{\mathbbm{1}} \newcommand*{\I}{\mathbf{1}} \newcommand*{\IN}{\mathbb{N}} \newcommand*{\IR}{\mathbb{R}} \newcommand*{\IP}{\Prob} \newcommand*{\IE}{\E} \newcommand*{\abs}{\operatorname*{abs}} \newcommand*{\sgn}{\operatorname*{sgn}} \newcommand*{\sign}{\operatorname*{sign}} \newcommand{\tr}{\ensuremath{^\mathsf{T}}}% or ^{\intercal} % \renewcommand*{\O}{\mathcal{O}}% big O(.) \newcommand{\CRANpkg}[1]{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}} \newcommand*{\file}[1]{\code{#1}} \newcommand*{\fct}[1]{\code{#1()}} %% journal specific aliases \newcommand*{\setcapwidth}[1]{} \newtheorem*{AS}{A.\ \& S.}% for formulas cited from Abramowitz & Stegun \defcitealias{AbrMS72}{A.\&S.}% for citing them shortly %% end of declarations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{document} %% include your article here, just as usual %% Note that you should use the \pkg{}, \proglang{} and \code{} commands. % \section[About Java]{About \proglang{Java}} \VerbatimFootnotes% <-- {fancyvrb} below %% Note: If there is markup in \(sub)section, then it has to be escaped as above. %% Note: These are explained in '?RweaveLatex' : <>= pdfDB <- function(name, width, height, ...) { grDevices::pdf(paste0(name, ".pdf"), ## "DB": vvvvvvvvvvvvvvvv width=width, height=height, onefile=FALSE, useDingbats=TRUE) } op.orig <- options(width = 70, useFancyQuotes = FALSE ## , SweaveHooks = list(fig=function() par(mar=c(5.1, 4.1, 1.1, 2.1))) ## JSS--ugly! , prompt="R> ", continue="+ " , continue = " " ) Sys.setenv(LANGUAGE = "en") if(.Platform$OS.type != "windows")#$ Sys.setlocale("LC_MESSAGES","C") if(Sys.getenv("USER") == "maechler") # my latest checked development version require("DPQ", lib="~/R/Pkgs/DPQ.Rcheck-64b") ## take CRAN's version, not development one: ## require("DPQ", lib="~/R/Pkgs/CRAN_lib") @ % \section[Introduction]{Introduction} % \dots\dots \section[Gaussian Quantiles in R]{% Gaussian quantiles in \R\ -- okay on regular probability range} %% see ./qnorm-svn-log.md for the "history" notes (svn log) and %% ^^^^^^^^^^^^^^^^ %% ~/R/D/r-devel/R/src/nmath/qnorm.c* for versions of the C source Gaussian or normal quantiles have been made available in \R\ \citep{R}, from the very beginning. Ross Ihaka (one of the two ``fathers'' of \R) wrote the first version; visible in R's subversion (svn) repository, rev 574, dated Jan.\ 14, 1998 % ~/R/D/r-devel/R/src/nmath/qnorm.c.~r574~ %% TODO? maybe have even earlier version? basically interfacing \R{} with a C version of the published AS~111 algorithm (which was in Fortran 66 with \code{GOTO} etc), \cite{AS111:BeaSpr1977}, but improving AS~111 already by using a more accurate formula from Wichura for the ``outer'' tail (defined to have $p' := min(p, 1-p)$ close to zero, specifically, when $p' \in (10^{-300}, \epsilon_c ]$, where $\epsilon_c$, the computer epsilon, (= \code{DBL\_EPSILON} in C's \code{math} library $=$ \R's \code{.Machine\$double.eps} is nowadays always $\epsilon_c = 2^{-52} = 2.220446..\cdot 10^{-16}$. This first algorithm AS~111, e.g., in \R~1.0.0, Feb.29, 2000, (svn rev~7639, 2000-01-18), % ~/R/D/r-devel/R/src/nmath/qnorm.c.~r7639~ the version of \file{\emph{}/src/nmath/qnorm.c} had contained the description \begin{quotation}\it \noindent Compute the quantile function for the normal distribution. \\[1ex] For small to moderate probabilities, algorithm referenced below is used to obtain an initial approximation which is polished with a final Newton step. \\[1ex] For very large arguments, an algorithm of Wichura is used. \end{quotation} and the reference to \cite{AS111:BeaSpr1977}. Also, already before releasing R 1.0.0 on Feb.~29, 2000, as R Core team, we had introduced the \code{log.p} and \code{lower.tail} logical switches, \begin{quotation}\smaller \noindent r7615 | maechler | 2000-01-17 12:18:30 +0100 (Mo, 17 Jan 2000)\\ \verb|add new argument lower.tail and log[p]; at first only to [dpq]pois()| \\[1ex] \noindent r7639 | maechler | 2000-01-18 12:10:44 +0100 (Di, 18 Jan 2000)\\ \verb|[dpq]norm() & [dpq]lnorm() have new args| \end{quotation} Already a few months after releasing R 1.0.0 (June 6, svn r9464), I had switched \fct{qnorm} to use the more recent and accurate AS~241 % r9464 | maechler | 2000-06-06 16:03:37 +0200 (Di, 06 Jun 2000) | 2 lines % new qnorm() AS241 with NEWS entry \begin{verbatim} o qnorm() is now based on AS 241 instead of AS 111, and should give precise results up to 16 digits precision. \end{verbatim} Algorithm AS~241 is by \cite{AS241:Wichura1988} %--> ./qnorm-litt.bib Wichura, M. J. (1988) Algorithm AS 241: .... which contains the promise of 16 digits precision\footnote{16 digits precision, i.e., about the usual IEEE 52-bit double precision ($\epsilon_c = 2^{-52} \approx 2.22 \cdot 10^{-16}$)}, the last sentence on p.477: \ \ \emph{\dots\ \ for $10^{-316} < \min(p, 1-p)$. The second routine, PPND16, is accurate to about 16 figures over the same range.} % In section \textbf{Accuracy}, Wichura specifies that for the ``Monte Carlo'' relative error, $p$ was constrained % to the interval $[10^{-70}, 1 - 10^{-15})$ (for \code{PPND16}, the more accurate of the two algorithms). Also in \cite{AS241:Wichura1988}, \begin{equation}\label{r.def} r := \sqrt{- \log(\min(p, 1-p))} \ \ (\Longleftrightarrow \min(p, 1-p) = e^{-r^2}). \end{equation} For ease of notation, we assume $p < \frac 1 2$, for now, and hence the quantile \code{qnorm(p)}$ = \Phi^{-1}(p) = \Phi^{-1}\bigl(\exp(-r^2)\bigr)$ is negative. The ``outermost'' minimax rational approximation to $-\Phi^{-1}(p)$ used in AS~241 is in the interval $r \in (5, 27] \Longleftrightarrow r^2 \in (25, 729]$, or equivalently, \begin{align} p \in \bigl[e^{-729}, e^{-25}\bigr) \approx \bigl[2.51 \cdot 10^{-317}, 1.389 \cdot 10^{-11} \bigr). \end{align} At first, the above seems sufficient, since indeed, the lower bound is already ``de-normalized'' in double precision, $e^{-27^2} = e^{-729} \approx 2.51 \cdot 10^{-317}$ is smaller than \code{DBL\_XMIN} in C's \code{math} library $=$ \R's \code{.Machine\$double.xmin}$=2^{-1022} \approx 2.225 \cdot 10^{-308}$. \emph{However}, as mentioned above, in the R core team we had already seen that it is often advisable to work on the \emph{$\log$}--scale with probabilities and therefore had introduced the option \code{log.p = TRUE} for all our (cumulative) distribution and quantile functions. %% Now this changes the picture of ``sufficient'' approximation dramatically, as, indeed, on the log scale, the AS~241 algorithm only goes up to $\log p = r^2 = 729$, and then quickly loses precision (see below). The goal of the remaining part of this paper is to describe the research for finding accurate approximations in these outermost tails. \subsection[DPQ's qnormR()]{\pkg{DPQ}'s \fct{qnormR} -- documenting R history} Note that in \pkg{DPQ}, pure \R{} code implementations of \R's \fct{qnorm} are provided by function \fct{qnormR} which has (almost\footnote{`mu' $\neq$ `mean'}) the same arguments \code{p, mu = 0, sd = 1, lower.tail = TRUE, log.p = FALSE} as \R's \fct{qnorm} and additionally \code{trace = 0, version = c("4.0.x", "2020-10-17", "2022-08-04")}, where the default \code{version = "4.0.x"} corresponds to \R{} version up to 4.0.5 (2021-03-31) which uses basically the above AS~241, additionally treating extreme cases including $\pm$\code{Inf} and \code{NA, NaN} well. These versions are explained subsequently, starting with the \code{"4.0.x"} version which may be considered as ``catastrophically wrong'' if we look closely in log scale: \section[Correcting qnorm(., log.p=TRUE)]{Correcting \code{qnorm(.., log.p=TRUE)}} %% ~/R/MM/NUMERICS/dpq-functions/qnorm-extreme-bad.R %% ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^^^^^^^^^^^^^^^^^^^ %% ~/R/MM/NUMERICS/dpq-functions/qnorm-asymptotic.R (which was part of the above) %% ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^^^^^^^^^^^^^^^^^^ %% ==> plots 37720 qnormLog-status_relErr_R-4.0.5.pdf %% 38145 qnormLog-status_relErr_R-4.1.3.pdf %% 9906 qnormLog-status_relErr_R-devel_2022-09-11_r82834.pdf %% more history <---> ./qnorm-svn-log.md and ~/R/D/r-devel/R/src/nmath/qnorm.c.~* In order to compare versions of \fct{qnorm} approximations with their ``true" values, we use the fact that it, $x = \Phi^{-1}(p)=$\code{qnorm(p)}, is defined as \emph{inverse} of $p = \Phi(x) =$\code{pnorm(x)} \emph{and} we additionally assume that \code{pnorm(x)} is ``fully accurate'' which it basically is, also on the log scale, demonstrably, e.g., using % our CRAN pkg \CRANpkg{Rmpfr} \citep{Rmpfr}, with its own very accurate \fct{pnorm}% \ifJSS{}\else \footnote{\code{Rmpfr::pnorm()} is limited in range because it currently has no log scale (or otherwise scaled) version, and we need to take explicit \code{log(.)} the values it computes from its \code{pnorm(x) := erfc}$(\sqrt{2}\cdot x)/d)$ which underflow to zero before \code{x == 1e6} even with extended mpfr erange.} \fi , but we are not providing the evidence here. With this assumption, the error of \fct{qnorm} is the deviation from the identity $\Phi^{-1}(\Phi(x)) \equiv x$. If $x \ne 0$, the \emph{relative} error is \begin{align} \label{eq:relErr} \frac{\widehat{\Phi^{-1}}(\Phi(x)) - x }{x} &= \widehat{\Phi^{-1}}(\Phi(x)) / x - 1, \end{align} and we ``define'' the relative error of \fct{qnorm} as \code{qnorm(pnorm(x)) / x - 1} where we need to adjust for cases where $x$ is (very close to) zero or not finite, etc. This is done by function \fct{relErrV} from package \CRANpkg{sfsmisc} \citep{sfsmisc}, %-- appendix "Function relErrV" -- only in non-JSS \ifJSS{}\else shown in the appendix~\ref{sec:relErrV}, \fi which takes care of all special or boundary cases. And as a matter of fact, we will work in log scale, hence using \code{log.p = TRUE} in both \fct{pnorm} and \fct{qnorm}, and we want to use positive numbers both for argument and result (and nicer formulae), so work with the \emph{upper tail}, i.e., use \code{lower.tail = FALSE}. Consequently, instead of computing and inverting $\Phi(x)$, i.e., our \code{qnorm(.)} should compute the inverse of $\log\left(1 - \Phi(x)\right)$. %% from qnorm-asymptotic.R <>= qs <- 2^seq( 0, 29, by=1/256) # => s >= 1.84 lp <- pnorm(qs, lower.tail=FALSE, log.p=TRUE) s <- -lp # = -pnorm(..) = -log(1 - Phi(qs)) > 0 require("DPQ") # --> qnormR(): qnrm <- qnorm (-s, lower.tail=FALSE, log.p=TRUE) qnrm405 <- qnormR(-s, lower.tail=FALSE, log.p=TRUE, version= "4.0.x") # R <= 4.0.5 qnrm410 <- qnormR(-s, lower.tail=FALSE, log.p=TRUE, version= "2020-10-17") qnrm43 <- qnormR(-s, lower.tail=FALSE, log.p=TRUE, version= "2022") Rver <- sfsmisc::shortRversion() if(getRversion() <= "4.0.5") { # our qnormR(.., version="4.0.x") cat(sprintf("%s, \"4.0.5\",\n all.equal(*, tol=0): %s; identical(): %s\n", Rver, all.equal(qnrm, qnrm405, tolerance=0), identical(qnrm, qnrm405))) stopifnot(all.equal(qnrm, qnrm405, tolerance = 1e-12)) } else if(getRversion() < "4.3") { # our qnormR(*, version="2020-10-17") matches: cat(sprintf("%s, \"4.1.0\",\n all.equal(*, tol=0): %s; identical(): %s\n", Rver, all.equal(qnrm, qnrm410, tolerance=0), identical(qnrm, qnrm410))) stopifnot(all.equal(qnrm, qnrm410, tolerance = 1e-12)) } else { # R version >= 4.3.x cat(sprintf("%s, >= 4.3.x,\n all.equal(*, tol=0): %s; identical(): %s\n", Rver, all.equal(qnrm, qnrm43, tolerance=0), identical(qnrm, qnrm43))) rE6 <- qnorm(-1e6, log.p=TRUE)/-1414.2077829910174 - 1 cat(sprintf(" rE(-1e6) = %g\n", rE6)) if(abs(rE6) < 7e-16) # have R-devel with new 2022 code: stopifnot(all.equal(qnrm, qnrm43, tolerance = 1e-14)) } @ Computing a version of the above (with larger range for $s$, % starting from % not enough: \linebreak[3] \verb|qs <- 2^seq(0,70,by=1/8)|) in \R\ version 4.0.5 and plotting in log-log scale, \ifJSS% not showing the R code below, explaining `true` and for the \textsf{true} curve %(almost a line), plotting \code{qs ~ s}% \fi \begin{figure}[H]% 'H' provided by {float} \centering <>= par(mar = c(3.6, 3.8, 1, .1), mgp = c(2.5, .75, 0)) <>= par(mar = c(3.5, 3.8, 0, .1), mgp = c(2.5, .75, 0)) <>= plot(qnrm405 ~ s, type="l", log="xy", col=2, ylim = c(1, max(qs)), asp = 1, xaxt="n", yaxt="n"); require("sfsmisc"); eaxis(1); eaxis(2) lines(qs ~ s, col=(c4 <- adjustcolor(4, 1/4)), lwd=4) legend("top", c("qnorm(-s, lower.tail=FALSE, log.p=TRUE)", "true"), col=c(palette()[2], c4), lwd=c(1,4), bty="n") @ <>= local({ # larger range for s -- qnorm-extreme-bad.R.~1~ (Sep 25, 2020): qs <- 2^seq(0, 70, by=1/8) s <- -(lp <- pnorm(qs, lower.tail=FALSE, log.p=TRUE)) ## with R 4.0.5: qp <- qnorm(pp, lower.tail=FALSE, log.p=TRUE) qnrm405 <- qnormR(lp, lower.tail=FALSE, log.p=TRUE, version= "4.0.x") <> <> }) @ \caption{Extreme tail log scale \code{qnorm(-s, ..)} in R 4.0.5 or earlier, i.e., up to 2021% ; note that {\sffamily \textcolor{LightBlue3}{true}} is just \code{ \textcolor{LightBlue3}{lines(qs\~{}s, ..)}}} \label{fig:qnorm405} \end{figure} Figure~\ref{fig:qnorm405} looks good up to about $10^{12}$, i.e., \fct{qnorm} coinciding with the true $x$ \code{qs}, but beyond $10^{14}$ diverging for larger $s$, and for even larger $s$ showing complete loss of accuracy as \code{qnorm(-s, *)} converges (to 98340296.6), % 98340296.60657 after $s=10^{43}$), even though the true function should go to $+\infty$. We will see that indeed, asymptotically, $\mathtt{qnorm}(|s|, ..) \sim \sqrt{2|s|}$ which in log-log scale is a line (with intercept $\log\sqrt{2}$ and slope $1/2$). Closer inspection, showing the relative errors in Figure~\ref{fig:relErr405} : <>= ablaxis1 <- function(x) { abline(v = x^2, col=4, lty=2) axis(1, at=x^2, labels = substitute(X^2, list(X=x)), col=4, col.axis=4, line=-.61, cex.axis=0.75) } <>= if(!exists("version.txt")) version.txt <- R.version.string plot(abs(relE_qn) ~ s, type="l", log="xy", main = "inaccuracy of qnorm(-s, log.p=TRUE, lower.tail=F)", axes=FALSE) eaxis(1, nintLog = 13, sub10 = 2); eaxis(2); ablaxis1(x=27) mtext(version.txt, line = -0.8, cex=.8, adj = 0.75) <>= <> <> @ for <>= relE_qn <- relErrV(qs, qnrm405) ; version.txt <- "R versions up to R 4.0.5" @ \begin{figure}[H] \centering <>= <> @ \caption{Relative error of \fct{qnorm} in extreme tails in \R\ version before R 4.1.0} \label{fig:relErr405} \end{figure} From this, in September 2020, I started to investigate the visually obvious asymptotic behavior of the \emph{correct} inversion of \fct{qnorm}, using the classical first order asymptotic \begin{align} \label{eq:Phi-asym-0} 1 - \Phi(x) \sim \frac{\phi(x)}{x}, \quad \mathrm{for \ } x \to \infty, \end{align} for the standard normal / Gaussian density $\phi(x) := \frac{1}{\sqrt{2\pi}} e^{-x^2/2}$ and cumulative distribution function $\Phi(x) := \int_{-\infty}^x \phi(t)\; dt$. On the log scale, this is equivalent to \begin{align} \label{eq:log-Phi-asym-0} \log(1 - \Phi(x)) &= \log\phi(x) - \log x + o(x),\\ \nonumber &= -x^2/2 - 1/2 \log{2\pi} - \log{x} + o(x)\\ \nonumber &= -x^2/2 + o(x), \end{align} as $O(\log x) = o(x)$, i.e., $l_p := \log(1 - \Phi(x)) \approx -x^2/2$ for large $x$ and hence, \begin{align} \label{eq:x-asym-0} x \approx \sqrt{-2 l_p}, \end{align} for large $|x|$ or large $|l_p| = -l_p =: s$ (using notation as in the \R{} code above with \code{lp} and \code{s <- -lp}). Consequently, a first order remedy against the ``catastrophic'' precision loss for extreme tail \fct{qnorm} was to use the above $\sqrt{2s}$ approximation for upper tail probabilities specified in log scale. Computer experimentation was used to find a numerically optimal (for the double precision implementation of AS~241) cutoff. Computing the difference (``delta'') betwen the absolute value of the relative errors for R~4.0.x's \fct{qnorm} and for the asymptotic approximation (\ref{eq:x-asym-0})% \ifJSS{,}\else{:}\fi <>= delta.relE <- function(q, qNorm = function(...) qnormR(..., version = "4.0.x")) { lp <- pnorm(q, lower.tail=FALSE, log.p=TRUE) # <==> q = true qnorm(lp, *) ## the "delta" of the two relative errors qnorm() vs sqrt(2*s) approx: abs(1 - qNorm(lp, lower.tail=FALSE, log.p=TRUE) / q) - abs(1 - sqrt(-2*lp) / q) } plot(delta.relE(qs) ~ qs, subset = 10 < qs & qs < 4e6, type="l", log="x") abline(h=0, col = adjustcolor(2, 1/2)) @ looks like a well defined zero, we now determine the root location as the optimal cutoff, as it will minimize the absolute value of the relative error of computing \code{qnorm(..)}. At first: <>= cutP. <- uniroot(function(logq) delta.relE(exp(logq)) , c(3, 13)) exp(cutP.$root) @ %% 1153.223 then, getting more accurate once we approximately know the region: <>= str(cP. <- uniroot(delta.relE, interval = c(1000, 1300), tol = 1e-12)) qC <- cP.$root # 1153.242 (lpC <- pnorm(qC, lower.tail=FALSE, log.p=TRUE)) @ %%[1] -664991 so the optimal cutoff where to use the sqrt-approximation is at \code{lp}$ = \log p = -664991$ or $r = \sqrt{s} = \sqrt{-\log p} = 815.470$ (with $r$ defined in Equation~\ref{r.def}), and for convenience (round number), using the cutoff $r \ge 816$ in \fct{qnorm}, i.e., basically \begin{Schunk} \begin{Sinput} if(r >= 816) value = sqrt(2) * r; \end{Sinput} \end{Schunk} This consequently was added to the \R{} (i.e., ``R-devel'') sources after more testing, a few weeks later \\[-5ex] \begin{verbatim} svn r79346 | maechler | 2020-10-17 21:42:17 +0200 \end{verbatim} \par\vspace*{-1ex} to be in R 4.1.0 with NEWS entry \\ \quad $\bullet$\hspace*{-1em}\begin{minipage}[t]{0.85\textwidth} \begin{quote} \code{qnorm(\emph{}, log.p=TRUE)} is now correct to at least five digits where it was catastrophically wrong, previously. \end{quote} \end{minipage} Indeed, \fct{qnorm} was now ``first order accurate'' even in the extreme tails, and in a plot such as Figure~\ref{fig:qnorm405} one would not notice any inaccuracy. But then, there you'd visually only notice deviations in the order of $1\;\%$, i.e, already visible in 2 digits accuracy. Looking at the relative errors directly in Figure~\ref{fig:relErr410} (cf. Figure~\ref{fig:relErr405} for the original version), indeed shows that the relative error now is smaller than $10^{-5}$ and maximal at the cutoff $s = 816^2 = 665856$ : <>= relE_qn <- relErrV(qs, qnrm410); version.txt <- "R 4.1.0 to 4.2.x" @ \begin{figure}[H] \centering <>= <> ablaxis1(x=816) @ \caption{\fct{qnorm} relative error in extreme tails, R ver.~4.1.0--4.2.x, ca.\ 2021--'22} \label{fig:relErr410} \end{figure} \section[Fully accurate asymptotic qnorm(., log.p=TRUE)]{% Fully accurate asymptotic \code{qnorm(., log.p=TRUE)}} In \R, the function \fct{qnorm} and notably the underlying C API function \fct{qnorm}\footnote{\R's C API \fct{qnorm} is an alias for \fct{qnorm5} in the source file \file{/src/nmath/qnorm.c}} are used in other places, not the least also to (approximately) compute quantiles of other distributions, such as the (non central) t. In addition, $\Phi^{-1}(x)$ is a very smooth monotone function, it may naturally be desirable that \fct{qnorm} computes its values to the same full (double precision) accuracy as most other mathematically well defined functions in \R. Now, the classical simple first order asymptotic (\ref{eq:Phi-asym-0}) for $\Phi(.)$, i.e., \R's \fct{pnorm}, has been known to many more terms, also for a long time. \citet{Mills1926} builds already on work by Laplace, said to have derived some of the two asymptotic series, in \cite{AbrMS72}[p. 932], \begin{AS}[\textbf{26.2.12}] \begin{align} \label{eq:AS_26.2.12} 1 - \Phi(x) = \Phi(-x) &= \frac{\phi(x)}{x} \cdot\left(1 - \frac 1{x^2} + \frac{1\cdot 3}{x^4} + \cdots + \frac{(-1)^n\; 1\cdot 3\cdots(2n-1)}{x^{2n}}\right) \ + \ R_n, \end{align} \end{AS} where the remainder term $R_n$ (which can be represented exactly as an integral) is smaller than the first neglected term. Note that \citetalias{AbrMS72} % \textbf{A.\&S.}%\citet{AbrMS72} use notation $Q(x) \equiv 1 - \Phi(x)$ and $Z(x) \equiv \phi(x)$, also in the subsequent asymptotic series which is slightly more accurate numerically (but without an explicit remainder term): \begin{AS}[\textbf{26.2.13}] \begin{align} \label{eq:AS_26.2.13} 1 - \Phi(x) \sim \frac{\phi(x)}{x} \cdot &\left( 1 - \frac{a_1}{x^2+2} + \frac{a_2}{(x^2+2)(x^2+4)} - \frac{a_3}{(x^2+2)(x^2+4)(x^2+6)} + \cdots \right), \\ & \mathrm{where \ } a_1=a_2=1, a_3=5, a_4=9, a_5=129, \nonumber \end{align} \end{AS} and the general coefficient $a_n$ is defined via coefficients of a polynom expansion. As previously, we need to use this in $\log$-scale, \begin{align} l_p= \log(1-\Phi(x)) &\approx \log(\phi(x)) - \log(x) + \log\bigl(1 - q(x^2)\bigr), \nonumber \\ &= -\frac{x^2}{2} - \frac{1}{2}\log(2\pi) - \log(x) + \log\bigl(1 - q(x^2)\bigr), \label{eq:log-ser}\\ \quad \mathrm{where \ } q(x^2) &= \frac{1}{x^2+2} - \frac{1}{(x^2+2)(x^2+4)} + \frac{5}{(x^2+2)(x^2+4)(x^2+6)} + \cdots, \nonumber \\ %\label{q:def}\\ &= \frac{1}{x^2+2} \Bigl(1 - \frac{1}{x^2+4} \Bigl(1- \frac{1}{x^2+6} \Bigl(5 - \frac{9}{x^2+8} + \cdots\Bigr)\Bigr)\Bigr), \label{q:nested} \end{align} and $l_p$ is the log probability (given as first argument to \fct{qnorm}), and we would like to \emph{solve} for $x$, as in the simple $1^{st}$ order case in (\ref{eq:log-Phi-asym-0}) and (\ref{eq:x-asym-0}) above, but of course that is not possible. However, an amazingly versatile idea of recursive ``plug-in'' will work here: For the first step, we may neglect $q(x^2) \approx 1/(x^2+2)$ entirely as we know that $x^2 \approx 2s$ for relatively large $s$, and hence drop $\log\bigl(1 - q(x^2)\bigr) \approx \log(1) = 0$, such that $(-2)$ times Equation~\ref{eq:log-ser} becomes \begin{align} -2 l_p = 2s \approx&\: x^2 + \log(2\pi) + 2\log(x) = \nonumber\\ &\: x^2 + \log(2\pi\; x^2) \label{eq:ser1}, % 2s \approx& x^2 + \log(2\pi\; x^2) \label{eq:ser1}, \end{align} now subtracting the $\log$ term \emph{and} replacing \emph{its} $x^2$ by its asymptotic approximation $x_0^2 = 2s$ gives \begin{align} 2s - \log(2\pi\; 2s) \approx&\: x^2, \ \ \mathrm{or, with \ } \nonumber\\ &\: x_0^2 := 2s, \label{x0sq}\\ x^2 \approx&\: x_1^2 := 2s - \log(2\pi\; x_0^2) = 2s - \log(4\pi s),\label{x1sq} \end{align} and we do have a substantially better approximation, verified empirically in Figure~\ref{fig:qnormAsym} below ($k=0$ vs $k=1$), where we show further steps, continuing to recursively plug in $x^2$ itself, now no longer neglecting $q(x^2)$ but still only using a first term, from Equation~\ref{eq:log-ser}, \begin{align} -2 \log(1-\Phi(x)) = 2s \approx&\: x^2 + \log(2\pi\; x^2) - 2\log\bigl(1 - q(x^2)\bigr) \approx \nonumber\\ &\: x^2 + \log(2\pi\; x^2) + 2 q(x^2), \label{eq:log-s} \end{align} where the 2nd ``$\approx$'' is from $\log(1 - q) \approx -q$ for $|q| \ll 1$ and $q(x^2) \approx 1/(x^2+2)$ is assumed to be very small here. Again solving for the first $x^2$ and replacing the other $x^2$ by our current best approximation $x_1^2$ leads to \begin{align} x^2 \approx&\: x_2^2 := 2s - \log(2\pi\; x_1^2) - 2/(x_1^2+2), \label{x2sq} \end{align} and continuing recursively, always taking one more term for $q(x^2)$, but no longer replacing $\log(1 - q)$ by $-q$ but rather the fully accurate $\logIp(-q)$, \begin{align} \label{x3sq} x^2 \approx&\: x_3^2 := 2s - \log(2\pi\; x_2^2) + 2 \logIp\bigl(- \bigl(1 - 1/(4+x_2^2)\bigr)/(2+x_2^2)\bigr), \ \ \mathrm{and} \ \ x^2 \approx \\ %%% & x_4^2 := 2s - \log(2\pi\; x_3^2) + 2 \logIp\bigl(- \bigl(1 - \bigl(1 - 5/(6 + x_3^2)\bigr)/(4+x_3^2)\bigr)/(2+x_3^2)\bigr), \ \ \mathrm{and} \label{x4sq} \\ % \ \ \ x^2 \approx \\ %%% & x_5^2 := 2s - \log(2\pi\; x_4^2) + 2 \logIp\bigl(- \bigl(1 - \bigl(1 - \bigl(5 - 9/(8 + x_4^2)\bigr)/(6 + x_4^2)\bigr)/(4+x_4^2)\bigr)/(2+x_4^2)\bigr). \label{x5sq} \end{align} Taking the square roots of these 6 approximations for $x^2$ for the inverse cumulative normal, $\Phi^{-1}\bigl(e^{-s}\bigr)$, namely \begin{align*} x_0(s) =& \sqrt{2s}, \mathrm{\ \ from\ } (\ref{x0sq}) \\ x_1(s) =& \sqrt{2s - \log(4\pi s)}, \mathrm{\ \ from\ } (\ref{x1sq}) \\ x_2(s) =& \sqrt{2s - \log(2\pi\; x_1^2) - 2/(x_1^2+2)}, \mathrm{\ \ from\ } (\ref{x2sq}) \\ x_3(s) =& \sqrt{2s - \log(2\pi\; x_2^2) + 2 \logIp( r(x_2) )}, \mathrm{\ \ see\ } (\ref{x3sq}) \\ x_4(s) =& \dots\dots,\quad x_5(s) = \dots\dots, \mathrm{\ \ see\ } (\ref{x4sq}),\ (\ref{x5sq}), \\ \end{align*} these $x_k(s)$ are provided as plain \R{} function \fct{qnormAsymp}, in our \CRANpkg{DPQ} package, specifically, $x_k(s) = $ \code{qnormAsymp(lp = -s, order = k)}\footnote{% which is the short form; indeed, \code{qnormAsymp(lp = lp, order = k)} is identical to \code{qnormAsymp(p = lp, lower.tail=FALSE, log.p=TRUE, order = k)} .} <>= k.s <- 0:5; nks <- paste0("k=", k.s) qnAsym <- sapply(setNames(k.s, nks), function(k) qnormAsymp(lp=lp, order = k)) relEasym <- apply(qnAsym, 2, relErrV, target = qs) # rel.errors for all @ In Figure~\ref{fig:qnormAsym} we depict the absolute values of their respective relative errors (in log-log scale against $s = -lp = -\log(1-\Phi(x))$), and and then zoom in more closely in Figure~\ref{fig:qnormAsymZoom}: \begin{figure}[H] \centering \setkeys{Gin}{width=0.88\textwidth}% default 0.8 <>= matplot(-lp, abs(relEasym), log="xy", type="l", lwd=2, axes=FALSE, xlab = quote(s == -lp)) eaxis(1, sub10=2); eaxis(2, sub10=c(-2,2), nintLog=16); grid(col="gray75") legend("right", nks, col=1:6, lty=1:5, lwd=2, bty="n") <>= <> <> @ \caption{$|$relative errors$|$ of asymptotic approximations in log-log scale}%vs $s = -lp = -\log P[..]$ \label{fig:qnormAsym} \end{figure} \enlargethispage{2ex} and then zoomed in more closely in Figure~\ref{fig:qnormAsymZoom}: %%------------------- \begin{figure}[H] \centering \setkeys{Gin}{width=0.88\textwidth}% default 0.8 <>= matplot(-lp, abs(relEasym), log="xy", type="l", lwd=2, axes=FALSE, xlab = quote(s == -lp), xlim = c(40, 1e9), ylim = 10^c(-16, -3)) eaxis(1, sub10=2); eaxis(2, sub10=c(-2,2), nintLog=16); grid(col="gray80") legend(4e7, 1e-9, nks, col=1:6, lty=1:5, lwd=2, bty="n")#, cex=.75, bg=adjustcolor("bisque", 3/4)) <>= <> <> @ \caption{(Zoomed Fig.~\ref{fig:qnormAsym}) $|$relative errors$|$ of asymptotic approx.\ $x_0(s)$, $x_1(s)$, \dots, $x_5(s)$} \label{fig:qnormAsymZoom} \end{figure} \subsection*{Fully accurate \fct{qnorm}} Our package \CRANpkg{DPQ}'s \code{qnormR(... version = "2022-08")} now implements a (pure \R{} implementation) also to be used (in C) in the next version of R, which uses ``the optimal'' asymptotic approximation $x_k(s)$ for $s = r^2 > 27^2$ and $k \in \{0,1,\dots,5\}$ as defined above in (\ref{x0sq})--(\ref{x5sq}). ``Optimal'' is defined as the smallest $k$ which still provides full accuracy, e.g., when $s > 10^{18}$ clearly, $x_0(s) = \sqrt{2s}$ is sufficient and hence optimal in that sense. %% for these, see also ~/R/MM/NUMERICS/dpq-functions/qnorm-asymptotic.R Consequently, we have determined (``round number'', approximate) \textbf{optimal cut points / regions for different approximation orders $k$} and found the following ``round number'' values, \begin{table}[H] \centering \begin{tabular}{l|rrrrrr} $ k $ & 5 & 4 & 3 & 2 & 1 & 0 \\ \hline $r\ge$ & 27 & 55 & 109 & 840 & 36000 & 6.4e8 \\ $s=r^2\ge$ & 729 & 3025 & 11880 & 705600 & 1296$\cdot 10^6$ & 4.096e17 % k 5 4 3 2 1 0 \end{tabular} \caption[Optimal cutpoints]{% Optimal cutpoints to determine $k$ to use $x_k(s)$ for $r=\sqrt{s} > 27$.} \label{tab:cutpoints} \end{table} e.g., $k = 0$ is fully accurate and hence optimal and used for $r \ge 6.4e8 = 640\cdot 10^6$, or equivalently, for $s = -\mathrm{lp} \ge 4.096e17$, where as for $r \in [55, 109) \Longleftrightarrow s\in [3025, 11880)$ one needs (and uses) $k = 4$. %% see also R code ../R/norm_f.R {and R's qnorm.c eventually} These were determined using function \fct{p.qnormAsy2} in appendix~\ref{sec:p.qnormAsy}, for visualizing the optimal region for switching from $k-1$ to $k$, for $k = 1,2,3,4,5$, see Figure~\ref{fig:cutp-Asym}. Empirically, we see that now most relative errors are numerically zero and the others are at most $\epsilon_c = 2^{-52} \approx 2.22 \cdot 10^{-16}$, indeed the asymptotic $x_5(s)$, see Equation~\ref{x5sq}, may be seen to be even better than algorithm AS~241 for the small region $s \in [22.8^2, 27^2]=[519.84, 729]$. % <>= absE <- function(e) pmax(abs(e), 2^-54) # = 1/4 eps_c local({ # larger range for s -- qnorm-extreme-bad.R.~1~ (Sep 25, 2020): qs <- seq(27, 40, by=1/256) lp <- pnorm(qs, lower.tail=FALSE, log.p=TRUE) s <- -lp # = -pnorm(..) = -log(1 - Phi(qs)) > 0 qnrm43 <- qnormR(-s, lower.tail=FALSE, log.p=TRUE, version= "2022") relEq43 <- relErrV(qs, qnrm43) <> par(mar=c(2, par("mar")[-1])) plot(absE(relEq43) ~ s, type="l", log="xy", ylim = 10^c(-16.3,-15.05), col=adjustcolor(1, 1/2), axes=FALSE) qnAsym5 <- qnormAsymp(lp=lp, order = 5) relE5 <- relErrV(qs, qnAsym5) lines(absE(relE5) ~ s, col=adjustcolor(2, 1/2), lwd = 2) ## smoothing "discrete" relative errors: lines(smooth.spline(s, abs(relEq43)), col=adjustcolor(1, 2/3), lwd=4, lty="6132") lines(smooth.spline(s, abs(relE5) ), col=adjustcolor(2, 2/3), lwd=4, lty="72") ## nice axes etc axis(1, at=c(17,19,20)^2) eaxis(1, nintLog = 16, sub10 = 2); eaxis(2); ablaxis1(27) r. <- c(17,19:23,25,27, 30, 35, 40) ; ablaxis1(21.5); ablaxis1(22.8) axis(3, at=r.^2, label=r., col=4, col.axis=4, line=-.5, cex.axis=.75) axis(3, at=19.5^2, label=quote(r == {}), col=4, col.axis=4, col.ticks=NA, cex.axis=1.4, line=-.5) if(FALSE) {## visually inspect "table" values cbind(qs, r = sqrt(s), s, relE5, relEq43)[21^2 <= s & s <= 27^2, ] cbind(qs, r=sqrt(s), s)[ which(abs(relE5) > 0),] # very last one is r = 23.9081 } }) @ % , but for back compatibility we keep using it .. <>= relE_qn <- relErrV(qs, qnrm43) @ We \emph{tabulate} the values as multiples of $\epsilon_c$ not needing a visualization: <>= table(2^52 * relE_qn) # all in [-2.5, 3] table(2^52 * relE_qn[s > 27^2]) # in [-1, 1] @ % \begin{figure}[H] % \centering % *NOT* doing it: vvvvvvvvvv <>= version.txt <- "R > 4.2.x (after 2022)" <> @ % \caption{qnormR(-s, .., version= "2022") relative error in extreme tails}% planned R ver.~$> 4.2.x$ % \label{fig:relErr43} % \end{figure} To see how the final \fct{qnormR} is implemented, you can look at the \code{length-1} version \fct{qnormR1}\footnote{indeed, in the package source file \verb|DPQ/R/norm_f.R|, \fct{qnormR} is defined to correctly vectorize in its main arguments \code{p}, \code{mu}, and \code{sd}, by \ \code{qnormR <- Vectorize(qnormR1, c("p", "mu", "sd"))}}. \afterpage{\clearpage} \section{Concluding summary} We have derived novel asymptotic formulas for \code{qnorm(lp, lower.tail=FALSE, log.p=TRUE)}, i.e., $\Phi^{-1}(e^s) = \Phi^{-1}(e^{r^2})$ for large $s = r^2$, notably for $r > 27$ which is beyond the range where the published algorithm AS~241, \cite{AS241:Wichura1988}, is accurate, see (\ref{x0sq}), (\ref{x1sq}), and (\ref{x2sq})--(\ref{x5sq}). % (12),(13), and (15)-(18). For these formulas of order $k=0, 1, \dots, 5$, implemented in \pkg{DPQ}'s \R\ function \code{qnormAsymp(*, order=k)} we have derived optimal regions, i.e., intervals for $r$, partitioning $(27, \infty)$, see Table~\ref{tab:cutpoints}, and implemented in \R{} function \code{qnormR(*, version = "2022-08")} for reproducibility and to be used in (the C code in \file{Rmathlib} in) the next version of \R's \fct{qnorm}. \section{Computational details, session information} %% ------------------------ For most of our plots we made use of utilities for log scale axis drawing, notably \fct{eaxis} and also \fct{mult.fig} (for \fct{p.qnormAsy2} in appendix~\ref{sec:p.qnormAsy}) from our package \CRANpkg{sfsmisc} \citep{sfsmisc}, from which also \fct{relErrV} was used, for computing \code{relEasym}, plotted in Figure~\ref{fig:qnormAsym} and \ref{fig:qnormAsymZoom}. <>= toLatex(sessionInfo(), locale=FALSE) @ %% <>= unlist(packageDescription("DPQ")[c("Package", "Version", "Date")]) @ % \section{Conclusion} % ... \appendix%============================================================================= %====================================================================================== \ifJSS{}\else \section[relErrV() (from sfsmisc)]{% Function \fct{relErrV} (package \pkg{sfsmisc})}\label{sec:relErrV} To compute relative (approximation) errors, in a way that works correctly, also with \code{Inf}, \code{NA}, and \code{NaN}s, we make use of the function \fct{relErrV} from (our own) CRAN package \CRANpkg{sfsmisc}, defined\footnote{currently; for updates, see \url{https://github.com/mmaechler/sfsmisc/blob/master/R/relErr.R}} as <>= ## Componentwise aka "Vectorized" relative error: ## Must not be NA/NaN unless one of the components is ==> deal with {0, Inf, NA} relErrV <- function(target, current, eps0 = .Machine$double.xmin) { n <- length(target <- as.vector(target)) ## assert( is multiple of ) : lc <- length(current) if(!n) { if(!lc) return(numeric()) # everything length 0 else stop("length(target) == 0 differing from length(current)") } else if(!lc) stop("length(current) == 0 differing from length(target)") ## else n, lc > 0 if(lc %% n) stop("length(current) must be a multiple of length(target)") recycle <- (lc != n) # explicitly recycle R <- if(recycle) target[rep(seq_len(n), length.out=lc)] else target # (possibly "mpfr") R[] <- 0 ## use *absolute* error when target is zero {and deal with NAs}: t0 <- abs(target) < eps0 & !(na.t <- is.na(target)) R[t0] <- current[t0] ## absolute error also when it is infinite, as (-Inf, Inf) would give NaN: dInf <- is.infinite(E <- current - target) R[dInf] <- E[dInf] useRE <- !dInf & !t0 & (na.t | is.na(current) | (current != target)) R[useRE] <- (current/target)[useRE] - 1 ## preserve {dim, dimnames, names} from 'current' : if(!is.null(d <- dim(current))) array(R, dim=d, dimnames=dimnames(current)) else if(!is.null(nm <- names(current)) && is.null(names(R))) # not needed for mpfr `names<-`(R, nm) else R } @ \fi%----Function relErrV() -- only in non-JSS vignette -------------------------------- \section[p.qnormAsy2() for optimal cutpoints]{% Function \fct{p.qnormAsy2} for showing optimal cutpoints}\label{sec:p.qnormAsy} This function, currently also used in \pkg{DPQ}'s \code{example(qnormAsymp)}, was used by the author and may be used for reproducibility to visualize the five ``cutpoint - regions'', Table~\ref{tab:cutpoints}, to switch from approximation $x_{k-1}(r)$ to $x_k(r)$, for $k=1,\dots,5$ and $r = \sqrt{s} = \sqrt{- \log p}$, using <>= r0 <- c(27, 55, 109, 840, 36000, 6.4e8) # <-- cutoffs <--> in ../R/norm_f.R # use k = 5 4 3 2 1 0 e.g. k = 0 good for r >= 6.4e8 @ <>= <> for(ir in 2:length(r0)) { p.qnormAsy2(r0[ir], k = 5 +2-ir) # k = 5, 4, .. if(interactive() && ir < length(r0)) { cat("[Enter] to continue: "); cat(readLines(stdin(), n=1), "\n") } } @ <>= ## Zoom into each each cut-point region : p.qnormAsy2 <- function(r0, k, # use k-1 and k in region around r0 n = 2048, verbose=TRUE, ylim = c(-1,1) * 2.5e-16, rr = seq(r0 * 0.5, r0 * 1.25, length = n), ...) { stopifnot(is.numeric(rr), !is.unsorted(rr), # the initial 'r' length(k) == 1L, is.numeric(k), k == as.integer(k), k >= 1) k.s <- (k-1L):k; nks <- paste0("k=", k.s) if(missing(r0)) r0 <- quantile(rr, 2/3)# allow specifying rr instead of r0 if(verbose) cat("Around r0 =", r0,"; k =", deparse(k.s), "\n") lp <- (-rr^2) # = -r^2 = -s <==> rr = sqrt(- lp) q. <- qnormR(lp, lower.tail=FALSE, log.p=TRUE, version="2022-08")# *not* depending on R ver! pq <- pnorm (q., lower.tail=FALSE, log.p=TRUE) # ~= lp ## the arg of pnorm() is the true qnorm(pq, ..) == q. by construction r <- sqrt(- pq) stopifnot(all.equal(rr, r, tol=1e-15)) qnAsy <- sapply(setNames(k.s, nks), function(ord) qnormAsymp(pq, lower.tail=FALSE, log.p=TRUE, order=ord)) relE <- qnAsy / q. - 1 m <- cbind(r, pq, relE) if(verbose) { print(head(m, 9)); for(j in 1:2) cat(" ..........\n") print(tail(m, 4)) } ## matplot(r, relE, type = "b", main = paste("around r0 = ", r0)) matplot(r, relE, type = "l", ylim = ylim, main = paste("Relative error of qnormAsymp(*, k) around r0 = ", r0, "for k =", deparse(k.s)), xlab = quote(r == sqrt(-log(p))), ...) legend("topleft", nks, horiz = TRUE, col=1:2, lty=1:2, bty="n", lwd=2) for(j in seq_along(k.s)) lines(smooth.spline(r, relE[,j]), col=adjustcolor(j, 2/3), lwd=4, lty="6132") cc <- "blue2"; lab <- substitute(r[0] == R, list(R = r0)) abline(v = r0, lty=2, lwd=2, col=cc) axis(3, at= r0, labels=lab, col=cc, col.axis=cc, line=-1) abline(h = (-1:1)*.Machine$double.eps, lty=c(3,1,3), col=c("green3", "gray", "tan2")) invisible(cbind(r = r, qn = q., relE)) } @ \begin{figure}[H] \centering \setkeys{Gin}{width=1.05\textwidth}% default 0.8 %% using A4 portrait => ratio = sqrt(2) <>= sfsmisc::mult.fig(5, main = "qnormAsymp(*, k) approximations in the 5 cutpoint regions") <> for(ir in 2:length(r0)) p.qnormAsy2(r0[ir], k = 5 +2-ir, n = 1024, verbose=FALSE, cex.main = .90) @ \caption{\code{qnormAsymp(*, k)} approximation in the 5 cutpoint regions:\\ \code{r0 <- c(27, 55, 109, 840, 36000, 6.4e8)}\\ % sync w/ % <>= above! \code{for(ir in 2:length(r0)) p.qnormAsy2(r0[ir], k = 5 + 2-ir, ..)} } \label{fig:cutp-Asym} \end{figure} <>= options(op.orig) @ \bibliography{qnorm-litt} \end{document}