From noreply at r-forge.r-project.org Mon Mar 7 15:58:22 2016 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 7 Mar 2016 15:58:22 +0100 (CET) Subject: [Distr-commits] r1086 - pkg/distrMod/man Message-ID: <20160307145822.C37DB187EFC@r-forge.r-project.org> Author: ruckdeschel Date: 2016-03-07 15:58:22 +0100 (Mon, 07 Mar 2016) New Revision: 1086 Modified: pkg/distrMod/man/MDEstimator.Rd pkg/distrMod/man/internalmdeHelpers.Rd pkg/distrMod/man/internals-qqplot.Rd pkg/distrMod/man/qqplot.Rd Log: distrMod: fixed some Rd files about qqplot Modified: pkg/distrMod/man/MDEstimator.Rd =================================================================== --- pkg/distrMod/man/MDEstimator.Rd 2016-01-21 11:52:57 UTC (rev 1085) +++ pkg/distrMod/man/MDEstimator.Rd 2016-03-07 14:58:22 UTC (rev 1086) @@ -74,7 +74,7 @@ %\note{} \seealso{\code{\link{ParamFamily-class}}, \code{\link{ParamFamily}}, \code{\link{MCEstimator}}, \code{\link{MCEstimate-class}}, - \code{\link[MASS]{fitdistr}}, \code{\link{.CvMMDCovariance} } + \code{\link[MASS]{fitdistr}}, \code{\link{.CvMMDCovariance}}} \examples{ ## (empirical) Data x <- rgamma(50, scale = 0.5, shape = 3) Modified: pkg/distrMod/man/internalmdeHelpers.Rd =================================================================== --- pkg/distrMod/man/internalmdeHelpers.Rd 2016-01-21 11:52:57 UTC (rev 1085) +++ pkg/distrMod/man/internalmdeHelpers.Rd 2016-03-07 14:58:22 UTC (rev 1086) @@ -28,13 +28,13 @@ \item{mu}{an object of class \code{UnivariateDistribution}; the distribution on the reals at which to integrate the squared distance of the cdf's in the CvM-distance} - \item{withplot}{logical; defaults to \code{FALSE}; if \code{TRUE} for diagnostic} + \item{withplot}{logical; defaults to \code{FALSE}; if \code{TRUE} for diagnostic purposes plots the influence function of the CvM-MDE} \item{withpreIC}{logical; should corresponding IC of the CvM-MDE be returned?} \item{\dots}{currently not used} \item{N}{integer; the number of grid points at which to evaluate the antiderivative - in case of an absolutely continuous distribution; more precisely, internally} - this becomes \eq{2N+1}} + in case of an absolutely continuous distribution; more precisely, internally + this becomes \eqn{2N+1}} \item{rel.tol}{numeric; relative tolerance; currently not used} \item{TruncQuantile}{numeric in (0,1); in case of an unbounded support of the distribution the quantile level at which to cut the distribution} Modified: pkg/distrMod/man/internals-qqplot.Rd =================================================================== --- pkg/distrMod/man/internals-qqplot.Rd 2016-01-21 11:52:57 UTC (rev 1085) +++ pkg/distrMod/man/internals-qqplot.Rd 2016-03-07 14:58:22 UTC (rev 1086) @@ -9,7 +9,7 @@ These functions are used internally by qqplot of package distrMod.} \usage{ -.labelprep(x,y,lab.pts,col.lbl,cex.lbl,which.lbs,which.Order,order.traf) +.labelprep(x,y,lab.pts,col.lbl,cex.lbl,adj.lbl,which.lbs,which.Order,order.traf) } @@ -18,6 +18,7 @@ \item{y}{a (numeric) vector of same length as \code{x}} \item{cex.lbl}{magnification factor for the plotted observation labels} \item{col.lbl}{color for the plotted observation labels} +\item{adj.lbl}{adj parameter for the plotted observation labels} \item{lab.pts}{character or \code{NULL}; observation labels to be used} \item{which.lbs}{integer or \code{NULL}; which observations shall be labelled} \item{which.Order}{integer or \code{NULL}; which of the ordered (remaining) observations shall be labelled} @@ -44,6 +45,7 @@ \code{lab} (the thinned out and ordered vector of labels \code{lab.pts}), \code{col} (the thinned out and ordered vector of colors \code{col.lbs}), \code{cex} (the thinned out and ordered vector of magnification factors \code{cex.lbs}). +\code{adj} (the thinned out and ordered vector of adjacencies \code{adj.lbs}). } } Modified: pkg/distrMod/man/qqplot.Rd =================================================================== --- pkg/distrMod/man/qqplot.Rd 2016-01-21 11:52:57 UTC (rev 1085) +++ pkg/distrMod/man/qqplot.Rd 2016-03-07 14:58:22 UTC (rev 1086) @@ -18,7 +18,7 @@ col.sCI = "tomato2", lty.sCI = 4, lwd.sCI = 2, pch.sCI = par("pch"), cex.sCI = par("cex"), added.points.CI = TRUE, cex.pch = par("cex"), col.pch = par("col"), - cex.lbl = par("cex"), col.lbl = par("col"), adj.lbl = NULL, + cex.lbl = par("cex"), col.lbl = par("col"), adj.lbl = par("adj"), alpha.trsp = NA, jit.fac = 0, jit.tol = .Machine$double.eps, check.NotInSupport = TRUE, col.NotInSupport = "red", with.legend = TRUE, legend.bg = "white", @@ -144,12 +144,54 @@ } } \value{ - As for function \code{\link[stats:qqnorm]{qqplot}} from package \pkg{stats}: a - list with components + A list of elements containing the information needed to compute the + respective QQ plot, in particular it extends the elements of the + return value of function \code{\link[stats:qqnorm]{qqplot}} + from package \pkg{stats}, i.e., a + list with components \code{x} and \code{y} for x and y coordinates + of the plotted points; this list is of S3 class + \code{c("qqplotInfo","DiagnInfo")}, and more specifically it contains \item{x}{The x coordinates of the points that were/would be plotted} \item{y}{The corresponding quantiles of the second distribution, \emph{including \code{\link{NA}}s}.} - \item{crit}{A matrix with the lower and upper confidence bounds + + \item{xy.0}{xy} + \item{y.0}{y} + \item{datax}{argument \code{datax} from the call.} + \item{withConf.pw}{argument \code{withConf.pw} from the call.} + \item{withConf.sim}{argument \code{withConf.sim} from the call.} + \item{alpha.CI}{argument \code{alpha.CI } from the call.} + \item{col.pCI}{argument \code{col.pCI} from the call.} + \item{lty.pCI}{argument \code{lty.pCI} from the call.} + \item{lwd.pCI}{argument \code{lwd.pCI} from the call.} + \item{pch.pCI}{argument \code{pch.pCI} from the call.} + \item{cex.pCI}{argument \code{cex.pCI} from the call.} + \item{col.sCI}{argument \code{col.sCI} from the call.} + \item{lty.sCI}{argument \code{lty.sCI} from the call.} + \item{lwd.sCI}{argument \code{lwd.sCI} from the call.} + \item{pch.sCI}{argument \code{pch.sCI} from the call.} + \item{cex.sCI}{argument \code{cex.sCI} from the call.} + \item{n}{argument \code{n} from the call.} + \item{exact.sCI}{argument \code{exact.sCI} from the call.} + \item{exact.pCI}{argument \code{exact.pCI} from the call.} + \item{nosym.pCI}{argument \code{nosym.pCI} from the call.} + \item{with.legend}{argument \code{with.legend} from the call.} + \item{legend.bg}{argument \code{legend.bg} from the call.} + \item{legend.pos}{argument \code{legend.pos} from the call.} + \item{legend.cex}{argument \code{legend.cex} from the call.} + \item{legend.pref}{argument \code{legend.pref} from the call.} + \item{legend.postf}{argument \code{legend.postf} from the call.} + \item{legend.alpha}{argument \code{legend.alpha} from the call.} + \item{debug}{argument \code{debug} from the call.} + \item{args.stats.qqplot}{the arguments of the call to \code{mcl} from the call.} + \item{withLab}{argument \code{withLab} from the call to + \code{stats::qqplot} from within this QQ plot method.} + \item{lbprep}{the return value of the label preparation from within + this function, i.e., a list with elements + \code{x0}, \code{y0}, \code{lab}, \code{col}, \code{cex}, + \code{adj}.} + + \item{crit}{A matrix with the lower and upper confidence bounds (computed by \code{qqbounds}).} \item{err}{logical vector of length 2.} (elements \code{crit} and \code{err} are taken from the return From noreply at r-forge.r-project.org Mon Mar 7 16:00:29 2016 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 7 Mar 2016 16:00:29 +0100 (CET) Subject: [Distr-commits] r1087 - pkg Message-ID: <20160307150029.17667187612@r-forge.r-project.org> Author: ruckdeschel Date: 2016-03-07 16:00:28 +0100 (Mon, 07 Mar 2016) New Revision: 1087 Added: pkg/confidencebandsQQ.txt Log: note on confidence bands for qqplot committed Added: pkg/confidencebandsQQ.txt =================================================================== --- pkg/confidencebandsQQ.txt (rev 0) +++ pkg/confidencebandsQQ.txt 2016-03-07 15:00:28 UTC (rev 1087) @@ -0,0 +1,98 @@ +%----------------------------------------------------------------- +CIs for QQ Plot zum Niveau beta (zB beta 0.95) +P. Ruckdeschel 20160304 +%----------------------------------------------------------------- + +notation: +qn: empirical a-quantile = order statistics +q: population a-quantile + +let F be a cdf, F^-(a) the quantile fct, i.e. + F^-(a) = inf{t in R| F(t)>= a} +F_n empirical cdf, F_n^- empirical quantile fct. + +basic fact (*) +{s in R| F(s)>= a} = {s in R| s >= F^-(a)} + + +Two distinct approaches +********************************************* +(a) pointwise: +********************************************* + +On a grid of a values a_j j=1..m in (0,1) do + +(a1) exact, i.e., with binomial probabilities: + +P(qn - q <= t) = P(qn <= t+q ) = P(F_n^-(a) <= t+q) =(*) +P(a <= F_n(t+q)) = P (sum I(X_i<=t+q) >= an) = +P(Binom(n,F(t+q))>=na) = [in R] + pbinom(n*a,size=n,prob=F(t+q),lower=FALSE)+dbinom(n*a,size=n,prob=F(t+q)) + ++++++ (a11) "symmetric on P-level" + +=> search +t1<0 minimal so that P(Binom(n,F(t1+q)) >=na) <= (1-beta)/2 +t2>0 minimal so that P(Binom(n,F(t2+q)) >=na) <= (1+beta)/2 +=> Intervall [t1+qn,qn+t2] + +(a12) "minimal length" +=> on a a grid a_i i=1..m of a-values from a=0 (or almost 0 for finite values) to a=1-beta +for i in 1.. m + +find ti1<0 minimal so that P(Binom(n,F(ti1+q)) >=na) <= a_i +find ti2>0 minimal so that P(Binom(n,F(ti2+q)) >=na) <= a_i+beta +use Intervall [ti01+qn,qn+ti02] where i0 is such that ti02-ti01 = min_i ti2-ti1 + +in R + +invF sei die Quantilsfunktion zu F + +fsearch <- function(level) + uniroot(function(t)qbinom(level,size=n,prob=F(t+q))-n*a,interval=c(-q+0.0001,invF(.9999)-q))$root + +ti1 <- fsearch((1-beta)/2) +ti1 <- fsearch((1+beta)/2) + + ++++++ (a2) pointwise CLT based + ++ set t=s/sqrt(n) then + +P( qn - q <= t) = P(qn <= t+q )=P(F^-(a) <= t+q)=(*) +P(a <= F_n(t+q)) = P (sum I(X_i<=t+q)/n >= a) = P (sum I(X_i<=t+q)/n -F(t+q) >= a -F(t+q)) = +P (sqrt(n/(F(t+q)(1-F(t+q)))) (sum I(X_i<=t+q)/n - F(t+q)) >= sqrt(n)(a-F(t+q))/sqrt(F(t+q)(1-F(t+q)))) = + LHS is asymptoticall N(0,1) for RHS do Taylor approximations + => assume F differentiable in q with deriv f(q)>0 + => F(q+t) = a+f(q)s/sqrt(n) + o(1/sqrt(n)) + => sqrt(n)(a-F(t+q)) = f(q)s + o(1), F(t+q)(1-F(t+q)) = a(1-a) + o(1) +=> P( qn - q <= t) =. P(N(0,1)>= f(q) s /sqrt(a(1-a))) + +=> Interval [qn - c sqrt(a(1-a))/f(q), qn + c sqrt(a(1-a))/f(q)], + for c = Phi^(-1)((1+beta)/2) (in R: qnorm() ...) + +----------------------------------------------------------------------- +for all possiblilities (a11),(a12),(a2) we now have lower and upper + confidence bounds t1j resp t2j + + link the points (x=F^-(aj),y=q+t1j) j=1..m by lines -> lower CI bound + link the points (x=F^-(aj),y=q+t2j) j=1..m by lines -> upper CI bound +----------------------------------------------------------------------- + +********************************************* +(b) simultaneously +********************************************* + +let c the beta-quantile of the statistic of the Kolmogorov Smirnoff test, i.e. + + P( sup_t sqrt(n) |F_n(t)-F(t)| <= c ) = beta + + then P( F_n(t)-c/sqrt(n) <= F(t) <= F_n(t)+c/sqrt(n) simultaneously for all t) + <=> (*) P(F^-(F_n(t)-c/sqrt(n)) <= t <= F^-(F_n(t)+c/sqrt(n)) for all t in R) + +evaluate this on a t grid t_j j=1..m in R + +we now have lower and upper confidence bounds + link the points (x=tj,y=F^-(F_n(tj)-c/sqrt(n))) j=1..m by lines -> lower CI bound + link the points (x=tj,y=F^-(F_n(tj)+c/sqrt(n)) j=1..m by lines -> upper CI bound +