[Distr-commits] r583 - branches/distr-2.2/pkg

Thu Sep 10 15:19:02 CEST 2009

Author: ruckdeschel
Date: 2009-09-10 15:19:02 +0200 (Thu, 10 Sep 2009)
New Revision: 583

Modified:
   branches/distr-2.2/pkg/qqplot.R
Log:
further improvements in qqplot.R 
--> now exact[for a.c.Distr] and approx. CIs should work; 
   (Binomial CIs were wrong before; subtly trying to use "correct" p.l resp. q.r were needed)
--> observations not in the support of a distribution are recognized and colored differently
--> jittering for Discrete (Parts of) Distributions is supported


Modified: branches/distr-2.2/pkg/qqplot.R
===================================================================

--- branches/distr-2.2/pkg/qqplot.R	2009-09-10 00:51:21 UTC (rev 582)
+++ branches/distr-2.2/pkg/qqplot.R	2009-09-10 13:19:02 UTC (rev 583)
@@ -24,7 +24,17 @@
 ## into distr
 
 ## helpers
+.inGaps <- function(x,gapm){
+  fct <- function(x,m){ m[,2]>=x & m[,1]<=x}
+  sapply(x, function(y) length(which(fct(y,gapm)))>0)
+}
 
+.isReplicated <- function(x){
+  tx <- table(x)
+  rx <- as.numeric(names(tx[tx>1]))
+  sapply(x, function(y) any(abs(y-rx)<.Machine$double.eps))
+}
+
 .q2kolmogorov <- function(alpha,n,exact=(n<100)){ ## Kolmogorovstat
  if(exact){
  fct <- function(p0){
@@ -48,7 +58,7 @@
         }
         return(p)
     }
- ###
+ ###  end of code from package stats
  fct <- function(p0){
       1 - pkstwo(p0)-alpha  }
  res <- uniroot(fct,lower=0,upper=sqrt(n))$root
@@ -56,52 +66,89 @@
  return(res)
 }
 
-.BinomCI <- function(p,n,alpha){
-  fct <- function(t,p1.bi){
-   pbinom(q = pmin(n*p1.bi+t*sqrt(n),n+1), size = n, prob = p1.bi) -
-   pbinom(q = pmax(n*p1.bi-t*sqrt(n),-1), size = n, prob = p1.bi)-alpha
+.BinomCI <- function(x,p.b,D,n,alpha){
+  if(length(x)==0) return(NA)
+  fct <- function(t,p.bi,x.i){
+   p.bi.u <- p(D)(x.i+t/sqrt(n))
+   p.bi.l <- p.l(D)(x.i-t/sqrt(n))
+   d.r <- if(n*p.bi>floor(n*p.bi)) 0 else
+        dbinom(x = n*p.bi, size = n, prob = pmax(p.bi.u,1))
+   p.r <- pbinom(q = n*p.bi, size = n, prob = pmin(p.bi.u,1),lower.tail=FALSE)+d.r
+   p.l <- pbinom(q = n*p.bi, size = n, prob = pmax(p.bi.l,0),lower.tail=FALSE)
+   r <- p.r -p.l - alpha
+#   print(c(r=r,p=p.bi,x=x.i,p.u=p.bi.u,p.l=p.bi.l,r.r=p.r,r.l=p.l,t=t,np=n*p.bi))
+   r
   }
-  sapply(p, function(p2) uniroot(fct, lower=0, upper=sqrt(n)+1,
-         p1.bi = p2, tol = 1e-9)$root)
+  sapply(1:length(x), function(i) uniroot(fct, lower=0, upper=sqrt(n)*max(x[i],n-x[i]),
+         p.bi = p.b[i], x.i = x[i], tol = 1e-9)$root)
 }
+.q2pw <- function(x,p.b,D,n,alpha,exact=(n<100)){
+ if(exact)
+    return(.BinomCI(x,p.b,D,n,alpha))
+ pq <- p.b*(1-p.b)
+ if(is(D,"AbscontDistribution")){
+    dp <- d(D)(x)
+ }else{
+    supp.ind <- sapply(x, function(y)
+                 which.min(abs(y-support(D))))
+    nx <- length(support(D))
+    supp.ind.p <- pmax(supp.ind + 1 ,nx)
+    supp.ind.m <- pmax(supp.ind - 1 ,1)
+    dsupp.p <- support(D)[supp.ind.p] - support(D)[supp.ind]
+    dsupp.m <- support(D)[supp.ind] - support(D)[supp.ind.m]
+    s <- sd(D)
+    m <- E(D)
+#    print(c(pq[1:3], x[1:3],dsupp.p[1:3],dsupp.m[1:3],m,s))
+    dp <- pnorm((x+dsupp.p/2-m)/s) - pnorm((x-dsupp.m/2-m)/s)
+ }
+ return(sqrt(pq)/dp*qnorm((1+alpha)/2))
+}
 
 .confqq <- function(x,D,alpha,col.pCI,lty.pCI,lwd.pCI,col.sCI,lty.sCI,lwd.sCI,
-                    n,exact=(n<100)){
-   p <- p(D)(x)
-   c.crit <- .q2kolmogorov(alpha,n,exact)
-   c.crit.i <- .BinomCI(p,n,alpha)
-
-   lines(x, q(D)(pmin(p+c.crit.i/sqrt(n),1-1e-9)),
+                    n,exact.sCI=(n<100),exact.pCI=(n<100)){
+   p.r <- p(D)(x)
+   p.l <- p.l(D)(x)
+   c.crit <- #q.r(D)(p(D)(
+             .q2kolmogorov(alpha,n,exact.sCI)
+             #))
+   c.crit.i <- #q.r(D)(p(D)(
+              .q2pw(x,p.r,D,n,alpha,exact.pCI)
+              #))
+#   print(c.crit.i)
+   lines(x, x+c.crit.i/sqrt(n),
          col=col.pCI,lty=lty.pCI,lwd=lwd.pCI)
-   lines(x, q(D)(pmax(p-c.crit.i/sqrt(n),0+1e-9)),
+   lines(x, x-c.crit.i/sqrt(n),
          col=col.pCI,lty=lty.pCI,lwd=lwd.pCI)
 
-   lines(x, q(D)(pmin(p+c.crit/sqrt(n),1-1e-9)),
+   lines(x, q.r(D)(pmax(1-p.r-c.crit/sqrt(n),getdistrOption("DistrResolution")),lower.tail=FALSE),
          col=col.sCI,lty=lty.sCI,lwd=lwd.sCI)
-   lines(x, q(D)(pmax(p-c.crit/sqrt(n),0+1e-9)),
+   lines(x, q(D)(pmax(p.l-c.crit/sqrt(n),getdistrOption("DistrResolution"))),
          col=col.sCI,lty=lty.sCI,lwd=lwd.sCI)
    legend("topleft", legend = eval(substitute(expression(
-      "pointw. exact"~alpha==alpha0~"%- conf. interval",
-      "simult."~ex0~alpha==alpha0~"%- conf. interval"#,
-      ),list(ex0=if(exact) "exact" else "asympt.",
+      "symm. pointw."~ex.p~alpha==alpha0~"%- conf. interval",
+      "simult."~ex.s~alpha==alpha0~"%- conf. interval"#,
+      ),list(ex.s=if(exact.sCI) "exact" else "asympt.",
+             ex.p=if(exact.pCI) "exact" else "asympt.",
              alpha0=alpha*100))), lty=c(lty.pCI,lty.sCI), col=c(col.pCI,col.sCI),
             lwd=2,cex=.8)
 }
 
 .deleteItemsMCL <- function(mcl){
     mcl$n <- NULL
-    mcl$col.IdL <- mcl$alpha.CI <- mcl$lty.IdL <- mcl$exact.sCI <- NULL
+    mcl$col.IdL <- mcl$alpha.CI <- mcl$lty.IdL <-  NULL
+    mcl$col.NotInSupport <- mcl$check.NotInSupport <- NULL
+    mcl$exact.sCI <- mcl$exact.pCI <- NULL
     mcl$withConf <- mcl$withIdLine <- mcl$distance <- NULL
     mcl$col.pCI <- mcl$lty.pCI <- mcl$col.sCI <- mcl$lty.sCI <- NULL
     mcl$lwd.IdL <- mcl$lwd.pCI <- mcl$lwd.sCI <- NULL
     mcl$withLab <- mcl$lab.pts <- mcl$which.lbs <- NULL
     mcl$which.Order <- mcl$order.traf  <- NULL
-    mcl$col.pch <- mcl$cex.pch  <- NULL
+    mcl$col.pch <- mcl$cex.pch  <- mcl$jit.fac <- NULL
     mcl$col.lbl <- mcl$cex.lbl  <- mcl$adj.lbl <- NULL
 mcl}
 
 ## helper into distrMod
-.labelprep <- function(x,y,lab.pts,which.lbs,which.Order,order.traf){
+.labelprep <- function(x,y,lab.pts,col.lbl,which.lbs,which.Order,order.traf){
       n <- length(x)
       xys <- cbind(x,y[rank(x)])
       if(is.null(which.lbs)) which.lbs <- 1:n
@@ -119,7 +166,7 @@
       if(is.null(lab.pts)) lab.pts <- paste(oN)
       else {lab.pts <-  rep(lab.pts, length.out=n)
             lab.pts <- lab.pts[oN]}
-      return(list(x0=x0,y0=y0,lab=lab.pts))
+      return(list(x0=x0,y0=y0,lab=lab.pts,col=col.lbl[oN]))
 }
 
 ## into distr:
@@ -129,20 +176,53 @@
     plot.it = TRUE, xlab = deparse(substitute(x)),
     ylab = deparse(substitute(y)), ...,
     col.IdL = "red", lty.IdL = 2, lwd.IdL = 2,
-    alpha.CI = .95, exact.sCI = (n<100),
+    alpha.CI = .95, exact.sCI = (n<100), exact.pCI = (n<100),
     col.pCI = "orange", lty.pCI = 3, lwd.pCI = 2,
     col.sCI = "tomato2", lty.sCI = 4, lwd.sCI = 2,
-    cex.pch=par("cex"), col.pch = par("col")){
+    cex.pch=par("cex"), col.pch = par("col"),
+    jit.fac = 0, check.NotInSupport = TRUE,
+    col.NotInSupport = "red"){
 
     mc <- match.call(call = sys.call(sys.parent(1)))
     if(missing(xlab)) mc$xlab <- as.character(deparse(mc$x))
     if(missing(ylab)) mc$ylab <- as.character(deparse(mc$y))
     mcl <- as.list(mc)[-1]
+    force(x)
 
     pp <- ppoints(n)
     xc <- q(x)(pp)
     yc <- q(y)(pp)
 
+    col.pch <- rep(col.pch,length.out=n)
+
+    if(check.NotInSupport){
+       xco <- sort(xc)
+       nInSupp <- which(xco < q(y)(0))
+       nInSupp <- unique(sort(c(nInSupp,which( ! xco > q(y)(1)))))
+       if("support" %in% names(getSlots(class(y))))
+          nInSupp <- unique(sort(c(nInSupp,which( ! xco %in% support(y)))))
+       if("gaps" %in% names(getSlots(class(y))))
+          nInSupp <- unique(sort(c(nInSupp,which( .inGaps(xco,gaps(y))))))
+       if(length(nInSupp)){
+          col.pch[nInSupp] <- col.NotInSupport
+       }
+    }
+
+
+    oxc <- 1:length(xc)
+    xc.o <- xc
+    yc.o <- yc
+
+    if("support" %in% names(getSlots(class(x)))){
+       xc <- jitter(xc, factor=jit.fac)
+       oxc <- order(xc)
+       xc <- xc[oxc]
+       col.nInSupp <- col.nInSupp [oxc]
+       }
+
+    if("support" %in% names(getSlots(class(y))))
+       yc <- sort(jitter(yc, factor=jit.fac))
+
     mcl$x <- xc
     mcl$y <- yc
 
@@ -156,10 +236,15 @@
        abline(0,1,col=col.IdL,lty=lty.IdL,lwd=lwd.IdL)
        if(#is(y,"AbscontDistribution") &&
        withConf){
-          xy <- sort(c(xc,yc))
+          xy <- unique(sort(c(xc.o,yc.o)))
+          lxy <- length(xy)
+          if(lxy<n){
+             xy0 <- seq(min(xy),max(xy),length=n-lxy)
+             xy <- sort(c(xy,xy0))
+          }
           .confqq(xy, y, alpha.CI, col.pCI, lty.pCI, lwd.pCI,
                       col.sCI, lty.sCI, lwd.sCI,
-                  length(xc), exact = exact.sCI)
+                  length(xc), exact.sCI = exact.sCI, exact.pCI = exact.pCI)
        }
     }
     return(ret)
@@ -188,6 +273,7 @@
              lwd.IdL = 2,         ## line width for the identity line
              alpha.CI = .95,      ## confidence level
              exact.sCI = (n<100), ## shall simultaneous CIs be determined with exact kolmogorov distribution?
+             exact.pCI = (n<100), ## shall pointwise CIs be determined with exact Binomial distribution?
              col.pCI = "orange",  ## color for the pointwise CI
              lty.pCI = 3,         ## line type for the pointwise CI
              lwd.pCI = 2,         ## line width for the pointwise CI
@@ -198,21 +284,55 @@
              col.pch = par("col"),## color for the plotted symbols
              cex.lbl = par("cex"),## magnification factor for the plotted observation labels
              col.lbl = par("col"),## color for the plotted observation labels
-             adj.lbl = NULL       ## adj parameter for the plotted observation labels
+             adj.lbl = NULL,      ## adj parameter for the plotted observation labels
+             jit.fac = 0,         ## jittering factor used for discrete distributions
+             check.NotInSupport = TRUE, ## shall we check if all x lie in support(y)
+             col.NotInSupport = "red" ## if preceding check TRUE color of x if not in support(y)
     ){ ## return value as in stats::qqplot
 
     mc <- match.call(call = sys.call(sys.parent(1)))
     if(missing(xlab)) mc$xlab <- as.character(deparse(mc$x))
     if(missing(ylab)) mc$ylab <- as.character(deparse(mc$y))
     mcl <- as.list(mc)[-1]
+    force(x)
 
+    xj <- x
+    if(any(.isReplicated(x)))
+       xj[.isReplicated(x)] <- jitter(x[.isReplicated(x)], factor=jit.fac)
 
     pp <- ppoints(n)
     yc <- q(y)(pp)
 
+    yc.o <- yc
+
+    if("support" %in% names(getSlots(class(y))))
+       yc <- sort(jitter(yc, factor=jit.fac))
+
+    col.pch <- rep(col.pch,length.out=n)
+    col.lbl <- rep(col.lbl,length.out=n)
+
+    ordx <- order(x)
+
+    if(check.NotInSupport){
+       xo <- x[ordx]
+       nInSupp <- which(xo < q(y)(0))
+
+       nInSupp <- unique(sort(c(nInSupp,which( xo > q(y)(1)))))
+       if("support" %in% names(getSlots(class(y))))
+          nInSupp <- unique(sort(c(nInSupp,which( ! xo %in% support(y)))))
+       if("gaps" %in% names(getSlots(class(y))))
+          nInSupp <- unique(sort(c(nInSupp,which( .inGaps(xo,gaps(y))))))
+       if(length(nInSupp)){
+          col.pch[nInSupp] <- col.NotInSupport
+          if(withLab)
+             col.lbl[ordx[nInSupp]] <- col.NotInSupport
+       }
+    }
+
+
     if(n!=length(x)) withLab <- FALSE
 
-
+    mcl$x <- xj
     mcl$y <- yc
     mcl <- .deleteItemsMCL(mcl)
     mcl$cex <- cex.pch
@@ -222,19 +342,25 @@
     ret <- do.call(stats::qqplot, args=mcl)
 
     if(withLab&& plot.it){
-      lbprep <- .labelprep(x,yc,lab.pts,which.lbs,which.Order,order.traf)
+       lbprep <- .labelprep(xj,yc,lab.pts,
+                            col.lbl,which.lbs,which.Order,order.traf)
        text(x = lbprep$x0, y = lbprep$y0, labels = lbprep$lab,
-            cex = cex.lbl, col = col.lbl, adj = adj.lbl)
+            cex = cex.lbl, col = lbprep$col, adj = adj.lbl)
     }
 
     if(withIdLine&& plot.it){
        abline(0,1,col=col.IdL,lty=lty.IdL,lwd=lwd.IdL)
        if(#is(y,"AbscontDistribution")&&
        withConf){
-          xy <- sort(c(x,yc))
+          xy <- unique(sort(c(x,yc.o)))
+          lxy <- length(xy)
+          if(lxy<n){
+             xy0 <- seq(min(xy),max(xy),length=n-lxy+2)
+             xy <- unique(sort(c(xy,xy0)))
+          }
           .confqq(xy, y, alpha.CI, col.pCI, lty.pCI, lwd.pCI,
                   col.sCI, lty.sCI, lwd.sCI,
-                  length(x), exact = exact.sCI)
+                  length(x), exact.sCI = exact.sCI, exact.pCI = exact.pCI)
        }
     }
     return(ret)