[Dream-commits] r12 - in pkg: R demos

Fri Feb 26 05:31:57 CET 2010

Author: josephguillaume
Date: 2010-02-26 05:31:57 +0100 (Fri, 26 Feb 2010)
New Revision: 12

Added:
   pkg/R/possibility.envelope.R
Modified:
   pkg/R/CompDensity.R
   pkg/R/coef.dream.R
   pkg/R/dream.R
   pkg/R/plot.dream.R
   pkg/R/summary.dream.R
   pkg/demos/FME nonlinear model.R
Log:
Added possibility.envelope function. Added example of usage to FME fn.


Modified: pkg/R/CompDensity.R
===================================================================

--- pkg/R/CompDensity.R	2010-02-25 23:44:36 UTC (rev 11)
+++ pkg/R/CompDensity.R	2010-02-26 04:31:57 UTC (rev 12)
@@ -23,7 +23,9 @@
 CompDensity <- function(pars,control,FUN,func.type,
                         measurement=NULL,...){
 
-  stopifnot(!is.null(measurement) || func.type%in% c("posterior.density","logposterior.density"))
+  ## Should be guaranteed by dream
+  ## stopifnot(!is.null(measurement) || func.type%in% c("posterior.density","logposterior.density"))
+  
   stopifnot(!any(is.na(pars)))
   
   ## dimensions:
@@ -86,6 +88,6 @@
   }           ## for rows
 
   stopifnot(!any(is.na(p)))
-  stopifnot(!any(is.na(logp)))
+  ##stopifnot(!any(is.na(logp))) ##Not used anyway
   return(list(p=p,logp=logp))
 } ##CompDensity

Modified: pkg/R/coef.dream.R
===================================================================
--- pkg/R/coef.dream.R	2010-02-25 23:44:36 UTC (rev 11)
+++ pkg/R/coef.dream.R	2010-02-26 04:31:57 UTC (rev 12)
@@ -3,12 +3,14 @@
 ##' @last.prop proportion of total sequence to use (0,1]
 ##'  if 1, use whole sequence
 ##' @return named vector of parameter values
-## TODO: potentially use reduced.seq instead
-coef.dream <- function(dream.obj,last.prop=.5){
+coef.dream <- function(dream.obj,last.prop=.5,use.thinned=FALSE){
   stopifnot(last.prop>0)
-  if (last.prop==1) return(maxLikPars(dream.obj$Sequences))
+  if (use.thinned) sss <- dream.obj$Reduced.Seq
+  else sss <- dream.obj$Sequences
+  
+  if (last.prop==1) return(maxLikPars(sss))
   else {
-    ss <- window(dream.obj$Sequences, start = end(dd$Sequences)*(1-last.prop) + 1)
+    ss <- window(dream.obj$Sequences, start = end(sss)*(1-last.prop) + 1)
     return(maxLikPars(ss))
   }
 }##coef.dream

Modified: pkg/R/dream.R
===================================================================
--- pkg/R/dream.R	2010-02-25 23:44:36 UTC (rev 11)
+++ pkg/R/dream.R	2010-02-26 04:31:57 UTC (rev 12)
@@ -9,21 +9,23 @@
          outlierTest = 'IQR_test', ## What kind of test to detect outlier chains?
          pCR.Update = TRUE,      ## Adaptive tuning of crossover values
          boundHandling = 'reflect', ## Boundary handling: "reflect", "bound", "fold", "none"
-##Termination criteria. TODO: are the 2nd two valid, given that ndraw is used in adaptive pcr
+### Termination criteria. TODO: are the 2nd two valid, given that ndraw is used in adaptive pcr
          ndraw = 1e5,            ## maximum number of function evaluations
          maxtime = Inf,           ## maximum duration of optimization in seconds
-         Rthres=1.01,            ## R value at which to stop. 
-## Thinning
+         Rthres=1.01,            ## R value at which to stop. Vrugt suggests 1.2
+### Thinning
          thin=FALSE,             ## do reduced sample collection
-          thin.t=NA,            ## parameter for reduced sample collection
-## Parameters with auto-set values
-	   ndim=NA,			 ## number of parameters (automatically set from length of pars)
+         thin.t=NA,            ## parameter for reduced sample collection
+### Reporting
+         REPORT = 1000,            ## approximate number of function evaluations between reports. >0. 0=none  TODO: when trace >= 1
+### Parameters with auto-set values
+         ndim=NA,			 ## number of parameters (automatically set from length of pars)
          DEpairs = NA,          ## Number of DEpairs. defaults to max val floor((nseq-1)/2)
-	   nseq = NA              ## Number of Markov Chains / sequences (defaults to N)
-## Currently unused parameters
+         nseq = NA,              ## Number of Markov Chains / sequences (defaults to N)
+         Cb=NA,Wb=NA
+         ## Currently unused parameters
 ##         trace = 0,              ## level of user feedback
-##         REPORT = 10            ## number of iterations between reports when trace >= 1
-         )            
+         )
 
 library(coda)
 
@@ -67,7 +69,7 @@
   
   ## dimensions
   ##  hist.logp matrix. ndraw/nseq x nseq. length nearly ndraw.
-  ##    TODO: removed Iter for simplicity. should have been kept?
+  ##    TODO: removed counter.fun.evals for simplicity. should have been kept?
   ##  CR nseq x steps
   ##  pCR length nCR or scalar
   ##  lCR length nCR or scalar
@@ -77,9 +79,9 @@
   ## Sequences. array n.elem*1.125 x ndim+2 x nseq
   ## Reduced.Seq array n.elem*1.125 x ndim+2 x nseq
 
-  ##  teller [2,ndraw/nseq]
-  ## Iter [nseq,ndraw(+steps*nseq)],
-  ## counter [2,ndraw/nseq]
+  ##  counter.outloop [2,ndraw/nseq]. count number of outside loops
+  ## counter.fun.evals [nseq,ndraw(+steps*nseq)],
+  ## counter [2,ndraw/nseq] . number of generations - iterations of inner loop
   ## iloc
 
 ############################
@@ -124,6 +126,8 @@
             
   if (control$boundHandling == 'none') warning("No bound handling in use, parameters may cause errors elsewhere")
 
+  stopifnot(control$REPORT>=0)
+  control$REPORT <- (control$REPORT%/%control$nseq) * control$nseq
   
 ############################
   ## Initialize variables
@@ -133,11 +137,11 @@
   NSEQ <- control$nseq
   
   ## for each iteration...
-  Iter <- NSEQ                          #? 1
+  counter.fun.evals <- NSEQ                          #? 1
   counter <- 2
   iloc <- 1
-  teller <- 2
-  new_teller <- 1
+  counter.outloop <- 2
+  counter.thin <- 1
   
   ## Calculate the parameters in the exponential power density function of Box and Tiao (1973)
   cbwb <- CalcCbWb(control$gamma)
@@ -181,17 +185,20 @@
   ## Number of times through while loop
   n.elem<-floor(control$ndraw/NSEQ)+1
 
-  ## Iter + AR at each step
+  ## counter.fun.evals + AR at each step
   obj$AR<-matrix(NA,n.elem,2)
   obj$AR[1,2]<-NSEQ-1 ##Number if only one rejected
   colnames(obj$AR) <- c("fun.evals","AR")
   
-  ##Iter + R statistic for each variable at each step
+  ##counter.fun.evals + R statistic for each variable at each step
+  ## TODO: now using counter.report
   obj$R.stat<-matrix(NA,ceiling(n.elem/control$steps),NDIM+1)
+  ##  n<10 matlab: -2 * ones(1,MCMCPar.n);
+  obj$R.stat[1,] <- c(counter.fun.evals,rep(-2,NDIM))
   if (!is.null(names(pars))) colnames(obj$R.stat) <- c("fun.evals",names(pars))
   else   colnames(obj$R.stat) <- c("fun.evals",paste("p",1:length(pars),sep=""))
-  
-  ##Iter + pCR for each CR
+
+  ##counter.fun.evals + pCR for each CR
   obj$CR <- matrix(NA,ceiling(n.elem/control$steps),length(pCR)+1)
   colnames(obj$CR) <- c("fun.evals",paste("CR",1:length(pCR),sep=""))
 
@@ -215,6 +222,7 @@
   ## initialize timer
   tic <- as.numeric(Sys.time())
   toc <- 0
+  counter.report <- 1
 
 ################################
   
@@ -239,26 +247,21 @@
   }
 
   ##Save N_CR in memory and initialize delta.tot
-  obj$CR[1,] <- c(Iter,pCR)
+  obj$CR[1,] <- c(counter.fun.evals,pCR)
   delta.tot <- rep(0,NCR)
   
   ##Save history log density of individual chains
   hist.logp[1,] <- X[,"logp"]
   
-  ##Compute R-statistic. Using coda package
-  ## TODO: more elegant way of using coda. And check for correctness
-  ## TODO: alternatively, convert matlab implementation
-  ##  n<10 matlab: -2 * ones(1,MCMCPar.n);
-  obj$R.stat[1,] <- c(Iter,rep(-2,NDIM))
 
 ################################
   ##Start iteration
-  while (Iter < control$ndraw) {
+  while (counter.fun.evals < control$ndraw) {
 
     for (gen.number in 1:control$steps) {
 
       ## Initialize DR properties
-      new_teller <- new_teller + 1 ## counter for thinning
+      counter.thin <- counter.thin + 1 
 
       ## Define the current locations and associated posterior densities
       x.old <- X[,1:NDIM]
@@ -299,10 +302,10 @@
       Sequences[iloc,,] <- t(newgen)
 
       ## Check reduced sample collection
-      if (control$thin && new_teller == control$thin.t){
-        ## Update iloc_2 and new_teller
+      if (control$thin && counter.thin == control$thin.t){
+        ## Update iloc_2 and counter.thin
         iloc.2 <- iloc.2+1
-        new_teller <- 0
+        counter.thin <- 0
         ## Reduced sample collection
         Reduced.Seq[iloc.2,,] <- t(newgen)
       }
@@ -330,23 +333,23 @@
       hist.logp[counter,] <- X[,NDIM+2]
       
       ## Save Acceptance Rate
-      obj$AR[counter,] <- c(Iter,100 * sum(accept) / NSEQ)
+      obj$AR[counter,] <- c(counter.fun.evals,100 * sum(accept) / NSEQ)
 
       ## CompDensity executes function NSEQ times per loop
-      Iter <- Iter + NSEQ
+      counter.fun.evals <- counter.fun.evals + NSEQ
       counter <- counter + 1
     } ##for gen.number steps
 
     ## ---------------------------------------------------------------------
 
     ## Store Important Diagnostic information -- Probability of individual crossover values
-    obj$CR[teller, ] <- c(Iter,pCR)
+    obj$CR[counter.outloop, ] <- c(counter.fun.evals,pCR)
 
     ## Do this to get rounded iteration numbers
-    if (teller == 2) control$steps <- control$steps + 1
+    if (counter.outloop == 2) control$steps <- control$steps + 1
 
     ## Check whether to update individual pCR values
-    if (Iter <= 0.1 * control$ndraw) {
+    if (counter.fun.evals <= 0.1 * control$ndraw) {
       if (control$pCR.Update) {
         ## Update pCR values
         tmp <- AdaptpCR(CR, delta.tot, lCR, control)
@@ -367,10 +370,11 @@
         ## Jump outlier chain to r_idx -- X
         X[out.id,1:(NDIM+2)] <- X[r.idx,]
         ## Add to chainoutlier
-        obj$outlier <- rbind(obj$outlier,c(Iter,out.id))
+        obj$outlier <- rbind(obj$outlier,c(counter.fun.evals,out.id))
       } ##for remove outliers
     }   ##else
 
+    
     if (control$pCR.Update) {
       ## Generate CR values based on current pCR values
       CR <- GenCR(pCR, control)
@@ -380,22 +384,29 @@
 
     ## Calculate Gelman and Rubin convergence diagnostic
     ## Compute the R-statistic using 50% burn-in from Sequences
-    try(
-             obj$R.stat[teller,] <- c(Iter,gelman.diag(
-                   as.mcmc.list(lapply(1:NSEQ,function(i) as.mcmc(Sequences[1:iloc,1:NDIM,i]))),
-                                              autoburnin=TRUE)$psrf[,1])
+    ## TODO: alternatively, convert matlab implementation
+    if (control$REPORT>0 && counter.fun.evals %% control$REPORT==0) {
+
+      counter.report <- counter.report+1
+            
+      try(
+          obj$R.stat[counter.report,] <- c(counter.fun.evals,gelman.diag(
+                    as.mcmc.list(lapply(1:NSEQ,function(i) as.mcmc(Sequences[1:iloc,1:NDIM,i]))),
+                       autoburnin=TRUE)$psrf[,1])
         )
+      
+      if (all(!is.na(obj$R.stat[counter.report,])) &&
+          all(obj$R.stat[counter.report,-1]<control$Rthres)) {
+        obj$EXITMSG <- 'Convergence criteria reached'
+        break
+        ## obj$EXITFLAG <- 3
+      }
 
-    ## Update the teller
-    teller = teller + 1
+    }##counter.report
+    
+    ## Update the counter.outloop
+    counter.outloop = counter.outloop + 1
 
-    ##Additional Exit conditions
-    
-    if (all(!is.na(obj$R.stat[teller-1,])) && all(obj$R.stat[teller-1,-1]<control$Rthres)) {
-      obj$EXITMSG <- 'Convergence criteria reached'
-      break
-     ## obj$EXITFLAG <- 3
-    }
       
     ## break if maximum time exceeded
     toc <- as.numeric(Sys.time()) - tic
@@ -409,7 +420,7 @@
 
   toc <- as.numeric(Sys.time()) - tic
   
-  if (Iter>= control$ndraw){
+  if (counter.fun.evals>= control$ndraw){
     obj$EXITMSG <- "Maximum function evaluations reached"
    ## obj$EXITFLAG <- 4
   }
@@ -430,13 +441,13 @@
                                            ))
   }
 
-  obj$R.stat <- obj$R.stat[1:(teller-1),]
+  obj$R.stat <- obj$R.stat[1:counter.report,,drop=FALSE]
   obj$AR <- obj$AR[1:(counter-1),]
-  obj$CR <- obj$CR[1:(teller-1),]
+  obj$CR <- obj$CR[1:(counter.outloop-1),]
   
   ## store number of function evaluations
   ## store number of iterations
-  obj$fun.evals <- Iter
+  obj$fun.evals <- counter.fun.evals
   ## store the amount of time taken
   obj$time <- toc
 

Modified: pkg/R/plot.dream.R
===================================================================
--- pkg/R/plot.dream.R	2010-02-25 23:44:36 UTC (rev 11)
+++ pkg/R/plot.dream.R	2010-02-26 04:31:57 UTC (rev 12)
@@ -7,7 +7,7 @@
 plot.dream <- function(dream.obj,interactive=TRUE){
   devAskNewPage(interactive)
   
-  ss <- window(dream.obj$Sequences, start = end(dd$Sequences)/2 + 1)
+  ss <- window(dream.obj$Sequences, start = end(dream.obj$Sequences)/2 + 1)
 
   ## Trace and parameter density
   

Added: pkg/R/possibility.envelope.R
===================================================================
--- pkg/R/possibility.envelope.R	                        (rev 0)
+++ pkg/R/possibility.envelope.R	2010-02-26 04:31:57 UTC (rev 12)
@@ -0,0 +1,31 @@
+
+##' Obtain prediction confidence intervals for model, around new input values
+##' @param dd dream object
+##' @param FUN.pars new par values. if missing, use those from dream object
+##' @param ndraw number of new iterations
+##' @param conf % two-sided confidence interval
+## TODO: use caching rather than a second application of FUN to parameter sets
+possibility.envelope <- function(dd,FUN.pars=NULL,ndraw=1000,conf=99){
+  stopifnot(is.null(FUN.pars) || is.list(FUN.pars))
+  
+  ## Generate more results from converged chains
+  dd$control$REPORT <- 0
+  dd$control$Rthres <- 0
+  dd$control$ndraw <- ndraw
+  dd$call$control <- dd$control
+  dd$call$INIT <- function(pars,nseq) dd$X[,1:dd$control$ndim]
+  ee <- eval(dd$call)
+
+  if (is.null(FUN.pars)) FUN.pars <- eval(dd$call$FUN.pars)
+  par.name <- names(formals(eval(dd$call$FUN)))[1]
+
+  ff <- apply(as.matrix(ee$Reduced.Seq),1,
+              function(p) {
+                FUN.pars[[par.name]] <- p
+                do.call(eval(dd$call$FUN),FUN.pars)
+              })
+
+  gg <- t(apply(ff,1,quantile,c((100-conf)/200,1-(100-conf)/200)))
+
+  return(gg)
+}##possibility.envelope


Property changes on: pkg/R/possibility.envelope.R
___________________________________________________________________
Name: svn:executable
   + *

Modified: pkg/R/summary.dream.R
===================================================================
--- pkg/R/summary.dream.R	2010-02-25 23:44:36 UTC (rev 11)
+++ pkg/R/summary.dream.R	2010-02-26 04:31:57 UTC (rev 12)
@@ -23,7 +23,7 @@
   cat("
 CODA summary for last 50% of MCMC chains:
 ")
-  print(summary(window(dream.obj$Sequences, start = end(dd$Sequences)/2 + 1)))
+  print(summary(window(dream.obj$Sequences, start = end(dream.obj$Sequences)/2 + 1)))
 
   cat("
 Acceptance Rate

Modified: pkg/demos/FME nonlinear model.R
===================================================================
--- pkg/demos/FME nonlinear model.R	2010-02-25 23:44:36 UTC (rev 11)
+++ pkg/demos/FME nonlinear model.R	2010-02-26 04:31:57 UTC (rev 12)
@@ -4,10 +4,6 @@
 ## TODO: document more clearly, better outputs
 ## 
 
-
-unloadNamespace("dream")
-library(dream)
-
 ## http://r-forge.r-project.org/plugins/scmsvn/viewcvs.php/pkg/FME/inst/doc/FMEmcmc.Rnw?rev=96&root=fme&view=markup
 
 Obs <- data.frame(x=c(   28,  55,   83,  110,  138,  225,  375),   # mg COD/l
@@ -16,6 +12,39 @@
                    y=c(0.05,0.07,0.09,0.10,0.11,0.122,0.125))   # 1/hour
 obs.all <- rbind(Obs,Obs2)
 
+
+###########################
+### FME results
+
+Model <- function(p,x) return(data.frame(x=x,y=p[1]*x/(x+p[2])))
+##Model(c(0.1,1),obs.all$x)
+
+
+library(FME)
+Residuals  <- function(p) {
+   cost<-modCost(model=Model(p,Obs$x),obs=Obs,x="x")
+   modCost(model=Model(p,Obs2$x),obs=Obs2,cost=cost,x="x")
+}
+
+P      <- modFit(f=Residuals,p=c(0.1,1))
+print(P$par)
+
+plotFME <- function(){
+plot(Obs,xlab="mg COD/l",ylab="1/hour", pch=16, cex=1.5,
+     xlim=c(25,400),ylim=c(0,0.15))
+points(Obs2,pch=18,cex=1.5, col="red")
+lines(Model(p=P$par,x=0:375))
+}
+
+
+##########################
+## DREAM results
+
+
+unloadNamespace("dream")
+library(dream)
+
+
 Model.y <- function(p,x) p[1]*x/(x+p[2])
 
 control <- list(
@@ -54,30 +83,58 @@
 
 plot(dd)
 
+plotFME()
+lines(Model(p=coef(dd),x=0:375),col="green")
 
 
-### Comparison to FME results
+########################
+## Calculate bounds around output estimates
 
-Model <- function(p,x) return(data.frame(x=x,y=p[1]*x/(x+p[2])))
-##Model(c(0.1,1),obs.all$x)
+plotCIs <- function(x,cis,...){
+  em <- strwidth("M")/2
+  segments(x0=x,y0=cis[,1],
+           x1=x,y1=cis[,2],
+           ...
+           )
+  segments(x0=x-em,y0=cis[,1],
+           x1=x+em,y1=cis[,1],
+           ...
+           )
+  segments(x0=x-em,y0=cis[,2],
+           x1=x+em,y1=cis[,2],
+           ...
+           )
+}##plotCIs
 
+## Calibrate with Obs
+dd <- dream(
+            FUN=Model.y, func.type="calc.rmse",
+            pars = pars,
+            FUN.pars=list(
+              x=Obs$x
+              ),
+            INIT = LHSInit,
+            measurement=list(data=obs.all$y),
+            control = control
+            )
 
-library(FME)
-Residuals  <- function(p) {
-   cost<-modCost(model=Model(p,Obs$x),obs=Obs,x="x")
-   modCost(model=Model(p,Obs2$x),obs=Obs2,cost=cost,x="x")
-}
+plotFME()
+lines(Model(p=coef(dd),x=0:375),col="green")
 
-P      <- modFit(f=Residuals,p=c(0.1,1))
-print(P$par)
+## Naive 95% bounds from residuals
+resid <- Model.y(p=coef(dd),x=Obs$x)-Obs$y
+##densityplot(resid)
+qq <- quantile(resid,c(0.005,.995))
+gg <- t(sapply(Model.y(p=coef(dd),x=Obs$x),function(v) v+qq))
+plotCIs(Obs$x,gg,col="grey")
 
-## rbind(
-##       pars.maxp-maxp.res,
-##       P$par,
-##       pars.maxp+maxp.res
-##       )
+## Bounds on Obs
+cis.1 <- possibility.envelope(dd)
+plotCIs(Obs$x,cis.1,col="black")
 
-plot(Obs,xlab="mg COD/l",ylab="1/hour", pch=16, cex=1.5)
-points(Obs2,pch=18,cex=1.5, col="red")
-lines(Model(p=P$par,x=0:375))
-lines(Model(p=coef(dd),x=0:375),col="green")
+## Test on Obs2
+cis.2 <- possibility.envelope(dd,list(x=Obs2$x))
+plotCIs(Obs2$x,cis.2,col="red")
+
+
+## TODO: add residual error, using method p6, vrugt. equifinality