[Dream-commits] r10 - in pkg: . R demos

Thu Feb 25 07:57:42 CET 2010

Author: josephguillaume
Date: 2010-02-25 07:57:41 +0100 (Thu, 25 Feb 2010)
New Revision: 10

Added:
   pkg/R/maxLikPars.R
   pkg/demos/FME nonlinear model.R
Modified:
   pkg/NAMESPACE
   pkg/R/CompDensity.R
   pkg/R/dream.R
   pkg/R/handleBounds.R
   pkg/R/metrop.R
Log:
Added demo of nonlinear model parameter estimation. E.g from FME vignette. Fixed bugs, strengthened assertions on parameter values. HandleBounds now has extra rand step to guarantee compliance with bounds.


Modified: pkg/NAMESPACE
===================================================================

--- pkg/NAMESPACE	2010-02-24 07:04:34 UTC (rev 9)
+++ pkg/NAMESPACE	2010-02-25 06:57:41 UTC (rev 10)
@@ -3,4 +3,4 @@
 export(dream)
 export(CovInit)
 export(LHSInit)
-
+export(maxLikPars)

Modified: pkg/R/CompDensity.R
===================================================================
--- pkg/R/CompDensity.R	2010-02-24 07:04:34 UTC (rev 9)
+++ pkg/R/CompDensity.R	2010-02-25 06:57:41 UTC (rev 10)
@@ -1,6 +1,6 @@
-##' Computes the density of each x value
+##' Computes the density of each pars value
 ##'
-##' @param x matrix nseq x ndim
+##' @param pars matrix nseq x ndim
 ##' @param control. list containing gamma,Wb,Cb,nseq
 ##' @param FUN - the model to run
 ##'   R function with first argument a vector of length ndim.
@@ -20,11 +20,11 @@
 ## TODO: p may be erroneously equal to logp?
 ## TODO: more appropriate naming of options?
 ## TODO: allow shortenings of option?
-CompDensity <- function(x,control,FUN,func.type,
+CompDensity <- function(pars,control,FUN,func.type,
                         measurement=NULL,...){
 
   stopifnot(!is.null(measurement) || func.type%in% c("posterior.density","logposterior.density"))
-  stopifnot(!any(is.na(x)))
+  stopifnot(!any(is.na(pars)))
   
   ## dimensions:
   ##  i. iter 1:nseq
@@ -36,10 +36,10 @@
   logp <- rep(NA,control$nseq)
   
   ## Sequential evaluation
-  for (ii in 1:nrow(x)){
+  for (ii in 1:nrow(pars)){
     ## Call model to generate simulated data
     ## TODO: correct use of optional pars?
-    modpred <- FUN(x[ii,],...)
+    modpred <- FUN(pars[ii,],...)
 
     switch(func.type,
            ## Model directly computes posterior density
@@ -76,7 +76,7 @@
              logp[ii] <- modpred
            },
            ## Similar as 3, but now weights with the Measurement Sigma
-           ## TODO: appears to be no difference to calc.rmse
+           ## TODO: identical to rmse because difference is in metrop
            calc.weighted.rmse={
              ## Define the error
              err <- measurement$data-modpred

Modified: pkg/R/dream.R
===================================================================
--- pkg/R/dream.R	2010-02-24 07:04:34 UTC (rev 9)
+++ pkg/R/dream.R	2010-02-25 06:57:41 UTC (rev 10)
@@ -1,38 +1,41 @@
 
 
 dreamDefaults <- function()
-    list(nseq = NA,              ## Number of Markov Chains / sequences (defaults to N)
+    list(
          nCR = 3,                ## Crossover values used to generate proposals (geometric series)
          gamma = 0,              ## Kurtosis parameter Bayesian Inference Scheme
-         DEpairs = 3,            ## Number of DEpairs. <=nseq-1 TODO: check
          steps = 10,             ## Number of steps in sem
          eps = 5e-2,             ## Random error for ergodicity
          outlierTest = 'IQR_test', ## What kind of test to detect outlier chains?
          pCR.Update = TRUE,      ## Adaptive tuning of crossover values
          boundHandling = 'reflect', ## Boundary handling: "reflect", "bound", "fold", "none"
-         ndraw = Inf,            ## maximum number of function evaluations
+##Termination criteria. TODO: are the 2nd two valid, given that ndraw is used in adaptive pcr
+         ndraw = 1e5,            ## maximum number of function evaluations
          maxtime = Inf,           ## maximum duration of optimization in seconds
-         Rthres=1.1,            ## R value at which to stop
+         Rthres=1.01,            ## R value at which to stop. 
+## Thinning
+         thin=FALSE,             ## do reduced sample collection
+          thin.t=NA,            ## parameter for reduced sample collection
+## Parameters with auto-set values
+	   ndim=NA,			 ## number of parameters (automatically set from length of pars)
+         DEpairs = NA,          ## Number of DEpairs. defaults to max val floor((nseq-1)/2)
+	   nseq = NA,              ## Number of Markov Chains / sequences (defaults to N)
+## Currently unused parameters
          trace = 0,              ## level of user feedback
-         REPORT = 10,            ## number of iterations between reports when trace >= 1
-         thin=FALSE,             ## do reduced sample collection
-         thin.t=NA,               ## parameter for reduced sample collection
-	   ndim=NA ## number of parameters (automatically set from length of pars)
+         REPORT = 10            ## number of iterations between reports when trace >= 1
          )            
 
 library(coda)
 
-## MATLAB function:
-## function [Sequences,Reduced_Seq,X,output,hist_logp] = dream(MCMCPar,ParRange,Measurement,ModelName,Extra,option)
 
 
-
-##' @param FUN model function with first argument a vector of length ndim
+##' @param FUN model function with first argument a vector of parameter values of length ndim
 ##' @param func.type. one of posterior.density, logposterior.density 
 ##' @param pars a list of variable ranges
 ##' @param INIT f(pars,nseq,...) returns nseq x ndim matrix of initial parameter values
 ##' @param control
-##' @param measurement list N,sigma,data. must be included unless func.type=posterior.density or logposterior.density is selected
+##' @param measurement list. must be included unless func.type=posterior.density or logposterior.density is selected
+##'  for calc.rmse: must have element data
 
 ##' @return ...
 ##'   TODO
@@ -46,6 +49,9 @@
 ##'
 
 ##' Terminates either when control$ndraw or control$maxtime is reached
+##'
+##' MATLAB function:
+##' function [Sequences,Reduced_Seq,X,output,hist_logp] = dream(MCMCPar,ParRange,Measurement,ModelName,Extra,option)
 
 dream <- function(FUN, func.type,
                   pars = list(x = range(0, 1e6)),
@@ -75,26 +81,26 @@
   ## counter [2,ndraw/nseq]
   ## iloc
 
-  
+############################
+  ## Process parameters
+
   if (is.character(FUN))  FUN <- get(FUN, mode = "function")
   stopifnot(is.function(FUN))
   stopifnot(is.list(pars))
   stopifnot(length(pars) > 0)
   stopifnot(!is.null(measurement) || func.type %in% c("posterior.density","logposterior.density"))
-
+  stopifnot(func.type!="calc.rmse" || "data" %in% names(measurement))
+  
   stopifnot(control$boundHandling %in% c("reflect", "bound", "fold", "none"))
 
   req.args.init <- names(formals(INIT))
   req.args.FUN <- names(formals(FUN))
     
   if(!all(req.args.init %in% c("pars","nseq",names(INIT.pars)))) stop(paste(c("INIT Missing extra arguments:",req.args.init[!req.args.init %in% c("pars","nseq",names(INIT.pars))]),sep=" "))
-  if(!all(req.args.FUN %in% c("x",names(FUN.pars)))) stop(paste(c("FUN Missing extra arguments:",req.args.FUN[!req.args.FUN %in% c("x",names(FUN.pars))]),sep=" "))
-
+  if(!all(req.args.FUN[2:length(req.args.FUN)] %in% c(names(FUN.pars)))) stop(paste(c("FUN Missing extra arguments:",req.args.FUN[!req.args.FUN %in% c("x",names(FUN.pars))]),sep=" "))
+ 
   pars <- lapply(pars, function(x) if (is.list(x)) x else list(x))
 
-############################
-  ## Initialize variables
-  
   ## update default options with supplied options
   stopifnot(is.list(control))
   control <- modifyList(dreamDefaults(), control)
@@ -106,7 +112,16 @@
   ## determine number of variables to be optimized
   control$ndim<-length(pars)
   if (is.na(control$nseq)) control$nseq <- control$ndim
+  if (is.na(control$DEpairs)) control$DEpairs <- floor((control$nseq-1)/2)
 
+  stopifnot(control$DEpairs<=(control$nseq-1)/2) ## Requirement of offde
+            
+  if (control$boundHandling == 'none') warning("No bound handling in use, parameters may cause errors elsewhere")
+
+  
+############################
+  ## Initialize variables
+
   NDIM <- control$ndim
   NCR <- control$nCR
   NSEQ <- control$nseq
@@ -202,7 +217,7 @@
   upper <- sapply(pars, function(x) max(x[[1]]))
 
   ##Step 2: Calculate posterior density associated with each value in x
-  tmp<-do.call(CompDensity,modifyList(FUN.pars,list(x=x,control=control,FUN=FUN,func.type=func.type,measurement=measurement)))
+  tmp<-do.call(CompDensity,modifyList(FUN.pars,list(pars=x,control=control,FUN=FUN,func.type=func.type,measurement=measurement)))
 
   ##Save the initial population, density and log density in one list X
   X<-cbind(x=x,p=tmp$p,logp=tmp$logp)
@@ -251,7 +266,7 @@
       CR[,gen.number] <- tmp$CR
 
       ## Now compute the likelihood of the new points
-      tmp<-do.call(CompDensity,modifyList(FUN.pars,list(x=x.new,control=control,FUN=FUN,func.type=func.type,measurement=measurement)))
+      tmp<-do.call(CompDensity,modifyList(FUN.pars,list(pars=x.new,control=control,FUN=FUN,func.type=func.type,measurement=measurement)))
       p.new <- tmp$p
       logp.new <- tmp$logp
 
@@ -360,8 +375,15 @@
                    as.mcmc.list(lapply(1:NSEQ,function(i) as.mcmc(Sequences[1:iloc,1:NDIM,i]))),
                                               autoburnin=TRUE)$psrf[,1])
         )
-    if (!is.na(obj$R.stat[teller,1]) && all(obj$R.stat[teller,-1]<control$Rthres)) {
+
+    ## Update the teller
+    teller = teller + 1
+
+    ##Additional Exit conditions
+    
+    if (all(!is.na(obj$R.stat[teller-1,])) && all(obj$R.stat[teller-1,-1]<control$Rthres)) {
       obj$EXITMSG <- 'Convergence criteria reached'
+      break
      ## obj$EXITFLAG <- 3
     }
       
@@ -373,9 +395,9 @@
       break
     }
 
-    ## Update the teller
-    teller = teller + 1
   } ##while
+
+  toc <- as.numeric(Sys.time()) - tic
   
   if (Iter>= control$ndraw){
     obj$EXITMSG <- "Maximum function evaluations reached"
@@ -397,6 +419,7 @@
                                                                    thin=control$thin.t)
                                            ))
   }
+
   obj$R.stat <- obj$R.stat[1:(teller-1),]
   obj$AR <- obj$AR[1:(counter-1),]
   obj$CR <- obj$CR[1:(teller-1),]

Modified: pkg/R/handleBounds.R
===================================================================
--- pkg/R/handleBounds.R	2010-02-24 07:04:34 UTC (rev 9)
+++ pkg/R/handleBounds.R	2010-02-25 06:57:41 UTC (rev 10)
@@ -17,6 +17,7 @@
     too.high<-which(x[,p]>upper[p])
     switch(bound.handling,
            reflect = {
+             ## TODO: may still violate bounds if x>2*upper
              x[too.low,p] <- 2*lower[p]-x[too.low,p]
              x[too.high,p] <- 2*upper[p]-x[too.high,p]
            },
@@ -26,6 +27,7 @@
            },
            fold = {
              ## ------- New approach that maintains detailed balance ----------
+             ## TODO: may still violate bounds if x>2*upper
              x[too.low,p] <- upper[p]-(lower[p]-x[too.low,p])
              too.high<-which(x[,p]>upper[p])
              x[too.high,p] <- lower[p]+(x[too.high,p]-upper[p])
@@ -36,7 +38,11 @@
            },
            stop("Unrecognised value of 'bound.handling'")
            )#switch
-    if (bound.handling!="none") stopifnot(all(x[,p]>=lower[p] & x[,p]<=upper[p]))
+    ##if (bound.handling!="none") stopifnot(all(x[,p]>=lower[p] & x[,p]<=upper[p]))
+    if (bound.handling!="none" && !all(x[,p]>=lower[p] & x[,p]<=upper[p])) {
+      warning("Bounds violated after correction, using random")
+      x <- handleBounds(x,lower,upper,"rand")
+    }
   } ##for p
   return(x)
 }#handleBounds

Added: pkg/R/maxLikPars.R
===================================================================
--- pkg/R/maxLikPars.R	                        (rev 0)
+++ pkg/R/maxLikPars.R	2010-02-25 06:57:41 UTC (rev 10)
@@ -0,0 +1,19 @@
+##' Naive maximum density parameter selection
+##' @param ss MCMC chains. mcmc or mcmclist object
+##' @return named character vector of parameter values
+##'
+##' Uses which.max and density function with default parameters
+maxLikPars <- function(ss){
+  xx <- as.matrix(ss)
+  pars.maxp <- rep(NA,ncol(xx))
+  names(pars.maxp) <- colnames(xx)
+  maxp.res <- rep(NA,ncol(xx))
+  names(maxp.res) <- colnames(xx)
+  for (n in colnames(xx)){
+    den <- density(xx[,n])
+    ii <- which.max(den$y)
+    pars.maxp[n] <- den$x[ii]
+    maxp.res[n] <- mean(diff(den$x))
+  }
+  return(pars.maxp)
+} ##maxLikPars
\ No newline at end of file


Property changes on: pkg/R/maxLikPars.R
___________________________________________________________________
Name: svn:executable
   + *

Modified: pkg/R/metrop.R
===================================================================
--- pkg/R/metrop.R	2010-02-24 07:04:34 UTC (rev 9)
+++ pkg/R/metrop.R	2010-02-25 06:57:41 UTC (rev 10)
@@ -50,7 +50,7 @@
            alpha <- pmin(exp(p.x-p.old),1)
          },
          calc.rmse = { ## SSE probability evaluation
-           alpha <- pmin((p.x/p.old)^(-measurement$N*(1+control$gamma)/2),1)
+           alpha <- pmin((p.x/p.old)^(-length(measurement$data)*(1+control$gamma)/2),1)
          },
          logposterior.density = { ## Lnp probability evaluation
            alpha <- pmin(exp(p.x-p.old),1)

Added: pkg/demos/FME nonlinear model.R
===================================================================
--- pkg/demos/FME nonlinear model.R	                        (rev 0)
+++ pkg/demos/FME nonlinear model.R	2010-02-25 06:57:41 UTC (rev 10)
@@ -0,0 +1,92 @@
+
+## Example from FME vignette, p21, section 7. Soetaert & Laine
+## Nonlinear model parameter estimation
+## TODO: document more clearly, better outputs
+## 
+
+
+unloadNamespace("dream")
+library(dream)
+
+## http://r-forge.r-project.org/plugins/scmsvn/viewcvs.php/pkg/FME/inst/doc/FMEmcmc.Rnw?rev=96&root=fme&view=markup
+
+Obs <- data.frame(x=c(   28,  55,   83,  110,  138,  225,  375),   # mg COD/l
+                  y=c(0.053,0.06,0.112,0.105,0.099,0.122,0.125))   # 1/hour
+Obs2<- data.frame(x=c(   20,  55,   83,  110,  138,  240,  325),   # mg COD/l
+                   y=c(0.05,0.07,0.09,0.10,0.11,0.122,0.125))   # 1/hour
+obs.all <- rbind(Obs,Obs2)
+
+Model <- function(p,x) return(data.frame(x=x,y=p[1]*x/(x+p[2])))
+##Model(c(0.1,1),obs.all$x)
+
+Model.y <- function(p,x) p[1]*x/(x+p[2])
+
+
+control <- list(
+                nseq=5,
+                gamma=0,
+                nCR=3,
+                ndraw=1e5,
+                steps=10,
+                eps=5e-2,
+                outlierTest='IQR_test',
+                pCR.Update=TRUE,
+                thin=TRUE,
+                thin.t=10,
+                boundHandling="fold"
+                )
+
+pars <- list(p1=c(0,1),p2=c(0,100))
+
+dd <- dream(
+            FUN=Model.y, func.type="calc.rmse",
+            pars = pars,
+            FUN.pars=list(
+              x=obs.all$x
+              ),
+            INIT = LHSInit,
+            measurement=list(data=obs.all$y),
+            control = control
+            )
+
+cat(sprintf("
+Exit message:  %s
+Num fun evals: %d
+Time (secs):   %.1f
+",
+            dd$EXITMSG,
+            dd$fun.evals,
+            dd$time
+            ))
+tail(dd$R.stat,1)
+maxLikPars(window(dd$Sequences, start = 0.75*end(dd$Sequences) + 1))
+
+ss <- window(dd$Sequences, start = end(dd$Sequences)/2 + 1)
+pars.maxp <- maxLikPars(ss)
+print(pars.maxp)
+
+plot(ss)
+gelman.plot(ss)
+
+
+### FME
+
+library(FME)
+Residuals  <- function(p) {
+   cost<-modCost(model=Model(p,Obs$x),obs=Obs,x="x")
+   modCost(model=Model(p,Obs2$x),obs=Obs2,cost=cost,x="x")
+}
+
+P      <- modFit(f=Residuals,p=c(0.1,1))
+print(P$par)
+
+## rbind(
+##       pars.maxp-maxp.res,
+##       P$par,
+##       pars.maxp+maxp.res
+##       )
+
+plot(Obs,xlab="mg COD/l",ylab="1/hour", pch=16, cex=1.5)
+points(Obs2,pch=18,cex=1.5, col="red")
+lines(Model(p=P$par,x=0:375))
+lines(Model(p=pars.maxp,x=0:375),col="green")


Property changes on: pkg/demos/FME nonlinear model.R
___________________________________________________________________
Name: svn:executable
   + *