[Pomp-commits] r270 - in pkg: R man tests

Wed Jun 23 19:20:19 CEST 2010

Author: kingaa
Date: 2010-06-23 19:20:19 +0200 (Wed, 23 Jun 2010)
New Revision: 270

Added:
   pkg/R/bsmc.R
   pkg/man/bsmc.Rd
   pkg/tests/ou2-bsmc.R
   pkg/tests/ou2-bsmc.Rout.save
Log:
- add functionality for the Bayesian sequential Monte Carlo algorithm of Liu & West, implemented as 'bsmc'.


Added: pkg/R/bsmc.R
===================================================================

--- pkg/R/bsmc.R	                        (rev 0)
+++ pkg/R/bsmc.R	2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,292 @@
+## Bayesian particle filtering codes
+##
+## in annotation L&W AGM == Liu & West "A General Algorithm"
+## 
+## params = the initial particles for the parameter values
+##          these are drawn from the prior distribution for the parameters
+##          if a parameter is being held fixed, it is given as a row of NA's
+## est = names of parameters to estimate.  Other parameters are not updated.
+## Np = number of particles
+## discount = delta, introduced in section 3.3 in Liu & West
+## ntries = number of samplesto draw from x_t+1|x(k)_t to estimate mean of mu(k)_t+1 as in sect 2.2 Liu & West
+## ntimes = number of timesteps in observation vector
+## lower  = lower bounds on prior
+## upper  = upper bounds on prior
+
+bsmc <- function (object, ...)
+  stop("function ",sQuote("bsmc")," is undefined for objects of class ",sQuote(class(object)))
+setGeneric('bsmc')
+
+setMethod(
+          "bsmc",
+          "pomp",
+          function (object, params, est,
+                    smooth = 0.1,
+                    ntries = 1,
+                    tol = 1e-17,
+                    lower = -Inf, upper = Inf,
+                    seed = NULL,
+                    verbose = getOption("verbose"),
+                    max.fail = 0,
+                    ...) {
+
+            if (missing(seed)) seed <- NULL
+            if (!is.null(seed)) {
+              if (!exists(".Random.seed",where=.GlobalEnv)) { # need to initialize the RNG
+                runif(1)
+              }
+              save.seed <- get(".Random.seed",pos=.GlobalEnv)
+              set.seed(seed)
+            }
+
+            error.prefix <- paste(sQuote("bsmc"),"error: ")
+
+            if (missing(params)) {
+              if (length(object at params)>0) {
+                params <- object at params
+              } else {
+                stop(error.prefix,sQuote("params")," must be supplied",call.=FALSE)
+              }
+            }
+
+            Np <- NCOL(params)
+            ntimes <- length(time(object))
+            if (is.null(dim(params))) {
+              params <- matrix(
+                               params,
+                               nrow=length(params),
+                               ncol=Np,
+                               dimnames=list(
+                                 names(params),
+                                 NULL
+                                 )
+                               )
+            }
+            prior <- params
+
+            npars <- nrow(params)
+            paramnames <- rownames(params)
+            estind <- match(est,paramnames)
+            npars.est <- length(estind)
+            
+            if (npars.est<1)
+              stop(error.prefix,"no parameters to estimate",call.=FALSE)
+
+            if (is.null(paramnames))
+              stop(error.prefix,sQuote("params")," must have rownames",call.=FALSE)
+
+            if ((length(smooth)!=1)||(smooth>1)||(smooth<=0))
+              stop(error.prefix,sQuote("smooth")," must be a scalar in [0,1)",call.=FALSE)
+
+            hsq <- smooth^2             #  see Liu & West eq(3.6) p10
+            shrink <- sqrt(1-hsq)
+
+            if (
+                ((length(lower)>1)&&(length(lower)!=npars.est))||
+                ((length(upper)>1)&&(length(upper)!=npars.est))
+                ) {
+              stop(
+                   error.prefix,
+                   sQuote("lower")," and ",sQuote("upper"),
+                   " must each have length 1 or length equal to that of ",sQuote("est"),
+                   call.=FALSE                   
+                   )
+            }
+
+            for (j in seq_len(Np)) {
+              if (any((params[estind,j]<lower)|(params[estind,j]>upper))) {
+                ind <- which((params[estind,j]<lower)|(params[estind,j]>upper))
+                stop(
+                     error.prefix,
+                     "parameter(s) ",paste(paramnames[estind[ind]],collapse=","),
+                     " in column ",j," in ",sQuote("params"),
+                     " is/are outside the box defined by ",
+                     sQuote("lower")," and ",sQuote("upper"),
+                     call.=FALSE
+                     )
+              }
+            }
+
+            xstart <- init.state(object,params=params)
+            statenames <- rownames(xstart)
+            nvars <- nrow(xstart)
+            
+            times <- time(object,t0=TRUE)
+            x <- xstart
+
+            loglik <- rep(NA,ntimes)
+            eff.sample.size <- rep(NA,ntimes)
+            nfail <- 0
+            
+            mu <- array(data=NA,dim=c(nvars,Np,1)) 
+            rownames(mu) <- rownames(xstart)
+            m  <- array(data=NA,dim=c(npars,Np))
+            rownames(m) <- rownames(params)
+            
+            for (nt in seq_len(ntimes)) {
+              
+              ## calculate particle means ; as per L&W AGM (1)
+              params.mean <- apply(params,1,mean) 
+              ## calculate particle covariances : as per L&W AGM (1)
+              params.var  <- cov(t(params[estind,,drop=FALSE]))
+
+              if (verbose) {
+                cat("at step",nt,"(time =",times[nt+1],")\n")
+                print(
+                      rbind(
+                            prior.mean=params.mean[estind],
+                            prior.sd=sqrt(diag(params.var))
+                            )
+                      )
+              }
+
+              for ( j in seq_len(Np) ) {
+                x.tmp <- matrix(data=x[,j],nrow=nvars,ncol=ntries)
+                rownames(x.tmp) <- statenames
+                p.tmp <- matrix(data=params[,j],nrow=npars,ncol=ntries)
+                rownames(p.tmp) <- paramnames
+
+                ## update mean of states at time nt as per L&W AGM (1) 
+                tries <- rprocess( 
+                                  object,
+                                  xstart=x.tmp,
+                                  times=times[c(nt,nt+1)],
+                                  params=p.tmp
+                                  )
+
+                mu[,j,1] <- apply(tries[,,2,drop=FALSE],1,mean)			
+
+                ## shrink parameters towards mean as per Liu & West eq (3.3) and L&W AGM (1)
+                m[,j] <- shrink*params[,j] + (1-shrink)*params.mean 
+              }
+              ## evaluate probability of obervation given mean value of parameters and states (used in L&W AGM (5) below)
+              g <- dmeasure( 
+                            object,
+                            y=object at data[,nt,drop=FALSE],
+                            x=mu,
+                            times=times[nt+1],
+                            params=m											
+                            )	
+              ## sample indices -- From L&W AGM (2)
+              k <- sample.int(n=Np,size=Np,replace=TRUE,prob=g)
+              params <- params[,k]
+              m <- m[,k]
+
+              ## sample new parameter vector as per L&W AGM (3) and Liu & West eq(3.2)
+              pvec <- try(
+                          mvtnorm::rmvnorm(
+                                           n=Np,
+                                           mean=rep(0,npars.est),
+                                           sigma=hsq*params.var,
+                                           method="eigen"
+                                           ),
+                          silent=FALSE
+                          )
+              if (inherits(pvec,"try-error"))
+                stop(error.prefix,"error in ",sQuote("rmvnorm"),call.=FALSE)
+              if (any(!is.finite(pvec)))
+                stop(error.prefix,"extreme particle depletion",call.=FALSE)
+              params[estind,] <- m[estind,]+t(pvec)
+
+              ## sample current state vector x^(g)_(t+1) as per L&W AGM (4)
+              X <- rprocess(
+                            object,
+                            xstart=x[,k],
+                            times=times[c(nt,nt+1)],
+                            params=params
+                            )[,,2,drop=FALSE]
+
+              ## evaluate likelihood of observation given X (from L&W AGM (4))
+              numer <- dmeasure(
+                                object,
+                                y=object at data[,nt,drop=FALSE],
+                                x=X,
+                                times=times[nt+1],
+                                params=params
+                                )
+              ## evaluate weights as per L&W AGM (5)
+              weights <- numer/g[k]
+              
+              ## apply box constraints as per the priors          
+              for (j in seq_len(Np)) {
+                ## the following seems problematic: will it tend to make the boundaries repellors
+                if (any((params[estind,j]<lower)|(params[estind,j]>upper))) {
+                  weights[j] <- 0 
+                }
+                ## might this rejection method be preferable?
+                ## while (any((params[estind,j]<lower)|(params[estind,j]>upper))) {
+                ##   ## rejection method
+                ##   pvec <- try(
+                ##               mvtnorm::rmvnorm(
+                ##                                n=1,
+                ##                                mean=rep(0,npars.est),
+                ##                                sigma=hsq*params.var,
+                ##                                method="eigen"
+                ##                                ),
+                ##               silent=FALSE
+                ##               )
+                ##   if (inherits(pvec,"try-error"))
+                ##     stop(error.prefix,"error in ",sQuote("rmvnorm"),call.=FALSE)
+                ##   if (any(!is.finite(pvec)))
+                ##     stop(error.prefix,"extreme particle depletion",call.=FALSE)
+                ##   params[estind,j] <- m[estind,j]+pvec[1,]
+                ## }
+              }
+
+              x[,] <- X                
+              
+              ## test for failure to filter
+              dim(weights) <- NULL
+              failures <- ((weights<tol)|(!is.finite(weights))) # test for NA weights
+              all.fail <- all(failures)
+              if (all.fail) {                     # all particles are lost
+                if (verbose) {
+                  message("filtering failure at time t = ",times[nt+1])
+                }
+                nfail <- nfail+1
+                if (nfail > max.fail)
+                  stop(error.prefix,"too many filtering failures",call.=FALSE)
+                loglik[nt] <- log(tol)          # worst log-likelihood
+                weights <- rep(1/Np,Np)
+                eff.sample.size[nt] <- 0
+              } else {                  # not all particles are lost
+                ## compute log-likelihood
+                loglik[nt] <- log(mean(weights))  
+                weights[failures] <- 0
+                weights <- weights/sum(weights)
+                ## compute effective sample-size
+                eff.sample.size[nt] <- 1/(weights%*%weights) 
+              }
+
+              if (verbose) {
+                cat("effective sample size =",round(eff.sample.size[nt],1),"\n")
+              }
+
+              ## Matrix with samples (columns) from filtering distribution theta.t | Y.t
+              if (!all.fail) {
+                ## smp <- .Call(systematic_resampling,weights,PACKAGE="pomp.devel")
+                smp <- sample.int(n=Np,size=Np,replace=TRUE,prob=weights)
+                x <- x[,smp,drop=FALSE]
+                params[estind,] <- params[estind,smp,drop=FALSE]
+              }
+              
+            }
+            
+            if (!is.null(seed)) {
+              assign(".Random.seed",save.seed,pos=.GlobalEnv)
+              seed <- save.seed
+            }
+            
+            list(
+                 post=params,
+                 prior=prior,
+                 eff.sample.size=eff.sample.size,
+                 cond.loglik=loglik,
+                 smooth=smooth,
+                 seed=seed,
+                 nfail=nfail,
+                 loglik=sum(loglik),
+                 weights=weights
+                 )
+          }
+          )

Added: pkg/man/bsmc.Rd
===================================================================
--- pkg/man/bsmc.Rd	                        (rev 0)
+++ pkg/man/bsmc.Rd	2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,107 @@
+\name{bsmc}
+\alias{bsmc}
+\alias{bsmc-pomp}
+\alias{bsmc,pomp-method}
+\title{Liu and West Bayesian Particle Filter}
+\description{
+  Generates draws from the posterior distribution for the parameters using the Liu and West algorithm.
+  \code{bsmc} gives draws from the posterior.
+}
+\usage{
+bsmc(object, \dots)
+\S4method{bsmc}{pomp}(object, params, est, smooth = 0.1, ntries = 1,
+     tol = 1e-17, lower = -Inf, upper = Inf, seed = NULL,
+     verbose = getOption("verbose"), max.fail = 0, \dots)
+}
+\arguments{
+\item{object}{
+    An object of class \code{pomp} or inheriting class \code{pomp}.
+  }
+  \item{params}{
+    A \code{npars} x \code{np} matrix containing the parameters corresponding to the initial state values in \code{xstart}.
+    The matrix should be Np columns long, where Np is the number of particles.  The values for each row should be Np draws from the prior distribution for the parameter. 
+    It is permissible to supply \code{params} as a named numeric vector, i.e., without a \code{dim} attribute.
+    In this case, all particles will inherit the same parameter values, which is equivalent to a degenerate prior.
+    If some parameter values are to be held fixed in the fitting (i.e. params.fixed = TRUE), then those elements of params (rows if params is given as a matrix), must be set to NA.
+  }
+  \item{est}{
+    Names of the rows of \code{params} that are to be estimated.
+    No updates will be made to the other parameters.
+  }
+  \item{smooth}{
+    Kernel density smoothing parameters.
+    The compensating shrinkage factor will be \code{sqrt(1-smooth^2)}.
+    Thus, \code{smooth=0} means that no noise will be added to parameters.
+    Generally, the value of \code{smooth} should be chosen close to 0 (i.e., \code{shrink~0.1}).
+  }
+  \item{ntries}{
+    Number of draws from \code{rprocess} per particle used to estimate the expected value of the state process at time \code{t+1} given the state and parameters at time \code{t}.
+  }
+  \item{tol}{
+    Particles with log likelihood below \code{tol} are considered to be "lost".
+    A filtering failure occurs when, at some time point, all particles are lost.
+    When all particles are lost, the conditional log likelihood at that time point is set to be \code{log(tol)}.
+  }
+  \item{lower, upper}{
+    optional; lower and upper bounds on the priors.
+    This is useful in case there are box constraints satisfied by the priors.
+    The posterior is guaranteed to lie within these bounds.
+  }
+  \item{seed}{
+    optional; an object specifying if and how the random number generator should be initialized (\sQuote{seeded}).
+    If \code{seed} is an integer, it is passed to \code{set.seed} prior to any simulation and is returned as the \dQuote{seed} element of the return list.
+    By default, the state of the random number generator is not changed and the value of \code{.Random.seed} on the call is stored in the \dQuote{seed} element of the return list.
+  }
+  \item{verbose}{
+    logical; if \code{TRUE}, print diagnostic messages.
+  }
+  \item{max.fail}{
+    The maximum number of filtering failures allowed.
+    If the number of filtering failures exceeds this number, execution will terminate with an error.
+  }
+  \item{\dots}{
+    currently ignored.
+  }
+}
+\value{
+  A list with the following elements:
+  \item{post}{
+    A matrix containing draws from the approximate posterior distribution.
+  }
+  \item{prior}{
+    A matrix containing draws from the prior distribution (identical to \code{params} on call).
+  }
+  \item{eff.sample.size}{
+    A vector containing the effective number of particles at each time point.
+  }
+  \item{cond.loglik}{
+    A vector containing the conditional log likelihoods at each time point.
+  }
+  \item{smooth}{
+    The smoothing parameter used (see above).
+  }
+  \item{seed}{
+    The state of the random number generator at the time \code{bsmc} was called.
+    If the argument \code{seed} was specified, this is a copy;
+    if not, this is the internal state of the random number generator at the time of call.
+  }
+  \item{nfail}{
+    The number of filtering failures encountered.
+  }
+  \item{loglik}{
+    The estimated log-likelihood.
+  }
+  \item{weights}{
+    The resampling weights for each particle.
+  }
+}
+\author{
+  Michael Lavine (lavine at math dot umass dot edu),
+  Matthew Ferrari (mferrari at psu dot edu),
+  Aaron A. King
+}
+\examples{
+## See the vignettes for examples.
+}
+\seealso{\link{pomp-class}}
+\keyword{ts}

Added: pkg/tests/ou2-bsmc.R
===================================================================
--- pkg/tests/ou2-bsmc.R	                        (rev 0)
+++ pkg/tests/ou2-bsmc.R	2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,51 @@
+library(pomp)
+
+set.seed(398585L)
+data(ou2)
+
+time(ou2) <- 1:10
+
+Np <- 10000
+
+prior.bounds <- rbind(
+                      alpha.2=c(-0.55,-0.45),
+                      alpha.3=c(0.25,0.35)
+                      )
+colnames(prior.bounds) <- c("lower","upper")
+
+estnames <- rownames(prior.bounds)
+
+prior <- matrix(data=coef(ou2),nrow=length(coef(ou2)),ncol=Np)
+rownames(prior) <- names(coef(ou2))
+for (n in estnames) {
+  prior[n,] <- runif(n=Np,min=prior.bounds[n,1],max=prior.bounds[n,2])
+}
+
+##Run Liu & West particle filter
+tic <- Sys.time()
+smc <- bsmc(
+            ou2,
+            params=prior,
+            est=estnames,
+            ntries=5,
+            smooth=0.02,
+            lower=prior.bounds[estnames,"lower"],
+            upper=prior.bounds[estnames,"upper"]
+            )
+toc <- Sys.time()
+
+prior <- smc$prior
+post <- smc$post
+
+print(etime <- toc-tic)
+
+print(
+      cbind(
+            prior.mean=apply(prior,1,mean),
+            posterior.mean=apply(post,1,mean),
+            truth=coef(ou2),
+            t(apply(post,1,quantile,c(0.025,0.5,0.975)))
+            )
+      )
+
+print(min(smc$eff.sample.size))

Added: pkg/tests/ou2-bsmc.Rout.save
===================================================================
--- pkg/tests/ou2-bsmc.Rout.save	                        (rev 0)
+++ pkg/tests/ou2-bsmc.Rout.save	2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,85 @@
+
+R version 2.11.1 (2010-05-31)
+Copyright (C) 2010 The R Foundation for Statistical Computing
+ISBN 3-900051-07-0
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(pomp)
+> 
+> set.seed(398585L)
+> data(ou2)
+> 
+> time(ou2) <- 1:10
+> 
+> Np <- 10000
+> 
+> prior.bounds <- rbind(
++                       alpha.2=c(-0.55,-0.45),
++                       alpha.3=c(0.25,0.35)
++                       )
+> colnames(prior.bounds) <- c("lower","upper")
+> 
+> estnames <- rownames(prior.bounds)
+> 
+> prior <- matrix(data=coef(ou2),nrow=length(coef(ou2)),ncol=Np)
+> rownames(prior) <- names(coef(ou2))
+> for (n in estnames) {
++   prior[n,] <- runif(n=Np,min=prior.bounds[n,1],max=prior.bounds[n,2])
++ }
+> 
+> ##Run Liu & West particle filter
+> tic <- Sys.time()
+> smc <- bsmc(
++             ou2,
++             params=prior,
++             est=estnames,
++             ntries=5,
++             smooth=0.02,
++             lower=prior.bounds[estnames,"lower"],
++             upper=prior.bounds[estnames,"upper"]
++             )
+Warning message:
+In sample.int(n = Np, size = Np, replace = TRUE, prob = g) :
+  Walker's alias method used: results are different from R < 2.2.0
+> toc <- Sys.time()
+> 
+> prior <- smc$prior
+> post <- smc$post
+> 
+> print(etime <- toc-tic)
+Time difference of 41.44052 secs
+> 
+> print(
++       cbind(
++             prior.mean=apply(prior,1,mean),
++             posterior.mean=apply(post,1,mean),
++             truth=coef(ou2),
++             t(apply(post,1,quantile,c(0.025,0.5,0.975)))
++             )
++       )
+        prior.mean posterior.mean truth       2.5%        50%      97.5%
+alpha.1  0.8000000      0.8000000   0.8  0.8000000  0.8000000  0.8000000
+alpha.2 -0.4999287     -0.5254444  -0.5 -0.5480088 -0.5461081 -0.4576629
+alpha.3  0.2996065      0.2964690   0.3  0.2827214  0.3009050  0.3012988
+alpha.4  0.9000000      0.9000000   0.9  0.9000000  0.9000000  0.9000000
+sigma.1  3.0000000      3.0000000   3.0  3.0000000  3.0000000  3.0000000
+sigma.2 -0.5000000     -0.5000000  -0.5 -0.5000000 -0.5000000 -0.5000000
+sigma.3  2.0000000      2.0000000   2.0  2.0000000  2.0000000  2.0000000
+tau      1.0000000      1.0000000   1.0  1.0000000  1.0000000  1.0000000
+x1.0    -3.0000000     -3.0000000  -3.0 -3.0000000 -3.0000000 -3.0000000
+x2.0     4.0000000      4.0000000   4.0  4.0000000  4.0000000  4.0000000
+> 
+> print(min(smc$eff.sample.size))
+[1] 5.987632
+>