[Pomp-commits] r270 - in pkg: R man tests
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jun 23 19:20:19 CEST 2010
Author: kingaa
Date: 2010-06-23 19:20:19 +0200 (Wed, 23 Jun 2010)
New Revision: 270
Added:
pkg/R/bsmc.R
pkg/man/bsmc.Rd
pkg/tests/ou2-bsmc.R
pkg/tests/ou2-bsmc.Rout.save
Log:
- add functionality for the Bayesian sequential Monte Carlo algorithm of Liu & West, implemented as 'bsmc'.
Added: pkg/R/bsmc.R
===================================================================
--- pkg/R/bsmc.R (rev 0)
+++ pkg/R/bsmc.R 2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,292 @@
+## Bayesian particle filtering codes
+##
+## in annotation L&W AGM == Liu & West "A General Algorithm"
+##
+## params = the initial particles for the parameter values
+## these are drawn from the prior distribution for the parameters
+## if a parameter is being held fixed, it is given as a row of NA's
+## est = names of parameters to estimate. Other parameters are not updated.
+## Np = number of particles
+## discount = delta, introduced in section 3.3 in Liu & West
+## ntries = number of samplesto draw from x_t+1|x(k)_t to estimate mean of mu(k)_t+1 as in sect 2.2 Liu & West
+## ntimes = number of timesteps in observation vector
+## lower = lower bounds on prior
+## upper = upper bounds on prior
+
+bsmc <- function (object, ...)
+ stop("function ",sQuote("bsmc")," is undefined for objects of class ",sQuote(class(object)))
+setGeneric('bsmc')
+
+setMethod(
+ "bsmc",
+ "pomp",
+ function (object, params, est,
+ smooth = 0.1,
+ ntries = 1,
+ tol = 1e-17,
+ lower = -Inf, upper = Inf,
+ seed = NULL,
+ verbose = getOption("verbose"),
+ max.fail = 0,
+ ...) {
+
+ if (missing(seed)) seed <- NULL
+ if (!is.null(seed)) {
+ if (!exists(".Random.seed",where=.GlobalEnv)) { # need to initialize the RNG
+ runif(1)
+ }
+ save.seed <- get(".Random.seed",pos=.GlobalEnv)
+ set.seed(seed)
+ }
+
+ error.prefix <- paste(sQuote("bsmc"),"error: ")
+
+ if (missing(params)) {
+ if (length(object at params)>0) {
+ params <- object at params
+ } else {
+ stop(error.prefix,sQuote("params")," must be supplied",call.=FALSE)
+ }
+ }
+
+ Np <- NCOL(params)
+ ntimes <- length(time(object))
+ if (is.null(dim(params))) {
+ params <- matrix(
+ params,
+ nrow=length(params),
+ ncol=Np,
+ dimnames=list(
+ names(params),
+ NULL
+ )
+ )
+ }
+ prior <- params
+
+ npars <- nrow(params)
+ paramnames <- rownames(params)
+ estind <- match(est,paramnames)
+ npars.est <- length(estind)
+
+ if (npars.est<1)
+ stop(error.prefix,"no parameters to estimate",call.=FALSE)
+
+ if (is.null(paramnames))
+ stop(error.prefix,sQuote("params")," must have rownames",call.=FALSE)
+
+ if ((length(smooth)!=1)||(smooth>1)||(smooth<=0))
+ stop(error.prefix,sQuote("smooth")," must be a scalar in [0,1)",call.=FALSE)
+
+ hsq <- smooth^2 # see Liu & West eq(3.6) p10
+ shrink <- sqrt(1-hsq)
+
+ if (
+ ((length(lower)>1)&&(length(lower)!=npars.est))||
+ ((length(upper)>1)&&(length(upper)!=npars.est))
+ ) {
+ stop(
+ error.prefix,
+ sQuote("lower")," and ",sQuote("upper"),
+ " must each have length 1 or length equal to that of ",sQuote("est"),
+ call.=FALSE
+ )
+ }
+
+ for (j in seq_len(Np)) {
+ if (any((params[estind,j]<lower)|(params[estind,j]>upper))) {
+ ind <- which((params[estind,j]<lower)|(params[estind,j]>upper))
+ stop(
+ error.prefix,
+ "parameter(s) ",paste(paramnames[estind[ind]],collapse=","),
+ " in column ",j," in ",sQuote("params"),
+ " is/are outside the box defined by ",
+ sQuote("lower")," and ",sQuote("upper"),
+ call.=FALSE
+ )
+ }
+ }
+
+ xstart <- init.state(object,params=params)
+ statenames <- rownames(xstart)
+ nvars <- nrow(xstart)
+
+ times <- time(object,t0=TRUE)
+ x <- xstart
+
+ loglik <- rep(NA,ntimes)
+ eff.sample.size <- rep(NA,ntimes)
+ nfail <- 0
+
+ mu <- array(data=NA,dim=c(nvars,Np,1))
+ rownames(mu) <- rownames(xstart)
+ m <- array(data=NA,dim=c(npars,Np))
+ rownames(m) <- rownames(params)
+
+ for (nt in seq_len(ntimes)) {
+
+ ## calculate particle means ; as per L&W AGM (1)
+ params.mean <- apply(params,1,mean)
+ ## calculate particle covariances : as per L&W AGM (1)
+ params.var <- cov(t(params[estind,,drop=FALSE]))
+
+ if (verbose) {
+ cat("at step",nt,"(time =",times[nt+1],")\n")
+ print(
+ rbind(
+ prior.mean=params.mean[estind],
+ prior.sd=sqrt(diag(params.var))
+ )
+ )
+ }
+
+ for ( j in seq_len(Np) ) {
+ x.tmp <- matrix(data=x[,j],nrow=nvars,ncol=ntries)
+ rownames(x.tmp) <- statenames
+ p.tmp <- matrix(data=params[,j],nrow=npars,ncol=ntries)
+ rownames(p.tmp) <- paramnames
+
+ ## update mean of states at time nt as per L&W AGM (1)
+ tries <- rprocess(
+ object,
+ xstart=x.tmp,
+ times=times[c(nt,nt+1)],
+ params=p.tmp
+ )
+
+ mu[,j,1] <- apply(tries[,,2,drop=FALSE],1,mean)
+
+ ## shrink parameters towards mean as per Liu & West eq (3.3) and L&W AGM (1)
+ m[,j] <- shrink*params[,j] + (1-shrink)*params.mean
+ }
+ ## evaluate probability of obervation given mean value of parameters and states (used in L&W AGM (5) below)
+ g <- dmeasure(
+ object,
+ y=object at data[,nt,drop=FALSE],
+ x=mu,
+ times=times[nt+1],
+ params=m
+ )
+ ## sample indices -- From L&W AGM (2)
+ k <- sample.int(n=Np,size=Np,replace=TRUE,prob=g)
+ params <- params[,k]
+ m <- m[,k]
+
+ ## sample new parameter vector as per L&W AGM (3) and Liu & West eq(3.2)
+ pvec <- try(
+ mvtnorm::rmvnorm(
+ n=Np,
+ mean=rep(0,npars.est),
+ sigma=hsq*params.var,
+ method="eigen"
+ ),
+ silent=FALSE
+ )
+ if (inherits(pvec,"try-error"))
+ stop(error.prefix,"error in ",sQuote("rmvnorm"),call.=FALSE)
+ if (any(!is.finite(pvec)))
+ stop(error.prefix,"extreme particle depletion",call.=FALSE)
+ params[estind,] <- m[estind,]+t(pvec)
+
+ ## sample current state vector x^(g)_(t+1) as per L&W AGM (4)
+ X <- rprocess(
+ object,
+ xstart=x[,k],
+ times=times[c(nt,nt+1)],
+ params=params
+ )[,,2,drop=FALSE]
+
+ ## evaluate likelihood of observation given X (from L&W AGM (4))
+ numer <- dmeasure(
+ object,
+ y=object at data[,nt,drop=FALSE],
+ x=X,
+ times=times[nt+1],
+ params=params
+ )
+ ## evaluate weights as per L&W AGM (5)
+ weights <- numer/g[k]
+
+ ## apply box constraints as per the priors
+ for (j in seq_len(Np)) {
+ ## the following seems problematic: will it tend to make the boundaries repellors
+ if (any((params[estind,j]<lower)|(params[estind,j]>upper))) {
+ weights[j] <- 0
+ }
+ ## might this rejection method be preferable?
+ ## while (any((params[estind,j]<lower)|(params[estind,j]>upper))) {
+ ## ## rejection method
+ ## pvec <- try(
+ ## mvtnorm::rmvnorm(
+ ## n=1,
+ ## mean=rep(0,npars.est),
+ ## sigma=hsq*params.var,
+ ## method="eigen"
+ ## ),
+ ## silent=FALSE
+ ## )
+ ## if (inherits(pvec,"try-error"))
+ ## stop(error.prefix,"error in ",sQuote("rmvnorm"),call.=FALSE)
+ ## if (any(!is.finite(pvec)))
+ ## stop(error.prefix,"extreme particle depletion",call.=FALSE)
+ ## params[estind,j] <- m[estind,j]+pvec[1,]
+ ## }
+ }
+
+ x[,] <- X
+
+ ## test for failure to filter
+ dim(weights) <- NULL
+ failures <- ((weights<tol)|(!is.finite(weights))) # test for NA weights
+ all.fail <- all(failures)
+ if (all.fail) { # all particles are lost
+ if (verbose) {
+ message("filtering failure at time t = ",times[nt+1])
+ }
+ nfail <- nfail+1
+ if (nfail > max.fail)
+ stop(error.prefix,"too many filtering failures",call.=FALSE)
+ loglik[nt] <- log(tol) # worst log-likelihood
+ weights <- rep(1/Np,Np)
+ eff.sample.size[nt] <- 0
+ } else { # not all particles are lost
+ ## compute log-likelihood
+ loglik[nt] <- log(mean(weights))
+ weights[failures] <- 0
+ weights <- weights/sum(weights)
+ ## compute effective sample-size
+ eff.sample.size[nt] <- 1/(weights%*%weights)
+ }
+
+ if (verbose) {
+ cat("effective sample size =",round(eff.sample.size[nt],1),"\n")
+ }
+
+ ## Matrix with samples (columns) from filtering distribution theta.t | Y.t
+ if (!all.fail) {
+ ## smp <- .Call(systematic_resampling,weights,PACKAGE="pomp.devel")
+ smp <- sample.int(n=Np,size=Np,replace=TRUE,prob=weights)
+ x <- x[,smp,drop=FALSE]
+ params[estind,] <- params[estind,smp,drop=FALSE]
+ }
+
+ }
+
+ if (!is.null(seed)) {
+ assign(".Random.seed",save.seed,pos=.GlobalEnv)
+ seed <- save.seed
+ }
+
+ list(
+ post=params,
+ prior=prior,
+ eff.sample.size=eff.sample.size,
+ cond.loglik=loglik,
+ smooth=smooth,
+ seed=seed,
+ nfail=nfail,
+ loglik=sum(loglik),
+ weights=weights
+ )
+ }
+ )
Added: pkg/man/bsmc.Rd
===================================================================
--- pkg/man/bsmc.Rd (rev 0)
+++ pkg/man/bsmc.Rd 2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,107 @@
+\name{bsmc}
+\alias{bsmc}
+\alias{bsmc-pomp}
+\alias{bsmc,pomp-method}
+\title{Liu and West Bayesian Particle Filter}
+\description{
+ Generates draws from the posterior distribution for the parameters using the Liu and West algorithm.
+ \code{bsmc} gives draws from the posterior.
+}
+\usage{
+bsmc(object, \dots)
+\S4method{bsmc}{pomp}(object, params, est, smooth = 0.1, ntries = 1,
+ tol = 1e-17, lower = -Inf, upper = Inf, seed = NULL,
+ verbose = getOption("verbose"), max.fail = 0, \dots)
+}
+\arguments{
+\item{object}{
+ An object of class \code{pomp} or inheriting class \code{pomp}.
+ }
+ \item{params}{
+ A \code{npars} x \code{np} matrix containing the parameters corresponding to the initial state values in \code{xstart}.
+ The matrix should be Np columns long, where Np is the number of particles. The values for each row should be Np draws from the prior distribution for the parameter.
+ It is permissible to supply \code{params} as a named numeric vector, i.e., without a \code{dim} attribute.
+ In this case, all particles will inherit the same parameter values, which is equivalent to a degenerate prior.
+ If some parameter values are to be held fixed in the fitting (i.e. params.fixed = TRUE), then those elements of params (rows if params is given as a matrix), must be set to NA.
+ }
+ \item{est}{
+ Names of the rows of \code{params} that are to be estimated.
+ No updates will be made to the other parameters.
+ }
+ \item{smooth}{
+ Kernel density smoothing parameters.
+ The compensating shrinkage factor will be \code{sqrt(1-smooth^2)}.
+ Thus, \code{smooth=0} means that no noise will be added to parameters.
+ Generally, the value of \code{smooth} should be chosen close to 0 (i.e., \code{shrink~0.1}).
+ }
+ \item{ntries}{
+ Number of draws from \code{rprocess} per particle used to estimate the expected value of the state process at time \code{t+1} given the state and parameters at time \code{t}.
+ }
+ \item{tol}{
+ Particles with log likelihood below \code{tol} are considered to be "lost".
+ A filtering failure occurs when, at some time point, all particles are lost.
+ When all particles are lost, the conditional log likelihood at that time point is set to be \code{log(tol)}.
+ }
+ \item{lower, upper}{
+ optional; lower and upper bounds on the priors.
+ This is useful in case there are box constraints satisfied by the priors.
+ The posterior is guaranteed to lie within these bounds.
+ }
+ \item{seed}{
+ optional; an object specifying if and how the random number generator should be initialized (\sQuote{seeded}).
+ If \code{seed} is an integer, it is passed to \code{set.seed} prior to any simulation and is returned as the \dQuote{seed} element of the return list.
+ By default, the state of the random number generator is not changed and the value of \code{.Random.seed} on the call is stored in the \dQuote{seed} element of the return list.
+ }
+ \item{verbose}{
+ logical; if \code{TRUE}, print diagnostic messages.
+ }
+ \item{max.fail}{
+ The maximum number of filtering failures allowed.
+ If the number of filtering failures exceeds this number, execution will terminate with an error.
+ }
+ \item{\dots}{
+ currently ignored.
+ }
+}
+\value{
+ A list with the following elements:
+ \item{post}{
+ A matrix containing draws from the approximate posterior distribution.
+ }
+ \item{prior}{
+ A matrix containing draws from the prior distribution (identical to \code{params} on call).
+ }
+ \item{eff.sample.size}{
+ A vector containing the effective number of particles at each time point.
+ }
+ \item{cond.loglik}{
+ A vector containing the conditional log likelihoods at each time point.
+ }
+ \item{smooth}{
+ The smoothing parameter used (see above).
+ }
+ \item{seed}{
+ The state of the random number generator at the time \code{bsmc} was called.
+ If the argument \code{seed} was specified, this is a copy;
+ if not, this is the internal state of the random number generator at the time of call.
+ }
+ \item{nfail}{
+ The number of filtering failures encountered.
+ }
+ \item{loglik}{
+ The estimated log-likelihood.
+ }
+ \item{weights}{
+ The resampling weights for each particle.
+ }
+}
+\author{
+ Michael Lavine (lavine at math dot umass dot edu),
+ Matthew Ferrari (mferrari at psu dot edu),
+ Aaron A. King
+}
+\examples{
+## See the vignettes for examples.
+}
+\seealso{\link{pomp-class}}
+\keyword{ts}
Added: pkg/tests/ou2-bsmc.R
===================================================================
--- pkg/tests/ou2-bsmc.R (rev 0)
+++ pkg/tests/ou2-bsmc.R 2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,51 @@
+library(pomp)
+
+set.seed(398585L)
+data(ou2)
+
+time(ou2) <- 1:10
+
+Np <- 10000
+
+prior.bounds <- rbind(
+ alpha.2=c(-0.55,-0.45),
+ alpha.3=c(0.25,0.35)
+ )
+colnames(prior.bounds) <- c("lower","upper")
+
+estnames <- rownames(prior.bounds)
+
+prior <- matrix(data=coef(ou2),nrow=length(coef(ou2)),ncol=Np)
+rownames(prior) <- names(coef(ou2))
+for (n in estnames) {
+ prior[n,] <- runif(n=Np,min=prior.bounds[n,1],max=prior.bounds[n,2])
+}
+
+##Run Liu & West particle filter
+tic <- Sys.time()
+smc <- bsmc(
+ ou2,
+ params=prior,
+ est=estnames,
+ ntries=5,
+ smooth=0.02,
+ lower=prior.bounds[estnames,"lower"],
+ upper=prior.bounds[estnames,"upper"]
+ )
+toc <- Sys.time()
+
+prior <- smc$prior
+post <- smc$post
+
+print(etime <- toc-tic)
+
+print(
+ cbind(
+ prior.mean=apply(prior,1,mean),
+ posterior.mean=apply(post,1,mean),
+ truth=coef(ou2),
+ t(apply(post,1,quantile,c(0.025,0.5,0.975)))
+ )
+ )
+
+print(min(smc$eff.sample.size))
Added: pkg/tests/ou2-bsmc.Rout.save
===================================================================
--- pkg/tests/ou2-bsmc.Rout.save (rev 0)
+++ pkg/tests/ou2-bsmc.Rout.save 2010-06-23 17:20:19 UTC (rev 270)
@@ -0,0 +1,85 @@
+
+R version 2.11.1 (2010-05-31)
+Copyright (C) 2010 The R Foundation for Statistical Computing
+ISBN 3-900051-07-0
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> library(pomp)
+>
+> set.seed(398585L)
+> data(ou2)
+>
+> time(ou2) <- 1:10
+>
+> Np <- 10000
+>
+> prior.bounds <- rbind(
++ alpha.2=c(-0.55,-0.45),
++ alpha.3=c(0.25,0.35)
++ )
+> colnames(prior.bounds) <- c("lower","upper")
+>
+> estnames <- rownames(prior.bounds)
+>
+> prior <- matrix(data=coef(ou2),nrow=length(coef(ou2)),ncol=Np)
+> rownames(prior) <- names(coef(ou2))
+> for (n in estnames) {
++ prior[n,] <- runif(n=Np,min=prior.bounds[n,1],max=prior.bounds[n,2])
++ }
+>
+> ##Run Liu & West particle filter
+> tic <- Sys.time()
+> smc <- bsmc(
++ ou2,
++ params=prior,
++ est=estnames,
++ ntries=5,
++ smooth=0.02,
++ lower=prior.bounds[estnames,"lower"],
++ upper=prior.bounds[estnames,"upper"]
++ )
+Warning message:
+In sample.int(n = Np, size = Np, replace = TRUE, prob = g) :
+ Walker's alias method used: results are different from R < 2.2.0
+> toc <- Sys.time()
+>
+> prior <- smc$prior
+> post <- smc$post
+>
+> print(etime <- toc-tic)
+Time difference of 41.44052 secs
+>
+> print(
++ cbind(
++ prior.mean=apply(prior,1,mean),
++ posterior.mean=apply(post,1,mean),
++ truth=coef(ou2),
++ t(apply(post,1,quantile,c(0.025,0.5,0.975)))
++ )
++ )
+ prior.mean posterior.mean truth 2.5% 50% 97.5%
+alpha.1 0.8000000 0.8000000 0.8 0.8000000 0.8000000 0.8000000
+alpha.2 -0.4999287 -0.5254444 -0.5 -0.5480088 -0.5461081 -0.4576629
+alpha.3 0.2996065 0.2964690 0.3 0.2827214 0.3009050 0.3012988
+alpha.4 0.9000000 0.9000000 0.9 0.9000000 0.9000000 0.9000000
+sigma.1 3.0000000 3.0000000 3.0 3.0000000 3.0000000 3.0000000
+sigma.2 -0.5000000 -0.5000000 -0.5 -0.5000000 -0.5000000 -0.5000000
+sigma.3 2.0000000 2.0000000 2.0 2.0000000 2.0000000 2.0000000
+tau 1.0000000 1.0000000 1.0 1.0000000 1.0000000 1.0000000
+x1.0 -3.0000000 -3.0000000 -3.0 -3.0000000 -3.0000000 -3.0000000
+x2.0 4.0000000 4.0000000 4.0 4.0000000 4.0000000 4.0000000
+>
+> print(min(smc$eff.sample.size))
+[1] 5.987632
+>
More information about the pomp-commits
mailing list