[Dream-commits] r4 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Feb 15 00:42:17 CET 2010


Author: josephguillaume
Date: 2010-02-15 00:42:16 +0100 (Mon, 15 Feb 2010)
New Revision: 4

Modified:
   pkg/R/ACR.R
   pkg/R/AdaptpCR.R
   pkg/R/CalcCbWb.R
   pkg/R/CalcDelta.R
   pkg/R/CompDensity.R
   pkg/R/CovInit.R
   pkg/R/DEStrategy.R
   pkg/R/GenCR.R
   pkg/R/LHSInit.R
   pkg/R/RemOutlierChains.R
   pkg/R/dream.R
   pkg/R/handleBounds.R
   pkg/R/metrop.R
   pkg/R/offde.R
Log:
Numerous errors corrected by reviewing dimensions and range of arguments and return values. Outlier removal moved from RemOutlierChains to main dream loop.

Modified: pkg/R/ACR.R
===================================================================
--- pkg/R/ACR.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/ACR.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -10,12 +10,12 @@
 ## Multivariate Analysis'.$$
 ## 
 ## Inputs:
-## p - number of independent variables.
-## n - sample size.
-## alpha - significance level (default = 0.05).
+##' @param p number of independent variables.
+##' @param n sample size.
+##' @param alpha significance level (default = 0.05).
 ## 
 ## Output:
-## x - critical value of the maximun squared Mahalanobis distance.
+##' @return ACR value of the maximum squared Mahalanobis distance.
 ## 
 ## We can generate all the critical values of the maximun squared Mahalanobis
 ## distance presented on the Table XXXII of by Barnett and Lewis (1978) and 
@@ -59,17 +59,17 @@
 ##
 ACR <- function(p,n,alpha){
 
-if (missing(alpha)) alpha <- 0.05
+  if (missing(alpha)) alpha <- 0.05
 
-if (alpha<=0 | alpha>=1) stop("Significance level must be between 0 and 1")
+  if (alpha<=0 | alpha>=1) stop("Significance level must be between 0 and 1")
 
-if (missing(p)|missing(n)) stop("Requires args p and n")
+  if (missing(p)|missing(n)) stop("Requires args p and n")
 
-a <- alpha
-## F distribution critical value with p and n-p-1 degrees of freedom using the Bonferroni correction
-Fc <- qf(1-a/n,p,n-p-1)
-ACR <- (p*(n-1)^2*Fc)/(n*(n-p-1)+(n*p*Fc))
+  a <- alpha
+  ## F distribution critical value with p and n-p-1 degrees of freedom using the Bonferroni correction
+  Fc <- qf(1-a/n,p,n-p-1)
+  ACR <- (p*(n-1)^2*Fc)/(n*(n-p-1)+(n*p*Fc))
 
-return(ACR)
+  return(ACR)
   
-}##ACR
+} ##ACR

Modified: pkg/R/AdaptpCR.R
===================================================================
--- pkg/R/AdaptpCR.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/AdaptpCR.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,7 +1,20 @@
-## Updates the probabilities of the various crossover values
-## in:
-## lCR.old vector of length nCR
+##' Updates the probabilities of the various crossover values
+##'
+##' @param CR matrix nseq x steps
+##' @param delta.tot vector of length nCR
+##' @param lCR.old vector of length nCR
+##' @param control. list needs elements: nCR,nseq
+##'
+##' @return ... list with elements
+##'   pCR vector of length nCR
+##'   lCR vector of length nCR
 AdaptpCR <- function(CR,delta.tot,lCR.old,control){
+
+  ## dimensions:
+  ##  CR vector of length nseq*steps
+  ##  zz iter. 1:nCR
+  ##  cr.count. scalar. range [0,nseq*steps]
+  
   ## Make CR to be a single vector
   CR <- c(CR)
   
@@ -10,15 +23,15 @@
   for (zz in 1:control$nCR){
     
     ## Determine how many times a particular CR value is used
-    ## TODO: shouldn't this be which(CR==CR[z]])?
-    idx <- which(CR==zz/control$nCR)
+    ## TODO: shouldn't this be which(CR==CR[zz]])?
+    cr.count <- length(which(CR==zz/control$nCR))
     
     ## This is used to weight delta.tot
-    lCR[zz] <- lCR.old[zz]+length(idx)
+    lCR[zz] <- lCR.old[zz]+cr.count
   }                                     #for CRs
   
   ## Adapt pCR using information from averaged normalized jumping distance
-  pCR <- control$nseq*delta.tot/lCR / sum(delta.tot)
+  pCR <- control$nseq * (delta.tot / lCR) / sum(delta.tot)
     
   ## Normalize pCR
   pCR <- pCR/sum(pCR)

Modified: pkg/R/CalcCbWb.R
===================================================================
--- pkg/R/CalcCbWb.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/CalcCbWb.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,7 +1,10 @@
+##' This function calculates the parameters for the exponential power density
+##' Equation [20] paper by Thiemann et al. WRR 2001, Vol 37, No 10, 2521-2535
+##' @param beta. scalar
+##' @return ... Cb, Wb. scalars
+## TODO: input valid range?
 CalcCbWb <- function(beta){
-  ## This function calculates the parameters for the exponential power density
-  ## Equation [20] paper by Thiemann et al. WRR 2001, Vol 37, No 10, 2521-2535
-
+ 
   ## First calculate some dummy variables
   A1 <- gamma(3*(1+beta)/2)
   A2 <- gamma((1+beta)/2)

Modified: pkg/R/CalcDelta.R
===================================================================
--- pkg/R/CalcDelta.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/CalcDelta.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,18 +1,26 @@
-## Calculate total normalized Euclidean distance for each crossover value
-## in:
-##   delta.tot	vector of length nCR ? TODO
-##   delta.normX	vector of length ndim ? TODO
-##   CR		vector of length nseq ? TODO
-CalcDelta <- function(control,delta.tot,delta.normX,CR){
+##' Calculate total normalized Euclidean distance for each crossover value
 
+##' @param nCR. scalar
+##' @param delta.tot vector of length nCR
+##' @param delta.normX vector of length nseq
+##' @param CR vector of length nseq
+
+##' @return delta.tot vector of length nCR
+CalcDelta <- function(nCR,delta.tot,delta.normX,CR){
+
+  ## Dimensions:
+  ##  zz. iter 1:nCR
+  ##  idx. vector. length [0,nseq]. range [1,nseq]
+  
   ## Derive sum_p2 for each different CR value
-  for (zz in 1:control$nCR){
+  for (zz in 1:nCR){
     ## Find which chains are updated with zz/MCMCPar.nCR
     ## TODO: possible that floating point error prevents exact comparison?
-    idx <- which(CR==zz/control$NCR)
+    idx <- which(CR==zz/nCR)
     
     ## Add the normalized squared distance tot the current delta_tot;
     delta.tot[zz] <- delta.tot[zz]+sum(delta.normX[idx])
+    
   } ## for CRs
   return(delta.tot)
 } ##CalcDelta

Modified: pkg/R/CompDensity.R
===================================================================
--- pkg/R/CompDensity.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/CompDensity.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,33 +1,57 @@
+##' Computes the density of each x value
+##'
+##' @param x matrix nseq x ndim
+##' @param control. list containing gamma,Wb,Cb
+##' @param FUN - the model to run
+##'   R function with first argument a vector of length ndim.
+##'   returning a single scalar corresponding to one of the options:
+##' @param option unambiguous abbrev of:
+##'   posterior.density, calc.loglik, calc.rmse, logposterior.density,calc.weighted.rmse
+##' @param measurement list containing TODO: not sure
+##'   data: vector of observations corresponding to model output
+##'   sigma: scalar
+##' @param ... additional arguments to FUN
+##' @return ... list with components
+##'   p vector of length nseq
+##'   logp vector of length nseq
+##
+## TODO: p may be erroneously equal to logp?
 CompDensity <- function(x,control,FUN,option,
                         measurement,...){
-  ## This function computes the density of each x value
-  ## option is unambiguous abbrev of:
-  ##   posterior.density, calc.loglik, calc.rmse, logposterior.density,calc.weighted.rmse
+
+  ## dimensions:
+  ##  i. iter 1:nseq
+  ##  modpred. scalar or vector commensurate to measurement$data
+  ##  err. vector of same length as modpred
+  ##  SSR scalar
+
+  p <- rep(NA,nseq)
+  logp <- rep(NA,nseq)
   
   ## Sequential evaluation
   for (ii in 1:nrow(x)){
     ## Call model to generate simulated data
     ## TODO: correct use of optional pars?
-    modpred <- FUN(...)
+    modpred <- FUN(x[ii,],...)
 
     switch(option,
            ## Model directly computes posterior density
            posterior.density={
-             p[ii,1:2] <- c(ModPred,ii)
-             logp <- log(p[ii,1])
+             p[ii] <- modpred
+             logp[ii] <- log(modpred)
            },
            ## Model computes output simulation           
            calc.loglik={
              err <- measurement$data-modpred
                  
              ## Compute the number of measurement data
-             N <- nrow(measurement)
+             N <- length(measurement$data)
              
              ## Derive the log likelihood
-             logp[ii,1] <- N*log(control$Wb/measurement$sigma)-
-               control$CB*(sum((abs(Err/measurement$sigma))^(2/(1+control$gamma))))
+             logp[ii] <- N*log(control$Wb/measurement$sigma)-
+               control$Cb*(sum((abs(err/measurement$sigma))^(2/(1+control$gamma))))
              ## And retain in memory
-             p[ii,1:2] <- c(logp[ii,1],ii)
+             p[ii] <- logp[ii]
            },
            ## Model computes output simulation
            calc.rmse={
@@ -35,14 +59,14 @@
              ## Derive the sum of squared error
              SSR <- sum(abs(err)^(2/(1+control$gamma)))
              ## And retain in memory
-             p[ii,1:2] <- c(-SSR,ii)
-             logp[ii,1] <- -0.5*SSR
+             p[ii] <- -SSR
+             logp[ii] <- -0.5*SSR
              
            },
            ## Model directly computes log posterior density
            logposterior.density={
-             p[ii,1:2] <- [modpred ii]
-             logp[ii,1] <- p[ii,1]
+             p[ii] <- modpred
+             logp[ii] <- modpred
            },
            ## Similar as 3, but now weights with the Measurement Sigma
            ## TODO: appears to be no difference to calc.rmse
@@ -52,10 +76,9 @@
              ## Derive the sum of squared error
              SSR <- sum(abs(err)^(2/(1+control$gamma)))
              ## And retain in memory
-             p[ii,2] <- c(-SSR,ii)
-             logp[ii,1] <- -0.5*SSR
-           }
-           )
-  }## for rows
-return(list(p=p,logp=logp))
-}##CompDensity
+             p[ii] <- -SSR
+             logp[ii] <- -0.5*SSR
+           }) ##switch
+  }           ## for rows
+  return(list(p=p,logp=logp))
+} ##CompDensity

Modified: pkg/R/CovInit.R
===================================================================
--- pkg/R/CovInit.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/CovInit.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,16 +1,27 @@
-## in:
-##  muX. vector of length ndim
-## out:
-##  x. dim nseq x ndim
-CovInit <- function(pars, nseq,muX,qcov,bound.handling)
+##' Alternative sampling method
+##'  used in examples, may not be useful in practice
+##'  @param pars list of parameter vectors
+##'  @param nseq scalar
+##'  @param muX vector of length ndim
+##'  @param qcov
+##'  @param bound.handling. character one of: none,reflect,bound,fold,rand
+##'  @return ...
+##'    x. dim nseq x ndim. parameter wise range [xmin,xmax] assured by handleBounds
+##
+## depends on:
+##   handleBounds.R
+##   rand_utils.R
+##
+##' When used as input to dream, muX, qcov and bound.handling must be passed as extra parameterrs
+CovInit <- function(pars,nseq,muX,qcov,bound.handling)
 {
 
   ##[x] = repmat(Extra.muX,MCMCPar.seq,1) + randn(MCMCPar.seq,MCMCPar.n) * chol(Extra.qcov);
-  x <- t(matrix(rep(a,nseq),length(a)))+randn(control$nseq,length(pars)) %*% chol(qcov)
+  x <- t(matrix(rep(a,nseq),length(a)))+randn(nseq,length(pars)) %*% chol(qcov)
   
   lower <- sapply(pars, function(x) min(x[[1]]))
   upper <- sapply(pars, function(x) max(x[[1]]))
 
   x <- handleBounds(x,lower,upper,bound.handling)
   return(x)
-}
\ No newline at end of file
+}

Modified: pkg/R/DEStrategy.R
===================================================================
--- pkg/R/DEStrategy.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/DEStrategy.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,7 +1,16 @@
+##' Determine which sequences to evolve with what DE strategy
+##' @param control list with elements DEpairs, nseq
+##' @return DEversion 
+##'   vector of length control$nseq. range [1,DEpairs]
+##
+##  ? maximises cumulative sum of 1/DEpairs < random number
+##  TODO: alternative implementation?
 DEStrategy<-function(control){
-  ## Determine which sequences to evolve with what DE strategy
-  ## in: control$ DEpairs, nseq
-  ## out: DEversion vector of length control$nseq
+
+  ## dimensions:
+  ##  p.pair vector length DEpairs+1. range [0,1]
+  ##  Z vector length nseq. range [0,1]
+  ##  qq. iter 1:nseq
   
   ## Determine probability of selecting a given number of pairs
   p.pair <- (1/control$DEpairs)*rep(1,control$DEpairs)
@@ -11,10 +20,9 @@
   Z <- runif(control$nseq)
   
   ## Select number of pairs
-  DEversion<-rep(0,control$nseq)
+  DEversion<-rep(NA,control$nseq)
   for (qq in 1:control$nseq){
-    z <- which(Z[qq]>p.pair)
-    DEversion[qq]<-tail(z,1)
+    DEversion[qq]<-tail(which(p.pair<Z[qq]),1)
   }
   return(DEversion)
 }#DEStrategy

Modified: pkg/R/GenCR.R
===================================================================
--- pkg/R/GenCR.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/GenCR.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,13 +1,27 @@
+##' Generates CR values based on current probabilities
+##' @param pCR vector of length nCR, summing to 1 (?)
+##' @param control must have pars nseq,steps,nCR
+##' @return ...
+##'   CR matrix nseq x steps. range [1/nCR,1]
 GenCR <- function(pCR,control){
-  ## Generates CR values based on current probabilities
+
+  ##dimensions:
+  ## L vector of length nCR
+  ## L2 vector of length nCR+1
+  ## r vector of length nseq*steps of range [1,nseq*steps]
+  ## idx vector of variable length. range of r
+  ## CR vector of length nseq*steps. range [1/nCR,1]
   
   ## How many candidate points for each crossover value?
-                                        # TODO: rmultinom may not be equivalent to multrnd
-  L <- rmultinom(1,size=control$nseq*control$steps,p=pCR)
+  ## TODO: verify result matches matlab
+  ## MATLAB: [L] = multrnd(MCMCPar.seq * MCMCPar.steps,pCR);
+  L <- as.numeric(rmultinom(1,size=control$nseq*control$steps,p=pCR))
   L2 <- c(0,cumsum(L))
   
   ## Then select which candidate points are selected with what CR
   r <- sample(control$nseq*control$steps)
+
+  CR <- rep(NA,control$nseq*control$steps)
   
   ## Then generate CR values for each chain
   for (zz in 1:control$nCR){
@@ -18,11 +32,13 @@
     ## Take the appropriate elements of r
     idx <- r[i.start:i.end]
     
-    ## Assign these indices MCMCPar.CR(zz)
+    ## Assign these indices control$CR(zz)
     CR[idx] <- zz/control$nCR
     
-    ## Now reshape CR
-    CR <- array(CR,control$nseq,control$steps)\
   } ## for nCR
-return(CR)
-}   ## GenCR
+    
+  ## Now reshape CR
+  CR <- matrix(CR,control$nseq,control$steps)
+
+  return(CR)
+} ## GenCR

Modified: pkg/R/LHSInit.R
===================================================================
--- pkg/R/LHSInit.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/LHSInit.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,28 +1,39 @@
-## Latin Hypercube sampling
+##' Latin Hypercube sampling
+##' @param pars list. length ndim - one element per parameter
+##' @param nseq scalar = number of sequences
+##' @return x matrix nseq x ndim. parameter-wise range [xmin, xmax]
+##
+## depends on rand_utils.R
 LHSInit <- function(pars, nseq){
-  ##TODO: R method?
+  ## TODO: R approach?
   ## lapply(pars, function(r)
   ##        sample(seq(min(r), max(r), length = nseq))
   ##        )
 
+  ## dimensions
+  ## xmin,xmax. vectors of length ndim
+  ## ran. matrix nseq x ndim. range [0,1]
+  ## idx. vector length nseq range [1,nseq]
+  ## P. vector length nseq. range [0,1]
+  
   xmin <- sapply(pars, function(x) min(x[[1]]))
   xmax <- sapply(pars, function(x) max(x[[1]]))
 
-  ## Define the size of xmin
-  nvar <- length(xmin)
+  ## Number of parameters
+  ndim <- length(pars)
   ## Initialize array ran with random numbers
-  ran <- rand(nseq,nvar)
+  ran <- rand(nseq,ndim)
   
   ## Initialize array s with zeros
-  s <- matrix(0,nseq,nvar)
+  s <- matrix(0,nseq,ndim)
   
-  ## Now fill s
-  for (j in 1:nvar){
+  ## Fill s by iterating through parameters
+  for (j in 1:ndim){
     ## Random permutation
     idx <- sample(nseq)
-    P <- idx-ran[,j]/nseq
+    P <- (idx-ran[,j])/nseq
     s[,j] <- xmin[j]+P*(xmax[j]-xmin[j])
   } ##for pars
 
-  return(x)
-} ## LHSInit
\ No newline at end of file
+  return(s)
+} ## LHSInit

Modified: pkg/R/RemOutlierChains.R
===================================================================
--- pkg/R/RemOutlierChains.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/RemOutlierChains.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -1,40 +1,58 @@
-## Finds outlier chains and removes them when needed
-## in:
-## hist.logp matrix dim ndraw/nseq x nseq = length ndraw
-RemOutlierChains <- function(X,Sequences,hist.logp,Iter,outlier,control){
+##' Finds outlier chains
+##' Note: differs from matlab implementation in that it does not remove outliers
+##'  removal is done in main dream function
+##
+##' @param X matrix nseq x ndim+2
+##' @param hist.logp matrix [1,ndraw/nseq] x nseq
+##' @param control. elements outlierTest,nseq,ndim
+
+##' @return ... list
+##'   chain.id. vector/scalar. length [0,nseq]. range [1,nseq]
+##'   mean.hist.logp vector. length nseq
+##
+## depends: ACR.R
+##
+## TODO: Could pass only x, not X. Could return r.idx, not mean.hist.logp
+RemOutlierChains <- function(X,hist.logp,control){
+  
+  ## Dimensions:
+  ## idx.end,idx.start
+  ##
+  ## q13. vector length 2
+  ## iqr,upper.range. scalar
+  ## G,t2,Gcrit. scalar
+  ## alpha,upper.range,idx,d1 scalar
+
+  
   ## Determine the number of elements of L_density
   idx.end <- nrow(hist.logp)
   idx.start <- floor(0.5*idx.end)
   
   ## Then determine the mean log density of the active chains
-  ## vector length nseq
   mean.hist.logp <- colMeans(hist.logp[idx.start:idx.end,])
   
   ## Initialize chain_id and Nid
   chain.id <- NULL
-  Nid <- 0
   
   ## Check whether any of these active chains are outlier chains
   switch(control$outlierTest,
-         'IQR_test'={
-           ## TODO: shouldn't upper.range be Q3+3*IQR?
-           
+         'IQR_test'={         
            ## Derive the upper and lower quantile of the data
            q13<-quantile(mean.hist.logp,c(0.75,0.25))
            ## Derive the Inter quartile range
            iqr <- q13[1]-q12[2]
            ## Compute the upper range -- to detect outliers
+           ## TODO: shouldn't upper.range be Q3+3*IQR?
            upper.range <- q13[2]-2*iqr
            ## See whether there are any outlier chains
            chain.id <- which(mean.hist.logp<upper.range)
-           Nid <- length(chain.id)
          },
          'Grubbs_test'={
            ## Test whether minimum log_density is outlier
            G <- (mean(mean.hist.logp)-min(mean.hist.logp))/sd(mean.hist.logp)
            
            ## Determine t-value of one-sided interval
-           t2 = tinv(1 - 0.01/control$nseq,control$nseq-2)^2; ## 95% interval
+           t2 = qt(1 - 0.01/control$nseq,control$nseq-2)^2; ## 95% interval
            
            ## Determine the critical value
            Gcrit <- ((control$nseq-1)/sqrt(control$nseq))*sqrt(t2/(control$nseq-2+t2))
@@ -42,7 +60,6 @@
            ## Then check this
            if (G > Gcrit) { ## Reject null-hypothesis
              chain.id <- which.min(mean.hist.logp)
-             Nid <- 1
            }
          },
          'Mahal_test'={
@@ -52,35 +69,20 @@
            ## Find which chain has minimum log_density
            idx <- which.min(mean.hist.logp)
            ## Then check the Mahalanobis distance
-           d1 <- mahalanobis(X[idx,control$ndim],X[-idx,control$ndim])
+           ## TODO: MATLAB: d1 = mahal(X(idx,1:MCMCPar.n),X(ii,1:MCMCPar.n));
+           d1 <- mahalanobis(X[idx,control$ndim],
+                             center=colMeans(X[-idx,control$ndim]),
+                             cov=cov(X[-idx,control$ndim])
+                             )
            ## Then see whether idx is an outlier in X
            if (d1>upper.range) {
              chain.id <- idx
-             Nid <- 1
            }
-         }
-         )
+         },
+         stop("Unknown outlierTest specified")
+         ) ##switch
 
-  if (Nid>0){
-    ## Loop over each outlier chain
-    for (qq in 1:Nid){
-      ## Draw random other chain -- cannot be the same as current chain
-      r.idx <- which.max(mean.hist.logp)
-      ## Added -- update hist_logp -- chain will not be considered as an outlier chain then
-      hist.logp[,chain.id[qq]] <- hist.logp[,r.idx]
-      ## Jump outlier chain to r_idx -- Sequences
-      Sequences[1,1:(control$nseq+2),chain.id[qq]] <- X[r.idx,1:(control$nseq+2)]
-      ## Jump outlier chain to r_idx -- X
-      X[chain.id[qq],1:(control$nseq+2)]] <- X[r.idx,1:(control$nseq+2)]
-    ## Add to chainoutlier
-    outlier <- rbind(outlier,c(Iter,chain.id[qq]))
-  }
 
-return(list(
-X=X,
-Sequences=Sequences,
-hist.logp=hist.logp,
-outlier=outlier
-))
+  return(list(chain.id=chain.id,mean.hist.logp=mean.hist.logp))
 
 } ##RemOutlierChains

Modified: pkg/R/dream.R
===================================================================
--- pkg/R/dream.R	2010-02-09 23:57:49 UTC (rev 3)
+++ pkg/R/dream.R	2010-02-14 23:42:16 UTC (rev 4)
@@ -15,24 +15,46 @@
          maxtime = 60,           ## maximum duration of optimization in seconds
          trace = 0,              ## level of user feedback
          REPORT = 10,            ## number of iterations between reports when trace >= 1
-         thin=FALSE,              ## do reduced sample collection
-         thin.t=NA               ## parameter for reduced sample collection
+         thin=FALSE,             ## do reduced sample collection
+         thin.t=NA,               ## parameter for reduced sample collection
+         metrop.opt=NA            ## ?? which option to use for metropolis acceptance/rejection. Some also need measurement$sigma
          )            
 
+library(coda)
+
 ## MATLAB function:
-# function [Sequences,Reduced_Seq,X,output,hist_logp] = dream(MCMCPar,ParRange,Measurement,ModelName,Extra,option)
+## function [Sequences,Reduced_Seq,X,output,hist_logp] = dream(MCMCPar,ParRange,Measurement,ModelName,Extra,option)
 
 dream <- function(FUN, pars = list(x = range(0, 1e6)),
+                  measurement,
                   ...,
                   INIT = LHSInit,
                   control = list()) #, do.SSE = FALSE)
 {
+
+  
+  ## dimensions
+  ##  x points in parameter space. matrix nseq x ndim
+  ##  hist.logp matrix. ndraw/nseq x nseq. length nearly ndraw.
+  ##    TODO: removed Iter for simplicity. should have been kept?
+  ##  CR nseq x steps
+  ##  pCR length nCR or scalar
+  ##  lCR length nCR or scalar
+  ##  Table.JumpRate ndim x DEpairs. range (0,~1.683]
+  ##  delta.tot vector of length nCR
+
+  
   If (is.character(FUN))
   FUN <- get(FUN, mode = "function")
   stopifnot(is.function(FUN))
   stopifnot(is.list(par))
   stopifnot(length(par) > 0)
 
+  pars <- lapply(pars, function(x) if (is.list(x)) x else list(x))
+
+############################
+  ## Initialize variables
+  
   ## update default options with supplied options
   stopifnot(is.list(control))
   control <- modifyList(dreamDefaults(), control)
@@ -41,13 +63,16 @@
     stop("unrecognised options: ",
          toString(names(control)[!isValid]))
 
-  ## Initialize variables
-  
   ## determine number of variables to be optimized
   control$ndim<-length(par)
+  if (is.na(control$nseq)) control$nseq <- control$ndim
 
+  NDIM <- control$ndim
+  NCR <- control$ncr
+  NSEQ <- control$nseq
+  
   ## for each iteration...
-  Iter <- nseq                          #? 1
+  Iter <- NSEQ                          #? 1
   counter <- 2
   iloc <- 1
   teller <- 2
@@ -55,16 +80,42 @@
   
   ## Calculate the parameters in the exponential power density function of Box and Tiao (1973)
   cbwb <- CalcCbWb(control$gamma)
-  control$Cb<-cbwb$cb
+  control$Cb <- cbwb$cb
   control$Wb <- cbwb$wb
+
+  ## Generate the Table with JumpRates (dependent on number of dimensions and number of pairs)
+  Table.JumpRate<-matrix(NA,NDIM,control$DEpairs)
+  for (zz in 1:control$DEpairs) Table.JumpRate[,zz] <- 2.38/sqrt(2*zz*1:NDIM)
   
   ## Initialize the array that contains the history of the log_density of each chain
-  hist.logp<-matrix(NA,floor(control$ndraw/control$nseq),control$nseq)
+  hist.logp<-matrix(NA,floor(control$ndraw/NSEQ),NSEQ)
   
-  ## Derive the number of elements in the output file
-  n.elem<-floor(control$ndraw/control$seq)+1
+  if (control$pCR.Update){
+    ## Calculate multinomial probabilities of each of the nCR CR values
+    pCR <- rep(1/NCR,NCR)
+    
+    ## Calculate the actual CR values based on p
+    CR <- GenCR(pCR,control)
+    lCR <- rep(0,NCR)
+  } else {
+    pCR <- 1/NCR
+    ## Define
+    CR <- matrix(pCR,NSEQ,control$steps)
+    lCR <- NSEQ*control$steps
+  } ##pCR.Update
 
-## the output object
+
+############################
+  ## Initialise output object
+
+  ## dimensions:
+  ## AR matrix n.elem x 2
+  ## outlier vector of variable length
+  ## R.stat matrix n.elem/steps x 1+ndim
+  ## CR matrix n.elem/steps x 1+length(pCR)
+  ## Sequences array n.elem*1.125 x ndim+2 x nseq
+  ## Reduced.Seq array n.elem*1.125 x ndim+2 x nseq
+  
   obj <- list()
   class(obj) <- c("dream", class(obj))
   obj$call <- match.call()
@@ -72,50 +123,32 @@
 
   EXITFLAG <- NA
   EXITMSG <- NULL
- 
-  ## Initialize output information -- AR
-  obj$AR<-matrix(0,n.elem,2)
-  obj$AR[1,1:2]<-control$nseq-1
+
+  ## Derive the number of elements in the output file
+  n.elem<-floor(control$ndraw/NSEQ)+1
+
+  ## Iter + AR at each step
+  obj$AR<-matrix(NA,n.elem,2)
+  obj$AR[1,1]<-NSEQ-1
   
-  ## Initialize output information -- Outlier chains
-  obj$outlier<-c()
-  
-  ## Initialize output information -- R statistic
-  obj$R.stat<-matrix(floor(n.elem/control$steps),control$ndim+1)
+  obj$outlier<-NULL
 
-  if (control$pCR.Update){
-    ## Calculate multinomial probabilities of each of the nCR CR values
-    pCR <- (1/control$nCR)*rep(1,control$nCR)
-    
-    ## Calculate the actual CR values based on p
-    CR <- GenCR(control,pCR)
-    lCR <- rep(0,control$nCR)
-  } else {
-    pCR <- 1/control$NCR
-    ## Define
-    CR <- pCR*matrix(1,control$nseq,control$steps)
-    lCR <- control$nseq*control$steps
-  } ##pCR.Update
+  ##Iter + R statistic for each variable at each step
+  obj$R.stat<-matrix(NA,floor(n.elem/control$steps),NDIM+1)
   
-  ## Initialize output information -- N_CR
-  obj$CR <- matrix(0,floor(n.elem/control$steps),length(pCR)+1)
+  ##Iter + pCR for each CR
+  obj$CR <- matrix(NA,floor(n.elem/control$steps),length(pCR)+1)
+  
+  Sequences <- array(NA, c(floor(1.25*n.elem),NDIM+2,NSEQ))
+  if (!is.null(names(pars))) colnames(Sequences) <- c(names(pars),"p","logp")
+  ## Sequences[1,] <- sapply(pars, mean) ## TODO: include?
 
-  if (isTRUE(control$saveInMemory)) {
-    ## TODO: support this case?
-    Sequences <- array(NA, c(floor(1.25*n.elem),control$ndim+1,control$nseq))
-    ## TODO: embellishments. accurate?
-    if (!is.null(names(pars))) colnames(Sequences) <- names(pars)
-    Sequences[1,] <- sapply(pars, mean)
-  } else {
-    Sequences<-NULL
-  }
+  ## Check whether will save a reduced sample
+  if (thin){
+    Reduced.Seq <- array(NA,c(floor(n.elem/thin.t),NDIM+2,NSEQ))
+  } else Reduced.Seq <- NULL
 
-  ## Generate the Table with JumpRates (dependent on number of dimensions and number of pairs)
-  Table.JumpRate<-matrix(NA,control$ndim,control$DEpairs)
-  for (zz in 1:control$DEpairs) Table.JumpRate[,zz] <- 2.38/sqrt(2*zz*1:control$ndim)
-
-  ## Check whether will save a reduced sample
-  # TODO
+############################
   
   ## Change MCMCPar.steps to make sure to get nice iteration numbers in first loop
   control$steps<-control$steps-1
@@ -124,36 +157,39 @@
   tic <- as.numeric(Sys.time())
   toc <- 0
 
+################################
   
   ## Step 1: Sample s points in the parameter space
   x <- INIT(pars, nseq,...)
 
   ## make each element of pars a list and extract lower / upper
-  pars <- lapply(pars, function(x) if (is.list(x)) x else list(x))
   lower <- sapply(pars, function(x) min(x[[1]]))
   upper <- sapply(pars, function(x) max(x[[1]]))
 
-  #Step 2: Calculate posterior density associated with each value in x
-  X<-CompDensity(x, control = control, FUN = FUN, ...)
-  #Save the initial population, density and log density in one list X
-  X<-cbind(x=x,p=X$p[,1],logp=X$logp)
+  ##Step 2: Calculate posterior density associated with each value in x
+  tmp<-CompDensity(x, control = control, FUN = FUN, ...)
+  ##Save the initial population, density and log density in one list X
+  X<-cbind(x=x,p=tmp$p,logp=tmp$logp)
+  if (!is.null(names(pars))) colnames(X) <- c(names(pars),"p","logp")
     
-  #Initialise the sequences
-  for (qq in 1:control$nseq){
-    Sequences[1,1:(control$ndim+2),qq] <- X[qq,]
+  ##Initialise the sequences
+  for (qq in 1:NSEQ){
+    Sequences[1,,qq] <- X[qq,]
   }
 
-  #Save N_CR in memory and initialize delta_tot
-  obj$CR[1,1:(length(pCR)+1)] <- c(Iter,pCR)
-  delta.tot <- rep(NA,control$nCR)
+  ##Save N_CR in memory and initialize delta.tot
+  obj$CR[1,] <- c(Iter,pCR)
+  delta.tot <- rep(NA,NCR)
   
-  #Save history log density of individual chains
-  hist.logp[1,1:control$nseq] <- c(Iter, X$logp)
+  ##Save history log density of individual chains
+  hist.logp[1,] <- X[,"logp"]
   
-  #Compute R-statistic. Using coda package
-  # TODO: gelman.diag takes mcmc.list as input
-  obj$R.stat[1,1:(control$ndim+1)] <- c(Iter,gelman.diag(Sequences(1:iloc,1:control$ndim,1:control$nseq)))
-  
+  ##Compute R-statistic. Using coda package
+  ## TODO: more elegant way of using coda. And check for correctness
+  ## TODO: alternatively, convert matlab implementation
+  obj$R.stat[1,] <- c(Iter,gelman.diag(as.mcmc.list(lapply(1:nseq,function(i) as.mcmc(Sequences[1:iloc,1:NDIM,i])))))
+
+################################
   ##Start iteration
   while (Iter < control$ndraw) {
 
@@ -163,27 +199,27 @@
       new_teller <- new_teller + 1 ## counter for thinning
 
       ## Define the current locations and associated posterior densities
-      x.old <- X[,1:control$ndim]
-      p.old <- X[,1:(control$ndim+1)]
-      logp.old <- X[,1:(control$ndim+2)]
+      x.old <- X[,1:NDIM]
+      p.old <- X[,1:(NDIM+1)]
+      logp.old <- X[,1:(NDIM+2)]
 
       ## Now generate candidate in each sequence using current point and members of X
       tmp <- offde(x.old, control = control,
-                   CR=CR,
-                   lower = lower, upper = upper,bound.handling=bound.handling,
+                   CR=CR[,gen.number],
+                   lower = lower, upper = upper,
                    Table.JumpRate=Table.JumpRate)
       x.new <- tmp$x.new
       CR[,gen.number] <- tmp$CR
 
       ## Now compute the likelihood of the new points
-      tmp <- CompDensity(xnew, control = control, FUN = FUN, ...)
+      tmp <- CompDensity(x.new, control = control, FUN = FUN, ...)
       p.new <- tmp$p
       logp.new <- tmp$logp
 
       ## Now apply the acceptance/rejectance rule for the chain itself
       tmp <- metrop(x.new,p.new,logp.new,
                     x.old,p.old,logp.old,
-                    N,sigma,control,option
+                    measurement,control
                     )
       newgen <- tmp$newgen
       alpha12 <- tmp$alpha
@@ -194,9 +230,8 @@
       ## accept2,ItExtra not required
 
       ## Update location in sequence and update the locations of the Sequences with the current locations
-      if (isTRUE(control$saveInMemory)) iloc <- iloc + 1
-      ##Sequences[iloc, 1:(NDIM+2), 1:nseq] <- matrix(newgen, NDIM+2, nseq)
-      Sequences[1:(NDIM+2), 1:nseq] <- matrix(newgen, NDIM+2, nseq)
+      iloc <- iloc + 1
+      Sequences[iloc,,] <- t(newgen)
 
       ## Check reduced sample collection
       if (thin && new_teller == thin.t){
@@ -204,8 +239,7 @@
         iloc.2 <- iloc.2+1
         new_teller <- 0
         ## Reduced sample collection
-        Reduced.Seq[iloc.2,1:(control$ndim+2),1:control$nseq] <-
-          array(newgen,c(1,control$ndim+2,control$nseq))
+        Reduced.Seq[iloc.2,,] <- t(newgen)
       }
 
       ## And update X using current members of Sequences
@@ -214,21 +248,21 @@
       if (control$pCR.Update) {
         ## Calculate the standard deviation of each dimension of X
         ## TODO: matlab syntax is unclear - seems to be elementwise?
-        r <- sd(c(X[,1:control$ndim]))
+        r <- sd(c(X[,1:NDIM]))
         ## Compute the Euclidean distance between new X and old X
-        delta.normX <- colSums(((x.old-X[,1:control$ndim])/r)^2,2)
+        delta.normX <- rowSums(((x.old-X[,1:NDIM])/r)^2)
         ## Use this information to update sum_p2 to update N_CR
-        delta.tot <- CalcDelta(control,delta.tot,delta.normX,CR[,gen.number])
+        delta.tot <- CalcDelta(NCR,delta.tot,delta.normX,CR[,gen.number])
       }
 
       ## Update hist.logp
-      hist.logp[counter,1:(control$nseq+1)] <- c(Iter,X[,control$ndim+2])
+      hist.logp[counter,] <- X[,NDIM+2]
       
       ## Save some important output -- Acceptance Rate
       obj$AR[counter] <- 100 * sum(accept) / nseq
 
       ## Update Iteration and counter
-      Iter <- Iter + control$nseq
+      Iter <- Iter + NSEQ
       counter <- counter + 1
     } ##for gen.number steps
 
@@ -244,33 +278,38 @@
     if (Iter <= 0.1 * control$ndraw) {
       if (control$pCR.Update) {
         ## Update pCR values
-        tmp <- AdaptpCR(CR, delta_tot, lCR, control)
+        tmp <- AdaptpCR(CR, delta.tot, lCR, control)
         pCR <- tmp$pCR
         lCR <- tmp$lCR
       }
     } else {
       ## See whether there are any outlier chains, and remove them to current best value of X
-      tmp <- RemOutlierChains(X,
-                              Sequences[iloc,,drop=FALSE],
-                              hist.logp[1:(counter-1),2:(control$nseq+1)],Iter,outlier,control
-                              )
-      Sequences[iloc,,] <- tmp$Sequences
-      X <- tmp$X
-      hist.logp[1:(counter-1),2:(control$nseq+1)] <- tmp$hist.logp
-      obj$outlier <- tmp$outlier
-    }
+      tmp <- RemOutlierChains(X,hist.logp[1:(counter-1),],control)
+      ## Loop over each outlier chain (if length>0)
+      for (out.id in tmp$chain.id){
+        ## Draw random other chain -- cannot be the same as current chain
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/dream -r 4


More information about the Dream-commits mailing list