[Vegan-commits] r1935 - in pkg/vegan: R inst man tests/Examples

Wed Oct 5 19:31:59 CEST 2011

Author: jarioksa
Date: 2011-10-05 19:31:59 +0200 (Wed, 05 Oct 2011)
New Revision: 1935

Modified:
   pkg/vegan/R/simulate.rda.R
   pkg/vegan/inst/ChangeLog
   pkg/vegan/man/simulate.rda.Rd
   pkg/vegan/tests/Examples/vegan-Ex.Rout.save
Log:
implement 'nsim' for simulate.rda/cca: returns a 'simmat' object

Modified: pkg/vegan/R/simulate.rda.R
===================================================================

--- pkg/vegan/R/simulate.rda.R	2011-10-05 15:32:23 UTC (rev 1934)
+++ pkg/vegan/R/simulate.rda.R	2011-10-05 17:31:59 UTC (rev 1935)
@@ -12,24 +12,58 @@
         RNGstate <- structure(seed, kind = as.list(RNGkind()))
         on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
     }
-    ## Proper simulation: very similar for simulate.lm, but produces a
-    ## response matrix.
-    if (nsim > 1)
-        .NotYetUsed("nsim")
+    ## indx can be an output of permute::shuffleSet in which case it
+    ## is a nsim x nrow matrix, or it may be a single vector, in which
+    ## case it will changed to shuffleSet
+    if (!is.null(indx))
+        if (is.vector(indx))
+            dim(indx) <- c(1, length(indx))
+    ## If nsim is missing, take it from indx (if given)
+    if (missing(nsim) && !is.null(indx))
+        nsim <- nrow(indx)
+    ## Check that dims match
+    if (!is.null(indx))
+        if(nrow(indx) != nsim)
+            stop(gettextf("'nsim' (%d) and no. of 'indx' rows (%d) do not match",
+                          nsim, nrow(indx)))
+    ## Proper simulation: very similar for simulate.lm, but produces
+    ## an array of response matrices
+    
     ftd <- predict(object, type = "response", rank = rank)
     ## pRDA: add partial Fit to the constrained
     if (!is.null(object$pCCA))
         ftd <- ftd + object$pCCA$Fit
-    if (is.null(indx))
-        ans <- as.data.frame(ftd + matrix(rnorm(length(ftd), 
+    ## Generate an array
+    ans <- array(0, c(dim(ftd), nsim))
+    for (i in seq_len(nsim)) {
+        if (is.null(indx))
+            ans[,,i] <- as.matrix(ftd + matrix(rnorm(length(ftd), 
                sd = outer(rep(1,nrow(ftd)), sd(object$CA$Xbar))), 
                nrow = nrow(ftd)))
-    else
-        ans <- as.data.frame(ftd + object$CA$Xbar[indx,])
+        else
+            ans[,,i] <- as.matrix(ftd + object$CA$Xbar[indx[i,],])
+    }
+    ## set RNG attributes
     if (is.null(indx))
         attr(ans, "seed") <- RNGstate
-    else
-        attr(ans, "seed") <- indx
+    else 
+        attr(ans, "seed") <- "index"
+    ## set commsim attributes if nsim > 1, else return a 2-dim matrix
+    if (nsim == 1) {
+        ans <- ans[,,1]
+        attributes(ans) <- attributes(ftd)
+    } else {
+        attr(ans, "data") <- ftd
+        attr(ans, "method") <- paste("simulate", ifelse(is.null(indx),
+                                                        "parametric", "index"))
+        attr(ans, "binary") <- FALSE
+        attr(ans, "isSeq") <- FALSE
+        attr(ans, "mode") <- "double"
+        attr(ans, "start") <- 1L
+        attr(ans, "end") <- as.integer(nsim)
+        attr(ans, "thin") <- 1L
+        class(ans) <- c("simulate.rda", "simmat", "array")
+    }
     ans
 }
 
@@ -52,10 +86,16 @@
         RNGstate <- structure(seed, kind = as.list(RNGkind()))
         on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
     }
-    ## Proper simulation: very similar for simulate.lm, but produces a
-    ## response matrix.
-    if (nsim > 1)
-        .NotYetUsed("nsim")
+    ## Preparations like in simulate.rda()
+    if (!is.null(indx))
+        if (is.vector(indx))
+            dim(indx) <- c(1, length(indx))
+    if (missing(nsim) && !is.null(indx))
+        nsim <- nrow(indx)
+    if (!is.null(indx))
+        if(nrow(indx) != nsim)
+            stop(gettextf("'nsim' (%d) and no. of 'indx' rows (%d) do not match",
+                          nsim, nrow(indx)))   
     ## Need sqrt of rowsums for weighting
     sq.r <- sqrt(object$rowsum)
     ## Fitted value
@@ -65,21 +105,43 @@
         ftd <- ftd + object$pCCA$Fit
     ## Residual Xbar need weighting and back-weighting
     Xbar <- sweep(object$CA$Xbar, 1, sq.r, "*")
-    if (is.null(indx)) {
-        ans <- matrix(rnorm(length(ftd), 
+    ## Simulation
+    ans <- array(0, c(dim(ftd), nsim))
+    for (i in seq_len(nsim)) {
+        if (is.null(indx)) {
+            tmp <- matrix(rnorm(length(ftd), 
                sd = outer(rep(1,nrow(ftd)), sd(Xbar))), 
                nrow = nrow(ftd))
-        ans <- as.data.frame(ftd + sweep(ans, 1, sq.r, "/"))
+            ans[,,i] <- as.matrix(ftd + sweep(tmp, 1, sq.r, "/"))
+        }
+        else 
+            ans[,,i] <- as.matrix(ftd + sweep(Xbar[indx[i,],], 1, sq.r, "/"))
     }
-    else 
-        ans <- as.data.frame(ftd + sweep(Xbar[indx,], 1, sq.r, "/"))
     ## From internal form to the original form with fixed marginal totals
     rc <- object$rowsum %o% object$colsum
-    ans <- (ans * sqrt(rc) + rc) * object$grand.total
+    for (i in seq_len(nsim))
+        ans[,,i] <- (ans[,,i] * sqrt(rc) + rc) * object$grand.total
+    ## RNG attributes
     if (is.null(indx))
         attr(ans, "seed") <- RNGstate
     else
-        attr(ans, "seed") <- indx
+        attr(ans, "seed") <- "index"
+    ## set commsim attributes if nsim > 1, else return a 2-dim matrix
+    if (nsim == 1) {
+        ans <- ans[,,1]
+        attributes(ans) <- attributes(ftd)
+    } else {
+        attr(ans, "data") <- ftd
+        attr(ans, "method") <- paste("simulate", ifelse(is.null(indx),
+                                                        "parametric", "index"))
+        attr(ans, "binary") <- FALSE
+        attr(ans, "isSeq") <- FALSE
+        attr(ans, "mode") <- "double"
+        attr(ans, "start") <- 1L
+        attr(ans, "end") <- as.integer(nsim)
+        attr(ans, "thin") <- 1L
+        class(ans) <- c("simulate.cca", "simmat", "array")
+    }    
     ans
 }
 

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2011-10-05 15:32:23 UTC (rev 1934)
+++ pkg/vegan/inst/ChangeLog	2011-10-05 17:31:59 UTC (rev 1935)
@@ -4,6 +4,18 @@
 
 Version 2.1-3 (opened October 4, 2011)
 
+	* simulate.rda/cca: implemented 'nsim' or an option to generate an
+	array of simulated matrices inheriting from "simmat" object and
+	using print.simmat() for a compact display. The attr(x, "data")
+	needs consideration: now it is the fitted value, but this makes
+	little sense if we are using the result in oecosimu(). If 'nsim =
+	1', similar 2-dim matrix is returned as before so that
+	cca(simulate(mod)) still work. For 'nsim > 1', the 'indx' argument
+	should be compliant with permute::shuffleSet and the number of
+	rows in 'indx' gives the number of simulations. Implemented for
+	rda() and cca() results, but not for capscale() where simulate
+	returns a "dist" object which is nasty to pack into an array.
+
 	* oecosimu: An attempt to set 'parallel' processing in evaluating
 	the statistic, and only evaluating the statistic -- the simulation
 	of null models is not influenced. This uses socket cluster and is

Modified: pkg/vegan/man/simulate.rda.Rd
===================================================================
--- pkg/vegan/man/simulate.rda.Rd	2011-10-05 15:32:23 UTC (rev 1934)
+++ pkg/vegan/man/simulate.rda.Rd	2011-10-05 17:31:59 UTC (rev 1935)
@@ -15,18 +15,29 @@
 \method{simulate}{rda}(object, nsim = 1, seed = NULL, indx = NULL, rank = "full", ...)
 }
 \arguments{
-  \item{object}{an object representing a fitted \code{\link{rda}} model.}
-  \item{nsim}{number of response vectors to simulate. (Not yet used, and 
-    values above 1 will give an error). }
+
+  \item{object}{an object representing a fitted \code{\link{rda}},
+    \code{\link{cca}} or \code{\link{capscale}} model.}
+
+  \item{nsim}{number of response matrices to be simulated. Only one
+    dissimilarity matrix is returned for \code{\link{capscale}}, and
+    larger \code{nsim} is an error.}
+
   \item{seed}{an object specifying if and how the random number
     generator should be initialized (\sQuote{seeded}). See 
     \code{\link{simulate}} for details. }
+
   \item{indx}{Index of residuals added to the fitted values, such as
-    produced by  \code{\link{permuted.index}},
-    \code{\link{shuffle}} or \code{\link{sample}}. The index can
-    have duplicate entries so that bootstrapping is allowed. If null,
-    parametric simulation is used and Gaussian error is added to the
-    fitted values.}
+    produced by \code{\link[permute]{shuffleSet}} or
+    \code{\link{sample}}.  The index can have duplicate entries so
+    that bootstrapping is allowed. If \code{nsim} \eqn{>1}, the output
+    should be compliant with \code{\link[permute]{shuffleSet}} with
+    one line for each simulation.  If \code{nsim} is missing, the
+    number of rows of \code{indx} is used to define the number of
+    simulations, but if \code{nsim} is given, it should match number
+    of rows in \code{indx}. If null, parametric simulation is used and
+    Gaussian error is added to the fitted values.}
+
   \item{rank}{The rank of the constrained component: passed to
     \code{\link{predict.rda}} or \code{\link{predict.cca}}. }
   \item{\dots}{additional optional arguments (ignored). }
@@ -40,8 +51,8 @@
   fitting the constraints. Alternatively, the function can take a
   permutation index that is used to add permuted residuals
   (unconstrained component) to the fitted values. Raw data are used in
-  \code{\link{rda}}. Internal Chi-square transformed data in
-  \code{\link{cca}} within the function, but the returned data frame is 
+  \code{\link{rda}}. Internal Chi-square transformed data are used in
+  \code{\link{cca}} within the function, but the returned matrix is 
   similar to the original input data. The simulation is performed on
   internal metric scaling data in \code{\link{capscale}}, but the
   function returns the Euclidean distances calculated from the simulated
@@ -49,17 +60,20 @@
   dimensions are ignored.
 }
 
-\value{ Returns a data frame with similar additional arguments on
-  random number seed as \code{\link{simulate}}.  }
+\value{ If \code{nsim = 1}, returns a matrix or dissimilarities (in
+  \code{\link{capscale}}) with similar additional arguments on random
+  number seed as \code{\link{simulate}}. If \code{nsim > 1}, returns a
+  similar array as returned by \code{\link{simulate.nullmodel}} with
+  similar attributes.  }
 
 \author{Jari Oksanen}
 
 
 \seealso{ \code{\link{simulate}} for the generic case and for
-  \code{\link{lm}} objects. Functions \code{\link{fitted.rda}} and
-  \code{\link{fitted.cca}} return fitted values without the error
-  component.  
-}
+  \code{\link{lm}} objects, and \code{\link{simulate.nullmodel}} for
+  community null model simulation. Functions \code{\link{fitted.rda}}
+  and \code{\link{fitted.cca}} return fitted values without the error
+  component.  }
 
 \examples{
 data(dune)
@@ -70,6 +84,8 @@
 ## An impression of confidence regions of site scores
 plot(mod, display="sites")
 for (i in 1:5) lines(procrustes(mod, update(mod, simulate(mod) ~ .)), col="blue")
+## Simulate a set of null communities with permutation of residuals
+simulate(mod, indx = shuffleSet(nrow(dune), 99))
 }
 \keyword{ models }
 \keyword{ datagen }

Modified: pkg/vegan/tests/Examples/vegan-Ex.Rout.save
===================================================================
--- pkg/vegan/tests/Examples/vegan-Ex.Rout.save	2011-10-05 15:32:23 UTC (rev 1934)
+++ pkg/vegan/tests/Examples/vegan-Ex.Rout.save	2011-10-05 17:31:59 UTC (rev 1935)
@@ -23,7 +23,7 @@
 > options(warn = 1)
 > library('vegan')
 Loading required package: permute
-This is vegan 2.1-2
+This is vegan 2.1-3
 > 
 > assign(".oldSearch", search(), pos = 'CheckExEnv')
 > cleanEx()
@@ -161,7 +161,7 @@
 
 Formula:
 y ~ poly(x1, 1) + poly(x2, 1)
-<environment: 0x102616318>
+<environment: 0x101f4a668>
 Total model degrees of freedom 3 
 
 GCV score: 0.0427924
@@ -4748,7 +4748,7 @@
 
 Formula:
 y ~ s(x1, x2, k = knots)
-<environment: 0x1076efd50>
+<environment: 0x107369758>
 
 Estimated degrees of freedom:
 6.4351  total = 7.435071 
@@ -4764,7 +4764,7 @@
 
 Formula:
 y ~ s(x1, x2, k = knots)
-<environment: 0x107d2c520>
+<environment: 0x107373118>
 
 Estimated degrees of freedom:
 6.1039  total = 7.103853 
@@ -4920,7 +4920,7 @@
 
 Formula:
 y ~ s(x1, x2, k = knots)
-<environment: 0x1071b6878>
+<environment: 0x106e7fd88>
 
 Estimated degrees of freedom:
 8.9275  total = 9.927492 
@@ -4933,7 +4933,7 @@
 
 Formula:
 y ~ s(x1, x2, k = knots)
-<environment: 0x107335828>
+<environment: 0x1075d88e0>
 
 Estimated degrees of freedom:
 7.7529  total = 8.75294 
@@ -4946,7 +4946,7 @@
 
 Formula:
 y ~ s(x1, x2, k = knots)
-<environment: 0x107559a78>
+<environment: 0x104c35a78>
 
 Estimated degrees of freedom:
 8.8962  total = 9.89616 
@@ -6038,6 +6038,13 @@
 > ## An impression of confidence regions of site scores
 > plot(mod, display="sites")
 > for (i in 1:5) lines(procrustes(mod, update(mod, simulate(mod) ~ .)), col="blue")
+> ## Simulate a set of null communities with permutation of residuals
+> simulate(mod, indx = shuffleSet(nrow(dune), 99))
+An object of class “simulate.rda” 
+‘simulate index’ method (abundance, non-sequential)
+20 x 30 matrix
+Number of permuted matrices = 99 
+
 > 
 > 
 > 
@@ -7208,7 +7215,7 @@
 
 Formula:
 y ~ s(x1, x2, k = knots)
-<environment: 0x107b75200>
+<environment: 0x1051f2b18>
 
 Estimated degrees of freedom:
 2  total = 3 
@@ -7684,7 +7691,7 @@
 > ### * <FOOTER>
 > ###
 > cat("Time elapsed: ", proc.time() - get("ptime", pos = 'CheckExEnv'),"\n")
-Time elapsed:  101.036 1.382 104.093 0 0 
+Time elapsed:  104.669 1.275 107.14 0 0 
 > grDevices::dev.off()
 null device 
           1