[Vegan-commits] r1873 - in branches/2.0: . R inst man

Thu Sep 22 17:05:46 CEST 2011

Author: jarioksa
Date: 2011-09-22 17:05:46 +0200 (Thu, 22 Sep 2011)
New Revision: 1873

Added:
   branches/2.0/R/raupcrick.R
   branches/2.0/man/raupcrick.Rd
Modified:
   branches/2.0/NAMESPACE
   branches/2.0/R/centroids.cca.R
   branches/2.0/R/commsimulator.R
   branches/2.0/R/meandist.R
   branches/2.0/R/nesteddisc.R
   branches/2.0/R/permuted.index.R
   branches/2.0/R/poolaccum.R
   branches/2.0/inst/ChangeLog
   branches/2.0/man/MDSrotate.Rd
   branches/2.0/man/designdist.Rd
   branches/2.0/man/oecosimu.Rd
   branches/2.0/man/vegdist.Rd
Log:
merge r1835,72 (raupcrick), r1838,45,46 (speed-up), r1869 (meandist bug fix)

Modified: branches/2.0/NAMESPACE
===================================================================

--- branches/2.0/NAMESPACE	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/NAMESPACE	2011-09-22 15:05:46 UTC (rev 1873)
@@ -21,7 +21,7 @@
 ordixyplot, orglpoints, orglsegments, orglspider, orgltext, 
 pcnm, permatfull, permatswap, permutest, poolaccum, postMDS, prc,
 prestondistr, prestonfit, procrustes, protest, radfit, radlattice,
-rankindex, rarefy, rda, renyiaccum, renyi, rrarefy, scores,
+rankindex, rarefy,raupcrick, rda, renyiaccum, renyi, rrarefy, scores,
 showvarparts, spandepth, spantree, specaccum, specnumber,
 specpool2vect, specpool, spenvcor, stepacross, stressplot, swan,
 taxa2dist, taxondive, tolerance, treedist, treedive, treeheight,

Modified: branches/2.0/R/centroids.cca.R
===================================================================
--- branches/2.0/R/centroids.cca.R	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/centroids.cca.R	2011-09-22 15:05:46 UTC (rev 1873)
@@ -1,24 +1,34 @@
-"centroids.cca" <-
-    function (x, mf, wt) 
+`centroids.cca` <-
+    function(x, mf, wt)
 {
-    mf <- mf[, unlist(lapply(mf, is.factor)), drop = FALSE]
-    if (ncol(mf) == 0) 
+    facts <- sapply(mf, is.factor)
+    if (!any(facts))
         return(NULL)
+    mf <- mf[, facts, drop = FALSE]
     if (missing(wt)) 
         wt <- rep(1, nrow(mf))
-    x <- sweep(x, 1, wt, "*")
-    workhorse <- function(mf, x, wt) {
-        sw <- tapply(wt, mf, sum)
-        swx <- apply(x, 2, tapply, mf, sum)
-        sweep(swx, 1, sw, "/")
-    }
-    tmp <- lapply(mf, workhorse, x, wt)
+    ind <- seq_len(nrow(mf))
+    workhorse <- function(x, wt)
+        colSums(x * wt) / sum(wt)
+    tmp <- lapply(mf, function(fct)
+                  tapply(ind, fct, function(i) workhorse(x[i,, drop=FALSE], wt[i])))
+    tmp <- lapply(tmp, function(z) sapply(z, rbind))
     pnam <- labels(tmp)
     out <- NULL
-    for (i in 1:length(tmp)) {
-        rownames(tmp[[i]]) <- paste(pnam[i], rownames(tmp[[i]]), 
-                                    sep = "")
-        out <- rbind(out, tmp[[i]])
+    if (ncol(x) == 1) {
+        for(i in 1:length(tmp)) {
+            names(tmp[[i]]) <- paste(pnam[i], names(tmp[[i]]), sep="")
+            out <- c(out, tmp[[i]])
+            out <- matrix(out, nrow=1)
+        }  
+    } else {
+        for (i in 1:length(tmp)) {
+            colnames(tmp[[i]]) <- paste(pnam[i], colnames(tmp[[i]]), 
+                                        sep = "")
+            out <- cbind(out, tmp[[i]])
+        }
     }
+    out <- t(out)
+    colnames(out) <- colnames(x)
     out
 }

Modified: branches/2.0/R/commsimulator.R
===================================================================
--- branches/2.0/R/commsimulator.R	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/commsimulator.R	2011-09-22 15:05:46 UTC (rev 1873)
@@ -18,18 +18,18 @@
             p <- p*p
         out <- matrix(0, nrow=nr, ncol=nc)
         for (i in 1:nr)
-            out[i,sample(nc, rs[i], prob=p)] <- 1 
+            out[i,sample.int(nc, rs[i], prob=p)] <- 1 
     }
     else if (method == "r00") {
         out <- numeric(nr*nc)
-        out[sample(length(out), sum(x))] <- 1
+        out[sample.int(length(out), sum(x))] <- 1
         dim(out) <- dim(x)
     }
     else if (method == "c0") {
         cs <- colSums(x)
         out <- matrix(0, nrow=nr, ncol=nc)
         for (j in 1:nc)
-            out[sample(nr, cs[j]), j] <- 1
+            out[sample.int(nr, cs[j]), j] <- 1
     } else if (method == "swap") {
         x <- as.matrix(x)
         out <- .C("swap", m = as.integer(x), as.integer(nrow(x)),

Modified: branches/2.0/R/meandist.R
===================================================================
--- branches/2.0/R/meandist.R	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/meandist.R	2011-09-22 15:05:46 UTC (rev 1873)
@@ -1,26 +1,27 @@
 `meandist` <-
     function(dist, grouping, ...)
 {
+    if (!inherits(dist, "dist"))
+        stop("'dist' must be dissimilarity object inheriting from", dQuote(dist))
     ## check that 'dist' are dissimilarities (non-negative)
     if (any(dist < -sqrt(.Machine$double.eps)))
         warning("some dissimilarities are negative -- is this intentional?")
-    ## merge levels so that lower is always first (filling lower triangle)
-    mergenames <- function(X, Y, ...) {
-        xy <- cbind(X, Y)
-        xy <- apply(xy, 1, sort)
-        apply(xy, 2, paste, collapse = " ")
-    }
     grouping <- factor(grouping, exclude = NULL)
-    cl <- outer(grouping, grouping, mergenames)
-    cl <- cl[lower.tri(cl)]
+    ## grouping for rows and columns
+    grow <- grouping[as.dist(row(as.matrix(dist)))]
+    gcol <- grouping[as.dist(col(as.matrix(dist)))]
+    ## The row index must be "smaller" of the factor levels so that
+    ## all means are in the lower triangle, and upper is NA
+    first <- as.numeric(grow) >= as.numeric(gcol)
+    cl1 <- ifelse(first, grow, gcol)
+    cl2 <- ifelse(!first, grow, gcol)
     ## Cannot have within-group dissimilarity for group size 1
     n <- table(grouping)
     take <- matrix(TRUE, nlevels(grouping), nlevels(grouping))
     diag(take) <- n > 1
     take[upper.tri(take)] <- FALSE
     ## Get output matrix
-    out <- matrix(NA, nlevels(grouping), nlevels(grouping))
-    out[take] <- tapply(dist, cl, mean)
+    out <- tapply(dist, list(cl1, cl2), mean)
     out[upper.tri(out)] <- t(out)[upper.tri(out)]
     rownames(out) <- colnames(out) <- levels(grouping)
     class(out) <- c("meandist", "matrix")

Modified: branches/2.0/R/nesteddisc.R
===================================================================
--- branches/2.0/R/nesteddisc.R	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/nesteddisc.R	2011-09-22 15:05:46 UTC (rev 1873)
@@ -42,7 +42,7 @@
                 perm <- matrix(allPerms(le[i]), ncol=le[i]) + cle[i]
                 ## Take at maximum NITER cases from complete enumeration
                 if (nrow(perm) >= NITER) {
-                    perm <- perm[sample(nrow(perm), NITER),]
+                    perm <- perm[sample.int(nrow(perm), NITER),]
                     ties <- TRUE
                 }
             }

Modified: branches/2.0/R/permuted.index.R
===================================================================
--- branches/2.0/R/permuted.index.R	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/permuted.index.R	2011-09-22 15:05:46 UTC (rev 1873)
@@ -2,7 +2,7 @@
     function (n, strata) 
 {
     if (missing(strata) || is.null(strata)) 
-        out <- sample(n, n)
+        out <- sample.int(n, n)
     else {
         out <- 1:n
         inds <- names(table(strata))

Modified: branches/2.0/R/poolaccum.R
===================================================================
--- branches/2.0/R/poolaccum.R	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/poolaccum.R	2011-09-22 15:05:46 UTC (rev 1873)
@@ -10,7 +10,7 @@
     ## specpool() is slow, but the vectorized versions below are
     ## pretty fast
     for (i in 1:permutations) {
-        take <- sample(n)
+        take <- sample.int(n, n)
         tmp <- apply(x[take,] > 0, 2, cumsum)
         S[,i] <- rowSums(tmp > 0)
         ## All-zero species are taken as *known* to be missing in

Copied: branches/2.0/R/raupcrick.R (from rev 1835, pkg/vegan/R/raupcrick.R)
===================================================================
--- branches/2.0/R/raupcrick.R	                        (rev 0)
+++ branches/2.0/R/raupcrick.R	2011-09-22 15:05:46 UTC (rev 1873)
@@ -0,0 +1,27 @@
+`raupcrick` <-
+    function(comm, null = "r1", nsimul = 999, chase = FALSE)
+{
+    comm <- as.matrix(comm)
+    comm <- ifelse(comm > 0, 1, 0)
+    ## 'tri' is a faster alternative to as.dist(): it takes the lower
+    ## diagonal, but does not set attributes of a "dist" object
+    N <- nrow(comm)
+    tri <- matrix(FALSE, N, N)
+    tri <- row(tri) > col(tri)
+    ## function(x) designdist(x, "J", terms="binary") does the same,
+    ## but is much slower
+    sol <- oecosimu(comm, function(x) tcrossprod(x)[tri], method = null,
+                    nsimul = nsimul,
+                    alternative = if (chase) "greater" else "less")
+    ## Chase et al. way, or the standard way
+    if (chase)
+        out <- 1 - sol$oecosimu$pval
+    else
+        out <- sol$oecosimu$pval
+    ## set attributes of a "dist" object
+    attributes(out) <- list("class"=c("raupcrick", "dist"), "Size"=N,
+                            "Labels" = rownames(comm), "call" =
+                            match.call(), "Diag" = FALSE, "Upper" = FALSE,
+                            "method" = "raupcrick")
+    out
+}

Modified: branches/2.0/inst/ChangeLog
===================================================================
--- branches/2.0/inst/ChangeLog	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/inst/ChangeLog	2011-09-22 15:05:46 UTC (rev 1873)
@@ -4,6 +4,12 @@
 
 Version 2.0-1 (opened September 8, 2011)
 
+	* merge r1872: raupcrick doc fixes.
+	* merge r1869: meandist re-ordering bug fix.
+	* merge r1846: tweak speed (a bit).
+	* merge r1845: faster centroids.cca.
+	* merge r1838: a bit faster example(MDSrotate).
+	* merge r1835: add raupcrick.
 	* merge r1811: permatswap bug fix in nestedness.c.
 	* merge r1810: -pedatic -Wall fixes in monoMDS.f, ordering.f
 	

Modified: branches/2.0/man/MDSrotate.Rd
===================================================================
--- branches/2.0/man/MDSrotate.Rd	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/MDSrotate.Rd	2011-09-22 15:05:46 UTC (rev 1873)
@@ -57,7 +57,7 @@
 mod <- monoMDS(vegdist(varespec))
 mod <- with(varechem, MDSrotate(mod, pH))
 plot(mod)
-ef <- envfit(mod ~ pH, varechem)
+ef <- envfit(mod ~ pH, varechem, permutations = 0)
 plot(ef)
 ordisurf(mod ~ pH, varechem, knots = 1, add = TRUE)
 }

Modified: branches/2.0/man/designdist.Rd
===================================================================
--- branches/2.0/man/designdist.Rd	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/designdist.Rd	2011-09-22 15:05:46 UTC (rev 1873)
@@ -59,7 +59,7 @@
     \code{1-J/sqrt(A*B)} \tab \code{"binary"} \tab Ochiai \cr
     \code{1-J/sqrt(A*B)} \tab \code{"quadratic"} \tab cosine
     complement \cr
-    \code{1-phyper(J-1, A, P-A, B)} \tab \code{"binary"} \tab Raup-Crick 
+    \code{1-phyper(J-1, A, P-A, B)} \tab \code{"binary"} \tab Raup-Crick (but see \code{\link{raupcrick}})
   }
 
   The function \code{designdist} can implement most dissimilarity

Modified: branches/2.0/man/oecosimu.Rd
===================================================================
--- branches/2.0/man/oecosimu.Rd	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/oecosimu.Rd	2011-09-22 15:05:46 UTC (rev 1873)
@@ -58,21 +58,23 @@
 \details{
   
   Function \code{oecosimu} is a wrapper that evaluates a nestedness
-  statistic using function given by \code{nestfun}, and then simulates a
-  series of null models using \code{commsimulator} or 
-  other functions (depending on method argument), and evaluates the
-  statistic on these null models. The \pkg{vegan} packages contains some
-  nestedness functions that are described separately
+  statistic using function given by \code{nestfun}, and then simulates
+  a series of null models using \code{commsimulator} or other
+  functions (depending on method argument), and evaluates the
+  statistic on these null models. The \pkg{vegan} packages contains
+  some nestedness functions that are described separately
   (\code{\link{nestedchecker}}, \code{\link{nesteddisc}},
   \code{\link{nestedn0}}, \code{\link{nestedtemp}}), but many other
-  functions can be used as long as they are meaningful with binary
-  or quantitative community models. An applicable function must return either the
-  statistic as a plain number, or as a list element \code{"statistic"}
-  (like \code{\link{chisq.test}}), or in an item whose name is given in
-  the argument \code{statistic}.  The statistic can be a single number
-  (like typical for a nestedness index), or it can be a vector. The
-  vector indices can be used to analyse site (row) or species (column)
-  properties, see \code{\link{treedive}} for an example.
+  functions can be used as long as they are meaningful with binary or
+  quantitative community models.  An applicable function must return
+  either the statistic as a plain number, or as a list element
+  \code{"statistic"} (like \code{\link{chisq.test}}), or in an item
+  whose name is given in the argument \code{statistic}.  The statistic
+  can be a single number (like typical for a nestedness index), or it
+  can be a vector. The vector indices can be used to analyse site
+  (row) or species (column) properties, see \code{\link{treedive}} for
+  an example. Raup-Crick index (\code{\link{raupcrick}}) gives an
+  example of using a dissimilarities index.
 
   Function \code{commsimulator} implements binary (presence/absence) 
   null models for community composition.

Copied: branches/2.0/man/raupcrick.Rd (from rev 1835, pkg/vegan/man/raupcrick.Rd)
===================================================================
--- branches/2.0/man/raupcrick.Rd	                        (rev 0)
+++ branches/2.0/man/raupcrick.Rd	2011-09-22 15:05:46 UTC (rev 1873)
@@ -0,0 +1,118 @@
+\name{raupcrick}
+\alias{raupcrick}
+
+\title{
+  Raup-Crick Dissimilarity with Unequal Sampling Densities of Species
+}
+
+\description{ Function finds the Raup-Crick dissimilarity which is a
+  probability of number of co-occurring species with species
+  occurrence probabilities proportional to species frequencies.  }
+
+\usage{
+raupcrick(comm, null = "r1", nsimul = 999, chase = FALSE)
+}
+
+\arguments{
+
+  \item{comm}{Community data which will be treated as presence/absence data.}
+
+  \item{null}{Null model used as the \code{method} in
+     \code{\link{oecosimu}}.}
+
+  \item{nsimul}{Number of null communities for assessing the
+     dissimilarity index.}
+
+  \item{chase}{Use the Chase et al. (2011) method of tie handling (not
+     recommended except for comparing the results against the Chase
+     script).}
+
+}
+
+\details{Raup-Crick index is the probability that compared sampling
+  units have non-identical species composition.  This probability can
+  be regarded as a dissimilarity, although it is not metric: identical
+  sampling units can have dissimilarity slightly above \eqn{0}, the
+  dissimilarity can be nearly zero over a range of shared species, and
+  sampling units with no shared species can have dissimilarity
+  slightly below \eqn{1}. Moreover, communities sharing rare species
+  appear as more similar (lower probability of finding rare species
+  together), than communities sharing the same number of common
+  species.
+
+  The function will always treat the data as binary (presence/
+  absence).
+
+  The probability is assessed using simulation with
+  \code{\link{oecosimu}} where the test statistic is the observed
+  number of shared species between sampling units evaluated against a
+  community null model (see Examples).  The default null model is
+  \code{"r1"} where the probability of selecting species is
+  proportional to the species frequencies.
+
+  The \code{\link{vegdist}} function implements a variant of the
+  Raup-Crick index with equal sampling probabilities for species using
+  exact analytic equations without simulation. This corresponds to
+  \code{null} model \code{"r0"} which also can be used with the
+  current function.  All other null model methods of
+  \code{\link{oecosimu}} can be used with the current function, but
+  they are new unpublished methods.  }
+
+\value{The function returns an object inheriting from
+  \code{\link{dist}} which can be interpreted as a dissimilarity
+  matrix.}
+
+\references{
+  Chase, J.M., Kraft, N.J.B., Smith, K.G., Vellend, M. and Inouye,
+  B.D. (2011). Using null models to disentangle variation in community
+  dissimilarity from variation in \eqn{\alpha}{alpha}-diversity.
+  \emph{Ecosphere} 2:art24 [doi:10.1890/ES10-00117.1]
+}
+
+\author{The function was developed after Brian Inouye contacted us and
+  informed us about the method in Chase et al. (2011), and the
+  function takes its idea from the code that was published with their
+  paper. The current function was written by Jari Oksanen.  }
+
+\note{ The test statistic is the number of shared species, and this is
+  typically tied with a large number of simulation results. The tied
+  values are handled differently in the current function and in the
+  function published with Chase et al. (2011). In \pkg{vegan}, the
+  index is the number of simulated values that are smaller \emph{or
+  equal} than the observed value, but smaller than observed value is
+  used by Chase et al. (2011) with option \code{split = FALSE} in
+  their script; this can be achieved with \code{chase = TRUE} in
+  \pkg{vegan}.  Chase et al. (2011) script with \code{split = TRUE}
+  uses half of tied simulation values to calculate a distance measure,
+  and that choice cannot be directly reproduced in vegan (it is the
+  average of \pkg{vegan} \code{raupcrick} results with \code{chase =
+  TRUE} and \code{chase = FALSE}).}
+
+\seealso{The function is based on \code{\link{oecosimu}}. Function
+  \code{\link{vegdist}} with {method = "raup"} implements a related
+  index but with equal sampling densities of species, and
+  \code{\link{designdist}} demonstrates its calculation.}
+
+\examples{
+## data set with variable species richness
+data(sipoo)
+## default raupcrick
+dr1 <- raupcrick(sipoo)
+## use null model "r0" of oecosimu
+dr0 <- raupcrick(sipoo, null = "r0")
+## vegdist(..., method = "raup") corresponds to 'null = "r0"'
+d <- vegdist(sipoo, "raup")
+op <- par(mfrow=c(2,1), mar=c(4,4,1,1)+.1)
+plot(dr1 ~ d, xlab = "Raup-Crick with Null R1", ylab="vegdist")
+plot(dr0 ~ d, xlab = "Raup-Crick with Null R0", ylab="vegdist")
+par(op)
+
+## The calculation is essentially as in the following oecosimu() call,
+## except that designdist() is replaced with faster code
+\dontrun{%
+oecosimu(sipoo, function(x) designdist(x, "J", "binary"), method = "r1")
+}
+}
+
+\keyword{ multivariate }
+

Modified: branches/2.0/man/vegdist.Rd
===================================================================
--- branches/2.0/man/vegdist.Rd	2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/vegdist.Rd	2011-09-22 15:05:46 UTC (rev 1873)
@@ -163,10 +163,12 @@
   parameter values.  The index is nonmetric: two communities with no
   shared species may have a dissimilarity slightly below one, and two
   identical communities may have dissimilarity slightly above
-  zero. Please note that this index does not implement the Raup--Crick
-  dissimilarity as discussed by Chase et al. (2011): the current index
-  uses equal probabilities for all species, but the probabilities
-  should be inequal and based on species frequencies.
+  zero. The index uses equal occurrence probabilities for all species,
+  but Raup and Crick originally suggested that sampling probabilities
+  should be proportional to species frequencies (Chase et al. 2011). A
+  simulation approach with unequal species sampling probabilities is
+  implemented in \code{\link{raupcrick}} function following Chase et
+  al. (2011).
   
   Chao index tries to take into account the number of unseen species
   pairs, similarly as in \code{method = "chao"} in