[Vegan-commits] r1873 - in branches/2.0: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Sep 22 17:05:46 CEST 2011
Author: jarioksa
Date: 2011-09-22 17:05:46 +0200 (Thu, 22 Sep 2011)
New Revision: 1873
Added:
branches/2.0/R/raupcrick.R
branches/2.0/man/raupcrick.Rd
Modified:
branches/2.0/NAMESPACE
branches/2.0/R/centroids.cca.R
branches/2.0/R/commsimulator.R
branches/2.0/R/meandist.R
branches/2.0/R/nesteddisc.R
branches/2.0/R/permuted.index.R
branches/2.0/R/poolaccum.R
branches/2.0/inst/ChangeLog
branches/2.0/man/MDSrotate.Rd
branches/2.0/man/designdist.Rd
branches/2.0/man/oecosimu.Rd
branches/2.0/man/vegdist.Rd
Log:
merge r1835,72 (raupcrick), r1838,45,46 (speed-up), r1869 (meandist bug fix)
Modified: branches/2.0/NAMESPACE
===================================================================
--- branches/2.0/NAMESPACE 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/NAMESPACE 2011-09-22 15:05:46 UTC (rev 1873)
@@ -21,7 +21,7 @@
ordixyplot, orglpoints, orglsegments, orglspider, orgltext,
pcnm, permatfull, permatswap, permutest, poolaccum, postMDS, prc,
prestondistr, prestonfit, procrustes, protest, radfit, radlattice,
-rankindex, rarefy, rda, renyiaccum, renyi, rrarefy, scores,
+rankindex, rarefy,raupcrick, rda, renyiaccum, renyi, rrarefy, scores,
showvarparts, spandepth, spantree, specaccum, specnumber,
specpool2vect, specpool, spenvcor, stepacross, stressplot, swan,
taxa2dist, taxondive, tolerance, treedist, treedive, treeheight,
Modified: branches/2.0/R/centroids.cca.R
===================================================================
--- branches/2.0/R/centroids.cca.R 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/centroids.cca.R 2011-09-22 15:05:46 UTC (rev 1873)
@@ -1,24 +1,34 @@
-"centroids.cca" <-
- function (x, mf, wt)
+`centroids.cca` <-
+ function(x, mf, wt)
{
- mf <- mf[, unlist(lapply(mf, is.factor)), drop = FALSE]
- if (ncol(mf) == 0)
+ facts <- sapply(mf, is.factor)
+ if (!any(facts))
return(NULL)
+ mf <- mf[, facts, drop = FALSE]
if (missing(wt))
wt <- rep(1, nrow(mf))
- x <- sweep(x, 1, wt, "*")
- workhorse <- function(mf, x, wt) {
- sw <- tapply(wt, mf, sum)
- swx <- apply(x, 2, tapply, mf, sum)
- sweep(swx, 1, sw, "/")
- }
- tmp <- lapply(mf, workhorse, x, wt)
+ ind <- seq_len(nrow(mf))
+ workhorse <- function(x, wt)
+ colSums(x * wt) / sum(wt)
+ tmp <- lapply(mf, function(fct)
+ tapply(ind, fct, function(i) workhorse(x[i,, drop=FALSE], wt[i])))
+ tmp <- lapply(tmp, function(z) sapply(z, rbind))
pnam <- labels(tmp)
out <- NULL
- for (i in 1:length(tmp)) {
- rownames(tmp[[i]]) <- paste(pnam[i], rownames(tmp[[i]]),
- sep = "")
- out <- rbind(out, tmp[[i]])
+ if (ncol(x) == 1) {
+ for(i in 1:length(tmp)) {
+ names(tmp[[i]]) <- paste(pnam[i], names(tmp[[i]]), sep="")
+ out <- c(out, tmp[[i]])
+ out <- matrix(out, nrow=1)
+ }
+ } else {
+ for (i in 1:length(tmp)) {
+ colnames(tmp[[i]]) <- paste(pnam[i], colnames(tmp[[i]]),
+ sep = "")
+ out <- cbind(out, tmp[[i]])
+ }
}
+ out <- t(out)
+ colnames(out) <- colnames(x)
out
}
Modified: branches/2.0/R/commsimulator.R
===================================================================
--- branches/2.0/R/commsimulator.R 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/commsimulator.R 2011-09-22 15:05:46 UTC (rev 1873)
@@ -18,18 +18,18 @@
p <- p*p
out <- matrix(0, nrow=nr, ncol=nc)
for (i in 1:nr)
- out[i,sample(nc, rs[i], prob=p)] <- 1
+ out[i,sample.int(nc, rs[i], prob=p)] <- 1
}
else if (method == "r00") {
out <- numeric(nr*nc)
- out[sample(length(out), sum(x))] <- 1
+ out[sample.int(length(out), sum(x))] <- 1
dim(out) <- dim(x)
}
else if (method == "c0") {
cs <- colSums(x)
out <- matrix(0, nrow=nr, ncol=nc)
for (j in 1:nc)
- out[sample(nr, cs[j]), j] <- 1
+ out[sample.int(nr, cs[j]), j] <- 1
} else if (method == "swap") {
x <- as.matrix(x)
out <- .C("swap", m = as.integer(x), as.integer(nrow(x)),
Modified: branches/2.0/R/meandist.R
===================================================================
--- branches/2.0/R/meandist.R 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/meandist.R 2011-09-22 15:05:46 UTC (rev 1873)
@@ -1,26 +1,27 @@
`meandist` <-
function(dist, grouping, ...)
{
+ if (!inherits(dist, "dist"))
+ stop("'dist' must be dissimilarity object inheriting from", dQuote(dist))
## check that 'dist' are dissimilarities (non-negative)
if (any(dist < -sqrt(.Machine$double.eps)))
warning("some dissimilarities are negative -- is this intentional?")
- ## merge levels so that lower is always first (filling lower triangle)
- mergenames <- function(X, Y, ...) {
- xy <- cbind(X, Y)
- xy <- apply(xy, 1, sort)
- apply(xy, 2, paste, collapse = " ")
- }
grouping <- factor(grouping, exclude = NULL)
- cl <- outer(grouping, grouping, mergenames)
- cl <- cl[lower.tri(cl)]
+ ## grouping for rows and columns
+ grow <- grouping[as.dist(row(as.matrix(dist)))]
+ gcol <- grouping[as.dist(col(as.matrix(dist)))]
+ ## The row index must be "smaller" of the factor levels so that
+ ## all means are in the lower triangle, and upper is NA
+ first <- as.numeric(grow) >= as.numeric(gcol)
+ cl1 <- ifelse(first, grow, gcol)
+ cl2 <- ifelse(!first, grow, gcol)
## Cannot have within-group dissimilarity for group size 1
n <- table(grouping)
take <- matrix(TRUE, nlevels(grouping), nlevels(grouping))
diag(take) <- n > 1
take[upper.tri(take)] <- FALSE
## Get output matrix
- out <- matrix(NA, nlevels(grouping), nlevels(grouping))
- out[take] <- tapply(dist, cl, mean)
+ out <- tapply(dist, list(cl1, cl2), mean)
out[upper.tri(out)] <- t(out)[upper.tri(out)]
rownames(out) <- colnames(out) <- levels(grouping)
class(out) <- c("meandist", "matrix")
Modified: branches/2.0/R/nesteddisc.R
===================================================================
--- branches/2.0/R/nesteddisc.R 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/nesteddisc.R 2011-09-22 15:05:46 UTC (rev 1873)
@@ -42,7 +42,7 @@
perm <- matrix(allPerms(le[i]), ncol=le[i]) + cle[i]
## Take at maximum NITER cases from complete enumeration
if (nrow(perm) >= NITER) {
- perm <- perm[sample(nrow(perm), NITER),]
+ perm <- perm[sample.int(nrow(perm), NITER),]
ties <- TRUE
}
}
Modified: branches/2.0/R/permuted.index.R
===================================================================
--- branches/2.0/R/permuted.index.R 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/permuted.index.R 2011-09-22 15:05:46 UTC (rev 1873)
@@ -2,7 +2,7 @@
function (n, strata)
{
if (missing(strata) || is.null(strata))
- out <- sample(n, n)
+ out <- sample.int(n, n)
else {
out <- 1:n
inds <- names(table(strata))
Modified: branches/2.0/R/poolaccum.R
===================================================================
--- branches/2.0/R/poolaccum.R 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/R/poolaccum.R 2011-09-22 15:05:46 UTC (rev 1873)
@@ -10,7 +10,7 @@
## specpool() is slow, but the vectorized versions below are
## pretty fast
for (i in 1:permutations) {
- take <- sample(n)
+ take <- sample.int(n, n)
tmp <- apply(x[take,] > 0, 2, cumsum)
S[,i] <- rowSums(tmp > 0)
## All-zero species are taken as *known* to be missing in
Copied: branches/2.0/R/raupcrick.R (from rev 1835, pkg/vegan/R/raupcrick.R)
===================================================================
--- branches/2.0/R/raupcrick.R (rev 0)
+++ branches/2.0/R/raupcrick.R 2011-09-22 15:05:46 UTC (rev 1873)
@@ -0,0 +1,27 @@
+`raupcrick` <-
+ function(comm, null = "r1", nsimul = 999, chase = FALSE)
+{
+ comm <- as.matrix(comm)
+ comm <- ifelse(comm > 0, 1, 0)
+ ## 'tri' is a faster alternative to as.dist(): it takes the lower
+ ## diagonal, but does not set attributes of a "dist" object
+ N <- nrow(comm)
+ tri <- matrix(FALSE, N, N)
+ tri <- row(tri) > col(tri)
+ ## function(x) designdist(x, "J", terms="binary") does the same,
+ ## but is much slower
+ sol <- oecosimu(comm, function(x) tcrossprod(x)[tri], method = null,
+ nsimul = nsimul,
+ alternative = if (chase) "greater" else "less")
+ ## Chase et al. way, or the standard way
+ if (chase)
+ out <- 1 - sol$oecosimu$pval
+ else
+ out <- sol$oecosimu$pval
+ ## set attributes of a "dist" object
+ attributes(out) <- list("class"=c("raupcrick", "dist"), "Size"=N,
+ "Labels" = rownames(comm), "call" =
+ match.call(), "Diag" = FALSE, "Upper" = FALSE,
+ "method" = "raupcrick")
+ out
+}
Modified: branches/2.0/inst/ChangeLog
===================================================================
--- branches/2.0/inst/ChangeLog 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/inst/ChangeLog 2011-09-22 15:05:46 UTC (rev 1873)
@@ -4,6 +4,12 @@
Version 2.0-1 (opened September 8, 2011)
+ * merge r1872: raupcrick doc fixes.
+ * merge r1869: meandist re-ordering bug fix.
+ * merge r1846: tweak speed (a bit).
+ * merge r1845: faster centroids.cca.
+ * merge r1838: a bit faster example(MDSrotate).
+ * merge r1835: add raupcrick.
* merge r1811: permatswap bug fix in nestedness.c.
* merge r1810: -pedatic -Wall fixes in monoMDS.f, ordering.f
Modified: branches/2.0/man/MDSrotate.Rd
===================================================================
--- branches/2.0/man/MDSrotate.Rd 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/MDSrotate.Rd 2011-09-22 15:05:46 UTC (rev 1873)
@@ -57,7 +57,7 @@
mod <- monoMDS(vegdist(varespec))
mod <- with(varechem, MDSrotate(mod, pH))
plot(mod)
-ef <- envfit(mod ~ pH, varechem)
+ef <- envfit(mod ~ pH, varechem, permutations = 0)
plot(ef)
ordisurf(mod ~ pH, varechem, knots = 1, add = TRUE)
}
Modified: branches/2.0/man/designdist.Rd
===================================================================
--- branches/2.0/man/designdist.Rd 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/designdist.Rd 2011-09-22 15:05:46 UTC (rev 1873)
@@ -59,7 +59,7 @@
\code{1-J/sqrt(A*B)} \tab \code{"binary"} \tab Ochiai \cr
\code{1-J/sqrt(A*B)} \tab \code{"quadratic"} \tab cosine
complement \cr
- \code{1-phyper(J-1, A, P-A, B)} \tab \code{"binary"} \tab Raup-Crick
+ \code{1-phyper(J-1, A, P-A, B)} \tab \code{"binary"} \tab Raup-Crick (but see \code{\link{raupcrick}})
}
The function \code{designdist} can implement most dissimilarity
Modified: branches/2.0/man/oecosimu.Rd
===================================================================
--- branches/2.0/man/oecosimu.Rd 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/oecosimu.Rd 2011-09-22 15:05:46 UTC (rev 1873)
@@ -58,21 +58,23 @@
\details{
Function \code{oecosimu} is a wrapper that evaluates a nestedness
- statistic using function given by \code{nestfun}, and then simulates a
- series of null models using \code{commsimulator} or
- other functions (depending on method argument), and evaluates the
- statistic on these null models. The \pkg{vegan} packages contains some
- nestedness functions that are described separately
+ statistic using function given by \code{nestfun}, and then simulates
+ a series of null models using \code{commsimulator} or other
+ functions (depending on method argument), and evaluates the
+ statistic on these null models. The \pkg{vegan} packages contains
+ some nestedness functions that are described separately
(\code{\link{nestedchecker}}, \code{\link{nesteddisc}},
\code{\link{nestedn0}}, \code{\link{nestedtemp}}), but many other
- functions can be used as long as they are meaningful with binary
- or quantitative community models. An applicable function must return either the
- statistic as a plain number, or as a list element \code{"statistic"}
- (like \code{\link{chisq.test}}), or in an item whose name is given in
- the argument \code{statistic}. The statistic can be a single number
- (like typical for a nestedness index), or it can be a vector. The
- vector indices can be used to analyse site (row) or species (column)
- properties, see \code{\link{treedive}} for an example.
+ functions can be used as long as they are meaningful with binary or
+ quantitative community models. An applicable function must return
+ either the statistic as a plain number, or as a list element
+ \code{"statistic"} (like \code{\link{chisq.test}}), or in an item
+ whose name is given in the argument \code{statistic}. The statistic
+ can be a single number (like typical for a nestedness index), or it
+ can be a vector. The vector indices can be used to analyse site
+ (row) or species (column) properties, see \code{\link{treedive}} for
+ an example. Raup-Crick index (\code{\link{raupcrick}}) gives an
+ example of using a dissimilarities index.
Function \code{commsimulator} implements binary (presence/absence)
null models for community composition.
Copied: branches/2.0/man/raupcrick.Rd (from rev 1835, pkg/vegan/man/raupcrick.Rd)
===================================================================
--- branches/2.0/man/raupcrick.Rd (rev 0)
+++ branches/2.0/man/raupcrick.Rd 2011-09-22 15:05:46 UTC (rev 1873)
@@ -0,0 +1,118 @@
+\name{raupcrick}
+\alias{raupcrick}
+
+\title{
+ Raup-Crick Dissimilarity with Unequal Sampling Densities of Species
+}
+
+\description{ Function finds the Raup-Crick dissimilarity which is a
+ probability of number of co-occurring species with species
+ occurrence probabilities proportional to species frequencies. }
+
+\usage{
+raupcrick(comm, null = "r1", nsimul = 999, chase = FALSE)
+}
+
+\arguments{
+
+ \item{comm}{Community data which will be treated as presence/absence data.}
+
+ \item{null}{Null model used as the \code{method} in
+ \code{\link{oecosimu}}.}
+
+ \item{nsimul}{Number of null communities for assessing the
+ dissimilarity index.}
+
+ \item{chase}{Use the Chase et al. (2011) method of tie handling (not
+ recommended except for comparing the results against the Chase
+ script).}
+
+}
+
+\details{Raup-Crick index is the probability that compared sampling
+ units have non-identical species composition. This probability can
+ be regarded as a dissimilarity, although it is not metric: identical
+ sampling units can have dissimilarity slightly above \eqn{0}, the
+ dissimilarity can be nearly zero over a range of shared species, and
+ sampling units with no shared species can have dissimilarity
+ slightly below \eqn{1}. Moreover, communities sharing rare species
+ appear as more similar (lower probability of finding rare species
+ together), than communities sharing the same number of common
+ species.
+
+ The function will always treat the data as binary (presence/
+ absence).
+
+ The probability is assessed using simulation with
+ \code{\link{oecosimu}} where the test statistic is the observed
+ number of shared species between sampling units evaluated against a
+ community null model (see Examples). The default null model is
+ \code{"r1"} where the probability of selecting species is
+ proportional to the species frequencies.
+
+ The \code{\link{vegdist}} function implements a variant of the
+ Raup-Crick index with equal sampling probabilities for species using
+ exact analytic equations without simulation. This corresponds to
+ \code{null} model \code{"r0"} which also can be used with the
+ current function. All other null model methods of
+ \code{\link{oecosimu}} can be used with the current function, but
+ they are new unpublished methods. }
+
+\value{The function returns an object inheriting from
+ \code{\link{dist}} which can be interpreted as a dissimilarity
+ matrix.}
+
+\references{
+ Chase, J.M., Kraft, N.J.B., Smith, K.G., Vellend, M. and Inouye,
+ B.D. (2011). Using null models to disentangle variation in community
+ dissimilarity from variation in \eqn{\alpha}{alpha}-diversity.
+ \emph{Ecosphere} 2:art24 [doi:10.1890/ES10-00117.1]
+}
+
+\author{The function was developed after Brian Inouye contacted us and
+ informed us about the method in Chase et al. (2011), and the
+ function takes its idea from the code that was published with their
+ paper. The current function was written by Jari Oksanen. }
+
+\note{ The test statistic is the number of shared species, and this is
+ typically tied with a large number of simulation results. The tied
+ values are handled differently in the current function and in the
+ function published with Chase et al. (2011). In \pkg{vegan}, the
+ index is the number of simulated values that are smaller \emph{or
+ equal} than the observed value, but smaller than observed value is
+ used by Chase et al. (2011) with option \code{split = FALSE} in
+ their script; this can be achieved with \code{chase = TRUE} in
+ \pkg{vegan}. Chase et al. (2011) script with \code{split = TRUE}
+ uses half of tied simulation values to calculate a distance measure,
+ and that choice cannot be directly reproduced in vegan (it is the
+ average of \pkg{vegan} \code{raupcrick} results with \code{chase =
+ TRUE} and \code{chase = FALSE}).}
+
+\seealso{The function is based on \code{\link{oecosimu}}. Function
+ \code{\link{vegdist}} with {method = "raup"} implements a related
+ index but with equal sampling densities of species, and
+ \code{\link{designdist}} demonstrates its calculation.}
+
+\examples{
+## data set with variable species richness
+data(sipoo)
+## default raupcrick
+dr1 <- raupcrick(sipoo)
+## use null model "r0" of oecosimu
+dr0 <- raupcrick(sipoo, null = "r0")
+## vegdist(..., method = "raup") corresponds to 'null = "r0"'
+d <- vegdist(sipoo, "raup")
+op <- par(mfrow=c(2,1), mar=c(4,4,1,1)+.1)
+plot(dr1 ~ d, xlab = "Raup-Crick with Null R1", ylab="vegdist")
+plot(dr0 ~ d, xlab = "Raup-Crick with Null R0", ylab="vegdist")
+par(op)
+
+## The calculation is essentially as in the following oecosimu() call,
+## except that designdist() is replaced with faster code
+\dontrun{%
+oecosimu(sipoo, function(x) designdist(x, "J", "binary"), method = "r1")
+}
+}
+
+\keyword{ multivariate }
+
Modified: branches/2.0/man/vegdist.Rd
===================================================================
--- branches/2.0/man/vegdist.Rd 2011-09-22 14:39:47 UTC (rev 1872)
+++ branches/2.0/man/vegdist.Rd 2011-09-22 15:05:46 UTC (rev 1873)
@@ -163,10 +163,12 @@
parameter values. The index is nonmetric: two communities with no
shared species may have a dissimilarity slightly below one, and two
identical communities may have dissimilarity slightly above
- zero. Please note that this index does not implement the Raup--Crick
- dissimilarity as discussed by Chase et al. (2011): the current index
- uses equal probabilities for all species, but the probabilities
- should be inequal and based on species frequencies.
+ zero. The index uses equal occurrence probabilities for all species,
+ but Raup and Crick originally suggested that sampling probabilities
+ should be proportional to species frequencies (Chase et al. 2011). A
+ simulation approach with unequal species sampling probabilities is
+ implemented in \code{\link{raupcrick}} function following Chase et
+ al. (2011).
Chao index tries to take into account the number of unseen species
pairs, similarly as in \code{method = "chao"} in
More information about the Vegan-commits
mailing list