[Vegan-commits] r816 - in pkg/vegan: R inst man

Mon May 4 21:31:13 CEST 2009

Author: psolymos
Date: 2009-05-04 21:31:13 +0200 (Mon, 04 May 2009)
New Revision: 816

Added:
   pkg/vegan/R/twostagechao.R
   pkg/vegan/man/twostagechao.Rd
Modified:
   pkg/vegan/inst/ChangeLog
Log:
twostagechao function added, needs debugging

Added: pkg/vegan/R/twostagechao.R
===================================================================

--- pkg/vegan/R/twostagechao.R	                        (rev 0)
+++ pkg/vegan/R/twostagechao.R	2009-05-04 19:31:13 UTC (rev 816)
@@ -0,0 +1,47 @@
+## Chao et al. 2008. Biometrics 64, 1178-1186
+twostagechao <- function(x, order=2, N=nrow(x), m=1, nboot=200, subset)
+{
+    DNAME <- deparse(substitute(x))
+    if (N < 2)
+        stop("provide 'N' >= 2")
+    if (missing(subset))
+        subset <- 1:N
+    X <- x[subset,]
+    if (N != nrow(X) || N != length(subset))
+        stop("'N' and 'subset' must conform")
+    tc <- colSums(X)
+    x <- xx <- decostand(X, "total")
+    if (m > 1)
+        x <- 1 - (1 - x)^m
+    spp <- ncol(x)
+    id <- cbind(unlist(lapply(2:spp, function(z) z:spp)),
+        rep(1:(spp-1), (spp-1):1))
+
+    FUN <- function(y) {
+        a <- sapply(1:spp, function(z) sum(y[,z])^order)
+        b <- sapply(1:spp, function(z) sum(y[,z]^order))
+        out <- ((1/(N^order - N)) * (sum(a - b))) / ((1/N) * sum(b))
+        out
+    }
+
+    if (nboot) {
+        BOOT <- sapply(1:spp, function(z) rmultinom(nboot, tc[z], xx[,z]))
+        BOOT <- array(BOOT, c(N, nboot, spp))
+        BOOTC <- sapply(1:nboot, function(z) FUN(BOOT[,z,]))
+    } else BOOTC <- NA
+
+    ESTIMATE <- FUN(x)
+    SE <- sd(BOOTC) / sqrt(nboot)
+    STATISTIC <- c(ESTIMATE, SE)
+    names(STATISTIC) <- c("Estimate", "Std. Error")
+    PARAMETER <- c(order, N)
+    names(PARAMETER) <- c("q", "N")
+    SETEXT <- if (nboot)
+        paste("(standard error is based on", nboot, "bootstrap samples)")
+        else ""
+    METHOD <- paste("Chao's two stage similarity index", SETEXT)
+
+    structure(list(statistic = STATISTIC, parameter = PARAMETER, 
+        p.value = NULL, method = METHOD, data.name = DNAME, observed = X, 
+        expected = NULL, residuals = NULL), class = c("twostagechao", "htest"))
+}

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2009-04-27 07:46:39 UTC (rev 815)
+++ pkg/vegan/inst/ChangeLog	2009-05-04 19:31:13 UTC (rev 816)
@@ -4,6 +4,12 @@
 
 Version 1.16-18 (opened April 21, 2009)
 
+	* twostagechao: function that calculates multiple-community
+	similarity based on Chao et al. 2008 (Biometrics 64, 1178-86).
+	Some debugging is needed since it cannot reproduce the numbers
+	in Table 3 exactly (lower estimates and SEs). The Rd file
+	also needs more details.
+
 	* anova.cca: there was a name clash and failure in
 	anova.ccabymargin, anova.ccabyaxis and anova.ccabyterm if data
 	were indexed with 'i' in the fitted model. Now the internal

Added: pkg/vegan/man/twostagechao.Rd
===================================================================
--- pkg/vegan/man/twostagechao.Rd	                        (rev 0)
+++ pkg/vegan/man/twostagechao.Rd	2009-05-04 19:31:13 UTC (rev 816)
@@ -0,0 +1,107 @@
+\name{twostagechao}
+\Rdversion{1.1}
+\alias{twostagechao}
+\title{
+Multiple-Community Similarity Index
+}
+\description{
+The function implements the two-stage probabilistic approach to 
+multiple-community similarity indices proposed by Chao et al. (2008),
+extending the traditional pairwise comparisons of sites.
+}
+\usage{
+twostagechao(x, order = 2, N = nrow(x), m = 1, nboot = 200, subset)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{x}{
+A community matrix with rows as sites and columns as species.
+}
+  \item{order}{
+The order of the similarity, see details.
+}
+  \item{N}{
+The number of sites to use. By default it is the number of rows in \code{x}.
+If \code{N < ncol(x)}, then \code{N} and \code{subset} must conform 
+(\code{length(subset) == N}).
+}
+  \item{m}{
+The exponent used for calculating the NESS index variant, see details.
+}
+  \item{nboot}{
+Number of bootstrap samples to use for calculating standard error of the estimate.
+If \code{nboot = 0}, standard error is not computed.
+}
+  \item{subset}{
+Optional vactor defining the subset of sites to use in the calculations.
+If this argument is provided, its length should equal \code{N}.
+}
+}
+\details{
+
+PUT DETAILS HERE
+
+}
+\value{
+The result is an object of class 'twostagechao' inheriting its structure from class 'htest'.
+}
+\references{
+Chao, A., Jost, L., Chiang, S. C., Jiang, Y.-H., Chazdon, R. L. (2008)
+A two-stage probabilistic approach to multiple-community similarity indices.
+\emph{Biometrics} \bold{64}, 1178--1186.
+
+Supplementary material at:
+\url{http://www.biometrics.tibs.org/datasets/070539.pdf}
+}
+\author{
+\enc{P\'eter S\'olymos}{Peter Solymos}, \email{solymos at ualberta.ca}
+}
+\seealso{
+\code{\link{dist}}, \code{\link{vegdist}}
+}
+\examples{
+data(BCI)
+twostagechao(BCI)
+res1 <- t(sapply(2:5, function(z) twostagechao(BCI, order=z)$statistic))
+rownames(res1) <- paste("Order =", 2:5)
+## similarity decreases as rare species get more weights
+res1
+## original example from Chao et al. 2008
+## supplementary material, LSUR Primary Plot
+lep1 <- structure(c(7L, 48L, 17L, 0L, 38L, 14L, 31L, 37L, 121L, 6L, 30L, 
+16L, 0L, 27L, 10L, 0L, 24L, 5L, 2L, 23L, 6L, 3L, 21L, 6L, 1L, 
+19L, 1L, 0L, 19L, 73L, 0L, 19L, 2L, 2L, 17L, 4L, 3L, 17L, 17L, 
+2L, 16L, 5L, 0L, 16L, 11L, 0L, 16L, 4L, 2L, 15L, 1L, 0L, 15L, 
+9L, 4L, 14L, 7L, 0L, 14L, 2L, 0L, 13L, 4L, 5L, 12L, 0L, 1L, 11L, 
+7L, 2L, 11L, 20L, 1L, 9L, 0L, 2L, 9L, 5L, 0L, 9L, 0L, 1L, 8L, 
+0L, 0L, 8L, 6L, 0L, 8L, 0L, 0L, 8L, 0L, 0L, 8L, 0L, 3L, 7L, 3L, 
+0L, 7L, 17L, 0L, 7L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 
+1L, 0L, 0L, 1L, 0L, 0L, 2L, 6L, 1L, 0L, 6L, 6L, 0L, 6L, 2L, 1L, 
+5L, 0L, 2L, 5L, 1L, 1L, 5L, 3L, 0L, 5L, 2L, 0L, 5L, 1L, 0L, 5L, 
+1L, 1L, 4L, 1L, 1L, 4L, 2L, 0L, 4L, 29L, 0L, 4L, 3L, 0L, 4L, 
+1L, 0L, 4L, 0L, 0L, 3L, 2L, 0L, 3L, 1L, 0L, 3L, 0L, 3L, 2L, 0L, 
+1L, 2L, 0L, 1L, 2L, 0L, 1L, 2L, 3L, 0L, 2L, 7L, 0L, 2L, 5L, 0L, 
+2L, 1L, 0L, 2L, 1L, 0L, 2L, 1L, 0L, 2L, 0L, 0L, 2L, 0L, 0L, 2L, 
+0L, 0L, 2L, 0L, 0L, 2L, 0L, 0L, 2L, 0L, 0L, 2L, 0L, 0L, 2L, 0L, 
+3L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 3L, 0L, 0L, 54L, 0L, 0L, 3L, 
+0L, 2L, 0L, 0L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 
+1L, 2L, 0L, 1L, 5L, 0L, 1L, 3L, 0L, 1L, 3L, 0L, 1L, 1L, 0L, 1L, 
+1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 
+0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 
+1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 
+0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 
+2L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 2L, 0L, 
+0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L), .Dim = c(3L, 120L), .Dimnames = list(
+    c("Tree", "Sapling", "Seedling"), NULL))
+## compare walues in Table 3 of Chao et al. 2008
+## (4nd column, last 6 rows)
+## results are different!
+Order <- rep(2:3, 3)
+m <- c(1, 1, 5, 5, 10, 10)
+res2 <- t(sapply(1:6, function(z)
+    twostagechao(lep1, order=Order[z], m=m[z])$statistic))
+rownames(res2) <- c("Morisita C23", "Morisita C33", "NESS23(5)",
+    "NESS33(5)", "NESS23(10)", "NESS33(10)")
+res2
+}
+\keyword{ multivariate }