[Vegan-commits] r1902 - in branches/2.0: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Sep 27 15:22:43 CEST 2011
Author: jarioksa
Date: 2011-09-27 15:22:42 +0200 (Tue, 27 Sep 2011)
New Revision: 1902
Added:
branches/2.0/R/clamtest.R
branches/2.0/R/plot.clamtest.R
branches/2.0/R/print.summary.clamtest.R
branches/2.0/R/summary.clamtest.R
branches/2.0/man/clamtest.Rd
Modified:
branches/2.0/NAMESPACE
branches/2.0/inst/ChangeLog
Log:
merge clamtest to branches/2.0
Modified: branches/2.0/NAMESPACE
===================================================================
--- branches/2.0/NAMESPACE 2011-09-27 12:50:02 UTC (rev 1901)
+++ branches/2.0/NAMESPACE 2011-09-27 13:22:42 UTC (rev 1902)
@@ -6,7 +6,7 @@
export(CCorA, MOStest, RsquareAdj, SSarrhenius, SSgitay, SSgleason,
SSlomolino, adipart, adonis, anosim, beals, betadisper, betadiver,
bgdispersal, bioenv, bstick, cIndexKM, calibrate, capscale, cascadeKM,
-cca, commsimulator, contribdiv, decorana, decostand, designdist,
+cca, commsimulator, contribdiv, clamtest, decorana, decostand, designdist,
coverscale, dispindmorisita, distconnected, diversity, downweight,
drarefy, eigengrad, eigenvals, envfit, estaccumR, estimateR, eventstar,
factorfit, fisherfit, fitspecaccum, goodness, hiersimu, humpfit,
@@ -210,6 +210,7 @@
S3method(plot, cascadeKM)
S3method(plot, cca)
S3method(plot, contribdiv)
+S3method(plot, clamtest)
S3method(plot, decorana)
S3method(plot, envfit)
S3method(plot, fisherfit)
@@ -301,6 +302,7 @@
S3method(print, specaccum)
S3method(print, summary.bioenv)
S3method(print, summary.cca)
+S3method(print, summary.clamtest)
S3method(print, summary.decorana)
S3method(print, summary.humpfit)
S3method(print, summary.isomap)
@@ -359,6 +361,7 @@
S3method(summary, anosim)
S3method(summary, bioenv)
S3method(summary, cca)
+S3method(summary, clamtest)
S3method(summary, decorana)
S3method(summary, eigenvals)
S3method(summary, humpfit)
Copied: branches/2.0/R/clamtest.R (from rev 1901, pkg/vegan/R/clamtest.R)
===================================================================
--- branches/2.0/R/clamtest.R (rev 0)
+++ branches/2.0/R/clamtest.R 2011-09-27 13:22:42 UTC (rev 1902)
@@ -0,0 +1,112 @@
+## CLAM, reproduction of software described in Chazdon et al. 2011
+## Ecology, 92, 1332--1343
+clamtest <-
+function(comm, groups, coverage.limit = 10,
+specialization = 2/3, npoints = 20, alpha = 0.05/20)
+{
+ ## inital checks
+ comm <- as.matrix(comm)
+ if (NROW(comm) < 2)
+ stop("'comm' must have at least 2 rows")
+ if (nrow(comm) > 2 && missing(groups))
+ stop("'groups' is missing")
+ if (nrow(comm) == 2 && missing(groups))
+ groups <- if (is.null(rownames(comm)))
+ c("Group.1", "Group.2") else rownames(comm)
+ if (length(groups) != nrow(comm))
+ stop("length of 'groups' must equal 'nrow(comm)'")
+ if (length(unique(groups)) != 2)
+ stop("number of groups must be 2")
+ glabel <- as.character(unique(groups))
+ if (is.null(colnames(comm)))
+ colnames(comm) <- paste("Species", 1:nrow(comm), sep=".")
+ if (any(colSums(comm) <= 0))
+ stop("'comm' contains zero sum columns")
+ spp <- colnames(comm)
+ ## reproduced from Chazdon et al. 2011, Ecology 92, 1332--1343
+ S <- ncol(comm)
+ if (nrow(comm) == 2) {
+ Y <- comm[glabel[1],]
+ X <- comm[glabel[2],]
+ } else {
+ Y <- colSums(comm[which(groups==glabel[1]),])
+ X <- colSums(comm[which(groups==glabel[2]),])
+ }
+ names(X) <- names(Y) <- NULL
+ #all(ct$Total_SG == Y)
+ #all(ct$Total_OG == X)
+ m <- sum(Y)
+ n <- sum(X)
+ if (sum(Y) <= 0 || sum(X) <= 0)
+ stop("zero group totals not allowed")
+ ## check if comm contains integer, especially for singletons
+ if (any(X[X>0] < 1) || any(Y[Y>0] < 1))
+ warning("<1 non integer values detected: analysis might not be meaningful")
+ if (abs(sum(X,Y) - sum(as.integer(X), as.integer(Y))) > 10^-6)
+ warning("non integer values detected")
+ C1 <- 1 - sum(X==1)/n
+ C2 <- 1 - sum(Y==1)/m
+ ## this stands for other than 2/3 cases
+ uu <- specialization/(1-specialization)
+ ## critical level
+ Zp <- qnorm(alpha, lower.tail=FALSE)
+ #p_i=a
+ #pi_i=b
+ ## function to calculate test statistic from Appendix D
+ ## (Ecological Archives E092-112-A4)
+ testfun <- function(p_i, pi_i, C1, C2, n, m) {
+ C1 <- ifelse(p_i < coverage.limit, C1, 1)
+ C2 <- ifelse(pi_i < coverage.limit, C2, 1)
+ Var <- C1^2*(p_i*(1-p_i)/n) + uu^2*C2^2*(pi_i*(1-pi_i)/m)
+ C1*p_i - C2*pi_i*uu - Zp*sqrt(Var)
+ }
+ ## root finding for iso-lines (instead of itarative search)
+ rootfun <- function(pi_i, C1, C2, n, m, upper) {
+ f <- function(p_i) testfun(p_i/n, pi_i/m, C1, C2, n, m)
+ if (length(unique(sign(c(f(1), f(upper))))) > 1)
+ ceiling(uniroot(f, lower=1, upper=upper)$root) else NA
+ }
+ ## sequences for finding Xmin and Ymin values
+ Xseq <- as.integer(trunc(seq(1, max(X), len=npoints)))
+ Yseq <- as.integer(trunc(seq(1, max(Y), len=npoints)))
+ ## finding Xmin and Ymin values for Xseq and Yseq
+ Xmins <- sapply(Yseq, function(z) rootfun(z, C1, C2, n, m, upper=max(X)))
+ Ymins <- sapply(Xseq, function(z) rootfun(z, C2, C1, m, n, upper=max(Y)))
+ minval <- list(data.frame(x=Xseq[!is.na(Ymins)], y=Ymins[!is.na(Ymins)]),
+ data.frame(x=Xmins[!is.na(Xmins)], y=Yseq[!is.na(Xmins)]))
+ ## shared but too rare
+ Ymin <- Ymins[1]
+ Xmin <- Xmins[1]
+ sr <- X < Xmin & Y < Ymin
+ tmp1 <- approx(c(Xmin, 1), c(1, Ymin), xout=1:Xmin)
+ tmp2 <- approx(c(1, Ymin), c(Xmin, 1), xout=1:Ymin)
+ for (i in 1:S) {
+ if (X[i] %in% tmp1$x)
+ sr[i] <- Y[i] < tmp1$y[which(X[i]==tmp1$x)]
+ if (Y[i] %in% tmp2$x)
+ sr[i] <- X[i] < tmp2$y[which(Y[i]==tmp2$x)]
+ }
+ ## classification
+ a <- ifelse(X==0, 1, X)/n # \hat{p_i}
+ b <- ifelse(Y==0, 1, Y)/m # \hat{\pi_i}
+ specX <- !sr & testfun(a, b, C1, C2, n, m) > 0
+ specY <- !sr & testfun(b, a, C2, C1, m, n) > 0
+ gen <- !sr & !specX & !specY
+ ## crosstable
+ tmp <- ifelse(cbind(gen, specY, specX, sr), 1, 0)
+ types <- c("Generalist", paste("Specialist", glabel[1], sep="_"),
+ paste("Specialist", glabel[2], sep="_"), "Too_rare")
+ classes <- as.factor((1:4)[rowSums(tmp*col(tmp))])
+ levels(classes) <- c("Generalist", paste("Specialist", glabel[1], sep="_"),
+ paste("Specialist", glabel[2], sep="_"), "Too_rare")
+ tab <- data.frame(Species=spp, y=Y, x=X, Classes=classes)
+ colnames(tab)[2:3] <- paste("Total", glabel, sep="_")
+ rownames(tab) <- NULL
+ class(tab) <- c("clamtest","data.frame")
+ attr(tab, "settings") <- list(labels = glabel,
+ coverage.limit = coverage.limit, specialization = specialization,
+ npoints = npoints, alpha = alpha)
+ attr(tab, "minv") <- minval
+ attr(tab, "coverage") <- structure(c(C2, C1), .Names=glabel)
+ tab
+}
Copied: branches/2.0/R/plot.clamtest.R (from rev 1901, pkg/vegan/R/plot.clamtest.R)
===================================================================
--- branches/2.0/R/plot.clamtest.R (rev 0)
+++ branches/2.0/R/plot.clamtest.R 2011-09-27 13:22:42 UTC (rev 1902)
@@ -0,0 +1,39 @@
+plot.clamtest <- function(x, xlab, ylab, main,
+ pch=21:24, col.points=1:4, col.lines=2:4, lty=1:3,
+ position="bottomright", ...) {
+ summ <- summary(x)
+ glabel <- summ$labels
+ if (missing(main))
+ main <- "Species Classification"
+ if (missing(xlab))
+ xlab <- paste(glabel[2], "(abundance + 1)")
+ if (missing(ylab))
+ ylab <- paste(glabel[1], "(abundance + 1)")
+ Y <- x[,2]
+ X <- x[,3]
+ minval <- summ$minv
+ ## plot the dots
+ rr <- range(X+1,Y+1)
+ plot(X+1, Y+1, log = "xy", xaxt = "n", yaxt = "n",
+ col=col.points[as.integer(x$Classes)],
+ pch=pch[as.integer(x$Classes)],
+ xlab=xlab, ylab=ylab, main=main,
+ xlim=rr, ylim=rr, ...)
+ axis(1, c(1,10,100,1000,10000))
+ axis(2, c(1,10,100,1000,10000))
+ ## too rare threshold
+ Ymin <- minval[[1]][1,2]
+ Xmin <- minval[[2]][1,1]
+ lines(rep(Xmin, 2)+1, c(0, 1)+1, col=col.lines[1], lty=lty[1])
+ lines(c(0, 1)+1, rep(Ymin, 2)+1, col=col.lines[1], lty=lty[1])
+ tmp <- approx(c(Xmin, 1), c(1, Ymin))
+ lines(tmp$x+1, tmp$y+1, col=col.lines[1], lty=lty[1])
+ ## Y vs. gen threshold
+ lines(minval[[1]]+1, col=col.lines[2], lty=lty[2])
+ ## X vs. gen threshold
+ lines(minval[[2]]+1, col=col.lines[3], lty=lty[3])
+ if (!is.null(position))
+ legend(position, col=col.points, pch=pch,
+ legend=rownames(summ$summary))
+ invisible(x)
+}
Copied: branches/2.0/R/print.summary.clamtest.R (from rev 1901, pkg/vegan/R/print.summary.clamtest.R)
===================================================================
--- branches/2.0/R/print.summary.clamtest.R (rev 0)
+++ branches/2.0/R/print.summary.clamtest.R 2011-09-27 13:22:42 UTC (rev 1902)
@@ -0,0 +1,13 @@
+print.summary.clamtest <- function(x, digits=max(3, getOption("digits") - 3), ...) {
+ cat("Two Groups Species Classification Method (CLAM)\n\n")
+ cat("Specialization threshold =", x$specialization)
+ cat("\nAlpha level =", x$alpha)
+ cat("\n\nEstimated sample coverage:\n")
+ print(x$coverage, digits=digits)
+ cat("\nMinimum abundance for classification:\n")
+ print(structure(c(x$minv[[1]][1,2], x$minv[[2]][1,1]),
+ .Names=x$labels))
+ cat("\n")
+ printCoefmat(x$summary, digits=digits, ...)
+}
+
Copied: branches/2.0/R/summary.clamtest.R (from rev 1901, pkg/vegan/R/summary.clamtest.R)
===================================================================
--- branches/2.0/R/summary.clamtest.R (rev 0)
+++ branches/2.0/R/summary.clamtest.R 2011-09-27 13:22:42 UTC (rev 1902)
@@ -0,0 +1,7 @@
+summary.clamtest <- function(object, ...) {
+ structure(c(attr(object, "settings"),
+ list(summary=cbind(Species=table(object$Classes),
+ Proportion=table(object$Classes)/nrow(object)),
+ minv=attr(object, "minv"),
+ coverage=attr(object, "coverage"))), class="summary.clamtest")
+}
Modified: branches/2.0/inst/ChangeLog
===================================================================
--- branches/2.0/inst/ChangeLog 2011-09-27 12:50:02 UTC (rev 1901)
+++ branches/2.0/inst/ChangeLog 2011-09-27 13:22:42 UTC (rev 1902)
@@ -4,6 +4,9 @@
Version 2.0-1 (opened September 8, 2011)
+ * r1897, 1840, 1825, 1823: copy clamtest (at r1897),
+ summary.clamtest, print.summary.clamtest, plot.clamtest (all at
+ r1823) and clamtest.Rd (at r1897)
* merge r1872: raupcrick doc fixes.
* merge r1869: meandist re-ordering bug fix.
* merge r1846: tweak speed (a bit).
Copied: branches/2.0/man/clamtest.Rd (from rev 1901, pkg/vegan/man/clamtest.Rd)
===================================================================
--- branches/2.0/man/clamtest.Rd (rev 0)
+++ branches/2.0/man/clamtest.Rd 2011-09-27 13:22:42 UTC (rev 1902)
@@ -0,0 +1,149 @@
+\name{clamtest}
+\alias{clamtest}
+\alias{summary.clamtest}
+\alias{plot.clamtest}
+
+\title{
+Multinomial Species Classification Method (CLAM)
+}
+
+\description{
+The CLAM statistical approach for classifying generalists and
+specialists in two distinct habitats is described in Chazdon et al. (2011).
+}
+\usage{
+clamtest(comm, groups, coverage.limit = 10, specialization = 2/3,
+ npoints = 20, alpha = 0.05/20)
+\method{summary}{clamtest}(object, ...)
+\method{plot}{clamtest}(x, xlab, ylab, main, pch = 21:24, col.points = 1:4,
+ col.lines = 2:4, lty = 1:3, position = "bottomright", ...)
+}
+\arguments{
+ \item{comm}{
+Community matrix, consisting of counts.
+}
+ \item{groups}{
+A vector identifying the two habitats. Must have exactly
+two unique values or levels.
+}
+ \item{coverage.limit}{
+Integer, below this limit the sample coverage based correction
+is applied to rare species. Sample coverage is calculated separately
+for the two habitats. Sample relative abundances are used for species
+with higher than or equal to \code{coverage.limit} total counts per habitat.
+}
+ \item{specialization}{
+Numeric, specialization threshold value between 0 and 1.
+The value of \eqn{2/3} represents \sQuote{supermajority} rule,
+while a value of \eqn{1/2} represents a \sQuote{simple majority} rule
+to assign shared species as habitat specialists.
+}
+ \item{npoints}{
+Integer, number of points used to determine the boundary lines
+in the plots.
+}
+\item{alpha}{ Numeric, nominal significance level for individual
+ tests. The default value reduces the conventional limit of
+ \eqn{0.05} to account for overdispersion and multiple testing for
+ several species simultaneously. However, the is no firm reason for
+ exactly this limit. }
+ \item{x, object}{
+Fitted model object of class \code{"clamtest"}.
+}
+ \item{xlab, ylab}{
+Labels for the plot axes.
+}
+ \item{main}{
+Main title of the plot.
+}
+ \item{pch, col.points}{
+Symbols and colors used in plotting species groups.
+}
+ \item{lty, col.lines}{
+Line types and colors for boundary lines in plot to separate species groups.
+}
+ \item{position}{
+Position of figure legend, see \code{\link{legend}} for specification details.
+Legend not shown if \code{position = NULL}.
+}
+ \item{\dots}{
+Additional arguments passed to methods.
+}
+}
+
+\details{ The method uses a multinomial model based on estimated
+ species relative abundance in two habitats (A, B). It minimizes bias
+ due to differences in sampling intensities between two habitat types
+ as well as bias due to insufficient sampling within each
+ habitat. The method permits a robust statistical classification of
+ habitat specialists and generalists, without excluding rare species
+ \emph{a priori} (Chazdon et al. 2011). Based on a user-defined
+ \code{specialization} threshold, the model classifies species into
+ one of four groups: (1) generalists; (2) habitat A specialists; (3)
+ habitat B specialists; and (4) too rare to classify with confidence.
+ }
+
+\value{ A data frame (with class attribute \code{"clamtest"}),
+ with columns:
+ \itemize{
+ \item{\code{Species}:}{ species name (column names from \code{comm}),}
+ \item{\code{Total_*A*}:}{ total count in habitat A,}
+ \item{\code{Total_*B*}:}{ total count in habitat B,}
+ \item{\code{Classes}:}{ species classification, a factor with
+ levels \code{Generalist}, \code{Specialist_*A*},
+ \code{Specialist_*B*}, and \code{Too_rare}.}
+}
+ \code{*A*} and \code{*B*} are placeholders for habitat names/labels found in the
+ data.
+
+The \code{summary} method returns descriptive statistics of the results.
+The \code{plot} method returns values invisibly and produces a bivariate
+scatterplot of species total abundances in the two habitats. Symbols and
+boundary lines are shown for species groups.
+}
+\references{
+Chazdon, R. L., Chao, A., Colwell, R. K., Lin, S.-Y., Norden, N.,
+Letcher, S. G., Clark, D. B., Finegan, B. and Arroyo J. P.(2011).
+A novel statistical method for classifying habitat
+generalists and specialists. \emph{Ecology} \bold{92}, 1332--1343.
+}
+\author{
+Peter Solymos \email{solymos at ualberta.ca}
+}
+\note{
+The code was tested against standalone CLAM software provided
+on the website of Anne Chao (\url{http://chao.stat.nthu.edu.tw/softwarece.html});
+minor inconsistencies were found, especially for finding the
+threshold for 'too rare' species.
+These inconsistencies are probably due to numerical differences between the
+two implementation. The current \R implementation uses
+root finding for iso-lines instead of iterative search.
+
+The original method (Chazdon et al. 2011) has two major problems:
+\enumerate{
+
+ \item It assumes that the error distribution is multinomial. This is
+ a justified choice if individuals are freely distributed, and
+ there is no over-dispersion or clustering of individuals. In most
+ ecological data, the variance is much higher than multinomial
+ assumption, and therefore test statistic are too optimistic.
+
+ \item The original authors suggest that multiple testing adjustment
+ for multiple testing should be based on the number of points
+ (\code{npoints}) used to draw the critical lines on the plot,
+ whereas the adjustment should be based on the number tests (i.e,
+ tested species). The function uses the same numerical values as
+ the original paper, but there is no automatic connection between
+ \code{npoints} and \code{alpha} arguments, but you must work out
+ the adjustment yourself.
+}
+}
+\examples{
+data(mite)
+data(mite.env)
+x <- clamtest(mite, mite.env$Shrub=="None", alpha=0.005)
+summary(x)
+head(x)
+plot(x)
+}
+\keyword{ htest }
More information about the Vegan-commits
mailing list