[Vegan-commits] r2104 - in pkg/vegan: R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Feb 23 15:37:52 CET 2012
Author: jarioksa
Date: 2012-02-23 15:37:51 +0100 (Thu, 23 Feb 2012)
New Revision: 2104
Modified:
pkg/vegan/R/vegdist.R
pkg/vegan/man/vegdist.Rd
pkg/vegan/src/vegdist.c
Log:
merge Cao index (CYd) in vegdist
Modified: pkg/vegan/R/vegdist.R
===================================================================
--- pkg/vegan/R/vegdist.R 2012-02-22 08:05:56 UTC (rev 2103)
+++ pkg/vegan/R/vegdist.R 2012-02-23 14:37:51 UTC (rev 2104)
@@ -7,7 +7,7 @@
method <- "euclidean"
METHODS <- c("manhattan", "euclidean", "canberra", "bray",
"kulczynski", "gower", "morisita", "horn", "mountford",
- "jaccard", "raup", "binomial", "chao", "altGower")
+ "jaccard", "raup", "binomial", "chao", "altGower", "cao")
method <- pmatch(method, METHODS)
inm <- METHODS[method]
if (is.na(method))
@@ -15,19 +15,23 @@
if (method == -1)
stop("ambiguous distance method")
if (method > 2 && any(rowSums(x, na.rm = TRUE) == 0))
- warning("you have empty rows: their dissimilarities may be meaningless in method ", inm,"\n")
+ warning("you have empty rows: their dissimilarities may be meaningless in method ",
+ dQuote(inm))
if (method > 2 && any(x < 0, na.rm = TRUE))
- warning("results may be meaningless because data have negative entries in method ", inm,"\n")
+ warning("results may be meaningless because data have negative entries in method ",
+ dQuote(inm))
if (method == 11 && any(colSums(x) == 0))
- warning("data have empty species which influence the results im method ", inm, "\n")
+ warning("data have empty species which influence the results im method ",
+ dQuote(inm))
if (method == 6) # gower, but no altGower
x <- decostand(x, "range", 2, na.rm = TRUE, ...)
if (binary)
x <- decostand(x, "pa")
N <- nrow(x <- as.matrix(x))
- if (method %in% c(7, 13) && !identical(all.equal(as.integer(x),
+ if (method %in% c(7, 13, 15) && !identical(all.equal(as.integer(x),
as.vector(x)), TRUE))
- warning("results may be meaningless with non-integer data in method ", inm, "\n")
+ warning("results may be meaningless with non-integer data in method ",
+ dQuote(inm))
d <- .C("veg_distance", x = as.double(x), nr = N, nc = ncol(x),
d = double(N * (N - 1)/2), diag = as.integer(FALSE),
method = as.integer(method), NAOK = na.rm, PACKAGE = "vegan")$d
Modified: pkg/vegan/man/vegdist.Rd
===================================================================
--- pkg/vegan/man/vegdist.Rd 2012-02-22 08:05:56 UTC (rev 2103)
+++ pkg/vegan/man/vegdist.Rd 2012-02-23 14:37:51 UTC (rev 2104)
@@ -13,7 +13,7 @@
Gower, Bray--Curtis, Jaccard and
Kulczynski indices are good in detecting underlying
ecological gradients (Faith et al. 1987). Morisita, Horn--Morisita,
- Binomial and Chao
+ Binomial, Cao and Chao
indices should be able to handle different sample sizes (Wolda 1981,
Krebs 1999, Anderson & Millar 2004),
and Mountford (1962) and Raup-Crick indices for presence--absence data should
@@ -27,8 +27,8 @@
\item{method}{Dissimilarity index, partial match to \code{"manhattan"},
\code{"euclidean"}, \code{"canberra"}, \code{"bray"}, \code{"kulczynski"},
\code{"jaccard"}, \code{"gower"}, \code{"altGower"}, \code{"morisita"},
- \code{"horn"}, \code{"mountford"}, \code{"raup"} , \code{"binomial"} or
- \code{"chao"}.}
+ \code{"horn"}, \code{"mountford"}, \code{"raup"} , \code{"binomial"},
+ \code{"chao"} of \code{"cao"}.}
\item{binary}{Perform presence/absence standardization before analysis
using \code{\link{decostand}}.}
\item{diag}{Compute diagonals. }
@@ -116,6 +116,15 @@
\cr
\tab where \eqn{n_i = x_{ij} + x_{ik}}{n[i] = x[ij] + x[ik]}
\cr \tab binary: \eqn{\log(2) \times (A+B-2J)}{log(2)*(A+B-2*J)}
+ \cr
+ \code{cao}
+ \tab \eqn{d_{jk} = \frac{1}{S} \sum_i \log
+ \left(\frac{n_i}{2}\right) - (x_{ij} \log(x_{ik}) + x_{ik}
+ \log(x_{ij}))/n_i}{d[jk] = (1/S) * sum(log(n[i]/2) -
+ (x[ij]*log(x[ik]) + x[ik]*log(x[ij]))/n[i])},
+ \cr
+ \tab where \eqn{S} is the number of species in compared sites and
+ \eqn{n_i = x_{ij}+x_{ik}}{n[i] = x[ij] + x[ik]}
}
Jaccard index is computed as \eqn{2B/(1+B)}, where \eqn{B} is
@@ -126,6 +135,18 @@
handle variable sample sizes. The index does not have a fixed upper
limit, but can vary among sites with no shared species. For further
discussion, see Anderson & Millar (2004).
+
+ Cao index or CYd index (Cao et al. 1997) was suggested as a minimally
+ biased index for high beta diversity and variable sampling intensity.
+ Cao index does not have a fixed upper limit, but can vary among sites
+ with no shared species. The index is intended for count (integer)
+ data, and it is undefined for zero abundances; these are replaced with
+ arbitrary value \eqn{0.1} following Cao et al. (1997). Cao et
+ al. (1997) used \eqn{\log_{10}}{log10}, but the current function uses
+ natural logarithms so that the values are approximately \eqn{2.30}
+ times higher than with 10-based logarithms. Anderson & Thompson (2004)
+ give an alternative formulation of Cao index to highlight its
+ relationship with Binomial index (above).
Mountford index is defined as \eqn{M = 1/\alpha} where \eqn{\alpha}
is the parameter of Fisher's logseries assuming that the compared
@@ -223,10 +244,18 @@
Zealand. \emph{Journal of Experimental Marine Biology and Ecology}
305, 191--221.
- Anderson, M.J., Ellingsen, K.E. & McArdle, B.H. (2006) Multivariate
+ Anderson, M.J., Ellingsen, K.E. & McArdle, B.H. (2006). Multivariate
dispersion as a measure of beta diversity. \emph{Ecology Letters}
9, 683--693.
+ Anderson, M.J & Thompson, A.A. (2004). Multivariate control charts for
+ ecological and environmental monitoring. \emph{Ecological
+ Applications} 14, 1921--1935.
+
+ Cao, Y., Williams, W.P. & Bark, A.W. (1997). Similarity measure bias
+ in river benthic Auswuchs community analysis. \emph{Water
+ Environment Research} 69, 95--106.
+
Chao, A., Chazdon, R. L., Colwell, R. K. and Shen, T. (2005). A new
statistical approach for assessing similarity of species composition
with incidence and abundance data. \emph{Ecology Letters} 8, 148--159.
Modified: pkg/vegan/src/vegdist.c
===================================================================
--- pkg/vegan/src/vegdist.c 2012-02-22 08:05:56 UTC (rev 2103)
+++ pkg/vegan/src/vegdist.c 2012-02-23 14:37:51 UTC (rev 2104)
@@ -43,6 +43,7 @@
#define MILLAR 12
#define CHAO 13
#define GOWERDZ 14
+#define CAO 15
#define MATCHING 50
#define NOSHARED 99
@@ -526,6 +527,46 @@
return dist;
}
+/* veg_cao implements Cao index (CYd) of Cao Y, Williams WP, Bark AW:
+ * Water Envir Res 69, 95-106; 1997. Anderson MJ & Thompson AA: Ecol
+ * Appl 14, 1921-1935; 2004 use different but equal formulation.
+ */
+
+double veg_cao(double *x, int nr, int nc, int i1, int i2)
+{
+ double dist, x1, x2, t1, t2, t3, tlog;
+ int count, j;
+
+ count = 0;
+ dist = 0;
+ for (j=0; j<nc; j++, i1 += nr, i2 += nr) {
+ if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
+ /* skip the rest of the loop if both species are
+ absent */
+ if (x[i1] == 0 && x[i2] == 0) continue;
+ /* Cao uses arbitrary value of 0.1 for zeros to avoid
+ log(0). Obviously this indicates the use of counts
+ (integer), but we accept non-integer data (with a
+ warning in R) and put the truncation to the same 0.1
+ to avoid discontinuities with non-integer data */
+ x1 = (x[i1] < 0.1) ? 0.1 : x[i1];
+ x2 = (x[i2] < 0.1) ? 0.1 : x[i2];
+ t1 = x1 + x2;
+ /* Cao et al. used log10, but we do not and so our
+ results are log(10) = 2.302585 times higher */
+ t2 = x1 * log(x2) + x2 * log(x1);
+ dist += log(t1) - M_LN2 - t2/t1;
+ count++;
+ }
+ }
+ if (count==0) return NA_REAL;
+ if (dist < 0)
+ dist = 0;
+ dist /= (double)count;
+ return dist;
+}
+
+
/* veg_noshared is not a proper dissimilarity index, but a pretty
* useless helper function. It returns 1 when there are no shared
* species, and 0 if two sites have at least one shared species, and
@@ -626,6 +667,9 @@
case GOWERDZ:
distfun = veg_gowerDZ;
break;
+ case CAO:
+ distfun = veg_cao;
+ break;
case MATCHING:
distfun = veg_matching;
break;
More information about the Vegan-commits
mailing list