[Vegan-commits] r2108 - in branches/2.0: R inst man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun Feb 26 16:07:30 CET 2012
Author: jarioksa
Date: 2012-02-26 16:07:29 +0100 (Sun, 26 Feb 2012)
New Revision: 2108
Modified:
branches/2.0/R/print.monoMDS.R
branches/2.0/R/simper.R
branches/2.0/R/vegdist.R
branches/2.0/inst/ChangeLog
branches/2.0/man/monoMDS.Rd
branches/2.0/man/scores.Rd
branches/2.0/man/vegdist.Rd
branches/2.0/src/vegdist.c
Log:
merge r2100 (print.summary.simper), r2101 (monoMDS.Rd), r2103 (simper permutation), r2104 (add Cao dissimilarity=, r2105 (-pedanti fix in Cao index), r2106 (scores and vegdist doc updates and fixes)
Modified: branches/2.0/R/print.monoMDS.R
===================================================================
--- branches/2.0/R/print.monoMDS.R 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/R/print.monoMDS.R 2012-02-26 15:07:29 UTC (rev 2108)
@@ -40,9 +40,12 @@
}
`scores.monoMDS` <-
- function(x, ...)
+ function(x, choices = NA, ...)
{
- x$points
+ if (any(is.na(choices)))
+ x$points
+ else
+ x$points[, choices, drop = FALSE]
}
`plot.monoMDS` <-
Modified: branches/2.0/R/simper.R
===================================================================
--- branches/2.0/R/simper.R 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/R/simper.R 2012-02-26 15:07:29 UTC (rev 2108)
@@ -53,7 +53,7 @@
}
perm.contr[ ,p] <- colMeans(contrp) * 100
}
- p <- (apply(apply(perm.contr, 2, function(x) x >= average), 1, sum) + 1) / (permutations + 1)
+ p <- (apply(apply(perm.contr, 2, function(x) x >= average), 1, sum) + 1) / (nperm + 1)
}
else {
p <- NULL
@@ -117,7 +117,7 @@
function(x, digits = attr(x, "digits"), ...)
{
signif.stars <- getOption("show.signif.stars") && attr(x, "permutations") > 0
- starprint <- function(z, ...) {
+ starprint <- function(z) {
if (signif.stars && any(z$p < 0.1)) {
stars <- symnum(z$p, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("***", "**", "*", ".", " "))
@@ -125,8 +125,11 @@
}
z
}
- out <- lapply(x, starprint, digits = digits, ...)
- print(out)
+ out <- lapply(x, starprint)
+ for (nm in names(out)) {
+ cat("\nContrast:", nm, "\n\n")
+ print(out[[nm]], digits = digits, ...)
+ }
if (signif.stars && any(sapply(x, function(z) z$p) < 0.1)) {
leg <- attr(symnum(1, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("***", "**", "*", ".", " ")), "legend")
@@ -136,4 +139,3 @@
cat("P-values based on", np, "permutations\n")
invisible(x)
}
-
Modified: branches/2.0/R/vegdist.R
===================================================================
--- branches/2.0/R/vegdist.R 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/R/vegdist.R 2012-02-26 15:07:29 UTC (rev 2108)
@@ -7,7 +7,7 @@
method <- "euclidean"
METHODS <- c("manhattan", "euclidean", "canberra", "bray",
"kulczynski", "gower", "morisita", "horn", "mountford",
- "jaccard", "raup", "binomial", "chao", "altGower")
+ "jaccard", "raup", "binomial", "chao", "altGower", "cao")
method <- pmatch(method, METHODS)
inm <- METHODS[method]
if (is.na(method))
@@ -15,19 +15,23 @@
if (method == -1)
stop("ambiguous distance method")
if (method > 2 && any(rowSums(x, na.rm = TRUE) == 0))
- warning("you have empty rows: their dissimilarities may be meaningless in method ", inm,"\n")
+ warning("you have empty rows: their dissimilarities may be meaningless in method ",
+ dQuote(inm))
if (method > 2 && any(x < 0, na.rm = TRUE))
- warning("results may be meaningless because data have negative entries in method ", inm,"\n")
+ warning("results may be meaningless because data have negative entries in method ",
+ dQuote(inm))
if (method == 11 && any(colSums(x) == 0))
- warning("data have empty species which influence the results im method ", inm, "\n")
+ warning("data have empty species which influence the results im method ",
+ dQuote(inm))
if (method == 6) # gower, but no altGower
x <- decostand(x, "range", 2, na.rm = TRUE, ...)
if (binary)
x <- decostand(x, "pa")
N <- nrow(x <- as.matrix(x))
- if (method %in% c(7, 13) && !identical(all.equal(as.integer(x),
+ if (method %in% c(7, 13, 15) && !identical(all.equal(as.integer(x),
as.vector(x)), TRUE))
- warning("results may be meaningless with non-integer data in method ", inm, "\n")
+ warning("results may be meaningless with non-integer data in method ",
+ dQuote(inm))
d <- .C("veg_distance", x = as.double(x), nr = N, nc = ncol(x),
d = double(N * (N - 1)/2), diag = as.integer(FALSE),
method = as.integer(method), NAOK = na.rm, PACKAGE = "vegan")$d
Modified: branches/2.0/inst/ChangeLog
===================================================================
--- branches/2.0/inst/ChangeLog 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/inst/ChangeLog 2012-02-26 15:07:29 UTC (rev 2108)
@@ -5,6 +5,13 @@
Version 2.0-3 (opened November 13, 2011)
* copy simper.R & simper.Rd at r2092.
+ * merge r2106: merge scores and vegdist doc updates from github.
+ * merge r2105: pacify -pedantic C compiler in vegdist.
+ * merge r2104: add Cao index in vegdist.
+ * merge r2103: fix bug in permutation p-values in simper.
+ * merge r2101: fix stress description and remove refs to
+ ecodist::nmds in monoMDS.Rd.
+ * merge r2100: fix print.summary.simper.
* merge r2098: fix summary.simper(..., order=FALSE) when
permutations = 0.
* merge r2097: add print.summary.simper.
Modified: branches/2.0/man/monoMDS.Rd
===================================================================
--- branches/2.0/man/monoMDS.Rd 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/man/monoMDS.Rd 2012-02-26 15:07:29 UTC (rev 2108)
@@ -18,7 +18,7 @@
threshold = 0.8, maxit = 200, weakties = TRUE, stress = 1,
scaling = TRUE, pc = TRUE, smin = 0.00001, sfgrmin = 0.00001,
sratmax=0.99999, ...)
-\method{scores}{monoMDS}(x, ...)
+\method{scores}{monoMDS}(x, choices = NA, ...)
\method{plot}{monoMDS}(x, choices = c(1,2), type = "t", ...)
}
@@ -57,8 +57,10 @@
\code{sratmax} (but is still \eqn{< 1}).}
\item{x}{A \code{monoMDS} result.}
- \item{choices}{Dimensions plotted.}
+ \item{choices}{Dimensions returned or plotted. The default \code{NA}
+ returns all dimensions. }
+
\item{type}{The type of the plot: \code{"t"} for text, \code{"p"}
for points, and \code{"n"} for none.}
@@ -77,9 +79,8 @@
species and dissimilarities are tied to their maximum value of
one. Breaking ties allows these points to be at different
distances and can help in recovering very long coenoclines
- (gradients). Functions \code{\link[smacof]{smacofSym}}
- (\pkg{smacof} package) and \code{\link[ecodist]{nmds}}
- (\pkg{ecodist} package) also have adequate tie treatment.
+ (gradients). Function \code{\link[smacof]{smacofSym}}
+ (\pkg{smacof} package) also has adequate tie treatment.
\item Handles missing values in a meaningful way.
@@ -99,22 +100,21 @@
\deqn{s^2 = \frac{\sum (d - \hat d)^2}{\sum(d - d_0)^2}}{stress^2 = sum (d-dhat)^2/ sum (d-dnull)^2}
- where \eqn{d} are the observed dissimilarities, \eqn{\hat d}{dhat}
- are the fitted ordination distances, and \eqn{d_0}{dnull} are the
- ordination distances under null model. For \dQuote{stress 1}
- \eqn{d_0 = 0}{dnull = 0}, and for \dQuote{stress 2} \eqn{d_0 =
- \bar{d}}{dnull = dbar} or mean dissimilarities. \dQuote{Stress 2}
- can be expressed as \eqn{s^2 = 1 - R^2}{stress^2 = 1 - R2}, where
- \eqn{R^2}{R2} is squared correlation between fitted values and
+ where \eqn{d} are distances among points in ordination configuration,
+ \eqn{\hat d}{dhat} are the fitted ordination distances, and
+ \eqn{d_0}{dnull} are the ordination distances under null model. For
+ \dQuote{stress 1} \eqn{d_0 = 0}{dnull = 0}, and for \dQuote{stress 2}
+ \eqn{d_0 = \bar{d}}{dnull = dbar} or mean distances. \dQuote{Stress 2}
+ can be expressed as \eqn{s^2 = 1 - R^2}{stress^2 = 1 - R2},
+ where\eqn{R^2}{R2} is squared correlation between fitted values and
ordination distances, and so related to the \dQuote{linear fit} of
\code{\link{stressplot}}.
Function \code{monoMDS} can fit several alternative NMDS variants
that can be selected with argument \code{model}. The default
\code{model = "global"} fits global NMDS, or Kruskal's (1964a,b)
- original NMDS similar to \code{\link[MASS]{isoMDS}} (\pkg{MASS}),
- \code{\link[smacof]{smacofSym}} (\pkg{smacof}) or
- \code{\link[ecodist]{nmds}} (\pkg{ecodist}). Alternative
+ original NMDS similar to \code{\link[MASS]{isoMDS}} (\pkg{MASS})
+ or \code{\link[smacof]{smacofSym}} (\pkg{smacof}). Alternative
\code{model = "local"} fits local NMDS where independent monotone
regression is used for each point (Sibson 1972). Alternative
\code{model = "linear"} fits a linear MDS. This fits a linear
@@ -183,9 +183,9 @@
}
\seealso{ \code{\link[vegan]{metaMDS}} for the \pkg{vegan} way of
- running NMDS, and \code{\link[MASS]{isoMDS}},
- \code{\link[smacof]{smacofSym}}, \code{\link[ecodist]{nmds}} for
- some alternative implementations of NMDS. }
+ running NMDS, and \code{\link[MASS]{isoMDS}} and
+ \code{\link[smacof]{smacofSym}} for some alternative implementations
+ of NMDS. }
\examples{
data(dune)
Modified: branches/2.0/man/scores.Rd
===================================================================
--- branches/2.0/man/scores.Rd 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/man/scores.Rd 2012-02-26 15:07:29 UTC (rev 2108)
@@ -4,10 +4,18 @@
\alias{scores.lda}
\title{ Get Species or Site Scores from an Ordination }
+
\description{
- Function to access either species or site scores for specified axes in
- some ordination methods.
+ Function to access either species or site scores for specified axes
+ in some ordination methods. The \code{scores} function is generic in
+ \pkg{vegan}, and \pkg{vegan} ordination functions have their own
+ \code{scores} functions that are documented separately with the
+ method (see e.g. \code{\link{scores.cca}},
+ \code{\link{scores.metaMDS}}, \code{\link{scores.decorana}}). This
+ help file documents the default \code{scores} method that is only
+ used for non-\pkg{vegan} ordination objects.
}
+
\usage{
\method{scores}{default}(x, choices, display=c("sites", "species"), ...)
}
@@ -37,12 +45,11 @@
analysis.
The \code{scores.default} function is used to extract scores from
- non-\pkg{vegan} ordination results. Most standard ordination
- methods of libraries \pkg{mva}, \pkg{multiv} and \pkg{MASS} do not
- have a specific \code{class}, and no specific method can be written
- for them. However, \code{scores.default} guesses where some
- commonly used functions keep their site scores and possible species
- scores.
+ non-\pkg{vegan} ordination results. Many standard ordination
+ methods of libraries do not have a specific \code{class}, and no
+ specific method can be written for them. However,
+ \code{scores.default} guesses where some commonly used functions
+ keep their site scores and possible species scores.
If \code{x} is a matrix, \code{scores.default} returns the chosen
columns of that matrix, ignoring whether species or sites were
@@ -57,12 +64,15 @@
\author{Jari Oksanen }
\seealso{
- \code{\link{scores.cca}}, \code{\link{scores.decorana}}. These have
- somewhat different interface -- \code{\link{scores.cca}} in
- particular -- but all work with keywords \code{display="sites"} and
- return a matrix. However, they may also return a list of matrices,
- and some other \code{scores} methods will have quite different
- arguments.
+ Specific \code{scores} functions include (but are not limited to)
+ \code{\link{scores.cca}}, \code{\link{scores.rda}},
+ \code{\link{scores.decorana}}, \code{\link{scores.envfit}},
+ \code{\link{scores.metaMDS}}, \code{\link{scores.monoMDS}} and
+ \code{\link{scores.pcnm}}. These have somewhat different interface
+ -- \code{\link{scores.cca}} in particular -- but all work with
+ keywords \code{display="sites"} and return a matrix. However, they
+ may also return a list of matrices, and some other \code{scores}
+ methods will have quite different arguments.
}
\examples{
Modified: branches/2.0/man/vegdist.Rd
===================================================================
--- branches/2.0/man/vegdist.Rd 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/man/vegdist.Rd 2012-02-26 15:07:29 UTC (rev 2108)
@@ -13,7 +13,7 @@
Gower, Bray--Curtis, Jaccard and
Kulczynski indices are good in detecting underlying
ecological gradients (Faith et al. 1987). Morisita, Horn--Morisita,
- Binomial and Chao
+ Binomial, Cao and Chao
indices should be able to handle different sample sizes (Wolda 1981,
Krebs 1999, Anderson & Millar 2004),
and Mountford (1962) and Raup-Crick indices for presence--absence data should
@@ -27,8 +27,8 @@
\item{method}{Dissimilarity index, partial match to \code{"manhattan"},
\code{"euclidean"}, \code{"canberra"}, \code{"bray"}, \code{"kulczynski"},
\code{"jaccard"}, \code{"gower"}, \code{"altGower"}, \code{"morisita"},
- \code{"horn"}, \code{"mountford"}, \code{"raup"} , \code{"binomial"} or
- \code{"chao"}.}
+ \code{"horn"}, \code{"mountford"}, \code{"raup"} , \code{"binomial"},
+ \code{"chao"} or \code{"cao"}.}
\item{binary}{Perform presence/absence standardization before analysis
using \code{\link{decostand}}.}
\item{diag}{Compute diagonals. }
@@ -116,6 +116,15 @@
\cr
\tab where \eqn{n_i = x_{ij} + x_{ik}}{n[i] = x[ij] + x[ik]}
\cr \tab binary: \eqn{\log(2) \times (A+B-2J)}{log(2)*(A+B-2*J)}
+ \cr
+ \code{cao}
+ \tab \eqn{d_{jk} = \frac{1}{S} \sum_i \log
+ \left(\frac{n_i}{2}\right) - (x_{ij} \log(x_{ik}) + x_{ik}
+ \log(x_{ij}))/n_i}{d[jk] = (1/S) * sum(log(n[i]/2) -
+ (x[ij]*log(x[ik]) + x[ik]*log(x[ij]))/n[i])},
+ \cr
+ \tab where \eqn{S} is the number of species in compared sites and
+ \eqn{n_i = x_{ij}+x_{ik}}{n[i] = x[ij] + x[ik]}
}
Jaccard index is computed as \eqn{2B/(1+B)}, where \eqn{B} is
@@ -126,6 +135,18 @@
handle variable sample sizes. The index does not have a fixed upper
limit, but can vary among sites with no shared species. For further
discussion, see Anderson & Millar (2004).
+
+ Cao index or CYd index (Cao et al. 1997) was suggested as a minimally
+ biased index for high beta diversity and variable sampling intensity.
+ Cao index does not have a fixed upper limit, but can vary among sites
+ with no shared species. The index is intended for count (integer)
+ data, and it is undefined for zero abundances; these are replaced with
+ arbitrary value \eqn{0.1} following Cao et al. (1997). Cao et
+ al. (1997) used \eqn{\log_{10}}{log10}, but the current function uses
+ natural logarithms so that the values are approximately \eqn{2.30}
+ times higher than with 10-based logarithms. Anderson & Thompson (2004)
+ give an alternative formulation of Cao index to highlight its
+ relationship with Binomial index (above).
Mountford index is defined as \eqn{M = 1/\alpha} where \eqn{\alpha}
is the parameter of Fisher's logseries assuming that the compared
@@ -223,10 +244,18 @@
Zealand. \emph{Journal of Experimental Marine Biology and Ecology}
305, 191--221.
- Anderson, M.J., Ellingsen, K.E. & McArdle, B.H. (2006) Multivariate
+ Anderson, M.J., Ellingsen, K.E. & McArdle, B.H. (2006). Multivariate
dispersion as a measure of beta diversity. \emph{Ecology Letters}
9, 683--693.
+ Anderson, M.J & Thompson, A.A. (2004). Multivariate control charts for
+ ecological and environmental monitoring. \emph{Ecological
+ Applications} 14, 1921--1935.
+
+ Cao, Y., Williams, W.P. & Bark, A.W. (1997). Similarity measure bias
+ in river benthic Auswuchs community analysis. \emph{Water
+ Environment Research} 69, 95--106.
+
Chao, A., Chazdon, R. L., Colwell, R. K. and Shen, T. (2005). A new
statistical approach for assessing similarity of species composition
with incidence and abundance data. \emph{Ecology Letters} 8, 148--159.
Modified: branches/2.0/src/vegdist.c
===================================================================
--- branches/2.0/src/vegdist.c 2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/src/vegdist.c 2012-02-26 15:07:29 UTC (rev 2108)
@@ -43,6 +43,7 @@
#define MILLAR 12
#define CHAO 13
#define GOWERDZ 14
+#define CAO 15
#define MATCHING 50
#define NOSHARED 99
@@ -526,6 +527,46 @@
return dist;
}
+/* veg_cao implements Cao index (CYd) of Cao Y, Williams WP, Bark AW:
+ * Water Envir Res 69, 95-106; 1997. Anderson MJ & Thompson AA: Ecol
+ * Appl 14, 1921-1935; 2004 use different but equal formulation.
+ */
+
+double veg_cao(double *x, int nr, int nc, int i1, int i2)
+{
+ double dist, x1, x2, t1, t2;
+ int count, j;
+
+ count = 0;
+ dist = 0;
+ for (j=0; j<nc; j++, i1 += nr, i2 += nr) {
+ if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
+ /* skip the rest of the loop if both species are
+ absent */
+ if (x[i1] == 0 && x[i2] == 0) continue;
+ /* Cao uses arbitrary value of 0.1 for zeros to avoid
+ log(0). Obviously this indicates the use of counts
+ (integer), but we accept non-integer data (with a
+ warning in R) and put the truncation to the same 0.1
+ to avoid discontinuities with non-integer data */
+ x1 = (x[i1] < 0.1) ? 0.1 : x[i1];
+ x2 = (x[i2] < 0.1) ? 0.1 : x[i2];
+ t1 = x1 + x2;
+ /* Cao et al. used log10, but we do not and so our
+ results are log(10) = 2.302585 times higher */
+ t2 = x1 * log(x2) + x2 * log(x1);
+ dist += log(t1) - M_LN2 - t2/t1;
+ count++;
+ }
+ }
+ if (count==0) return NA_REAL;
+ if (dist < 0)
+ dist = 0;
+ dist /= (double)count;
+ return dist;
+}
+
+
/* veg_noshared is not a proper dissimilarity index, but a pretty
* useless helper function. It returns 1 when there are no shared
* species, and 0 if two sites have at least one shared species, and
@@ -626,6 +667,9 @@
case GOWERDZ:
distfun = veg_gowerDZ;
break;
+ case CAO:
+ distfun = veg_cao;
+ break;
case MATCHING:
distfun = veg_matching;
break;
More information about the Vegan-commits
mailing list