[Vegan-commits] r2108 - in branches/2.0: R inst man src

Sun Feb 26 16:07:30 CET 2012

Author: jarioksa
Date: 2012-02-26 16:07:29 +0100 (Sun, 26 Feb 2012)
New Revision: 2108

Modified:
   branches/2.0/R/print.monoMDS.R
   branches/2.0/R/simper.R
   branches/2.0/R/vegdist.R
   branches/2.0/inst/ChangeLog
   branches/2.0/man/monoMDS.Rd
   branches/2.0/man/scores.Rd
   branches/2.0/man/vegdist.Rd
   branches/2.0/src/vegdist.c
Log:
merge r2100 (print.summary.simper), r2101 (monoMDS.Rd), r2103 (simper permutation), r2104 (add Cao dissimilarity=, r2105 (-pedanti fix in Cao index), r2106 (scores and vegdist doc updates and fixes)

Modified: branches/2.0/R/print.monoMDS.R
===================================================================

--- branches/2.0/R/print.monoMDS.R	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/R/print.monoMDS.R	2012-02-26 15:07:29 UTC (rev 2108)
@@ -40,9 +40,12 @@
 }
 
 `scores.monoMDS` <-
-    function(x, ...)
+    function(x, choices = NA, ...)
 {
-    x$points
+    if (any(is.na(choices)))
+        x$points
+    else
+        x$points[, choices, drop = FALSE]
 }
 
 `plot.monoMDS` <-

Modified: branches/2.0/R/simper.R
===================================================================
--- branches/2.0/R/simper.R	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/R/simper.R	2012-02-26 15:07:29 UTC (rev 2108)
@@ -53,7 +53,7 @@
                 }
                 perm.contr[ ,p] <- colMeans(contrp) * 100
             }
-        p <- (apply(apply(perm.contr, 2, function(x) x >= average), 1, sum) + 1) / (permutations + 1)
+        p <- (apply(apply(perm.contr, 2, function(x) x >= average), 1, sum) + 1) / (nperm + 1)
         } 
         else {
           p <- NULL
@@ -117,7 +117,7 @@
     function(x, digits = attr(x, "digits"), ...)
 {
     signif.stars <- getOption("show.signif.stars") && attr(x, "permutations") > 0
-    starprint <- function(z, ...) {
+    starprint <- function(z) {
         if (signif.stars && any(z$p < 0.1)) {
             stars <- symnum(z$p, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
                             symbols = c("***", "**", "*", ".", " "))
@@ -125,8 +125,11 @@
         }
         z
     }
-    out <- lapply(x, starprint, digits = digits, ...)
-    print(out)
+    out <- lapply(x, starprint)
+    for (nm in names(out)) {
+        cat("\nContrast:", nm, "\n\n")
+        print(out[[nm]], digits = digits, ...)
+    }
     if (signif.stars && any(sapply(x, function(z) z$p) < 0.1)) {
         leg <- attr(symnum(1, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
                             symbols = c("***", "**", "*", ".", " ")), "legend")
@@ -136,4 +139,3 @@
         cat("P-values based on", np, "permutations\n")
     invisible(x)
 }
-

Modified: branches/2.0/R/vegdist.R
===================================================================
--- branches/2.0/R/vegdist.R	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/R/vegdist.R	2012-02-26 15:07:29 UTC (rev 2108)
@@ -7,7 +7,7 @@
         method <- "euclidean"
     METHODS <- c("manhattan", "euclidean", "canberra", "bray", 
                  "kulczynski", "gower", "morisita", "horn", "mountford", 
-                 "jaccard", "raup", "binomial", "chao", "altGower")
+                 "jaccard", "raup", "binomial", "chao", "altGower", "cao")
     method <- pmatch(method, METHODS)
     inm <- METHODS[method]
     if (is.na(method)) 
@@ -15,19 +15,23 @@
     if (method == -1) 
         stop("ambiguous distance method")
     if (method > 2 && any(rowSums(x, na.rm = TRUE) == 0)) 
-        warning("you have empty rows: their dissimilarities may be meaningless in method ", inm,"\n")
+        warning("you have empty rows: their dissimilarities may be meaningless in method ",
+                dQuote(inm))
     if (method > 2 && any(x < 0, na.rm = TRUE)) 
-        warning("results may be meaningless because data have negative entries in method ", inm,"\n")
+        warning("results may be meaningless because data have negative entries in method ",
+                dQuote(inm))
     if (method == 11 && any(colSums(x) == 0)) 
-        warning("data have empty species which influence the results im method ", inm, "\n")
+        warning("data have empty species which influence the results im method ",
+                dQuote(inm))
     if (method == 6) # gower, but no altGower
         x <- decostand(x, "range", 2, na.rm = TRUE, ...)
     if (binary) 
         x <- decostand(x, "pa")
     N <- nrow(x <- as.matrix(x))
-    if (method %in% c(7, 13) && !identical(all.equal(as.integer(x), 
+    if (method %in% c(7, 13, 15) && !identical(all.equal(as.integer(x), 
                                                      as.vector(x)), TRUE)) 
-        warning("results may be meaningless with non-integer data in method ", inm, "\n")
+        warning("results may be meaningless with non-integer data in method ",
+                dQuote(inm))
     d <- .C("veg_distance", x = as.double(x), nr = N, nc = ncol(x), 
             d = double(N * (N - 1)/2), diag = as.integer(FALSE), 
             method = as.integer(method), NAOK = na.rm, PACKAGE = "vegan")$d

Modified: branches/2.0/inst/ChangeLog
===================================================================
--- branches/2.0/inst/ChangeLog	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/inst/ChangeLog	2012-02-26 15:07:29 UTC (rev 2108)
@@ -5,6 +5,13 @@
 Version 2.0-3 (opened November 13, 2011)
 
 	* copy simper.R & simper.Rd at r2092.
+	* merge r2106: merge scores and vegdist doc updates from github.
+	* merge r2105: pacify -pedantic C compiler in vegdist.
+	* merge r2104: add Cao index in vegdist.
+	* merge r2103: fix bug in permutation p-values in simper.
+	* merge r2101: fix stress description and remove refs to
+	ecodist::nmds in monoMDS.Rd.
+	* merge r2100: fix print.summary.simper.
 	* merge r2098: fix summary.simper(..., order=FALSE) when
 	permutations = 0.
 	* merge r2097: add print.summary.simper.

Modified: branches/2.0/man/monoMDS.Rd
===================================================================
--- branches/2.0/man/monoMDS.Rd	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/man/monoMDS.Rd	2012-02-26 15:07:29 UTC (rev 2108)
@@ -18,7 +18,7 @@
     threshold = 0.8, maxit = 200, weakties = TRUE, stress = 1,
     scaling = TRUE, pc = TRUE, smin = 0.00001, sfgrmin = 0.00001, 
     sratmax=0.99999, ...) 
-\method{scores}{monoMDS}(x, ...)
+\method{scores}{monoMDS}(x, choices = NA, ...)
 \method{plot}{monoMDS}(x, choices = c(1,2), type = "t", ...)
 }
 
@@ -57,8 +57,10 @@
     \code{sratmax} (but is still \eqn{< 1}).}
 
   \item{x}{A \code{monoMDS} result.}
-  \item{choices}{Dimensions plotted.}
 
+  \item{choices}{Dimensions returned or plotted. The default \code{NA}
+    returns all dimensions. }
+
   \item{type}{The type of the plot: \code{"t"} for text, \code{"p"}
     for points, and \code{"n"} for none.}
 
@@ -77,9 +79,8 @@
     species and dissimilarities are tied to their maximum value of
     one. Breaking ties allows these points to be at different
     distances and can help in recovering very long coenoclines
-    (gradients).  Functions \code{\link[smacof]{smacofSym}}
-    (\pkg{smacof} package) and \code{\link[ecodist]{nmds}}
-    (\pkg{ecodist} package) also have adequate tie treatment.
+    (gradients).  Function \code{\link[smacof]{smacofSym}}
+    (\pkg{smacof} package) also has adequate tie treatment.
 
     \item Handles missing values in a meaningful way.
   
@@ -99,22 +100,21 @@
 
 \deqn{s^2 = \frac{\sum (d - \hat d)^2}{\sum(d - d_0)^2}}{stress^2 = sum (d-dhat)^2/ sum (d-dnull)^2}
 
-  where \eqn{d} are the observed dissimilarities, \eqn{\hat d}{dhat}
-  are the fitted ordination distances, and \eqn{d_0}{dnull} are the
-  ordination distances under null model. For \dQuote{stress 1}
-  \eqn{d_0 = 0}{dnull = 0}, and for \dQuote{stress 2} \eqn{d_0 =
-  \bar{d}}{dnull = dbar} or mean dissimilarities.  \dQuote{Stress 2}
-  can be expressed as \eqn{s^2 = 1 - R^2}{stress^2 = 1 - R2}, where
-  \eqn{R^2}{R2} is squared correlation between fitted values and
+  where \eqn{d} are distances among points in ordination configuration,
+  \eqn{\hat d}{dhat} are the fitted ordination distances, and
+  \eqn{d_0}{dnull} are the ordination distances under null model.  For
+  \dQuote{stress 1} \eqn{d_0 = 0}{dnull = 0}, and for \dQuote{stress 2}
+  \eqn{d_0 = \bar{d}}{dnull = dbar} or mean distances. \dQuote{Stress 2}
+  can be expressed as \eqn{s^2 = 1 - R^2}{stress^2 = 1 - R2},
+  where\eqn{R^2}{R2} is squared correlation between fitted values and
   ordination distances, and so related to the \dQuote{linear fit} of
   \code{\link{stressplot}}.
 
   Function \code{monoMDS} can fit several alternative NMDS variants
   that can be selected with argument \code{model}.  The default
   \code{model = "global"} fits global NMDS, or Kruskal's (1964a,b)
-  original NMDS similar to \code{\link[MASS]{isoMDS}} (\pkg{MASS}),
-  \code{\link[smacof]{smacofSym}} (\pkg{smacof}) or
-  \code{\link[ecodist]{nmds}} (\pkg{ecodist}).  Alternative
+  original NMDS similar to \code{\link[MASS]{isoMDS}} (\pkg{MASS})
+  or \code{\link[smacof]{smacofSym}} (\pkg{smacof}).  Alternative
   \code{model = "local"} fits local NMDS where independent monotone
   regression is used for each point (Sibson 1972).  Alternative
   \code{model = "linear"} fits a linear MDS. This fits a linear
@@ -183,9 +183,9 @@
 }
 
 \seealso{ \code{\link[vegan]{metaMDS}} for the \pkg{vegan} way of
-  running NMDS, and \code{\link[MASS]{isoMDS}},
-  \code{\link[smacof]{smacofSym}}, \code{\link[ecodist]{nmds}} for
-  some alternative implementations of NMDS. }
+  running NMDS, and \code{\link[MASS]{isoMDS}} and
+  \code{\link[smacof]{smacofSym}} for some alternative implementations
+  of NMDS. }
 
 \examples{
 data(dune)

Modified: branches/2.0/man/scores.Rd
===================================================================
--- branches/2.0/man/scores.Rd	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/man/scores.Rd	2012-02-26 15:07:29 UTC (rev 2108)
@@ -4,10 +4,18 @@
 \alias{scores.lda}
 
 \title{ Get Species or Site Scores from an Ordination }
+
 \description{
-  Function to access either species or site scores for specified axes in
-  some ordination methods.
+  Function to access either species or site scores for specified axes
+  in some ordination methods. The \code{scores} function is generic in
+  \pkg{vegan}, and \pkg{vegan} ordination functions have their own
+  \code{scores} functions that are documented separately with the
+  method (see e.g. \code{\link{scores.cca}},
+  \code{\link{scores.metaMDS}}, \code{\link{scores.decorana}}). This
+  help file documents the default \code{scores} method that is only
+  used for non-\pkg{vegan} ordination objects.
 }
+
 \usage{
 \method{scores}{default}(x, choices, display=c("sites", "species"), ...)
 }
@@ -37,12 +45,11 @@
   analysis.
   
   The \code{scores.default} function is used to extract scores from
-  non-\pkg{vegan} ordination results.  Most standard ordination
-  methods of libraries \pkg{mva}, \pkg{multiv} and \pkg{MASS} do not
-  have a specific \code{class}, and no specific method can be written
-  for them.  However, \code{scores.default} guesses where some
-  commonly used functions keep their site scores and possible species
-  scores.
+  non-\pkg{vegan} ordination results.  Many standard ordination
+  methods of libraries do not have a specific \code{class}, and no
+  specific method can be written for them.  However,
+  \code{scores.default} guesses where some commonly used functions
+  keep their site scores and possible species scores.
 
   If \code{x} is a matrix, \code{scores.default} returns the chosen
   columns of that matrix, ignoring whether species or sites were
@@ -57,12 +64,15 @@
 \author{Jari Oksanen }
 
 \seealso{
-  \code{\link{scores.cca}}, \code{\link{scores.decorana}}.  These have
-  somewhat different interface -- \code{\link{scores.cca}} in
-  particular -- but all work with keywords \code{display="sites"} and
-  return a matrix. However, they may also return a list of matrices,
-  and some other \code{scores} methods will have quite different
-  arguments.  
+  Specific \code{scores} functions include (but are not limited to)
+  \code{\link{scores.cca}}, \code{\link{scores.rda}},
+  \code{\link{scores.decorana}}, \code{\link{scores.envfit}},
+  \code{\link{scores.metaMDS}}, \code{\link{scores.monoMDS}} and
+  \code{\link{scores.pcnm}}.  These have somewhat different interface
+  -- \code{\link{scores.cca}} in particular -- but all work with
+  keywords \code{display="sites"} and return a matrix. However, they
+  may also return a list of matrices, and some other \code{scores}
+  methods will have quite different arguments.
 }
 
 \examples{

Modified: branches/2.0/man/vegdist.Rd
===================================================================
--- branches/2.0/man/vegdist.Rd	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/man/vegdist.Rd	2012-02-26 15:07:29 UTC (rev 2108)
@@ -13,7 +13,7 @@
   Gower, Bray--Curtis, Jaccard and
   Kulczynski indices are good in detecting underlying
   ecological gradients (Faith et al. 1987). Morisita, Horn--Morisita,
-  Binomial and Chao
+  Binomial, Cao and Chao
   indices should be able to handle different sample sizes (Wolda 1981,
   Krebs 1999, Anderson & Millar 2004),
   and Mountford (1962) and Raup-Crick indices for presence--absence data should
@@ -27,8 +27,8 @@
   \item{method}{Dissimilarity index, partial match to  \code{"manhattan"},
     \code{"euclidean"}, \code{"canberra"}, \code{"bray"}, \code{"kulczynski"},
      \code{"jaccard"}, \code{"gower"}, \code{"altGower"}, \code{"morisita"}, 
-     \code{"horn"}, \code{"mountford"}, \code{"raup"} , \code{"binomial"} or 
-     \code{"chao"}.}
+     \code{"horn"}, \code{"mountford"}, \code{"raup"} , \code{"binomial"}, 
+     \code{"chao"} or \code{"cao"}.}
   \item{binary}{Perform presence/absence standardization before analysis
     using \code{\link{decostand}}.}
   \item{diag}{Compute diagonals. }
@@ -116,6 +116,15 @@
     \cr
     \tab where \eqn{n_i = x_{ij} + x_{ik}}{n[i] = x[ij] + x[ik]}
     \cr \tab binary: \eqn{\log(2) \times (A+B-2J)}{log(2)*(A+B-2*J)}
+    \cr
+    \code{cao}
+    \tab \eqn{d_{jk} = \frac{1}{S} \sum_i \log
+    \left(\frac{n_i}{2}\right) - (x_{ij} \log(x_{ik}) + x_{ik}
+    \log(x_{ij}))/n_i}{d[jk] = (1/S) * sum(log(n[i]/2) -
+    (x[ij]*log(x[ik]) + x[ik]*log(x[ij]))/n[i])},
+  \cr
+  \tab where \eqn{S} is the number of species in compared sites and
+    \eqn{n_i = x_{ij}+x_{ik}}{n[i] = x[ij] + x[ik]}
   }
 
   Jaccard index is computed as \eqn{2B/(1+B)}, where \eqn{B} is
@@ -126,6 +135,18 @@
   handle variable sample sizes. The index does not have a fixed upper
   limit, but can vary among sites with no shared species. For further
   discussion, see Anderson & Millar (2004).
+
+  Cao index or CYd index (Cao et al. 1997) was suggested as a minimally
+  biased index for high beta diversity and variable sampling intensity.
+  Cao index does not have a fixed upper limit, but can vary among sites
+  with no shared species.  The index is intended for count (integer)
+  data, and it is undefined for zero abundances; these are replaced with
+  arbitrary value \eqn{0.1} following Cao et al. (1997).  Cao et
+  al. (1997) used \eqn{\log_{10}}{log10}, but the current function uses
+  natural logarithms so that the values are approximately \eqn{2.30}
+  times higher than with 10-based logarithms. Anderson & Thompson (2004)
+  give an alternative formulation of Cao index to highlight its
+  relationship with Binomial index (above).
   
   Mountford index is defined as \eqn{M = 1/\alpha} where \eqn{\alpha}
   is the parameter of Fisher's logseries assuming that the compared
@@ -223,10 +244,18 @@
   Zealand.  \emph{Journal of Experimental Marine Biology and Ecology}
   305, 191--221.
 
-  Anderson, M.J., Ellingsen, K.E. & McArdle, B.H. (2006) Multivariate
+  Anderson, M.J., Ellingsen, K.E. & McArdle, B.H. (2006). Multivariate
   dispersion as a measure of beta diversity. \emph{Ecology Letters} 
   9, 683--693.
 
+  Anderson, M.J & Thompson, A.A. (2004). Multivariate control charts for
+  ecological and environmental monitoring. \emph{Ecological
+    Applications} 14, 1921--1935.
+
+  Cao, Y., Williams, W.P. & Bark, A.W. (1997). Similarity measure bias
+  in river benthic Auswuchs community analysis. \emph{Water
+  Environment Research} 69, 95--106.
+
   Chao, A., Chazdon, R. L., Colwell, R. K. and Shen, T. (2005). A new
   statistical approach for assessing similarity of species composition
   with incidence and abundance data. \emph{Ecology Letters} 8, 148--159.

Modified: branches/2.0/src/vegdist.c
===================================================================
--- branches/2.0/src/vegdist.c	2012-02-26 14:55:25 UTC (rev 2107)
+++ branches/2.0/src/vegdist.c	2012-02-26 15:07:29 UTC (rev 2108)
@@ -43,6 +43,7 @@
 #define MILLAR 12
 #define CHAO 13
 #define GOWERDZ 14
+#define CAO 15
 #define MATCHING 50
 #define NOSHARED 99
 
@@ -526,6 +527,46 @@
     return dist;
 }
 
+/* veg_cao implements Cao index (CYd) of Cao Y, Williams WP, Bark AW:
+ *   Water Envir Res 69, 95-106; 1997. Anderson MJ & Thompson AA: Ecol
+ *   Appl 14, 1921-1935; 2004 use different but equal formulation.
+ */
+
+double veg_cao(double *x, int nr, int nc, int i1, int i2)
+{
+     double dist, x1, x2, t1, t2;
+     int count, j;
+  
+     count = 0;
+     dist = 0;
+     for (j=0; j<nc; j++, i1 += nr, i2 += nr) {
+	  if (R_FINITE(x[i1]) && R_FINITE(x[i2])) {
+	       /* skip the rest of the loop if both species are
+		  absent */
+	       if (x[i1] == 0 && x[i2] == 0) continue;
+	       /* Cao uses arbitrary value of 0.1 for zeros to avoid
+		  log(0). Obviously this indicates the use of counts
+		  (integer), but we accept non-integer data (with a
+		  warning in R) and put the truncation to the same 0.1
+		  to avoid discontinuities with non-integer data */
+	       x1 = (x[i1] < 0.1) ? 0.1 : x[i1];
+	       x2 = (x[i2] < 0.1) ? 0.1 : x[i2];
+	       t1 = x1 + x2;
+	       /* Cao et al. used log10, but we do not and so our
+		  results are log(10) = 2.302585 times higher */
+	       t2 = x1 * log(x2) + x2 * log(x1);
+	       dist += log(t1) - M_LN2 - t2/t1;
+	       count++;
+	  }
+     }
+     if (count==0) return NA_REAL;
+     if (dist < 0)
+	 dist = 0;
+     dist /= (double)count;
+     return dist;
+}
+
+
 /* veg_noshared is not a proper dissimilarity index, but a pretty
  * useless helper function. It returns 1 when there are no shared
  * species, and 0 if two sites have at least one shared species, and
@@ -626,6 +667,9 @@
     case GOWERDZ:
 	distfun = veg_gowerDZ;
 	break;
+    case CAO:
+        distfun = veg_cao;
+        break;
     case MATCHING:
 	distfun = veg_matching;
 	break;