[Vegan-commits] r864 - in branches/1.15: R inst man

Wed Jun 10 22:12:08 CEST 2009

Author: jarioksa
Date: 2009-06-10 22:12:07 +0200 (Wed, 10 Jun 2009)
New Revision: 864

Modified:
   branches/1.15/R/capscale.R
   branches/1.15/R/ordiGetData.R
   branches/1.15/inst/ChangeLog
   branches/1.15/man/capscale.Rd
Log:
merged fixes on capscale handling of negative eigenvalues to branches/1.15

Modified: branches/1.15/R/capscale.R
===================================================================

--- branches/1.15/R/capscale.R	2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/R/capscale.R	2009-06-10 20:12:07 UTC (rev 864)
@@ -1,7 +1,9 @@
 `capscale` <-
-    function (formula, data, distance = "euclidean", comm = NULL, 
-              add = FALSE, dfun = vegdist, metaMDSdist = FALSE, ...) 
+    function (formula, data, distance = "euclidean", sqrt.dist = FALSE,
+              comm = NULL, add = FALSE, dfun = vegdist,
+              metaMDSdist = FALSE, ...) 
 {
+    EPS <- sqrt(.Machine$double.eps)
     if (!inherits(formula, "formula")) 
         stop("Needs a model formula")
     if (missing(data)) {
@@ -26,15 +28,19 @@
             X <- dfun(X, distance)
         }
     }
+    if (sqrt.dist)
+        X <- sqrt(X)
     inertia <- attr(X, "method")
     if (is.null(inertia))
         inertia <- "unknown"
     inertia <- paste(toupper(substr(inertia, 1, 1)), substr(inertia, 
                                                             2, 256), sep = "")
-    inertia <- paste("squared", inertia, "distance")
+    inertia <- paste(inertia, "distance")
+    if (!sqrt.dist)
+        inertia <- paste("squared", inertia)
     if (add) 
         inertia <- paste(inertia, "(euclidified)")
-    k <- attr(X, "Size") - 1
+    k <- attr(X, "Size") - 1 
     if (max(X) >= 4 + .Machine$double.eps) {
         inertia <- paste("mean", inertia)
         adjust <- 1
@@ -43,17 +49,26 @@
         adjust <- k
     }
     nm <- attr(X, "Labels")
-    X <- cmdscale(X, k = k, eig = TRUE, add = add)
+    ## cmdscale is only used if 'add = TRUE': it cannot properly
+    ## handle negative eigenvalues and therefore we normally use
+    ## wcmdscale. If we have 'add = TRUE' there will be no negative
+    ## eigenvalues and this is not a problem.
+    if (add)
+        X <- cmdscale(X, k = k, eig = TRUE, add = add)
+    else
+        X <- wcmdscale(X, eig = TRUE)
     if (is.null(rownames(X$points))) 
         rownames(X$points) <- nm
     X$points <- adjust * X$points
-    neig <- min(which(X$eig < 0) - 1, k)
+    X$eig <- adjust * X$eig
+    tot.chi <- sum(X$eig) 
+    neig <- min(which(X$eig < 0) - 1, sum(X$eig > EPS))
     sol <- X$points[, 1:neig]
     fla <- update(formula, sol ~ .)
     environment(fla) <- environment()
     d <- ordiParseFormula(fla, data, envdepth = 1)
     sol <- rda.default(d$X, d$Y, d$Z, ...)
-    sol$tot.chi <- sol$tot.chi
+    sol$tot.chi <- tot.chi
     if (!is.null(sol$CCA)) {
         colnames(sol$CCA$u) <- colnames(sol$CCA$biplot) <- names(sol$CCA$eig) <-
             colnames(sol$CCA$wa) <- colnames(sol$CCA$v) <-
@@ -62,6 +77,15 @@
     if (!is.null(sol$CA)) {
         colnames(sol$CA$u) <- names(sol$CA$eig) <- colnames(sol$CA$v) <-
             paste("MDS", 1:ncol(sol$CA$u), sep = "")
+        ## Add negative eigenvalues to the list and update tot.chi
+        poseig <- length(sol$CA$eig)
+        if (any(X$eig < 0)) {
+            negax <- X$eig[X$eig < 0]
+            names(negax) <- paste("NEG", seq_along(negax), sep="")
+            sol$CA$eig <- c(sol$CA$eig, negax)
+            sol$CA$tot.chi <- abs(sum(sol$CA$eig))
+            sol$CA$rank <- length(sol$CA$eig)
+        }
     }
     if (!is.null(comm)) {
         comm <- scale(comm, center = TRUE, scale = FALSE)
@@ -76,7 +100,7 @@
         }
         if (!is.null(sol$CA)) {
             sol$CA$v.eig <- t(comm) %*% sol$CA$u/sqrt(k)
-            sol$CA$v <- sweep(sol$CA$v.eig, 2, sqrt(sol$CA$eig), 
+            sol$CA$v <- sweep(sol$CA$v.eig, 2, sqrt(sol$CA$eig[1:poseig]), 
                               "/")
         }
     }

Modified: branches/1.15/R/ordiGetData.R
===================================================================
--- branches/1.15/R/ordiGetData.R	2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/R/ordiGetData.R	2009-06-10 20:12:07 UTC (rev 864)
@@ -1,10 +1,10 @@
 `ordiGetData` <-
 function (call, env) 
 {
-    call$scale <- call$distance <- call$comm <- call$add <- NULL
+    call$scale <- call$distance <- call$comm <- call$add <-
+        call$sqrt.dist <- NULL
     call$na.action <- na.pass
     call[[2]] <- NULL
     call[[1]] <- as.name("model.frame")
     eval(call, env)
 }
-

Modified: branches/1.15/inst/ChangeLog
===================================================================
--- branches/1.15/inst/ChangeLog	2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/inst/ChangeLog	2009-06-10 20:12:07 UTC (rev 864)
@@ -5,6 +5,9 @@
 
 Version 1.15-3 (opened 28 May, 2009)
 
+	* merged fixes for capscale handling of negative eigenvalues: revs
+	851, 858, 859, 861, 863.
+
 	* merged r850: wcmdscale removes zero eigenvalues, but may keep
 	the last (unlike cmdscale).
 

Modified: branches/1.15/man/capscale.Rd
===================================================================
--- branches/1.15/man/capscale.Rd	2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/man/capscale.Rd	2009-06-10 20:12:07 UTC (rev 864)
@@ -19,8 +19,9 @@
   optionally using extended dissimilarities.
 }
 \usage{
-capscale(formula, data, distance = "euclidean", comm = NULL,
-         add = FALSE,  dfun = vegdist, metaMDSdist = FALSE, ...)
+capscale(formula, data, distance = "euclidean", sqrt.dist = FALSE,
+    comm = NULL, add = FALSE,  dfun = vegdist, metaMDSdist = FALSE,
+    ...)
 }
 
 \arguments{
@@ -40,9 +41,11 @@
     or \code{\link{cca}}. This allows the use of partial CAP.}
   \item{data}{ Data frame containing the variables on the right hand side of
     the model formula. }
-  \item{distance}{Dissimilarity (or distance) index  in
-    \code{\link{vegdist}} used if the LHS of the \code{formula} is a
-    data frame instead of dissimilarity matrix. }
+  \item{distance}{The name of the dissimilarity (or distance) index if
+    the LHS of the \code{formula} is a data frame instead of
+    dissimilarity matrix.}
+  \item{sqrt.dist}{Take square roots of dissimilarities. See section
+  \code{Notes} below.}
   \item{comm}{ Community data frame which will be used for finding
     species scores when the LHS of the \code{formula} was a
     dissimilarity matrix. This is not used if the LHS is a data
@@ -141,6 +144,9 @@
   coordinates: a useful method of constrained ordination for
   ecology. \emph{Ecology} 84, 511--525.
 
+  Gower, J.C. (1985). Properties of Euclidean and non-Euclidean
+  distance matrices. \emph{Linear Algebra and its Applications} 67, 81--97.
+
   Legendre, P. & Anderson, M. J. (1999). Distance-based redundancy
   analysis: testing multispecies responses in multifactorial ecological
   experiments. \emph{Ecological Monographs} 69, 1--24.
@@ -149,27 +155,36 @@
   Edition. Elsevier
 }
 \author{ Jari Oksanen }
-\note{
-  Warnings of negative eigenvalues are issued with most dissimilarity
-  indices.  These are harmless, and negative eigenvalues will be ignored
-  in the analysis.  If the warnings are disturbing, you can use argument
-  \code{add = TRUE} passed to \code{\link{cmdscale}}, or, preferably, a
-  distance measure that does not cause these warnings.  In
-  \code{\link{vegdist}}, \code{method = "jaccard"} gives such an index.
-  Alternatively, after square root transformation many indices do not
-  cause warnings.
-  
-  Function \code{\link{rda}} usually divides the ordination scores by
-  number of sites minus one. In this way, the inertia is variance
-  instead of sum of squares, and the eigenvalues sum up to
-  variance. Many dissimilarity measures are in the range 0 to 1, so they
-  have already made a similar division. If the largest original
+
+\note{ The function produces negative eigenvalues with many
+  dissimilarity indices. The negative eigenvalues are listed after
+  positive unconstrained eigenvalues with prefix \code{NEG}.
+  The total inertia and total unconstrained inertia are sums of all
+  eigenvalues, including negative ones, and the rank is the number of 
+  all nonzero eigenvalues (Gower 1985). No ordination scores are given
+  for negative eigenvalues.  If the negative eigenvalues are
+  disturbing, you can use argument \code{add = TRUE} passed to
+  \code{\link{cmdscale}}, or, preferably, a distance measure that does
+  not cause these warnings.  Alternatively, after square root
+  transformation of distances (argument \code{sqrt.dist = TRUE}) many
+  indices do not produce negative eigenvalues.
+
+  The inertia is named after the dissimilarity index as defined in the
+  dissimilarity data, or as \code{unknown distance} if such an
+  information is missing.  Function \code{\link{rda}} usually divides
+  the ordination scores by number of sites minus one. In this way, the
+  inertia is variance instead of sum of squares, and the eigenvalues sum
+  up to variance. Many dissimilarity measures are in the range 0 to 1,
+  so they have already made a similar division. If the largest original
   dissimilarity is less than or equal to 4 (allowing for
   \code{\link{stepacross}}), this division is undone in \code{capscale}
-  and original dissimilarities are used. The inertia is named
-  \code{squared dissimilarity} (as defined in the dissimilarity matrix),
-  but keyword \code{mean} is added to the inertia in cases where
-  division was made, e.g. in Euclidean and Manhattan distances.
+  and original dissimilarities are used. Keyword \code{mean} is added to
+  the inertia in cases where division was made, e.g. in Euclidean and
+  Manhattan distances.  Inertia is based on squared index, and keyword
+  \code{squared} is added to the name of distance, unless data were
+  square root transformed (argument \code{sqrt.dist = TRUE}). If an
+  additive constant was used, keyword \code{euclidified} is added to the
+  the name of inertia (argument \code{add = TRUE}).
 }
 
 
@@ -179,11 +194,18 @@
 \examples{
 data(varespec)
 data(varechem)
+## Basic Analysis
 vare.cap <- capscale(varespec ~ N + P + K + Condition(Al), varechem,
                      dist="bray")
 vare.cap
 plot(vare.cap)
 anova(vare.cap)
+## Avoid negative eigenvalues with additive constant
+capscale(varespec ~ N + P + K + Condition(Al), varechem,
+                     dist="bray", add =TRUE)
+## Avoid negative eigenvalues by taking square roots of dissimilarities
+capscale(varespec ~ N + P + K + Condition(Al), varechem,
+                     dist = "bray", sqrt.dist= TRUE)
 ## Principal coordinates analysis with extended dissimilarities
 capscale(varespec ~ 1, dist="bray", metaMDS = TRUE)
 }