[Vegan-commits] r864 - in branches/1.15: R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Wed Jun 10 22:12:08 CEST 2009
Author: jarioksa
Date: 2009-06-10 22:12:07 +0200 (Wed, 10 Jun 2009)
New Revision: 864
Modified:
branches/1.15/R/capscale.R
branches/1.15/R/ordiGetData.R
branches/1.15/inst/ChangeLog
branches/1.15/man/capscale.Rd
Log:
merged fixes on capscale handling of negative eigenvalues to branches/1.15
Modified: branches/1.15/R/capscale.R
===================================================================
--- branches/1.15/R/capscale.R 2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/R/capscale.R 2009-06-10 20:12:07 UTC (rev 864)
@@ -1,7 +1,9 @@
`capscale` <-
- function (formula, data, distance = "euclidean", comm = NULL,
- add = FALSE, dfun = vegdist, metaMDSdist = FALSE, ...)
+ function (formula, data, distance = "euclidean", sqrt.dist = FALSE,
+ comm = NULL, add = FALSE, dfun = vegdist,
+ metaMDSdist = FALSE, ...)
{
+ EPS <- sqrt(.Machine$double.eps)
if (!inherits(formula, "formula"))
stop("Needs a model formula")
if (missing(data)) {
@@ -26,15 +28,19 @@
X <- dfun(X, distance)
}
}
+ if (sqrt.dist)
+ X <- sqrt(X)
inertia <- attr(X, "method")
if (is.null(inertia))
inertia <- "unknown"
inertia <- paste(toupper(substr(inertia, 1, 1)), substr(inertia,
2, 256), sep = "")
- inertia <- paste("squared", inertia, "distance")
+ inertia <- paste(inertia, "distance")
+ if (!sqrt.dist)
+ inertia <- paste("squared", inertia)
if (add)
inertia <- paste(inertia, "(euclidified)")
- k <- attr(X, "Size") - 1
+ k <- attr(X, "Size") - 1
if (max(X) >= 4 + .Machine$double.eps) {
inertia <- paste("mean", inertia)
adjust <- 1
@@ -43,17 +49,26 @@
adjust <- k
}
nm <- attr(X, "Labels")
- X <- cmdscale(X, k = k, eig = TRUE, add = add)
+ ## cmdscale is only used if 'add = TRUE': it cannot properly
+ ## handle negative eigenvalues and therefore we normally use
+ ## wcmdscale. If we have 'add = TRUE' there will be no negative
+ ## eigenvalues and this is not a problem.
+ if (add)
+ X <- cmdscale(X, k = k, eig = TRUE, add = add)
+ else
+ X <- wcmdscale(X, eig = TRUE)
if (is.null(rownames(X$points)))
rownames(X$points) <- nm
X$points <- adjust * X$points
- neig <- min(which(X$eig < 0) - 1, k)
+ X$eig <- adjust * X$eig
+ tot.chi <- sum(X$eig)
+ neig <- min(which(X$eig < 0) - 1, sum(X$eig > EPS))
sol <- X$points[, 1:neig]
fla <- update(formula, sol ~ .)
environment(fla) <- environment()
d <- ordiParseFormula(fla, data, envdepth = 1)
sol <- rda.default(d$X, d$Y, d$Z, ...)
- sol$tot.chi <- sol$tot.chi
+ sol$tot.chi <- tot.chi
if (!is.null(sol$CCA)) {
colnames(sol$CCA$u) <- colnames(sol$CCA$biplot) <- names(sol$CCA$eig) <-
colnames(sol$CCA$wa) <- colnames(sol$CCA$v) <-
@@ -62,6 +77,15 @@
if (!is.null(sol$CA)) {
colnames(sol$CA$u) <- names(sol$CA$eig) <- colnames(sol$CA$v) <-
paste("MDS", 1:ncol(sol$CA$u), sep = "")
+ ## Add negative eigenvalues to the list and update tot.chi
+ poseig <- length(sol$CA$eig)
+ if (any(X$eig < 0)) {
+ negax <- X$eig[X$eig < 0]
+ names(negax) <- paste("NEG", seq_along(negax), sep="")
+ sol$CA$eig <- c(sol$CA$eig, negax)
+ sol$CA$tot.chi <- abs(sum(sol$CA$eig))
+ sol$CA$rank <- length(sol$CA$eig)
+ }
}
if (!is.null(comm)) {
comm <- scale(comm, center = TRUE, scale = FALSE)
@@ -76,7 +100,7 @@
}
if (!is.null(sol$CA)) {
sol$CA$v.eig <- t(comm) %*% sol$CA$u/sqrt(k)
- sol$CA$v <- sweep(sol$CA$v.eig, 2, sqrt(sol$CA$eig),
+ sol$CA$v <- sweep(sol$CA$v.eig, 2, sqrt(sol$CA$eig[1:poseig]),
"/")
}
}
Modified: branches/1.15/R/ordiGetData.R
===================================================================
--- branches/1.15/R/ordiGetData.R 2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/R/ordiGetData.R 2009-06-10 20:12:07 UTC (rev 864)
@@ -1,10 +1,10 @@
`ordiGetData` <-
function (call, env)
{
- call$scale <- call$distance <- call$comm <- call$add <- NULL
+ call$scale <- call$distance <- call$comm <- call$add <-
+ call$sqrt.dist <- NULL
call$na.action <- na.pass
call[[2]] <- NULL
call[[1]] <- as.name("model.frame")
eval(call, env)
}
-
Modified: branches/1.15/inst/ChangeLog
===================================================================
--- branches/1.15/inst/ChangeLog 2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/inst/ChangeLog 2009-06-10 20:12:07 UTC (rev 864)
@@ -5,6 +5,9 @@
Version 1.15-3 (opened 28 May, 2009)
+ * merged fixes for capscale handling of negative eigenvalues: revs
+ 851, 858, 859, 861, 863.
+
* merged r850: wcmdscale removes zero eigenvalues, but may keep
the last (unlike cmdscale).
Modified: branches/1.15/man/capscale.Rd
===================================================================
--- branches/1.15/man/capscale.Rd 2009-06-10 20:01:29 UTC (rev 863)
+++ branches/1.15/man/capscale.Rd 2009-06-10 20:12:07 UTC (rev 864)
@@ -19,8 +19,9 @@
optionally using extended dissimilarities.
}
\usage{
-capscale(formula, data, distance = "euclidean", comm = NULL,
- add = FALSE, dfun = vegdist, metaMDSdist = FALSE, ...)
+capscale(formula, data, distance = "euclidean", sqrt.dist = FALSE,
+ comm = NULL, add = FALSE, dfun = vegdist, metaMDSdist = FALSE,
+ ...)
}
\arguments{
@@ -40,9 +41,11 @@
or \code{\link{cca}}. This allows the use of partial CAP.}
\item{data}{ Data frame containing the variables on the right hand side of
the model formula. }
- \item{distance}{Dissimilarity (or distance) index in
- \code{\link{vegdist}} used if the LHS of the \code{formula} is a
- data frame instead of dissimilarity matrix. }
+ \item{distance}{The name of the dissimilarity (or distance) index if
+ the LHS of the \code{formula} is a data frame instead of
+ dissimilarity matrix.}
+ \item{sqrt.dist}{Take square roots of dissimilarities. See section
+ \code{Notes} below.}
\item{comm}{ Community data frame which will be used for finding
species scores when the LHS of the \code{formula} was a
dissimilarity matrix. This is not used if the LHS is a data
@@ -141,6 +144,9 @@
coordinates: a useful method of constrained ordination for
ecology. \emph{Ecology} 84, 511--525.
+ Gower, J.C. (1985). Properties of Euclidean and non-Euclidean
+ distance matrices. \emph{Linear Algebra and its Applications} 67, 81--97.
+
Legendre, P. & Anderson, M. J. (1999). Distance-based redundancy
analysis: testing multispecies responses in multifactorial ecological
experiments. \emph{Ecological Monographs} 69, 1--24.
@@ -149,27 +155,36 @@
Edition. Elsevier
}
\author{ Jari Oksanen }
-\note{
- Warnings of negative eigenvalues are issued with most dissimilarity
- indices. These are harmless, and negative eigenvalues will be ignored
- in the analysis. If the warnings are disturbing, you can use argument
- \code{add = TRUE} passed to \code{\link{cmdscale}}, or, preferably, a
- distance measure that does not cause these warnings. In
- \code{\link{vegdist}}, \code{method = "jaccard"} gives such an index.
- Alternatively, after square root transformation many indices do not
- cause warnings.
-
- Function \code{\link{rda}} usually divides the ordination scores by
- number of sites minus one. In this way, the inertia is variance
- instead of sum of squares, and the eigenvalues sum up to
- variance. Many dissimilarity measures are in the range 0 to 1, so they
- have already made a similar division. If the largest original
+
+\note{ The function produces negative eigenvalues with many
+ dissimilarity indices. The negative eigenvalues are listed after
+ positive unconstrained eigenvalues with prefix \code{NEG}.
+ The total inertia and total unconstrained inertia are sums of all
+ eigenvalues, including negative ones, and the rank is the number of
+ all nonzero eigenvalues (Gower 1985). No ordination scores are given
+ for negative eigenvalues. If the negative eigenvalues are
+ disturbing, you can use argument \code{add = TRUE} passed to
+ \code{\link{cmdscale}}, or, preferably, a distance measure that does
+ not cause these warnings. Alternatively, after square root
+ transformation of distances (argument \code{sqrt.dist = TRUE}) many
+ indices do not produce negative eigenvalues.
+
+ The inertia is named after the dissimilarity index as defined in the
+ dissimilarity data, or as \code{unknown distance} if such an
+ information is missing. Function \code{\link{rda}} usually divides
+ the ordination scores by number of sites minus one. In this way, the
+ inertia is variance instead of sum of squares, and the eigenvalues sum
+ up to variance. Many dissimilarity measures are in the range 0 to 1,
+ so they have already made a similar division. If the largest original
dissimilarity is less than or equal to 4 (allowing for
\code{\link{stepacross}}), this division is undone in \code{capscale}
- and original dissimilarities are used. The inertia is named
- \code{squared dissimilarity} (as defined in the dissimilarity matrix),
- but keyword \code{mean} is added to the inertia in cases where
- division was made, e.g. in Euclidean and Manhattan distances.
+ and original dissimilarities are used. Keyword \code{mean} is added to
+ the inertia in cases where division was made, e.g. in Euclidean and
+ Manhattan distances. Inertia is based on squared index, and keyword
+ \code{squared} is added to the name of distance, unless data were
+ square root transformed (argument \code{sqrt.dist = TRUE}). If an
+ additive constant was used, keyword \code{euclidified} is added to the
+ the name of inertia (argument \code{add = TRUE}).
}
@@ -179,11 +194,18 @@
\examples{
data(varespec)
data(varechem)
+## Basic Analysis
vare.cap <- capscale(varespec ~ N + P + K + Condition(Al), varechem,
dist="bray")
vare.cap
plot(vare.cap)
anova(vare.cap)
+## Avoid negative eigenvalues with additive constant
+capscale(varespec ~ N + P + K + Condition(Al), varechem,
+ dist="bray", add =TRUE)
+## Avoid negative eigenvalues by taking square roots of dissimilarities
+capscale(varespec ~ N + P + K + Condition(Al), varechem,
+ dist = "bray", sqrt.dist= TRUE)
## Principal coordinates analysis with extended dissimilarities
capscale(varespec ~ 1, dist="bray", metaMDS = TRUE)
}
More information about the Vegan-commits
mailing list