[Vegan-commits] r2499 - in pkg/vegan: . R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun May 5 17:58:34 CEST 2013
Author: jarioksa
Date: 2013-05-05 17:58:34 +0200 (Sun, 05 May 2013)
New Revision: 2499
Added:
pkg/vegan/R/veganMahatrans.R
Modified:
pkg/vegan/DESCRIPTION
pkg/vegan/NAMESPACE
pkg/vegan/R/bioenv.default.R
pkg/vegan/R/bioenv.formula.R
pkg/vegan/R/print.bioenv.R
pkg/vegan/inst/ChangeLog
pkg/vegan/man/bioenv.Rd
pkg/vegan/man/vegan-internal.Rd
Log:
implement Mahalanobis, Manhattan and Gower metrics for bioenv
Modified: pkg/vegan/DESCRIPTION
===================================================================
--- pkg/vegan/DESCRIPTION 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/DESCRIPTION 2013-05-05 15:58:34 UTC (rev 2499)
@@ -1,7 +1,7 @@
Package: vegan
Title: Community Ecology Package
-Version: 2.1-29
-Date: April 19, 2013
+Version: 2.1-30
+Date: May 5, 2013
Author: Jari Oksanen, F. Guillaume Blanchet, Roeland Kindt, Pierre Legendre,
Peter R. Minchin, R. B. O'Hara, Gavin L. Simpson, Peter Solymos,
M. Henry H. Stevens, Helene Wagner
Modified: pkg/vegan/NAMESPACE
===================================================================
--- pkg/vegan/NAMESPACE 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/NAMESPACE 2013-05-05 15:58:34 UTC (rev 2499)
@@ -58,7 +58,7 @@
## export(ade2vegancca, orderingKM, ordiArgAbsorber, ordiArrowMul,
## ordiGetData, ordimedian, ordiNAexclude, ordiNApredict,
## ordiParseFormula, ordiTerminfo, pregraphKM, simpleRDA2, varpart2,
-## varpart3, varpart4, veganCovEllipse)
+## varpart3, varpart4, veganCovEllipse, veganMahatrans)
## Registration of S3 methods
import(stats)
Modified: pkg/vegan/R/bioenv.default.R
===================================================================
--- pkg/vegan/R/bioenv.default.R 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/R/bioenv.default.R 2013-05-05 15:58:34 UTC (rev 2499)
@@ -1,8 +1,14 @@
`bioenv.default` <-
function (comm, env, method = "spearman", index = "bray", upto = ncol(env),
- trace = FALSE, partial = NULL, parallel = getOption("mc.cores"),
+ trace = FALSE, partial = NULL,
+ metric = c("euclidean", "mahalanobis", "manhattan", "gower"),
+ parallel = getOption("mc.cores"),
...)
{
+ metric <- match.arg(metric)
+ method <- match.arg(method, eval(formals(cor)$method))
+ if (any(sapply(env, is.factor)) && metric != "gower")
+ stop("you have factors in 'env': only 'metric = \"gower\"' is allowed")
if (is.null(partial)) {
corfun <- function(dx, dy, dz, method) {
cor(dx, dy, method=method)
@@ -26,13 +32,31 @@
n <- ncol(env)
ntake <- 2^n - 1
ndone <- 0
+ upto <- min(upto, n)
if (n > 8 || trace) {
if (upto < n)
cat("Studying", nall <- sum(choose(n, 1:upto)), "of ")
cat(ntake, "possible subsets (this may take time...)\n")
flush.console()
}
- x <- scale(env)
+ ## Check metric and adapt data and distance function
+ if (metric == "euclidean") {
+ x <- scale(env, scale = TRUE)
+ distfun <- function(x) dist(x)
+ } else if (metric == "mahalanobis") {
+ x <- as.matrix(scale(env, scale = FALSE))
+ distfun <- function(x) dist(veganMahatrans(x))
+ } else if (metric == "gower") {
+ require(cluster) ||
+ stop("package 'cluster' needed for factor variables in 'env'")
+ x <- env
+ distfun <- function(x) daisy(x, metric = "gower")
+ } else if (metric == "manhattan") {
+ x <- decostand(env, "range")
+ distfun <- function(x) dist(x, "manhattan")
+ } else {
+ stop("unknown metric")
+ }
best <- list()
if (inherits(comm, "dist")) {
comdis <- comm
@@ -77,18 +101,19 @@
if (isParal && nrow(sets) >= CLUSLIM*nclus) {
if (isMulticore) {
est <- unlist(mclapply(1:nrow(sets), function(j)
- corfun(comdis, dist(x[,sets[j, ]]), partial,
- method = method),
+ corfun(comdis,
+ distfun(x[,sets[j,],drop = FALSE]),
+ partial, method = method),
mc.cores = parallel))
} else {
est <- parSapply(parallel, 1:nrow(sets), function(j)
- corfun(comdis, dist(x[,sets[j, ]]), partial,
- method = method))
+ corfun(comdis, distfun(x[,sets[j,],drop = FALSE]),
+ partial, method = method))
}
} else {
est <- sapply(1:nrow(sets), function(j)
- corfun(comdis, dist(x[,sets[j, ]]), partial,
- method = method))
+ corfun(comdis, distfun(x[,sets[j,], drop=FALSE ]),
+ partial, method = method))
}
best[[i]] <- list(best = sets[which.max(est), ], est = max(est))
if (trace) {
@@ -98,8 +123,9 @@
flush.console()
}
}
- out <- list(names = colnames(env), method = method, index = index,
- upto = upto, models = best, partial = partpart)
+ out <- list(names = colnames(env), method = method, index = index,
+ metric = metric, upto = upto, models = best,
+ partial = partpart)
out$call <- match.call()
out$call[[1]] <- as.name("bioenv")
class(out) <- "bioenv"
Modified: pkg/vegan/R/bioenv.formula.R
===================================================================
--- pkg/vegan/R/bioenv.formula.R 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/R/bioenv.formula.R 2013-05-05 15:58:34 UTC (rev 2499)
@@ -7,12 +7,7 @@
comm <- formula[[2]]
comm <- eval(comm, data, parent.frame())
formula[[2]] <- NULL
- mf <- model.frame(formula, data, na.action = NULL)
- if (any(sapply(mf, function(x) is.factor(x) || !is.numeric(x))))
- stop("bioenv applies only to numeric variables")
- env <- attr(mf, "terms")
- attr(env, "intercept") <- 0
- env <- model.matrix(env, mf)
+ env <- model.frame(formula, data, na.action = NULL)
out <- bioenv(comm, env, ...)
out$formula <- fla
out$call <- match.call()
Modified: pkg/vegan/R/print.bioenv.R
===================================================================
--- pkg/vegan/R/print.bioenv.R 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/R/print.bioenv.R 2013-05-05 15:58:34 UTC (rev 2499)
@@ -1,11 +1,12 @@
-"print.bioenv" <-
+`print.bioenv` <-
function (x, ...)
{
cat("\nCall:\n")
cat(deparse(x$call), "\n")
cat("\nSubset of environmental variables with best correlation to community data.\n\n")
- cat("Correlations: ", x$method, "\n")
- cat("Dissimilarities: ", x$index, "\n\n")
+ cat("Correlations: ", x$method, "\n")
+ cat("Dissimilarities:", x$index, "\n")
+ cat("Metric: ", x$metric, "\n\n")
i <- which.max(lapply(x$models, function(tmp) tmp$est))
cat("Best model has", i, "parameters (max.", x$upto, "allowed):\n")
cat(paste(x$names[x$models[[i]]$best], collapse = " "))
Added: pkg/vegan/R/veganMahatrans.R
===================================================================
--- pkg/vegan/R/veganMahatrans.R (rev 0)
+++ pkg/vegan/R/veganMahatrans.R 2013-05-05 15:58:34 UTC (rev 2499)
@@ -0,0 +1,20 @@
+### Internal function for Mahalanobis transformation of the matrix.
+### Mahalanobis transformation of matrix X is M = X S^(-1/2) where S
+### is the covariance matrix. The inverse square root of S is found
+### via eigen decomposition S = G L G^T, where G is the matrix of
+### eigenvectors, and L is the diagonal matrix of eigenvalues. Thus
+### S^(-1/2) = G L^(-1/2) G^T. This is an internal function so that
+### input must be correct: 'x' must be a centred matrix (not a
+### data.frame, not raw data).
+`veganMahatrans` <-
+ function (x, s2, tol = 1e-8)
+{
+ n <- nrow(x)
+ if (missing(s2))
+ s2 <- cov(x)
+ e <- eigen(s2)
+ k <- e$values > tol
+ sisqr <- e$vectors[,k, drop=FALSE] %*%
+ (sqrt(1/e$values[k]) * t(e$vectors[,k, drop = FALSE]))
+ x %*% sisqr
+}
Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/inst/ChangeLog 2013-05-05 15:58:34 UTC (rev 2499)
@@ -2,8 +2,23 @@
VEGAN DEVEL VERSIONS at http://r-forge.r-project.org/
-Version 2.1-29 (opened April 19, 2013)
+Version 2.1-30 (opened May 5, 2013)
+ * bioenv: can now use Mahalanobis, Manhattan and Gower distances
+ for environmental variables. The Mahalanobis distances are based
+ on orthogonalized data, Manhattan distances give the direct sum of
+ differences of environmental variables, and Gower distances can
+ also handle factor variables. This involves adding internal
+ function veganMahatrans() for Mahalanobis transformation. The
+ change was triggered by a recent email by Lydia Beaudrot (UC
+ Davis) to implement Mahalanobis distances, and in the same I
+ also implemented Robby Marotte's suggestion of using Gower
+ distances (vegan Forum item in R-Forge in July 2012). The output
+ is changed to show the 'metric' and the name of the 'method' is
+ fully expanded. No more fails if 'upto' is too large.
+
+Version 2.1-29 (closed April 19, 2013)
+
* ordisurf: significant changes were made to this function:
- The default for `method` and `select` were changed to `"REML"`
Modified: pkg/vegan/man/bioenv.Rd
===================================================================
--- pkg/vegan/man/bioenv.Rd 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/man/bioenv.Rd 2013-05-05 15:58:34 UTC (rev 2499)
@@ -14,6 +14,7 @@
\usage{
\method{bioenv}{default}(comm, env, method = "spearman", index = "bray",
upto = ncol(env), trace = FALSE, partial = NULL,
+ metric = c("euclidean", "mahalanobis", "manhattan", "gower"),
parallel = getOption("mc.cores"), ...)
\method{bioenv}{formula}(formula, data, ...)
}
@@ -30,6 +31,8 @@
\item{trace}{Trace the calculations }
\item{partial}{Dissimilarities partialled out when inspecting
variables in \code{env}.}
+ \item{metric}{Metric used for distances of environmental distances. See
+ Details.}
\item{parallel}{Number of parallel processes or a predefined socket
cluster. With \code{parallel = 1} uses ordinary, non-parallel
processing. The parallel processing is done with \pkg{parallel}
@@ -37,17 +40,33 @@
\item{...}{Other arguments passed to \code{\link{cor}}.}
}
\details{
+
The function calculates a community dissimilarity matrix using
\code{\link{vegdist}}. Then it selects all possible subsets of
environmental variables, \code{\link{scale}}s the variables, and
calculates Euclidean distances for this subset using
- \code{\link{dist}}. Then it finds the correlation between community
- dissimilarities and environmental distances, and for each size of
- subsets, saves the best result.
- There are \eqn{2^p-1} subsets of \eqn{p} variables, and an exhaustive
- search may take a very, very, very long time (parameter \code{upto} offers a
- partial relief).
+ \code{\link{dist}}. The function finds the correlation between
+ community dissimilarities and environmental distances, and for each
+ size of subsets, saves the best result. There are \eqn{2^p-1}
+ subsets of \eqn{p} variables, and an exhaustive search may take a
+ very, very, very long time (parameter \code{upto} offers a partial
+ relief).
+ The argument \code{metric} defines distances in the given set of
+ environmental variables. With \code{metric = "euclidean"}, the
+ variables are scaled to unit variance and Euclidean distances are
+ calculated. With \code{metric = "mahalanobis"}, the Mahalanobis
+ distances are calculated: in addition to scaling to unit variance,
+ the matrix of the current set of environmental variables is also
+ made orthogonal (uncorrelated). With \code{metric = "manhanttan"},
+ the variables are scaled to unit range and Manhattan distances are
+ calculated, so that the distances are sums of differences of
+ environmental variables. With \code{metric = "gower"}, the Gower
+ distances are calculated using function
+ \code{\link[cluster]{daisy}}. This allows also using factor
+ variables, but with continuous variables the results are equal to
+ \code{metric = "manhattan"}.
+
The function can be called with a model \code{\link{formula}} where
the LHS is the data matrix and RHS lists the environmental variables.
The formula interface is practical in selecting or transforming
Modified: pkg/vegan/man/vegan-internal.Rd
===================================================================
--- pkg/vegan/man/vegan-internal.Rd 2013-04-29 08:00:00 UTC (rev 2498)
+++ pkg/vegan/man/vegan-internal.Rd 2013-05-05 15:58:34 UTC (rev 2499)
@@ -11,6 +11,7 @@
\alias{ordiArgAbsorber}
\alias{veganCovEllipse}
\alias{hierParseFormula}
+\alias{veganMahatrans}
\title{Internal vegan functions}
@@ -32,6 +33,7 @@
permuted.index(n, strata)
pasteCall(call, prefix = "Call:")
veganCovEllipse(cov, center = c(0, 0), scale = 1, npoints = 100)
+veganMahatrans(x, s2, tol = 1e-8)
hierParseFormula(formula, data)
}
@@ -82,6 +84,12 @@
\code{veganCovEllipse} finds the coordinates for drawing a
covariance ellipse.
+ \code{veganMahatrans} transforms data matrix so that its Euclidean
+ distances are Mahalanobis distances. The input data \code{x} must be
+ a matrix centred by columns, and \code{s2} its covariance matrix. If
+ \code{s2} is not given, covariance matrix is found from \code{x}
+ within the function.
+
\code{hierParseFormula} returns a list of one matrix (left hand side)
and a model frame with factors representing hierarchy levels
(right hand side) to be used in \code{\link{adipart}},
More information about the Vegan-commits
mailing list