[Vegan-commits] r2130 - in branches/2.0: R inst man

Sun Apr 15 12:32:03 CEST 2012

Author: jarioksa
Date: 2012-04-15 12:32:03 +0200 (Sun, 15 Apr 2012)
New Revision: 2130

Modified:
   branches/2.0/R/anova.prc.R
   branches/2.0/R/centroids.cca.R
   branches/2.0/R/msoplot.R
   branches/2.0/inst/ChangeLog
   branches/2.0/inst/NEWS.Rd
   branches/2.0/man/adonis.Rd
   branches/2.0/man/anosim.Rd
   branches/2.0/man/anova.cca.Rd
   branches/2.0/man/mrpp.Rd
   branches/2.0/man/mso.Rd
   branches/2.0/man/simper.Rd
Log:
Merge r2121 thru 2129: doc fixes (r2121,3,5), more configurable msoplot (r2127),
anova.prc(..., by=...) fix (r2128), envfit fix for unused factor levels (r2129)


Modified: branches/2.0/R/anova.prc.R
===================================================================

--- branches/2.0/R/anova.prc.R	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/R/anova.prc.R	2012-04-15 10:32:03 UTC (rev 2130)
@@ -15,7 +15,7 @@
                    names(object$call), 0)
         call <- object$call[c(1,m)]
         call$formula <- fla
-        call[[1]] <- as.name("rda.formula")
+        call[[1]] <- as.name("rda")
         object <- eval(call, parent.frame())
         anova(object, ...)
     } else {

Modified: branches/2.0/R/centroids.cca.R
===================================================================
--- branches/2.0/R/centroids.cca.R	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/R/centroids.cca.R	2012-04-15 10:32:03 UTC (rev 2130)
@@ -5,6 +5,7 @@
     if (!any(facts))
         return(NULL)
     mf <- mf[, facts, drop = FALSE]
+    mf <- droplevels(mf)
     if (missing(wt)) 
         wt <- rep(1, nrow(mf))
     ind <- seq_len(nrow(mf))

Modified: branches/2.0/R/msoplot.R
===================================================================
--- branches/2.0/R/msoplot.R	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/R/msoplot.R	2012-04-15 10:32:03 UTC (rev 2130)
@@ -1,5 +1,5 @@
 `msoplot` <-
-    function (x, alpha = 0.05, explained = FALSE, ...) 
+    function (x, alpha = 0.05, explained = FALSE, ylim = NULL, ...) 
 {
     object.cca <- x
     if (is.data.frame(object.cca$vario)) {
@@ -17,19 +17,18 @@
                    "Residual variance", "Explained variance", "Conditioned variance")
         ci.lab <- "C.I. for total variance"
         sign.lab <- if(hasSig) "Sign. autocorrelation" else NULL
-        ## You should not change par, or at least you must put
-        ## back the old values when exiting:
-        ## op <- par(omi = c(0.5, 0.5, 0, 0))
-        ## on.exit(par(op))
-        ##par(omi = c(0.5, 0.5, 0, 0))
         if (is.numeric(object$CCA$rank)) {
             if (!explained) 
                 b <- b - 1
             if (is.numeric(object$vario$se)) 
                 b <- b - 1
-            plot(vario$Dist, vario$All, type = "n", lty = 1, 
-                 pch = 3, xlab = "Distance", ylab = "Variance", 
-                 ylim = c(0, ymax), cex.lab = 1.2, ...)
+            figmat <- cbind(vario$All + z * vario$se,
+                            vario$All - z * vario$se,
+                            vario$Sum,
+                            vario[, 6:(b + 3)])
+            matplot(vario$Dist, cbind(0,figmat), type = "n",
+                    xlab = "Distance", ylab = "Variance",
+                    ylim = ylim, ...)
             lines(vario$Dist, vario$All + z * vario$se, lty = 1, ...)
             lines(vario$Dist, vario$All - z * vario$se, lty = 1, ...)
             lines(vario$Dist, vario$Sum, type = "b", lty = 2, 
@@ -39,24 +38,22 @@
                    lty=c(c(1,2,1,1,1)[2:b], 1, if(hasSig) NA),
                    pch=c(3, (6:(b+3))-6, NA, if(hasSig) 15)
                    )
-            for (i in 6:(b + 3)) {
-                lines(vario$Dist, vario[, i], type = "b", lty = 1, 
-                      pch = i - 6, ...)
-            }
-            text(x = c(vario$Dist), y = rep(0, length(vario$Dist)), 
+            matlines(vario$Dist, figmat[,-c(1:3)], type = "b", lty = 1,
+                     pch = 6:(b+3)-6, ...)
+            text(x = c(vario$Dist), y = par("usr")[3], pos = 3, 
                  label = c(vario$n), cex = 0.8, ...)
-            lines(x = rep(max(object$H)/2, 2), y = c(-10, ymax + 
-                                               10), lty = 3, ...)
+            abline(v = max(object$H/2), lty = 3, ...)
         }
         else {
+            if (is.null(ylim))
+                ylim <- c(0, ymax)
             plot(vario$Dist, vario$All, type = "b", lty = 1, 
-                 pch = 0, xlab = "Distance", ylab = "Variance", 
-                 ylim = c(0, ymax), cex.lab = 1.2, ...)
-            lines(c(0, 10), rep(object$tot.chi, 2), lty = 5, ...)
-            text(x = c(vario$Dist), y = rep(0, length(vario$Dist)), 
+                 pch = 0, xlab = "Distance", ylab = "Variance",
+                 ylim = ylim, ...)
+            abline(h = object$tot.chi, lty = 5, ...)
+            text(x = c(vario$Dist), y = par("usr")[3], pos = 3, 
                  label = c(vario$n), cex = 0.8)
-            lines(x = rep(max(object$H)/2, 2), y = c(-10, ymax + 
-                                               10), lty = 3, ...)
+            abline(v = max(object$H)/2, lty = 3, ...)
             legend("topleft",
                    c("Total variance","Global variance estimate",
                      if(hasSig) "Sign. autocorrelation"),

Modified: branches/2.0/inst/ChangeLog
===================================================================
--- branches/2.0/inst/ChangeLog	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/inst/ChangeLog	2012-04-15 10:32:03 UTC (rev 2130)
@@ -3,6 +3,14 @@
 VEGAN RELEASE VERSIONS at http://cran.r-project.org/
 
 Version 2.0-4 (opened March 9, 2012)
+
+	* merge r2129: envfit failed with empty factor levels.
+	* merge r2128: anova(<prc-object>, by = ...) failed.
+	* merge r2127: more configurable msoplot.
+	* merge r2125: typo in anova.cca.Rd.
+	* merge r2123: r2121 for adonis.
+	* merge r2121: doc location/dispersion mix-up in simper, mrpp &
+	anosim. 
 	
 Version 2.0-3 (released March 3, 2012)
 
@@ -29,7 +37,7 @@
 	* merge r2078, 2084: simper NAMESPACE.
 	* merge r2071,2: dimnames fix in indopower & expand example.
 	* merge r2068: broken url in renyi.Rd.
-	* merge r2065: number of iterations is an arguent in nesteddisc.
+	* merge r2065: number of iterations is an argument in nesteddisc.
 	* merge r2060: adonis tells terms were added sequentially.
 	* mrege r2057: add .Rinstignore to silense R 2.15.0 checks.
 	* merge r2056: use inconsolata fonts in vignettes.

Modified: branches/2.0/inst/NEWS.Rd
===================================================================
--- branches/2.0/inst/NEWS.Rd	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/inst/NEWS.Rd	2012-04-15 10:32:03 UTC (rev 2130)
@@ -2,6 +2,30 @@
 \title{vegan News}
 \encoding{UTF-8}
 
+\section{Changes in version 2.0-4}{
+
+  \subsection{BUG FIXES}{
+    \itemize{
+    
+      \item \code{anova(<prc-object>, by = "axis")} and other
+      \code{by} cases failed due to \file{NAMESPACE} issues.
+
+      \item \code{envfit} failed with unused factor levels.
+    
+    }
+  }% end bug fixes
+
+  \subsection{NEW FEATURES}{
+    \itemize{
+
+      \item \code{msoplot} is more configurable, and allows, for
+      instance, setting y-axis limits.
+
+    }
+  } % end new features 	 
+
+}%end version 2.0-4
+
 \section{Changes in version 2.0-3}{
 
   \subsection{NEW FUNCTIONS}{

Modified: branches/2.0/man/adonis.Rd
===================================================================
--- branches/2.0/man/adonis.Rd	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/man/adonis.Rd	2012-04-15 10:32:03 UTC (rev 2130)
@@ -137,6 +137,18 @@
     side of the formula.}
   \item{terms}{The \code{\link{terms}} component of the model.}
 }
+
+\note{Anderson (2001, Fig. 4) warns that the method may confound
+  location and dispersion effects: significant differences may be caused
+  by different within-group variation (dispersion) instead of different
+  mean values of the groups (see Warton et al. 2012 for a general
+  analysis). However, it seems that \code{adonis} is less sensitive to
+  dispersion effects than some of its alternatives (\code{link{anosim}},
+  \code{\link{mrpp}}). Function \code{\link{betadisper}} is a sister
+  function to \code{adonis} to study the differences in dispersion
+  within the same geometric framework.
+}
+
 \references{
 Anderson, M.J. 2001. A new method for non-parametric multivariate
 analysis of variance. \emph{Austral Ecology}, \strong{26}: 32--46.
@@ -157,6 +169,10 @@
 community data: A comment on distance-based redundancy
 analysis. \emph{Ecology}, \strong{82}: 290--297.
 
+Warton, D.I., Wright, T.W., Wang, Y. 2012. Distance-based multivariate
+analyses confound location and dispersion effects. \emph{Methods in
+Ecology and Evolution}, 3, 89--101.
+
 Zapala, M.A. and N.J. Schork. 2006. Multivariate regression analysis of
 distance matrices for testing associations between gene expression
 patterns and related variables. \emph{Proceedings of the National Academy of

Modified: branches/2.0/man/anosim.Rd
===================================================================
--- branches/2.0/man/anosim.Rd	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/man/anosim.Rd	2012-04-15 10:32:03 UTC (rev 2130)
@@ -79,15 +79,24 @@
 }
 \references{
   Clarke, K. R. (1993). Non-parametric multivariate analysis of changes
-  in community structure. \emph{Australian Journal of Ecology} 18, 117-143.
+  in community structure. \emph{Australian Journal of Ecology} 18,
+  117--143.
+  
+  Warton, D.I., Wright, T.W., Wang, Y. 2012. Distance-based multivariate
+  analyses confound location and dispersion effects. \emph{Methods in
+  Ecology and Evolution}, 3, 89--101
+  
 }
 \author{Jari Oksanen, with a help from Peter R. Minchin.}
 \note{
-  I don't quite trust this method.  Somebody should study its
-  performance carefully.  The function returns a lot of information 
-  to ease further scrutiny. Most \code{anosim} models could be analysed
-  with \code{\link{adonis}} which seems to be a more robust alternative.
 
+  The \code{anosim} function can confound the differences between groups
+  and dispersion within groups and the results can be difficult to
+  interpret (cf. Warton et al. 2012).  The function returns a lot of
+  information to ease studying its performance. Most \code{anosim}
+  models could be analysed with \code{\link{adonis}} which seems to be a
+  more robust alternative.
+
 }
 
 \seealso{\code{\link{mrpp}} for a similar function using original

Modified: branches/2.0/man/anova.cca.Rd
===================================================================
--- branches/2.0/man/anova.cca.Rd	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/man/anova.cca.Rd	2012-04-15 10:32:03 UTC (rev 2130)
@@ -91,7 +91,7 @@
   respective ranks.  If there are no conditions (\dQuote{partial} terms), the
   sum of all eigenvalues remains constant, so that pseudo-\eqn{F} and
   eigenvalues would give equal results.  In partial CCA/RDA/CAP, the
-  effect of conditioning variables (\dQuote{covariables} is removed before
+  effect of conditioning variables (\dQuote{covariables}) is removed before
   permutation, and these residuals are added to the non-permuted fitted
   values of partial CCA (fitted values of \code{X ~ Z}).  Consequently,
   the total Chi-square is not fixed, and test based on pseudo-\eqn{F}

Modified: branches/2.0/man/mrpp.Rd
===================================================================
--- branches/2.0/man/mrpp.Rd	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/man/mrpp.Rd	2012-04-15 10:32:03 UTC (rev 2130)
@@ -49,72 +49,75 @@
   \item{\dots}{Further arguments passed to functions.}
 }
 
-\details{ Multiple Response Permutation Procedure (MRPP) provides a test
-of whether there is a significant difference between two or more groups
-of sampling units. This difference may be one of location (differences
-in mean) or one of spread (differences in within-group
-distance). Function \code{mrpp} operates on a \code{data.frame} matrix
-where rows are observations and responses data matrix. The response(s)
-may be uni- or multivariate. The method is philosophically and
-mathematically allied with analysis of variance, in that it compares
-dissimilarities within and among groups. If two groups of sampling units
-are really different (e.g. in their species composition), then average
-of the within-group compositional dissimilarities ought to be less than
-the average of the dissimilarities between two random collection of
-sampling units drawn from the entire population. 
+\details{
 
-The mrpp statistic \eqn{\delta} is the overall weighted mean of
-within-group means of the pairwise dissimilarities among sampling
-units. The choice of group weights is currently not clear. The
-\code{mrpp} function offers three choices: (1) group size (\eqn{n}), (2) a
-degrees-of-freedom analogue (\eqn{n-1}), and (3) a weight that is the number
-of unique distances calculated among \eqn{n} sampling units (\eqn{n(n-1)/2}).
+  Multiple Response Permutation Procedure (MRPP) provides a test of
+  whether there is a significant difference between two or more groups
+  of sampling units. This difference may be one of location (differences
+  in mean) or one of spread (differences in within-group distance;
+  cf. Warton et al. 2012). Function \code{mrpp} operates on a
+  \code{data.frame} matrix where rows are observations and responses
+  data matrix. The response(s) may be uni- or multivariate. The method
+  is philosophically and mathematically allied with analysis of
+  variance, in that it compares dissimilarities within and among
+  groups. If two groups of sampling units are really different (e.g. in
+  their species composition), then average of the within-group
+  compositional dissimilarities ought to be less than the average of the
+  dissimilarities between two random collection of sampling units drawn
+  from the entire population.
 
-The \code{mrpp} algorithm first calculates all pairwise distances in the
-entire dataset, then calculates \eqn{\delta}. It then permutes the
-sampling units and their associated pairwise distances, and recalculates
-\eqn{\delta} based on the permuted data. It repeats the permutation
-step \code{permutations} times. The significance test is the
-fraction of permuted deltas that are less than the observed delta, with
-a small sample correction. The function also calculates the
-change-corrected within-group agreement
-\eqn{A = 1 -\delta/E(\delta)}, where \eqn{E(\delta)} is the expected
-\eqn{\delta} assessed as the average of dissimilarities.
+  The mrpp statistic \eqn{\delta} is the overall weighted mean of
+  within-group means of the pairwise dissimilarities among sampling
+  units. The choice of group weights is currently not clear. The
+  \code{mrpp} function offers three choices: (1) group size (\eqn{n}),
+  (2) a degrees-of-freedom analogue (\eqn{n-1}), and (3) a weight that
+  is the number of unique distances calculated among \eqn{n} sampling
+  units (\eqn{n(n-1)/2}).
 
-If the first argument \code{dat} can be interpreted as dissimilarities,
-they will be used directly. In other cases the function treats
-\code{dat} as observations, and uses \code{\link{vegdist}} to find the
-dissimilarities.  The default \code{distance} is Euclidean as in the
-traditional use of the method, but other dissimilarities in
-\code{\link{vegdist}} also are available.
+  The \code{mrpp} algorithm first calculates all pairwise distances in
+  the entire dataset, then calculates \eqn{\delta}. It then permutes the
+  sampling units and their associated pairwise distances, and
+  recalculates \eqn{\delta} based on the permuted data. It repeats the
+  permutation step \code{permutations} times. The significance test is
+  the fraction of permuted deltas that are less than the observed delta,
+  with a small sample correction. The function also calculates the
+  change-corrected within-group agreement \eqn{A = 1 -\delta/E(\delta)},
+  where \eqn{E(\delta)} is the expected \eqn{\delta} assessed as the
+  average of dissimilarities.
 
-Function \code{meandist} calculates a matrix of mean within-cluster
-dissimilarities (diagonal) and between-cluster dissimilarities
-(off-diagonal elements), and an attribute \code{n} of \code{grouping}
-counts. Function \code{summary} finds the within-class, between-class
-and overall means of these dissimilarities, and the MRPP statistics with
-all \code{weight.type} options and the Classification Strength, CS (Van
-Sickle and Hughes, 2000). CS is defined for dissimiliraties as
-\eqn{\bar{B} - \bar{W}}{Bbar-Wbar}, where \eqn{\bar{B}}{Bbar} is the
-mean between cluster dissimilarity and \eqn{\bar{W}}{Wbar} is the mean
-within cluster dissimilarity with \code{weight.type = 1}. The function
-does not perform significance tests for these statistics, but you must
-use \code{mrpp} with appropriate \code{weight.type}. There is currently
-no significance test for CS, but \code{mrpp} with \code{weight.type = 1}
-gives the correct test for \eqn{\bar{W}}{Wbar} and a good approximation
-for CS.  Function \code{plot} draws a dendrogram or a histogram of the
-result matrix based on the within-group and between group
-dissimilarities. The dendrogram is found with the method given in the
-\code{cluster} argument using function \code{\link{hclust}}. The
-terminal segments hang to within-cluster dissimilarity. If some of the
-clusters are more heterogeneous than the combined class, the leaf
-segment are reversed.  The histograms are based on dissimilarites, but
-ore otherwise similar to those of Van Sickle and Hughes (2000):
-horizontal line is drawn at the level of mean between-cluster
-dissimilarity and vertical lines connect within-cluster dissimilarities
-to this line.
-}
+  If the first argument \code{dat} can be interpreted as
+  dissimilarities, they will be used directly. In other cases the
+  function treats \code{dat} as observations, and uses
+  \code{\link{vegdist}} to find the dissimilarities.  The default
+  \code{distance} is Euclidean as in the traditional use of the method,
+  but other dissimilarities in \code{\link{vegdist}} also are available.
 
+  Function \code{meandist} calculates a matrix of mean within-cluster
+  dissimilarities (diagonal) and between-cluster dissimilarities
+  (off-diagonal elements), and an attribute \code{n} of \code{grouping}
+  counts. Function \code{summary} finds the within-class, between-class
+  and overall means of these dissimilarities, and the MRPP statistics
+  with all \code{weight.type} options and the Classification Strength,
+  CS (Van Sickle and Hughes, 2000). CS is defined for dissimiliraties as
+  \eqn{\bar{B} - \bar{W}}{Bbar-Wbar}, where \eqn{\bar{B}}{Bbar} is the
+  mean between cluster dissimilarity and \eqn{\bar{W}}{Wbar} is the mean
+  within cluster dissimilarity with \code{weight.type = 1}. The function
+  does not perform significance tests for these statistics, but you must
+  use \code{mrpp} with appropriate \code{weight.type}. There is
+  currently no significance test for CS, but \code{mrpp} with
+  \code{weight.type = 1} gives the correct test for \eqn{\bar{W}}{Wbar}
+  and a good approximation for CS.  Function \code{plot} draws a
+  dendrogram or a histogram of the result matrix based on the
+  within-group and between group dissimilarities. The dendrogram is
+  found with the method given in the \code{cluster} argument using
+  function \code{\link{hclust}}. The terminal segments hang to
+  within-cluster dissimilarity. If some of the clusters are more
+  heterogeneous than the combined class, the leaf segment are reversed.
+  The histograms are based on dissimilarites, but ore otherwise similar
+  to those of Van Sickle and Hughes (2000): horizontal line is drawn at
+  the level of mean between-cluster dissimilarity and vertical lines
+  connect within-cluster dissimilarities to this line.  }
+
 \value{
 The function returns a list of class mrpp with following items:
   \item{call }{	Function call.}
@@ -142,7 +145,6 @@
   B. McCune and J. B. Grace. 2002. \emph{Analysis of Ecological
   Communities.} MjM  Software Design, Gleneden Beach, Oregon, USA.
 
-
   P. W. Mielke and K. J. Berry. 2001. \emph{Permutation Methods: A
   Distance  Function Approach.} Springer Series in
   Statistics. Springer.  
@@ -151,6 +153,9 @@
   ecoregions, catchments, and geographic clusters of aquatic vertebrates
   in Oregon. \emph{J. N. Am. Benthol. Soc.} 19:370--384.
 
+  Warton, D.I., Wright, T.W., Wang, Y. 2012. Distance-based multivariate
+  analyses confound location and dispersion effects. \emph{Methods in
+  Ecology and Evolution}, 3, 89--101
 
 }
 \author{

Modified: branches/2.0/man/mso.Rd
===================================================================
--- branches/2.0/man/mso.Rd	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/man/mso.Rd	2012-04-15 10:32:03 UTC (rev 2130)
@@ -14,7 +14,7 @@
 
 \usage{
 mso(object.cca, object.xy, grain = 1, round.up = FALSE, permutations = FALSE)
-msoplot(x, alpha = 0.05, explained = FALSE, ...)
+msoplot(x, alpha = 0.05, explained = FALSE, ylim = NULL, ...)
 }
 \arguments{
   \item{object.cca}{ An object of class cca, created by the \code{\link{cca}} or
@@ -39,6 +39,7 @@
     classes.} 
   \item{explained}{ If false, suppresses the plotting of the variogram
     of explained variance.}
+  \item{ylim}{Limits for y-axis.}
   \item{\dots}{Other arguments passed to functions.}
 }
 \details{

Modified: branches/2.0/man/simper.Rd
===================================================================
--- branches/2.0/man/simper.Rd	2012-04-15 06:04:31 UTC (rev 2129)
+++ branches/2.0/man/simper.Rd	2012-04-15 10:32:03 UTC (rev 2130)
@@ -52,6 +52,15 @@
   the data frames also include the cumulative contributions and
   are ordered by species contribution.
 
+  The results of \code{simper} can be very difficult to interpret. The
+  method very badly confounds the mean between group differences and
+  within group variation, and seems to single out variable species
+  instead of distinctive species (Warton et al. 2012). Even if you make
+  groups that are copies of each other, the method will single out
+  species with high contribution, but these are not contributions
+  to non-existing between-group differences but to within-group
+  variation in species abundance.
+
 }
 
 \value{
@@ -81,6 +90,10 @@
   Clarke, K.R. 1993. Non-parametric multivariate analyses of changes
     in community structure. \emph{Australian Journal of Ecology}, 18,
     117–143.
+
+  Warton, D.I., Wright, T.W., Wang, Y. 2012. Distance-based multivariate
+    analyses confound location and dispersion effects. \emph{Methods in
+    Ecology and Evolution}, 3, 89--101.
 }
 \keyword{multivariate}