[Vegan-commits] r2093 - in branches/2.0: R inst man

Sat Feb 18 11:19:16 CET 2012

Author: jarioksa
Date: 2012-02-18 11:19:16 +0100 (Sat, 18 Feb 2012)
New Revision: 2093

Added:
   branches/2.0/R/simper.R
   branches/2.0/man/simper.Rd
Modified:
   branches/2.0/R/betadisper.R
   branches/2.0/R/scores.default.R
   branches/2.0/inst/ChangeLog
Log:
add simper, merge r2080 (droplevels in betadisper), r2089 (scores.default fix)

Modified: branches/2.0/R/betadisper.R
===================================================================

--- branches/2.0/R/betadisper.R	2012-02-17 13:47:58 UTC (rev 2092)
+++ branches/2.0/R/betadisper.R	2012-02-18 10:19:16 UTC (rev 2093)
@@ -28,8 +28,11 @@
         type <- "median"
     type <- match.arg(type)
     ## checks for groups - need to be a factor for later
-    if(!is.factor(group))
+    if(!is.factor(group)) {
         group <- as.factor(group)
+    } else { ## if already a factor, drop empty levels
+        group <- droplevels(group)
+    }
     n <- attr(d, "Size")
     x <- matrix(0, ncol = n, nrow = n)
     x[row(x) > col(x)] <- d^2

Modified: branches/2.0/R/scores.default.R
===================================================================
--- branches/2.0/R/scores.default.R	2012-02-17 13:47:58 UTC (rev 2092)
+++ branches/2.0/R/scores.default.R	2012-02-18 10:19:16 UTC (rev 2093)
@@ -51,8 +51,10 @@
     }
     if (is.null(colnames(X))) 
         colnames(X) <- paste("Dim", 1:ncol(X), sep = "")
-    if (!missing(choices)) 
+    if (!missing(choices)) {
+        choices <- choices[choices <= ncol(X)]
         X <- X[, choices, drop = FALSE]
+    }
     X <- as.matrix(X)
     X
 }

Copied: branches/2.0/R/simper.R (from rev 2092, pkg/vegan/R/simper.R)
===================================================================
--- branches/2.0/R/simper.R	                        (rev 0)
+++ branches/2.0/R/simper.R	2012-02-18 10:19:16 UTC (rev 2093)
@@ -0,0 +1,102 @@
+`simper` <-
+    function(comm, group, permutations = 0, trace = FALSE,  ...)
+{
+    comm <- as.matrix(comm)
+    comp <- t(combn(unique(as.character(group)), 2))
+    outlist <- NULL
+    ## data parameters
+    P <- ncol(comm)
+    nobs <- nrow(comm)
+    ## Make permutation matrix
+    if (length(permutations) == 1) {
+        perm <- shuffleSet(nobs, permutations, ...)
+    }
+    nperm <- nrow(perm)
+    if (nperm > 0)
+        perm.contr <- matrix(nrow=P, ncol=nperm)
+    for (i in 1:nrow(comp)) {
+        group.a <- comm[group == comp[i, 1], ]
+        group.b <- comm[group == comp[i, 2], ]
+        n.a <- nrow(group.a)
+        n.b <- nrow(group.b)
+        contr <- matrix(ncol = P, nrow = n.a * n.b)
+        for (j in 1:n.b) {
+            for (k in 1:n.a) {
+                md <- abs(group.a[k, ] - group.b[j, ])
+                me <- group.a[k, ] + group.b[j, ]
+                contr[(j-1)*n.a+k, ] <- md / sum(me)	
+            }
+        }
+        average <- colMeans(contr) * 100
+        
+        if(nperm > 0){
+            if (trace)
+                cat("Permuting", paste(comp[i,1], comp[i,2], sep = "_"), "\n")
+            contrp <- matrix(ncol = P, nrow = n.a * n.b)
+            for(p in 1:nperm){
+                groupp <- group[perm[p,]]
+                ga <- comm[groupp == comp[i, 1], ] 
+                gb <- comm[groupp == comp[i, 2], ]
+                for(j in 1:n.b) {
+                    for(k in 1:n.a) {
+                        mdp <- abs(ga[k, ] - gb[j, ])
+                        mep <- ga[k, ] + gb[j, ]
+                        contrp[(j-1)*n.a+k, ] <- mdp / sum(mep)  
+                    }
+                }
+                perm.contr[ ,p] <- colMeans(contrp) * 100
+            }
+        p <- (apply(apply(perm.contr, 2, function(x) x >= average), 1, sum) + 1) / (permutations + 1)
+        } 
+        else {
+          p <- NULL
+        }
+        
+        overall <- sum(average)
+        sdi <- apply(contr, 2, sd)
+        ratio <- average / sdi
+        av.a <- colMeans(group.a)
+        av.b <- colMeans(group.b) 
+        ord <- order(average, decreasing = TRUE)
+        cusum <- cumsum(average[ord] / overall * 100)
+        out <-  list(species = colnames(comm), average = average, overall = overall, sd = sdi, ratio = ratio, ava = av.a, avb = av.b, ord = ord, cusum = cusum, p = p)
+        outlist[[paste(comp[i,1], "_", comp[i,2], sep = "")]] <- out
+    }
+    class(outlist) <- "simper"
+    outlist
+}
+
+`print.simper` <-
+    function(x, ...)
+{
+    cat("cumulative contributions of most influential species:\n\n")
+    cusum <- lapply(x, function(z) z$cusum)
+    spec <- lapply(x, function(z) z$species[z$ord])
+    for (i in 1:length(cusum)) {
+        names(cusum[[i]]) <- spec[[i]]
+    }
+    ## this probably fails with empty or identical groups that have 0/0 = NaN
+    out <- lapply(cusum, function(z) z[seq_len(min(which(z >= 70)))])
+    print(out)
+    invisible(x)
+}
+
+`summary.simper` <-
+    function(object, ordered = TRUE, ...)
+{
+    if (ordered) {
+        out <- lapply(object, function(z) data.frame(contr = z$average, sd = z$sd, ratio = z$ratio, av.a = z$ava, av.b = z$avb)[z$ord, ])
+        cusum <- lapply(object, function(z) z$cusum)
+        for(i in 1:length(out)) {
+            out[[i]]$cumsum <- cusum[[i]]
+            if(!is.null(object[[i]]$p)) {
+                out[[i]]$p <- object[[i]]$p[object[[i]]$ord]
+            }
+        } 
+    } 
+    else {
+        out <- lapply(object, function(z) data.frame(contr = z$average, sd = z$sd, 'contr/sd' = z$ratio, av.a = z$ava, av.b = z$avb, p = z$p))
+    }
+    class(out) <- "summary.simper"
+    out
+}

Modified: branches/2.0/inst/ChangeLog
===================================================================
--- branches/2.0/inst/ChangeLog	2012-02-17 13:47:58 UTC (rev 2092)
+++ branches/2.0/inst/ChangeLog	2012-02-18 10:19:16 UTC (rev 2093)
@@ -4,6 +4,9 @@
 
 Version 2.0-3 (opened November 13, 2011)
 
+	* copy simper.R & simper.Rd at r2092.
+	* merge r2089: scores.default fixed with non-existing scores.
+	* merge r2080: use droplevels in betadisper.
 	* merge r2079: do not use .Internal().
 	* merge r2071,2: dimnames fix in indopower & expand example.
 	* merge r2068: broken url in renyi.Rd.

Copied: branches/2.0/man/simper.Rd (from rev 2092, pkg/vegan/man/simper.Rd)
===================================================================
--- branches/2.0/man/simper.Rd	                        (rev 0)
+++ branches/2.0/man/simper.Rd	2012-02-18 10:19:16 UTC (rev 2093)
@@ -0,0 +1,96 @@
+\encoding{UTF-8}
+\name{simper}
+\alias{simper}
+\alias{summary.simper}
+\title{Similarity Percentages}
+
+\description{
+  Discriminating species between two groups using
+  Bray-Curtis dissimilarities
+}
+
+\usage{
+  simper(comm, group, permutations = 0, trace = FALSE, ...)
+  
+  \method{summary}{simper}(object, ordered = TRUE, ...)
+}
+
+\arguments{
+  \item{comm}{Community data matrix.}
+  \item{group}{Factor describing the group structure. Must have at
+    least 2 levels.}
+  \item{permutations}{Number of permutations or a permutation matrix
+    such as produced by \code{\link[permute]{shuffleSet}} for assessing
+    the \dQuote{significance} of the average contribution. Default is
+    set to 0 (no permutation test), since computations may take long
+    time.}
+  \item{trace}{Trace permutations.}
+  \item{object}{an object returned by \code{simper}.}
+  \item{ordered}{Logical; Should the species be ordered by their
+    average contribution?}
+  \item{...}{Parameters passed to other functions. In \code{simper} the
+    extra parameters are passed to \code{\link[permute]{shuffleSet}} if
+    permutations are used.}
+}
+
+\details{ Similarity percentage, \code{simper} (Clarke 1993) is based
+  on the decomposition of Bray-Curtis dissimilarity index (see
+  \code{\link{vegdist}}, \code{\link{designdist}}). The contribution
+  of individual species \eqn{i} to the overall Bray-Curtis dissimilarity
+  \eqn{d_{jk}}{d[jk]} is given by
+
+  \deqn{d_{ijk} = \frac{|x_{ij}-x_{ik}|}{\sum_{i=1}^S (x_{ij}+x_{ik})}}{d[ijk] = abs(x[ij]-x[ik])/sum(x[ij]+x[ik])}
+  
+  where \eqn{x} is the abundance of species \eqn{i} in sampling units
+  \eqn{j} and \eqn{k}. The overall index is the sum of the individual
+  contributions over all \eqn{S} species 
+  \eqn{d_{jk}=\sum_{i=1}^S d_{ijk}}{d[jk] = sum(i=1..S) d[ijk]}. 
+  
+  The \code{simper} functions performs pairwise comparisons of groups
+  of sampling units and finds the average percentage contributions
+  of each species to the average overall Bray-Curtis dissimilarity.
+
+  The function displays most important species for each pair of
+  \code{groups}.  These species contribute at least to 70 \% of the
+  differences between groups.  The function returns much more
+  extensive results which can be accessed directly from the result
+  object (see section Value). Function \code{summary} transforms the
+  result to a list of data frames. With argument \code{ordered =
+  TRUE} the data frames also include the cumulative contributions and
+  are ordered by species contribution.
+
+}
+
+\value{
+  A list of class \code{"simper"} with following items:
+  \item{species}{The species names.}
+  \item{average}{average contribution to overall dissimilarity.}
+  \item{overall}{The overall between-group dissimilarity (times 100).} 
+  \item{sd}{standard deviation of contribution.} 
+  \item{ratio}{mean to sd ratio.}
+  \item{ava, avb}{average abundances per group.}
+  \item{ord}{An index vector to order vectors by their contribution or
+    order \code{cusum} back to the original data order.}
+  \item{cusum}{Ordered cumulative per cent contribution.}
+  \item{p}{permutation \eqn{p}-value. Probability of getting a larger
+    or equal average contribution in random permutation of the group
+    factor.}
+}
+
+\examples{
+data(dune)
+data(dune.env)
+(sim <- with(dune.env, simper(dune, Management)))
+summary(sim)
+}
+\author{
+  Eduard Szöcs \email{szoe8822 at uni-landau.de}
+}
+
+\references{
+  Clarke, K.R. 1993. Non-parametric multivariate analyses of changes
+    in community structure. \emph{Australian Journal of Ecology}, 18,
+    117–143.
+}
+\keyword{multivariate}
+