[Vegan-commits] r1132 - in branches/1.17: R inst man

Wed Feb 17 14:32:07 CET 2010

Author: jarioksa
Date: 2010-02-17 14:32:07 +0100 (Wed, 17 Feb 2010)
New Revision: 1132

Added:
   branches/1.17/R/multipart.R
   branches/1.17/R/print.multipart.R
   branches/1.17/man/multipart.Rd
Modified:
   branches/1.17/inst/ChangeLog
   branches/1.17/inst/NEWS
Log:
copied multipart() to release branches/1.17

Copied: branches/1.17/R/multipart.R (from rev 1131, pkg/vegan/R/multipart.R)
===================================================================

--- branches/1.17/R/multipart.R	                        (rev 0)
+++ branches/1.17/R/multipart.R	2010-02-17 13:32:07 UTC (rev 1132)
@@ -0,0 +1,139 @@
+multipart <-
+function(formula, data, index=c("renyi", "tsallis"), scales = 1,
+    global = FALSE, relative = FALSE, nsimul=99, ...)
+{
+    if (length(scales) > 1)
+        stop("length of 'scales' must be 1")
+    ## evaluate formula
+    lhs <- formula[[2]]
+    if (missing(data))
+        data <- parent.frame()
+    lhs <- as.matrix(eval(lhs, data))
+    formula[[2]] <- NULL
+    rhs <- model.frame(formula, data, drop.unused.levels = TRUE)
+    tlab <- attr(attr(rhs, "terms"), "term.labels")
+    nlevs <- length(tlab)
+    if (nlevs < 2)
+        stop("provide at least two level hierarchy")
+    if (any(rowSums(lhs) == 0))
+        stop("data matrix contains empty rows")
+    if (any(lhs < 0))
+        stop("data matrix contains negative entries")
+
+    ## part check proper design of the model frame
+    noint <- attr(attr(attr(rhs, "terms"), "factors"), "dimnames")[[1]]
+    int <- attr(attr(attr(rhs, "terms"), "factors"), "dimnames")[[2]]
+    if (!identical(noint, int))
+        stop("interactions are not allowed in formula")
+    if (!all(attr(attr(rhs, "terms"), "dataClasses") == "factor"))
+        stop("all right hand side variables in formula must be factors")
+    l1 <- sapply(rhs, function(z) length(unique(z)))
+    if (!any(sapply(2:nlevs, function(z) l1[z] <= l1[z-1])))
+        stop("number of levels are inapropriate, check sequence")
+    rval <- list()
+    rval[[1]] <- as.factor(rhs[,nlevs])
+    rval[[1]] <- rval[[1]][drop = TRUE]
+    nCol <- nlevs - 1
+    for (i in 2:nlevs) {
+        rval[[i]] <- interaction(rhs[,nCol], rval[[(i-1)]], drop=TRUE)
+        nCol <- nCol - 1
+    }
+    rval <- as.data.frame(rval[rev(1:length(rval))])
+    l2 <- sapply(rval, function(z) length(unique(z)))
+    if (any(l1 != l2))
+        warning("levels are not perfectly nested")
+
+    ## aggregate response matrix
+    fullgamma <-if (nlevels(rhs[,nlevs]) == 1)
+        TRUE else FALSE
+#    if (!fullgamma && !global)
+#        warning("gamma diversity value might be meaningless")
+    ftmp <- vector("list", nlevs)
+    for (i in 1:nlevs) {
+        ftmp[[i]] <- as.formula(paste("~", tlab[i], "- 1"))
+    }
+
+    ## is there a method/burnin/thin in ... ?
+    method <- if (is.null(list(...)$method))
+        "r2dtable" else list(...)$method
+    burnin <- if (is.null(list(...)$burnin))
+        0 else list(...)$burnin
+    thin <- if (is.null(list(...)$thin))
+        1 else list(...)$thin
+
+    ## evaluate other arguments
+    index <- match.arg(index)
+    divfun <- switch(index,
+        "renyi" = function(x) renyi(x, scales=scales, hill = TRUE),
+        "tsallis" = function(x) tsallis(x, scales=scales, hill = TRUE))
+
+    ## cluster membership determination
+    nrhs <- rhs
+    nrhs <- sapply(nrhs, as.numeric)
+    idcl <- function(i) {
+        h <- nrhs[,i]
+        l <- nrhs[,(i-1)]
+        sapply(unique(l), function(i) h[l==i][1])
+    }
+    id <- lapply(2:nlevs, idcl)
+
+    ## this is the function passed to oecosimu
+    if (global) {
+        wdivfun <- function(x) {
+            if (fullgamma) {
+                tmp <- lapply(1:(nlevs-1), function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+                tmp[[nlevs]] <- matrix(colSums(lhs), nrow = 1, ncol = ncol(lhs))
+            } else {
+                tmp <- lapply(1:nlevs, function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+            }
+            raw <- sapply(1:nlevs, function(i) divfun(tmp[[i]]))
+            a <- sapply(raw, mean)
+            G <- a[nlevs]
+            b <- sapply(1:(nlevs-1), function(i) G / a[i])
+            if (relative)
+                b <- b / sapply(raw[1:(nlevs-1)], length)
+            c(a, b)
+        }
+    } else {
+        wdivfun <- function(x) {
+            if (fullgamma) {
+                tmp <- lapply(1:(nlevs-1), function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+                tmp[[nlevs]] <- matrix(colSums(lhs), nrow = 1, ncol = ncol(lhs))
+            } else {
+                tmp <- lapply(1:nlevs, function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+            }
+            a <- sapply(1:nlevs, function(i) divfun(tmp[[i]]))
+            am <- lapply(1:(nlevs-1), function(i) {
+                    sapply(1:length(unique(id[[i]])), function(ii) {
+                        mean(a[[i]][id[[i]]==ii])
+                    })
+                })
+            b <- lapply(1:(nlevs-1), function(i) a[[(i+1)]] / am[[i]])
+            bmax <- lapply(id, function(i) table(i))
+            if (relative)
+                b <- lapply(1:(nlevs-1), function(i) b[[i]] / bmax[[i]])
+            c(sapply(a, mean), sapply(b, mean))
+        }
+    }
+    if (nsimul > 0) {
+            sim <- oecosimu(lhs, wdivfun, method = method, nsimul=nsimul,
+                burnin=burnin, thin=thin)
+        } else {
+            sim <- wdivfun(lhs)
+            tmp <- rep(NA, length(sim))
+            sim <- list(statistic = sim,
+                oecosimu = list(z = tmp, pval = tmp, method = NA, statistic = sim))
+        }
+    nam <- c(paste("alpha", 1:(nlevs-1), sep="."), "gamma",
+        paste("beta", 1:(nlevs-1), sep="."))
+    names(sim$statistic) <- attr(sim$oecosimu$statistic, "names") <- nam
+    attr(sim, "call") <- match.call()
+    attr(sim, "index") <- index
+    attr(sim, "scales") <- scales
+    attr(sim, "global") <- TRUE
+    attr(sim, "n.levels") <- nlevs
+    attr(sim, "terms") <- tlab
+    attr(sim, "model") <- rhs
+    class(sim) <- c("multipart", "list")
+    sim
+}

Copied: branches/1.17/R/print.multipart.R (from rev 1131, pkg/vegan/R/print.multipart.R)
===================================================================
--- branches/1.17/R/print.multipart.R	                        (rev 0)
+++ branches/1.17/R/print.multipart.R	2010-02-17 13:32:07 UTC (rev 1132)
@@ -0,0 +1,27 @@
+print.multipart <-
+function(x, ...)
+{
+    n <- if (is.null(x$oecosimu$simulated))
+        0 else ncol(x$oecosimu$simulated)
+    cat("multipart with", n, "simulations\n")
+    att <- attributes(x)
+    att$names <- att$call <- att$class <- att$n.levels <- att$terms <- att$model <- NULL
+    cat("with", paste(names(att), att, collapse=", "))
+    cat("\n\n")
+    cl <- class(x)
+    if (length(cl) > 1 && cl[2] != "list") {
+        NextMethod("print", x)
+        cat("\n")
+    }
+    if (!is.null(x$oecosimu$simulated)) {
+        tmp <- x$oecosimu$simulated
+    } else {
+        tmp <- data.matrix(x$oecosimu$statistic)
+    }
+    qu <- apply(tmp, 1, quantile, probs=c(0.025, 0.5, 0.975))
+    m <- cbind("statistic" = x$oecosimu$statistic,
+               "z" = x$oecosimu$z, t(qu),
+               "Pr(sim.)"=x$oecosimu$pval)
+    printCoefmat(m, ...)
+    invisible(x)
+}

Modified: branches/1.17/inst/ChangeLog
===================================================================
--- branches/1.17/inst/ChangeLog	2010-02-16 09:03:55 UTC (rev 1131)
+++ branches/1.17/inst/ChangeLog	2010-02-17 13:32:07 UTC (rev 1132)
@@ -4,6 +4,8 @@
 
 Version 1.17-1 (opened February 16, 2010)
 
+	* copied multipart.R & print.multipart.R from the devel branch. 
+
 	* merged r1124: envfit() weights fixed for cca().
 
 	* merged r1123: CCorA() output fixes plus upgrades of

Modified: branches/1.17/inst/NEWS
===================================================================
--- branches/1.17/inst/NEWS	2010-02-16 09:03:55 UTC (rev 1131)
+++ branches/1.17/inst/NEWS	2010-02-17 13:32:07 UTC (rev 1132)
@@ -4,11 +4,15 @@
 		       	======================
 
 
-			FIXES IN VEGAN 1.17-1
+		NEW FEATURES AND FIXES IN VEGAN 1.17-1
 
+    - multipart: new functions for multiplicative partionining of
+      gamma diversity into alpha and beta diversity components. 
+
     - CCorA: Fixed choice of scores in biplots -- Used PC scores
       instead of observed scores in the right-hand-side biplot
-      panel. New choices of biplot types.
+      panel. The biplot function has several new and enhanched
+      alternatives of plots.
 
     - envfit: ignored weights in cca() results for factors or a single
       continuous variable.  The bug was introduced with NA handling

Copied: branches/1.17/man/multipart.Rd (from rev 1131, pkg/vegan/man/multipart.Rd)
===================================================================
--- branches/1.17/man/multipart.Rd	                        (rev 0)
+++ branches/1.17/man/multipart.Rd	2010-02-17 13:32:07 UTC (rev 1132)
@@ -0,0 +1,107 @@
+\encoding{UTF-8}
+\name{multipart}
+\alias{multipart}
+\alias{print.multipart}
+\title{Multiplicative Diversity Partitioning}
+\description{
+In multiplicative diversity partitioning, mean values of alpha diversity at lower levels of a sampling 
+hierarchy are compared to the total diversity in the entire data set or the pooled samples (gamma diversity). 
+}
+\usage{
+multipart(formula, data, index=c("renyi", "tsallis"), scales = 1,
+    global = FALSE, relative = FALSE, nsimul=99, ...)
+\method{print}{multipart}(x, ...)
+}
+\arguments{
+  \item{formula}{A two sided model formula in the form \code{y ~ x}, where \code{y} 
+    is the community data matrix with samples as rows and species as column. Right 
+    hand side (\code{x}) must contain factors referring to levels of sampling hierarchy, 
+    terms from right to left will be treated as nested (first column is the lowest, 
+    last is the highest level). These variables must be factors in order to unambiguous 
+    handling. Interaction terms are not allowed.}
+  \item{data}{A data frame where to look for variables defined in the right hand side 
+    of \code{formula}. If missing, variables are looked in the global environment.}
+  \item{index}{Character, the entropy index to be calculated (see Details).}
+  \item{relative}{Logical, if \code{TRUE} then beta diversity is
+    standardized by its maximum (see Details).}
+  \item{scales}{Numeric, of length 1, the order of the generalized diversity index 
+    to be used.}
+  \item{global}{Logical, indicates the calculation of beta diversity values, see Details.}
+  \item{nsimul}{Number of permutation to use if \code{matr} is not of class 'permat'.
+    If \code{nsimul = 0}, only the \code{FUN} argument is evaluated. It is thus possible
+    to reuse the statistic values without using a null model.}
+  \item{x}{An object to print.}
+  \item{\dots}{Other arguments passed to \code{\link{oecosimu}}, i.e. 
+    \code{method}, \code{thin} or \code{burnin}.}
+}
+\details{
+Multiplicative diversity partitioning is based on Whittaker's (1972) ideas, that has 
+recently been generalised to one parametric diversity families (i.e. \enc{R\'enyi}{Renyi} 
+and Tsallis) by Jost (2006, 2007). Jost recommends to use the numbers equivalents 
+(Hill numbers), instead of pure diversities, and proofs, that this satisfies the 
+multiplicative partitioning requirements.
+
+The current implementation of \code{multipart} calculates Hill numbers based on the 
+functions \code{\link{renyi}} and \code{\link{tsallis}} (provided as \code{index} argument). 
+If values for more than one \code{scales} are desired, it should be done in separate 
+runs, because it adds extra dimensionality to the implementation, which has not been resolved 
+efficiently.
+
+Alpha diversities are then the averages of these Hill numbers for each hierarchy levels, 
+the global gamma diversity is the alpha value calculated for the highest hierarchy level. 
+When \code{global = TRUE}, beta is calculated relative to the global gamma value:
+\deqn{\beta_i = \gamma / \alpha_{i}}{beta_i = gamma / alpha_i}
+when \code{global = FALSE}, beta is calculated relative to local gamma values (local gamma
+means the diversity calculated for a particular cluster based on the pooled abundance vector):
+\deqn{\beta_ij = \alpha_{(i+1)j} / mean(\alpha_{ij})}{beta_ij = alpha_(i+1)j / mean(alpha_i)}
+where \eqn{j} is a particular cluster at hierarchy level \eqn{i}. Then beta diversity value for
+level \eqn{i} is the mean of the beta values of the clusters at that level,
+\eqn{\beta_{i} = mean(\beta_{ij})}.
+
+If \code{relative = TRUE}, the respective beta diversity values are
+standardized by their maximum possible values (\eqn{mean(\beta_{ij}) / \beta_{max,ij}})
+given as \eqn{\beta_{max,ij} = n_{j}} (the number of lower level units in a given cluster \eqn{j}).
+
+The expected diversity components are calculated \code{nsimul} times by individual based 
+randomisation of the community data matrix. This is done by the \code{"r2dtable"} method
+in \code{\link{oecosimu}} by default.
+}
+\value{
+An object of class 'multipart' with same structure as 'oecosimu' objects.
+}
+\references{
+Jost, L. (2006). Entropy and diversity.
+\emph{Oikos}, \bold{113}, 363--375.
+
+Jost, L. (2007). Partitioning diversity into independent alpha and beta components.
+\emph{Ecology}, \bold{88}, 2427--2439.
+
+Whittaker, R. (1972). Evolution and measurement of species diversity.
+\emph{Taxon}, \bold{21}, 213--251.
+}
+\author{\enc{P\'eter S\'olymos}{Peter Solymos}, \email{solymos at ualberta.ca}}
+\seealso{See \code{\link{adipart}} for additive diversity partitioning,
+  \code{\link{hiersimu}} for hierarchical null model testing
+  and \code{\link{oecosimu}} for permutation settings and calculating \eqn{p}-values.}
+\examples{
+data(mite)
+data(mite.xy)
+data(mite.env)
+## Function to get equal area partitions of the mite data
+cutter <- function (x, cut = seq(0, 10, by = 2.5)) {
+    out <- rep(1, length(x))
+    for (i in 2:(length(cut) - 1))
+        out[which(x > cut[i] & x <= cut[(i + 1)])] <- i
+    return(as.factor(out))}
+## The hierarchy of sample aggregation
+levsm <- data.frame(
+    l1=as.factor(1:nrow(mite)),
+    l2=cutter(mite.xy$y, cut = seq(0, 10, by = 2.5)),
+    l3=cutter(mite.xy$y, cut = seq(0, 10, by = 5)),
+    l4=cutter(mite.xy$y, cut = seq(0, 10, by = 10)))
+## Multiplicative diversity partitioning
+multipart(mite ~ ., levsm, index="renyi", scales=1, nsimul=25)
+multipart(mite ~ ., levsm, index="renyi", scales=1, nsimul=25, relative=TRUE)
+multipart(mite ~ ., levsm, index="renyi", scales=1, nsimul=25, global=TRUE)
+}
+\keyword{multivariate}