[Vegan-commits] r627 - in pkg/vegan: R inst man

Tue Dec 9 03:42:56 CET 2008

Author: psolymos
Date: 2008-12-09 03:42:55 +0100 (Tue, 09 Dec 2008)
New Revision: 627

Modified:
   pkg/vegan/R/adipart.R
   pkg/vegan/R/print.adipart.R
   pkg/vegan/inst/ChangeLog
   pkg/vegan/man/adipart.Rd
Log:
adipart with formula, and 10 times faster


Modified: pkg/vegan/R/adipart.R
===================================================================

--- pkg/vegan/R/adipart.R	2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/R/adipart.R	2008-12-09 02:42:55 UTC (rev 627)
@@ -1,35 +1,49 @@
 adipart <-
-function(matr, strata, index=c("richness", "shannon", "simpson"),
+function(formula, data, index=c("richness", "shannon", "simpson"),
     weights=c("unif", "prop"), relative = FALSE, nsimul=99, control, ...)
 {
+    ## evaluate formula
+    lhs <- formula[[2]]
+    if (missing(data))
+        data <- parent.frame()
+    lhs <- as.matrix(eval(lhs, data))
+    formula[[2]] <- NULL
+    rhs <- model.frame(formula, data, drop.unused.levels = TRUE)
+    tlab <- attr(attr(rhs, "terms"), "term.labels")
+    nlevs <- length(tlab)
 
-    ## internal function, maybe later gets out
-    nested.factor <-
-    function(x) {
-        x <- data.frame(x)
-        nc <- NCOL(x)
-        if (nc < 2)
-            stop("number of columns must at least 2")
-        nr <- NROW(x)
-        l1 <- sapply(x, function(z) length(unique(z)))
-        if (!any(sapply(2:nc, function(z) l1[z] <= l1[z-1])))
-            stop("number of levels are inapropriate")
-        rval <- list()
-        rval[[1]] <- as.factor(x[,nc])
-        rval[[1]] <- rval[[1]][drop = TRUE]
-        ncol <- nc - 1
-        for (i in 2:nc) {
-            rval[[i]] <- interaction(x[,ncol], rval[[(i-1)]], drop=TRUE)
-            ncol <- ncol - 1
-        }
-        rval <- as.data.frame(rval[rev(1:length(rval))])
-        colnames(rval) <- paste("x", 1:nc, sep="")
-        l2 <- sapply(rval, function(z) length(unique(z)))
-        if (any(l1 != l2))
-            warning("levels are not perfectly nested")
-        rval
+    ## part check proper design of the model frame
+    noint <- attr(attr(attr(rhs, "terms"), "factors"), "dimnames")[[1]]
+    int <- attr(attr(attr(rhs, "terms"), "factors"), "dimnames")[[2]]
+    if (!identical(noint, int))
+        stop("interactions are not allowed in formula")
+    if (!all(attr(attr(rhs, "terms"), "dataClasses") == "factor"))
+        stop("all right hand side variables in formula must be factors")
+    l1 <- sapply(rhs, function(z) length(unique(z)))
+    if (!any(sapply(2:nlevs, function(z) l1[z] <= l1[z-1])))
+        stop("number of levels are inapropriate, check sequence")
+    rval <- list()
+    rval[[1]] <- as.factor(rhs[,nlevs])
+    rval[[1]] <- rval[[1]][drop = TRUE]
+    nCol <- nlevs - 1
+    for (i in 2:nlevs) {
+        rval[[i]] <- interaction(rhs[,nCol], rval[[(i-1)]], drop=TRUE)
+        nCol <- nCol - 1
     }
+    rval <- as.data.frame(rval[rev(1:length(rval))])
+    l2 <- sapply(rval, function(z) length(unique(z)))
+    if (any(l1 != l2))
+        warning("levels are not perfectly nested")
 
+    ## aggregate response matrix
+    fullgamma <-if (nlevels(rhs[,nlevs]) == 1)
+        TRUE else FALSE
+    ftmp <- vector("list", nlevs)
+    for (i in 1:nlevs) {
+        ftmp[[i]] <- as.formula(paste("~", tlab[i], "- 1"))
+    }
+
+    ## evaluate other arguments
     index <- match.arg(index)
     weights <- match.arg(weights)
     if (missing(control))
@@ -41,30 +55,36 @@
             divfun <- function(x) diversity(x, index = "shannon", MARGIN = 1, ...)},
         "simpson" = {
             divfun <- function(x) diversity(x, index = "simpson", MARGIN = 1)})
-    strata <- nested.factor(strata)
-    nl <- NCOL(strata)
-#    seed <- trunc(runif(1, 1000, 9999))
+    sumMatr <- sum(lhs)
+
     ## this is the function passed to oecosimu
     wdivfun <- function(x) {
-        tmp <- lapply(1:nl, function(i) aggregate(x, list(strata[,i]), sum)[,-1])
+        if (fullgamma) {
+            tmp <- lapply(1:(nlevs-1), function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+            tmp[[nlevs]] <- matrix(colSums(lhs), nrow = 1, ncol = ncol(lhs))
+        } else {
+            tmp <- lapply(1:nlevs, function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+        }
         ## weights will change in oecosimu thus need to be recalculated
         if (weights == "prop")
-            wt <- lapply(1:nl, function(i) apply(tmp[[i]], 1, function(z) sum(z) / sum(matr)))
-            else wt <- lapply(1:nl, function(i) rep(1 / NROW(tmp[[i]]), NROW(tmp[[i]])))
-        a <- sapply(1:nl, function(i) sum(divfun(tmp[[i]]) * wt[[i]]))
+            wt <- lapply(1:nlevs, function(i) apply(tmp[[i]], 1, function(z) sum(z) / sumMatr))
+            else wt <- lapply(1:nlevs, function(i) rep(1 / NROW(tmp[[i]]), NROW(tmp[[i]])))
+        a <- sapply(1:nlevs, function(i) sum(divfun(tmp[[i]]) * wt[[i]]))
         if (relative)
             a <- a / a[length(a)]
-        b <- sapply(2:nl, function(i) a[i] - a[(i-1)])
+        b <- sapply(2:nlevs, function(i) a[i] - a[(i-1)])
         c(a, b)
     }
-    sim <- oecosimu(matr, wdivfun, method = "permat", nsimul=nsimul,
+    sim <- oecosimu(lhs, wdivfun, method = "permat", nsimul=nsimul,
         burnin=control$burnin, thin=control$thin, control=control)
-    nam <- c(paste("alpha", 1:(nl-1), sep="."), "gamma",
-        paste("beta", 1:(nl-1), sep="."))
+    nam <- c(paste("alpha", 1:(nlevs-1), sep="."), "gamma",
+        paste("beta", 1:(nlevs-1), sep="."))
     names(sim$statistic) <- attr(sim$oecosimu$statistic, "names") <- nam
     attr(sim, "index") <- index
     attr(sim, "weights") <- weights
-    attr(sim, "n.levels") <- nl
+    attr(sim, "n.levels") <- nlevs
+    attr(sim, "terms") <- tlab
+    attr(sim, "model") <- rhs
     class(sim) <- c("adipart", "list")
     sim
 }

Modified: pkg/vegan/R/print.adipart.R
===================================================================
--- pkg/vegan/R/print.adipart.R	2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/R/print.adipart.R	2008-12-09 02:42:55 UTC (rev 627)
@@ -3,7 +3,7 @@
 {
     cat("adipart with", ncol(x$oecosimu$simulated), "simulations\n")
     att <- attributes(x)
-    att$names <- att$class <- att$n.levels <- NULL
+    att$names <- att$class <- att$n.levels <- att$terms <- att$model <- NULL
     cat(" with", paste(names(att), att, collapse=", "))
 
     cat("\n\n")

Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog	2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/inst/ChangeLog	2008-12-09 02:42:55 UTC (rev 627)
@@ -6,6 +6,11 @@
 
 Version 1.16-6 (closeed Dec 7, 2008)
 
+    * adipart: got a formula interface, and aggregate() was
+    replaced by matrix multiplication. Now it 10 times faster.
+    The formula interface has some consequences on the 
+    specification of the sampling design.
+
     * permat*: functions were rationalised, strata argument is
     used instead of reg and hab, and returned object got several
     new arguments. The tide of changes affected methods as well,

Modified: pkg/vegan/man/adipart.Rd
===================================================================
--- pkg/vegan/man/adipart.Rd	2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/man/adipart.Rd	2008-12-09 02:42:55 UTC (rev 627)
@@ -7,13 +7,13 @@
 In additive diversity partitioning, mean values of alpha diversity at lower levels of a sampling hierarchy are compared to the total number of species in the entire data set (gamma diversity) in the form: gamma = mean(alpha) + beta. Thus beta = gamma - mean(alpha).
 }
 \usage{
-adipart(matr, strata, index=c("richness", "shannon", "simpson"),
+adipart(formula, data, index=c("richness", "shannon", "simpson"),
 weights=c("unif", "prop"), relative = FALSE, nsimul=99, control, ...)
 \method{print}{adipart}(x, ...)
 }
 \arguments{
-  \item{matr}{A community data matrix with samples as rows and species as column.}
-  \item{strata}{A matrix or data frame containing levels of sampling hierarchy as columns, columns from right to left will be treated as nested (first column is the lowest, last is the highest level).}
+  \item{formula}{A two sided model formula in the form \code{y ~ x}, where \code{y} is the community data matrix with samples as rows and species as column. Right hand side (\code{x}) must contain factors referring to levels of sampling hierarchy, terms from right to left will be treated as nested (first column is the lowest, last is the highest level). These variables must be factors in order to unambiguous handling. Interaction terms are not allowed.}
+  \item{data}{A data frame where to look for variables defined in the right hand side of \code{formula}. If missing, variables are looked in the global environment.}
   \item{index}{Character, the diversity index to be calculated, one of  \code{c("richness", "shannon", "simpson")}.}
   \item{weights}{Character, \code{"unif"} for uniform weights, \code{"prop"} for weighting proportional to sample abundances to use in weighted averaging of individual alpha values within strata of a given level of the sampling hierarchy.}
   \item{relative}{Logical, if \code{TRUE} then alpha and beta diversity values are given relative to the value of gamma. This can be useful when comparing different indices.}
@@ -66,10 +66,10 @@
     out <- rep(1, length(x))
     for (i in 2:(length(cut) - 1))
         out[which(x > cut[i] & x <= cut[(i + 1)])] <- i
-    return(out)}
+    return(as.factor(out))}
 ## The hierarchy of sample aggregation
 levsm <- data.frame(
-    l1=1:nrow(mite),
+    l1=as.factor(1:nrow(mite)),
     l2=cutter(mite.xy$y, cut = seq(0, 10, by = 2.5)),
     l3=cutter(mite.xy$y, cut = seq(0, 10, by = 5)),
     l4=cutter(mite.xy$y, cut = seq(0, 10, by = 10)))
@@ -80,16 +80,16 @@
 plot(mite.xy, main="l3", col=levsm$l3+1)
 par(mfrow=c(1,1))
 ## Additive diversity partitioning
-adpMite <- adipart(mite, levsm, index="richness", nsimul=20)
+adpMite <- adipart(mite ~., levsm, index="richness", nsimul=20)
 adpMite
 ## Simple artificial example
 set.seed(4321)
 matr <- r2dtable(1, c(3,4,3,7,4,8,7,6), 3:9)[[1]]
-strata <- cbind(1:8, rep(1:4,each=2), rep(1,8))
+strata <- data.frame(letters[1:8], rep(letters[1:4],each=2), rep("a",8))
 ## restricted permutation within habitat classes
 contr <- permat.control(strata = c("a","a","a","b","a","b","b","b"))
 ## Additive diversity partitioning ('oneway')
-x1 <- adipart(matr, strata, index="shannon", nsimul=25, control=contr)
+x1 <- adipart(matr ~ ., strata, index="shannon", nsimul=25, control=contr)
 x1
 }
 \keyword{multivariate}