[Vegan-commits] r627 - in pkg/vegan: R inst man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Dec 9 03:42:56 CET 2008
Author: psolymos
Date: 2008-12-09 03:42:55 +0100 (Tue, 09 Dec 2008)
New Revision: 627
Modified:
pkg/vegan/R/adipart.R
pkg/vegan/R/print.adipart.R
pkg/vegan/inst/ChangeLog
pkg/vegan/man/adipart.Rd
Log:
adipart with formula, and 10 times faster
Modified: pkg/vegan/R/adipart.R
===================================================================
--- pkg/vegan/R/adipart.R 2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/R/adipart.R 2008-12-09 02:42:55 UTC (rev 627)
@@ -1,35 +1,49 @@
adipart <-
-function(matr, strata, index=c("richness", "shannon", "simpson"),
+function(formula, data, index=c("richness", "shannon", "simpson"),
weights=c("unif", "prop"), relative = FALSE, nsimul=99, control, ...)
{
+ ## evaluate formula
+ lhs <- formula[[2]]
+ if (missing(data))
+ data <- parent.frame()
+ lhs <- as.matrix(eval(lhs, data))
+ formula[[2]] <- NULL
+ rhs <- model.frame(formula, data, drop.unused.levels = TRUE)
+ tlab <- attr(attr(rhs, "terms"), "term.labels")
+ nlevs <- length(tlab)
- ## internal function, maybe later gets out
- nested.factor <-
- function(x) {
- x <- data.frame(x)
- nc <- NCOL(x)
- if (nc < 2)
- stop("number of columns must at least 2")
- nr <- NROW(x)
- l1 <- sapply(x, function(z) length(unique(z)))
- if (!any(sapply(2:nc, function(z) l1[z] <= l1[z-1])))
- stop("number of levels are inapropriate")
- rval <- list()
- rval[[1]] <- as.factor(x[,nc])
- rval[[1]] <- rval[[1]][drop = TRUE]
- ncol <- nc - 1
- for (i in 2:nc) {
- rval[[i]] <- interaction(x[,ncol], rval[[(i-1)]], drop=TRUE)
- ncol <- ncol - 1
- }
- rval <- as.data.frame(rval[rev(1:length(rval))])
- colnames(rval) <- paste("x", 1:nc, sep="")
- l2 <- sapply(rval, function(z) length(unique(z)))
- if (any(l1 != l2))
- warning("levels are not perfectly nested")
- rval
+ ## part check proper design of the model frame
+ noint <- attr(attr(attr(rhs, "terms"), "factors"), "dimnames")[[1]]
+ int <- attr(attr(attr(rhs, "terms"), "factors"), "dimnames")[[2]]
+ if (!identical(noint, int))
+ stop("interactions are not allowed in formula")
+ if (!all(attr(attr(rhs, "terms"), "dataClasses") == "factor"))
+ stop("all right hand side variables in formula must be factors")
+ l1 <- sapply(rhs, function(z) length(unique(z)))
+ if (!any(sapply(2:nlevs, function(z) l1[z] <= l1[z-1])))
+ stop("number of levels are inapropriate, check sequence")
+ rval <- list()
+ rval[[1]] <- as.factor(rhs[,nlevs])
+ rval[[1]] <- rval[[1]][drop = TRUE]
+ nCol <- nlevs - 1
+ for (i in 2:nlevs) {
+ rval[[i]] <- interaction(rhs[,nCol], rval[[(i-1)]], drop=TRUE)
+ nCol <- nCol - 1
}
+ rval <- as.data.frame(rval[rev(1:length(rval))])
+ l2 <- sapply(rval, function(z) length(unique(z)))
+ if (any(l1 != l2))
+ warning("levels are not perfectly nested")
+ ## aggregate response matrix
+ fullgamma <-if (nlevels(rhs[,nlevs]) == 1)
+ TRUE else FALSE
+ ftmp <- vector("list", nlevs)
+ for (i in 1:nlevs) {
+ ftmp[[i]] <- as.formula(paste("~", tlab[i], "- 1"))
+ }
+
+ ## evaluate other arguments
index <- match.arg(index)
weights <- match.arg(weights)
if (missing(control))
@@ -41,30 +55,36 @@
divfun <- function(x) diversity(x, index = "shannon", MARGIN = 1, ...)},
"simpson" = {
divfun <- function(x) diversity(x, index = "simpson", MARGIN = 1)})
- strata <- nested.factor(strata)
- nl <- NCOL(strata)
-# seed <- trunc(runif(1, 1000, 9999))
+ sumMatr <- sum(lhs)
+
## this is the function passed to oecosimu
wdivfun <- function(x) {
- tmp <- lapply(1:nl, function(i) aggregate(x, list(strata[,i]), sum)[,-1])
+ if (fullgamma) {
+ tmp <- lapply(1:(nlevs-1), function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+ tmp[[nlevs]] <- matrix(colSums(lhs), nrow = 1, ncol = ncol(lhs))
+ } else {
+ tmp <- lapply(1:nlevs, function(i) t(model.matrix(ftmp[[i]], rhs)) %*% x)
+ }
## weights will change in oecosimu thus need to be recalculated
if (weights == "prop")
- wt <- lapply(1:nl, function(i) apply(tmp[[i]], 1, function(z) sum(z) / sum(matr)))
- else wt <- lapply(1:nl, function(i) rep(1 / NROW(tmp[[i]]), NROW(tmp[[i]])))
- a <- sapply(1:nl, function(i) sum(divfun(tmp[[i]]) * wt[[i]]))
+ wt <- lapply(1:nlevs, function(i) apply(tmp[[i]], 1, function(z) sum(z) / sumMatr))
+ else wt <- lapply(1:nlevs, function(i) rep(1 / NROW(tmp[[i]]), NROW(tmp[[i]])))
+ a <- sapply(1:nlevs, function(i) sum(divfun(tmp[[i]]) * wt[[i]]))
if (relative)
a <- a / a[length(a)]
- b <- sapply(2:nl, function(i) a[i] - a[(i-1)])
+ b <- sapply(2:nlevs, function(i) a[i] - a[(i-1)])
c(a, b)
}
- sim <- oecosimu(matr, wdivfun, method = "permat", nsimul=nsimul,
+ sim <- oecosimu(lhs, wdivfun, method = "permat", nsimul=nsimul,
burnin=control$burnin, thin=control$thin, control=control)
- nam <- c(paste("alpha", 1:(nl-1), sep="."), "gamma",
- paste("beta", 1:(nl-1), sep="."))
+ nam <- c(paste("alpha", 1:(nlevs-1), sep="."), "gamma",
+ paste("beta", 1:(nlevs-1), sep="."))
names(sim$statistic) <- attr(sim$oecosimu$statistic, "names") <- nam
attr(sim, "index") <- index
attr(sim, "weights") <- weights
- attr(sim, "n.levels") <- nl
+ attr(sim, "n.levels") <- nlevs
+ attr(sim, "terms") <- tlab
+ attr(sim, "model") <- rhs
class(sim) <- c("adipart", "list")
sim
}
Modified: pkg/vegan/R/print.adipart.R
===================================================================
--- pkg/vegan/R/print.adipart.R 2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/R/print.adipart.R 2008-12-09 02:42:55 UTC (rev 627)
@@ -3,7 +3,7 @@
{
cat("adipart with", ncol(x$oecosimu$simulated), "simulations\n")
att <- attributes(x)
- att$names <- att$class <- att$n.levels <- NULL
+ att$names <- att$class <- att$n.levels <- att$terms <- att$model <- NULL
cat(" with", paste(names(att), att, collapse=", "))
cat("\n\n")
Modified: pkg/vegan/inst/ChangeLog
===================================================================
--- pkg/vegan/inst/ChangeLog 2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/inst/ChangeLog 2008-12-09 02:42:55 UTC (rev 627)
@@ -6,6 +6,11 @@
Version 1.16-6 (closeed Dec 7, 2008)
+ * adipart: got a formula interface, and aggregate() was
+ replaced by matrix multiplication. Now it 10 times faster.
+ The formula interface has some consequences on the
+ specification of the sampling design.
+
* permat*: functions were rationalised, strata argument is
used instead of reg and hab, and returned object got several
new arguments. The tide of changes affected methods as well,
Modified: pkg/vegan/man/adipart.Rd
===================================================================
--- pkg/vegan/man/adipart.Rd 2008-12-08 09:18:30 UTC (rev 626)
+++ pkg/vegan/man/adipart.Rd 2008-12-09 02:42:55 UTC (rev 627)
@@ -7,13 +7,13 @@
In additive diversity partitioning, mean values of alpha diversity at lower levels of a sampling hierarchy are compared to the total number of species in the entire data set (gamma diversity) in the form: gamma = mean(alpha) + beta. Thus beta = gamma - mean(alpha).
}
\usage{
-adipart(matr, strata, index=c("richness", "shannon", "simpson"),
+adipart(formula, data, index=c("richness", "shannon", "simpson"),
weights=c("unif", "prop"), relative = FALSE, nsimul=99, control, ...)
\method{print}{adipart}(x, ...)
}
\arguments{
- \item{matr}{A community data matrix with samples as rows and species as column.}
- \item{strata}{A matrix or data frame containing levels of sampling hierarchy as columns, columns from right to left will be treated as nested (first column is the lowest, last is the highest level).}
+ \item{formula}{A two sided model formula in the form \code{y ~ x}, where \code{y} is the community data matrix with samples as rows and species as column. Right hand side (\code{x}) must contain factors referring to levels of sampling hierarchy, terms from right to left will be treated as nested (first column is the lowest, last is the highest level). These variables must be factors in order to unambiguous handling. Interaction terms are not allowed.}
+ \item{data}{A data frame where to look for variables defined in the right hand side of \code{formula}. If missing, variables are looked in the global environment.}
\item{index}{Character, the diversity index to be calculated, one of \code{c("richness", "shannon", "simpson")}.}
\item{weights}{Character, \code{"unif"} for uniform weights, \code{"prop"} for weighting proportional to sample abundances to use in weighted averaging of individual alpha values within strata of a given level of the sampling hierarchy.}
\item{relative}{Logical, if \code{TRUE} then alpha and beta diversity values are given relative to the value of gamma. This can be useful when comparing different indices.}
@@ -66,10 +66,10 @@
out <- rep(1, length(x))
for (i in 2:(length(cut) - 1))
out[which(x > cut[i] & x <= cut[(i + 1)])] <- i
- return(out)}
+ return(as.factor(out))}
## The hierarchy of sample aggregation
levsm <- data.frame(
- l1=1:nrow(mite),
+ l1=as.factor(1:nrow(mite)),
l2=cutter(mite.xy$y, cut = seq(0, 10, by = 2.5)),
l3=cutter(mite.xy$y, cut = seq(0, 10, by = 5)),
l4=cutter(mite.xy$y, cut = seq(0, 10, by = 10)))
@@ -80,16 +80,16 @@
plot(mite.xy, main="l3", col=levsm$l3+1)
par(mfrow=c(1,1))
## Additive diversity partitioning
-adpMite <- adipart(mite, levsm, index="richness", nsimul=20)
+adpMite <- adipart(mite ~., levsm, index="richness", nsimul=20)
adpMite
## Simple artificial example
set.seed(4321)
matr <- r2dtable(1, c(3,4,3,7,4,8,7,6), 3:9)[[1]]
-strata <- cbind(1:8, rep(1:4,each=2), rep(1,8))
+strata <- data.frame(letters[1:8], rep(letters[1:4],each=2), rep("a",8))
## restricted permutation within habitat classes
contr <- permat.control(strata = c("a","a","a","b","a","b","b","b"))
## Additive diversity partitioning ('oneway')
-x1 <- adipart(matr, strata, index="shannon", nsimul=25, control=contr)
+x1 <- adipart(matr ~ ., strata, index="shannon", nsimul=25, control=contr)
x1
}
\keyword{multivariate}
More information about the Vegan-commits
mailing list