[Ecopd-commits] r55 - branches/single-tree/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Nov 4 20:24:47 CET 2009


Author: regetz
Date: 2009-11-04 20:24:47 +0100 (Wed, 04 Nov 2009)
New Revision: 55

Modified:
   branches/single-tree/R/ecophylo.R
Log:
redesigned phylo4com as a formal S4 class representing a single phylo4d
tree with all data, plus a slot containing community-specific subtrees;
added constructor methods for several kinds of input data


Modified: branches/single-tree/R/ecophylo.R
===================================================================
--- branches/single-tree/R/ecophylo.R	2009-11-02 19:48:55 UTC (rev 54)
+++ branches/single-tree/R/ecophylo.R	2009-11-04 19:24:47 UTC (rev 55)
@@ -1,32 +1,123 @@
-phylo4com <- function(communityID, species, abundance, tree,
-  missing=c("warn", "OK", "fail")) {
+setClass("phylo4com",
+    representation(subtrees="list"),
+    prototype = list(subtrees = list()),
+    validity = checkPhylo4,
+    contains="phylo4d")
 
-  ## species should only ever match against tip *labels* (not node IDs)
-  species <- as.character(species)
+## create single tree with all data in it, plus a list of subtrees (sans
+## data) for each unique species composition
+setGeneric("phylo4com", function(x, n, ...) {
+    standardGeneric("phylo4com")
+})
 
-  ## identify and exclude any species missing from tree
-#  tips <- getNode(tree, unique(species), type="tip", missing=missing)
-#  isInTree <- species %in% names(tips[!is.na(tips)])
-#  species <- species[isInTree]
-#  abundance <- abundance[isInTree]
-#  communityID <- communityID[isInTree]
+setMethod("phylo4com", c("phylo", "ANY"),
+  function(x, n, ..., check.node.labels="keep") {
+    x <- phylo4(x, check.node.labels)
+    phylo4com(x, n, ...)
+})
 
-  communities <- split(data.frame(species, abundance,
-    stringsAsFactors=FALSE), factor(communityID,
-    unique(as.character(communityID))))
-  
-  subtrees <- lapply(communities, function(community) {
-    subtree <- subset(tree, tips.include=community$species)
-    cdata <- with(community, data.frame(abundance, row.names=species))
-    tipData(subtree, extra.data="OK") <- cdata
-    subtree
-  })
+setMethod("phylo4com", c("phylo4", "numeric"),
+  function(x, n, communityID, species,
+    missing=c("warn", "OK", "fail")) {
 
-  return(subtrees)
+    ## create site-by-species abundance matrix
+    comm <- factor(communityID, levels=unique(communityID))
+    taxa <- factor(species)
+    dat <- matrix(0, nrow=nlevels(taxa), ncol=nlevels(comm),
+        dimnames=list(levels(taxa), unique(comm)))
+    dat[cbind(taxa, comm)] <- n
 
-  # alternative conceptualization...
-  #result <- list(tree=tree, community=subtrees)
-  #class(result) <- "phylo4com"
-  #return(result)
+    ## hand off to the phylo4com matrix method
+    phylo4com(x, dat, missing)
 
-}
+})
+
+setMethod("phylo4com", c("phylo4", "matrix"),
+  function(x, n, missing=c("warn", "OK", "fail")) {
+
+    taxa <- rownames(n)
+    if (any(duplicated(taxa))) {
+        stop("duplicated taxa are not permitted")
+    }
+    comm <- colnames(n)
+    if (any(duplicated(comm))) {
+        stop("duplicated community IDs are not permitted")
+    }
+
+    phy <- subset(x, tips.include=taxa)
+    phyd <- addData(phy, tip.data=n, extra.data="OK")
+    phylo4com(phyd, cols=comm)
+
+})
+
+setMethod("phylo4com", c("phylo4", "data.frame"),
+  function(x, n, missing=c("warn", "OK", "fail")) {
+    n <- as.matrix(n)
+    phylo4com(x, n, missing)
+})
+
+setMethod("phylo4com", c("phylo4d", "missing"),
+  function(x, n, cols) {
+
+    if (missing(cols)) {
+        cols <- names(tipData(x))
+    }
+    if (is.null(cols)) {
+        res <- as(x, "phylo4com")
+        res at metadata$comms <- NULL
+        return(res)
+    }
+    x at metadata$comms <- setNames(rep(NA, length(cols)),
+        make.names(cols))
+
+    # create trees for each unique community wrt composition only
+    P <- presence(x)
+    phy <- extractTree(x)
+    ids <- as.character(as.numeric(factor(sapply(P, paste,
+        collapse=""))))
+    subtrees <- lapply(P[!duplicated(ids)],
+        function(n) subset(phy, rownames(P)[n %in% 1]))
+    names(subtrees) <- ids[!duplicated(ids)]
+
+    res <- as(x, "phylo4com")
+    res at subtrees <- subtrees
+    res at metadata$comms[] <- ids
+    
+    return(res)
+})
+
+
+## older approach: create a single phylo4d object with all data in it
+#phylo4com <- function(communityID, species, abundance, tree,
+#    missing=c("warn", "OK", "fail")) {
+#
+#    comm <- factor(communityID, levels=unique(communityID))
+#    taxa <- factor(species)
+#    dat <- matrix(0, nrow=nlevels(taxa), ncol=nlevels(comm),
+#      dimnames=list(levels(taxa), unique(comm)))
+#    dat[cbind(taxa, comm)] <- abundance
+#
+#    subtree <- subset(tree, tips.include=levels(taxa))
+#
+#    res <- addData(subtree, tip.data=dat, extra.data="OK")
+#    res at metadata$comms <- make.names(unique(comm))
+#    return(res)
+#
+#}
+
+## similar to above, but cast function from reshape package
+#phylo4com <- function(communityID, species, abundance, tree,
+#    missing=c("warn", "OK", "fail")) {
+#
+#    raw <- data.frame(.taxa=factor(species), .comm=factor(communityID),
+#      value=abundance, stringsAsFactors=FALSE)
+#    dat <- cast(.taxa ~ .comm, data=raw)
+#    row.names(dat) <- dat[[1]]
+#    dat <- dat[-1]
+#
+#    subtree <- subset(tree, tips.include=row.names(dat))
+#
+#    addData(subtree, tip.data=dat,
+#      metadata=list(comms=make.names(colnames(dat))), extra.data="OK")
+#
+#}



More information about the Ecopd-commits mailing list