[Ecopd-commits] r92 - in pkg: . R data inst/unitTests man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Nov 18 22:23:00 CET 2009


Author: regetz
Date: 2009-11-18 22:23:00 +0100 (Wed, 18 Nov 2009)
New Revision: 92

Added:
   pkg/R/aed.R
   pkg/R/coerce.R
   pkg/R/genera.R
   pkg/R/iac.R
   pkg/R/pae.R
   pkg/R/phylo4com.R
   pkg/R/simpson.R
   pkg/data/weeds.data.rda
   pkg/data/weeds.tree.rda
   pkg/man/aed.Rd
   pkg/man/coerce.Rd
   pkg/man/communities.Rd
   pkg/man/iac.Rd
   pkg/man/pae.Rd
   pkg/man/phylo4com-class.Rd
   pkg/man/phylo4com.Rd
   pkg/man/simpson.Rd
Removed:
   pkg/R/ecophylo.R
   pkg/man/ecophylo.Rd
Modified:
   pkg/DESCRIPTION
   pkg/NAMESPACE
   pkg/R/community.R
   pkg/R/indices.R
   pkg/R/pd.R
   pkg/R/utilities.R
   pkg/data/weeds.rda
   pkg/inst/unitTests/runit.indices.R
   pkg/man/abundance.Rd
   pkg/man/genera.Rd
   pkg/man/minTL.Rd
   pkg/man/pairdist.Rd
   pkg/man/pd.Rd
   pkg/man/richness.Rd
   pkg/man/siteBySpecies.Rd
   pkg/man/tipLength.Rd
   pkg/man/weeds.Rd
Log:
merged single-tree branch changes r54:91 into pkg


Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/DESCRIPTION	2009-11-18 21:23:00 UTC (rev 92)
@@ -5,8 +5,9 @@
 Date: 2008-09-03
 Author: Jim Regetz, Marc Cadotte, Jonathan Davies
 Maintainer: Jim Regetz <regetz at nceas.ucsb.edu>
-Depends: ape, phylobase
+Depends: methods, phylobase
 Description: A set of metrics and functions for combining ecological and phylogenetic information
 License: GPL
+Collate: phylo4com.R coerce.R genera.R utilities.R community.R pd.R aed.R iac.R pae.R simpson.R indices.R plots.R
 LazyLoad: yes
 LazyData: yes

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/NAMESPACE	2009-11-18 21:23:00 UTC (rev 92)
@@ -1,35 +1,51 @@
+#----------------------------------------------------------------------
+
+#importMethodsFrom(phylobase, subset)
+#importMethodsFrom(phylobase, labels)
+import(phylobase)
+
+#----------------------------------------------------------------------
+
+exportClasses(phylo4com)
+
+#----------------------------------------------------------------------
+
+# constructor methods
+exportMethods(phylo4com)
+
+# metric methods
+exportMethods(pd, pae, iac, ed, eed, hed, aed, eaed, haed, value)
+exportMethods(simpson)
+
+#----------------------------------------------------------------------
+
+# phylogeny helper functions
 export(tipLength)
 export(pairdist)
-export(abundance)
-export(`abundance<-`)
-export(minTL)
-export(`minTL<-`)
 
-export(caterpillar)
+# community helper functions
+export(abundance, "abundance<-")
+export(presence)
+export(communities)
 export(genera)
-export(getMinTL)
-export(pd)
 export(richness)
-export(simp.phy)
 export(siteBySpecies)
-export(weightByAbund)
 
+# pd-related functions
+export(minTL)
+export(`minTL<-`)
+
+# miscellaneous
+export(caterpillar)
+
+# reference data sets
 export(Families)
 export(LookupTL)
 export(Supertree)
 
-export(phylo4com)
-export(pd)
-export(PAE)
-export(IAC)
-export(ED)
-export(EED)
-export(HED)
-export(AED)
-export(EAED)
-export(HAED)
-export(value)
+#----------------------------------------------------------------------
 
-#importMethodsFrom(phylobase, subset)
-#importMethodsFrom(phylobase, labels)
-import(phylobase)
+# unexported functions
+#export(getSubtrees)
+#export(getMinTL)
+#export(weightByAbund)

Copied: pkg/R/aed.R (from rev 91, branches/single-tree/R/aed.R)
===================================================================
--- pkg/R/aed.R	                        (rev 0)
+++ pkg/R/aed.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -0,0 +1,221 @@
+##
+## ED and related methods
+##
+
+setGeneric("ed", function(x, na.rm=TRUE) {
+    standardGeneric("ed")
+})
+
+setMethod("ed", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    ed(phyc, na.rm=na.rm)
+})
+
+# TODO: This function includes its own code for not counting root edge
+# length. Maybe this should maybe be done at a higher level?
+setMethod("ed", signature(x="phylo4com"), function(x, na.rm=TRUE) {
+
+    comms <- x at metadata$comms
+    if (is.null(comms)) {
+        stop("no community data specified in phylo4com object")
+    }
+    subtrees <- x at subtrees[unique(as.character(comms))]
+    .edi <- function(tree) {
+        # set length of root edge to zero
+        edgeLength(tree)[edgeId(tree, "root")] <- 0
+
+        all.nodes <- nodeId(tree, type = "all")
+        des <- descendants(tree, all.nodes, type="tips")
+        nv <- edgeLength(tree, all.nodes) / sapply(des, length)
+        names(nv) <- all.nodes
+
+        tip.nodes <- nodeId(tree, "tip")
+        anc <- ancestors(tree, tip.nodes, "ALL")
+
+        res <- sapply(anc, function(n) sum(nv[as.character(n)], na.rm=TRUE))
+        names(res) <- tipLabels(tree)
+        res
+    }
+    res <- lapply(subtrees, .edi)[as.character(comms)]
+    names(res) <- names(comms)
+    return(res)
+
+})
+
+setGeneric("hed", function(x, na.rm=TRUE) {
+    standardGeneric("hed")
+})
+
+setMethod("hed", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    hed(phyc, na.rm=na.rm)
+})
+
+setMethod("hed", signature(x="phylo4com"), function(x, na.rm=TRUE) {
+    ED <- ed(x)
+    PD <- pd(x)
+    res <- sapply(seq_len(length(PD)), function(i) {
+        scaledED <- ED[[i]] / PD[[i]]
+        -sum(scaledED * log(scaledED))
+    })
+    names(res) <- names(PD)
+    return(res)
+})
+
+setGeneric("eed", function(x, na.rm=TRUE) {
+    standardGeneric("eed")
+})
+
+setMethod("eed", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    eed(phyc, na.rm=na.rm)
+})
+
+setMethod("eed", signature(x="phylo4com"), function(x, na.rm=TRUE) {
+    comms <- x at metadata$comms
+    if (is.null(comms)) {
+        stop("no community data specified in phylo4com object")
+    }
+    subtrees <- x at subtrees[unique(as.character(comms))]
+    hed(x) / log(sapply(subtrees, nTips)[as.character(comms)])
+})
+
+setGeneric("aed", function(x, na.rm=TRUE) {
+    standardGeneric("aed")
+})
+
+setMethod("aed", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    aed(phyc, na.rm=na.rm)
+})
+
+# TODO: This function includes its own code for not counting root edge
+# length. Maybe this should maybe be done at a higher level?
+setMethod("aed", signature(x="phylo4com"),
+  function(x, na.rm=TRUE) {
+
+    comms <- x at metadata$comms
+    if (is.null(comms)) {
+        stop("no community data specified in phylo4com object")
+    }
+    subtrees <- x at subtrees[unique(as.character(comms))]
+    .isD <- function(tree) {
+        # get all node IDs, but excluding root node
+        nonroot.nodes <- setdiff(nodeId(tree), rootNode(tree))
+        # Create logical matrix indicating which tips (in columns) are
+        # descendants of each node (in rows), self-inclusive
+        t(sapply(ancestors(tree, tipLabels(tree), "ALL"),
+            function(n) nonroot.nodes %in% n))
+    }
+
+    .elen <- function(tree) {
+        nonroot.nodes <- setdiff(nodeId(tree), rootNode(tree))
+        edgeLength(tree, nonroot.nodes)
+    }
+    isDescendant <- lapply(subtrees, .isD)[as.character(comms)]
+    edge.length <- lapply(subtrees, .elen)[as.character(comms)]
+    N <- abundance(x)
+
+    res <- lapply(seq_along(N), function(i) {
+        spp <- row.names(isDescendant[[i]])        
+        dAbund <- N[spp, i] * isDescendant[[i]]
+
+        # Calculate individual-based AED of each species
+        AED <- colSums(edge.length[[i]] * t(prop.table(dAbund, margin=2)))
+        AED/N[spp, i]
+    })
+    names(res) <- names(comms)
+    return(res)
+
+})
+
+#    .isD <- function(tree, template) {
+#        # get all node IDs, but excluding root node
+#        nonroot.nodes <- setdiff(nodeId(tree), rootNode(tree))
+#        # Create logical matrix indicating which tips (in columns) are
+#        # descendants of each node (in rows), self-inclusive
+#        res <- t(sapply(ancestors(tree, tipLabels(tree), "ALL"),
+#          function(n) nonroot.nodes %in% n))
+#        template[match(rownames(res), rownames(template)), ] <- res
+#        template
+#    }
+#
+#    .el <- function(tree) {
+#        nonroot.nodes <- setdiff(nodeId(tree), rootNode(tree))
+#        edgeLength(tree, nonroot.nodes)
+#    }
+#    tmp <- setNames(rep(NA, nTips(x)), tipLabels(x))
+#    tmp <- matrix(NA, nrow=nTips(x), ncol=nNodes(x)+nTips(x)-1)
+#    rownames(tmp) <- tipLabels(x)
+#    isDescendant <- lapply(subtrees, .isD, tmp)
+#    isDescendant <- array(unlist(isDescendant), dim=c(nTips(x),
+#        nTips(x)+nNodes(x)-1, length(subtrees)))
+#
+#    # Create vector of ancestral edge lengths
+#    edge.length <- sapply(subtrees, .elen)
+#
+#    # Create matrix containing number of individuals of each species
+#    # descending from each interior node
+#    N <- as.matrix(abundance(x))
+#    dAbund <- sweep(isDescendant, c(1,3), N, "*")
+#
+#    # Calculate individual-based AED of each species
+#    pt <- prop.table(dAbund, margin=c(2,3))
+#    AED <- sweep(pt, c(2,3), edge.length, "*")
+#    AED <- apply(AED, 3, rowSums) / N
+#
+#    return(AED)
+
+setGeneric("haed", function(x, na.rm=TRUE) {
+    standardGeneric("haed")
+})
+
+setMethod("haed", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    haed(phyc, na.rm=na.rm)
+})
+
+setMethod("haed", signature(x="phylo4com"), function(x, na.rm=TRUE) {
+    # Recast AED in terms of individuals
+    AED <- aed(x)
+    PD <- pd(x)
+    N <- abundance(x)
+    scaled.AED <- lapply(seq_along(N), function(i) {
+        spp <- names(AED[[i]])        
+        rep(unname(AED[[i]]), N[spp, i]) / PD[[i]]
+    })
+    res <- sapply(scaled.AED, function(x) -sum(x * log(x)))
+    names(res) <- names(AED)
+    return(res)
+})
+
+setGeneric("eaed", function(x, na.rm=TRUE) {
+    standardGeneric("eaed")
+})
+
+setMethod("eaed", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    eaed(phyc, na.rm=na.rm)
+})
+
+setMethod("eaed", signature(x="phylo4com"), function(x, na.rm=TRUE) {
+  haed(x) / log(colSums(abundance(x)))
+})
+
+setGeneric("value",
+  function(x, na.rm=TRUE) {
+    standardGeneric("value")
+})
+
+setMethod("value", signature(x="phylo4d"),
+  function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    value(phyc, na.rm=na.rm)
+})
+
+setMethod("value", signature(x="phylo4com"),
+  function(x, na.rm=TRUE) {
+    AED <- aed(x)
+    lapply(AED, function(x) x/sum(x))
+})
+

Copied: pkg/R/coerce.R (from rev 91, branches/single-tree/R/coerce.R)
===================================================================
--- pkg/R/coerce.R	                        (rev 0)
+++ pkg/R/coerce.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -0,0 +1,48 @@
+setMethod("phylo4d", c("phylo4com"),
+  function(x, community) {
+    if (missing(community)) {
+        return(as(x, "phylo4d"))
+    }
+    if (length(community)!=1) {
+        stop("a single community label must be provided")
+    }
+
+    ## get the tree
+    tree <- phylo4(x, community)
+
+    ## get abundance values, and drop species with zero abundance
+    N <- abundance(x, community)
+    N <- N[N[[community]]!=0, , drop=FALSE]
+
+    ## combine and return phylo4d object
+    phylo4d(tree, tip.data=N)
+})
+
+setMethod("phylo4", c("phylo4com"),
+  function(x, community) {
+    if (missing(community)) {
+        return(as(x, "phylo4"))
+    }
+    if (length(community)!=1) {
+        stop("a single community label must be provided")
+    }
+    getSubtrees(x, community)[[1]]
+})
+
+## internal helper function for extracting a list of the subtrees
+## associated with each communities
+getSubtrees <- function(x, community) {
+    communities <- x at metadata$comms
+    if (missing(community)) {
+        community <- names(communities)
+    }
+    doNotExist <- !community %in% names(communities)
+    if (any(doNotExist)) {
+        stop("one or more communities not found in x: ",
+            paste(community[doNotExist], collapse=", "))
+    }
+    res <- x at subtrees[communities[names(communities) %in% community]]
+    names(res) <- community
+    return(res)
+}
+    

Modified: pkg/R/community.R
===================================================================
--- pkg/R/community.R	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/R/community.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -1,51 +1,14 @@
 # Generate site-by-species matrix
-siteBySpecies <- function(phylo4com, presence.only=FALSE,
-  transpose=FALSE) {
+siteBySpecies <- function(phy, presence.only=FALSE,
+    na.zero=FALSE, transpose=FALSE) {
 
-  ## create and populate the matrix
-  spp <- unique(unlist(lapply(phylo4com, tipLabels)))
-  mat <- sapply(phylo4com, function(x) {
-    vec <- tipData(x)[spp, "abundance"]
-    vec[!spp %in% tipLabels(x)] <- 0
-    vec
-  })
-  rownames(mat) <- spp
+    if (presence.only) {
+        dat <- presence(phy, na.zero=na.zero)
+    } else {
+        dat <- abundance(phy, na.zero=na.zero)
+    }
+    mat <- as.matrix(dat)
+    if (!transpose) mat <- t(mat)
+    return(mat)
 
-  if (presence.only) {
-    mat[mat>0] <- 1
-    mat[mat<=0] <- 0
-  }
-
-  if (!transpose) mat <- t(mat)
-
-  return(mat)
-
 }
-
-# Calculate species richness
-richness <- function(phylo4com, na.rm=FALSE) {
-  sapply(phylo4com, function(x) sum(abundance(x)>0, na.rm=na.rm))
-}
-
-# simpson's index with and without phylogenetic distances using
-# commmunity matrix 'x', species names as colnames and community names
-# as rownames. phy=FALSE returns traditional simpson's index
-simp.phy <- function(x, tr, phy=TRUE) {
-
-  x <- as.matrix(x)
-  x <- x[, order(colnames(x)), drop=FALSE]
-  x <- prop.table(x, 1)
-    
-  if (phy==TRUE){
-    phy.mat <- cophenetic(tr)
-    phy.mat <- phy.mat[order(rownames(phy.mat)), order(colnames(phy.mat))]
-    out <- apply(x, 1, function(x) sum((x %o% x)*phy.mat))
-  } else {
-    bin.mat <- matrix(1, dim(x)[2], dim(x)[2])
-    diag(bin.mat) <- 0
-    out <- apply(x, 1, function(x) sum((x %o% x)*bin.mat))
-  }   
-        
-  return(out) 
-}
-

Deleted: pkg/R/ecophylo.R
===================================================================
--- pkg/R/ecophylo.R	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/R/ecophylo.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -1,32 +0,0 @@
-phylo4com <- function(communityID, species, abundance, tree,
-  missing=c("warn", "OK", "fail")) {
-
-  ## species should only ever match against tip *labels* (not node IDs)
-  species <- as.character(species)
-
-  ## identify and exclude any species missing from tree
-#  tips <- getNode(tree, unique(species), type="tip", missing=missing)
-#  isInTree <- species %in% names(tips[!is.na(tips)])
-#  species <- species[isInTree]
-#  abundance <- abundance[isInTree]
-#  communityID <- communityID[isInTree]
-
-  communities <- split(data.frame(species, abundance,
-    stringsAsFactors=FALSE), factor(communityID,
-    unique(as.character(communityID))))
-  
-  subtrees <- lapply(communities, function(community) {
-    subtree <- subset(tree, tips.include=community$species)
-    cdata <- with(community, data.frame(abundance, row.names=species))
-    tipData(subtree, extra.data="OK") <- cdata
-    subtree
-  })
-
-  return(subtrees)
-
-  # alternative conceptualization...
-  #result <- list(tree=tree, community=subtrees)
-  #class(result) <- "phylo4com"
-  #return(result)
-
-}

Copied: pkg/R/genera.R (from rev 91, branches/single-tree/R/genera.R)
===================================================================
--- pkg/R/genera.R	                        (rev 0)
+++ pkg/R/genera.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -0,0 +1,24 @@
+setGeneric("genera", function(x, ...) {
+    standardGeneric("genera")
+})
+
+setMethod("genera", c("phylo4"),
+  function(x) {
+    # From taxa names in tree, remove "_" and species name after
+    gsub("_.*$", "", tipLabels(x))
+})
+
+setMethod("genera", c("phylo4com"),
+  function(x, community) {
+    if (missing(community)) {
+        community <- communities(x)
+    }
+    doNotExist <- !community %in% communities(x)
+    if (any(doNotExist)) {
+        stop("one or more communities not found in x: ",
+            paste(community[doNotExist], collapse=", "))
+    }
+    g <- lapply(getSubtrees(x, community), genera)
+    names(g) <- community
+    return(g)
+})

Copied: pkg/R/iac.R (from rev 91, branches/single-tree/R/iac.R)
===================================================================
--- pkg/R/iac.R	                        (rev 0)
+++ pkg/R/iac.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -0,0 +1,52 @@
+##
+## IAC methods
+##
+
+setGeneric("iac", function(x, na.rm=TRUE) {
+    standardGeneric("iac")
+})
+
+setMethod("iac", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    iac(phyc, na.rm=na.rm)
+})
+
+setMethod("iac", signature(x="phylo4com"), function(x, na.rm=TRUE) {
+
+    comms <- x at metadata$comms
+    if (is.null(comms)) {
+        stop("no community data specified in phylo4com object")
+    }
+    subtrees <- x at subtrees[unique(as.character(comms))]
+    .denom <-  function(tree, template) {
+        # Count number of lineages originating at each internal node
+        # (i.e. number of splits)
+        int.nodes <- nodeId(tree, "internal")
+        nSplits <- sapply(int.nodes, function(x) length(children(tree,
+            x)))
+        names(nSplits) <- int.nodes
+        # For each tip, take the product of the number of splits across
+        # all of its ancestral nodes
+        res <- sapply(ancestors(tree, tipLabels(tree)), function(x)
+            prod(nSplits[as.character(x)]))
+        template[match(names(res), names(template))] <- res
+        template
+    }
+
+    # now for each subtree...
+    tmp <- setNames(rep(NA, nTips(x)), tipLabels(x))
+    denom <- lapply(subtrees, .denom, tmp)
+    denom <- do.call("cbind", denom[as.character(comms)])
+    nnodes <- sapply(x at subtrees, nNodes)[as.character(comms)]
+
+    # Calculate expected number of individuals under null hypothesis
+    # of equal allocation to each lineage at each (node) split 
+    N <- abundance(x)
+    expected <- t(colSums(N, na.rm=na.rm) / t(denom))
+
+    # IAC: summed absolute difference between expected and observed
+    # abundances, divided by number of nodes
+    colSums(abs(expected - N), na.rm=na.rm) / nnodes
+
+})
+

Modified: pkg/R/indices.R
===================================================================
--- pkg/R/indices.R	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/R/indices.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -1,113 +0,0 @@
-PAE <- function(tree) {
-
-  # Calculate PD
-  PD <- pd(tree)
-
-  # Extract tip lengths and abundances of those taxa
-  TL <- tipLength(tree)
-  N <- abundance(tree)
-
-  # Calculate PAE
-  numer <- PD + sum(TL * (N-1))
-  denom <- PD + (mean(N)-1) * sum(TL)
-  return(numer/denom)
-
-}
-
-IAC <- function(tree) {
-
-  # Count number of lineages originating at each internal node (i.e.
-  # number of splits)
-  int.nodes <- nodeId(tree, "internal")
-  nSplits <- sapply(int.nodes, function(x) length(children(tree, x)))
-  names(nSplits) <- int.nodes
-
-  # For each tip, take the product of the number of splits across all of
-  # its ancestral nodes
-  denom <- sapply(ancestors(tree, nodeId(tree, "tip")), function(x)
-    prod(nSplits[as.character(x)]))
-
-  abundance <- abundance(tree)
-
-  # Calculate expected number of individuals under null hypothesis of
-  # equal allocation to each lineage at each (node) split 
-  expected <- sum(abundance) / denom
-
-  # IAC: summed absolute difference between expected and observed
-  # abundances, divide by number of nodes
-  sum(abs(expected-abundance)) / nNodes(tree)
-
-}
-
-# TODO: This function includes its own code for not counting root edge
-# length. Maybe this should maybe be done at a higher level?
-ED <- function(tree) {
-
-  # set length of root edge to zero
-  edgeLength(tree)[edgeId(tree, "root")] <- 0
-
-  all.nodes <- nodeId(tree, type = "all")
-  des <- descendants(tree, all.nodes, type="tips")
-  nv <- edgeLength(tree, all.nodes) / sapply(des, length)
-  names(nv) <- all.nodes
-
-  tip.nodes <- nodeId(tree, "tip")
-  anc <- ancestors(tree, tip.nodes, "ALL")
-  EDI <- sapply(anc, function(n) sum(nv[as.character(n)], na.rm=TRUE))
-  names(EDI) <- tipLabels(tree)
-
-  return(EDI)
-
-}
-
-HED <- function(tree) {
-  scaledED <- ED(tree) / pd(tree)
-  return(-sum(scaledED * log(scaledED)))
-}
-
-EED <- function(tree) {
-  HED(tree) / log(nTips(tree))
-}
-
-AED <- function(tree) {
-
-  # get all node IDs, but excluding root node
-  nonroot.nodes <- setdiff(nodeId(tree), rootNode(tree))
-  # Create logical matrix indicating which tips (in columns) are
-  # descendants of each node (in rows), self-inclusive
-  isDescendant <- sapply(ancestors(tree, tipLabels(tree), "ALL"),
-    function(n) nonroot.nodes %in% n)
-
-  # Create vector of ancestral edge lengths
-  edge.length <- edgeLength(tree, nonroot.nodes)
-
-  # Create matrix containing number of individuals of each species
-  # descending from each interior node
-  abundance <- abundance(tree)
-  dAbund <- abundance * t(isDescendant)
-
-  # Calculate individual-based AED of each species
-  AED <- colSums(edge.length * t(prop.table(dAbund, margin=2)))
-  AED <- AED/abundance
-
-  return(AED)
-
-}
-
-HAED <- function(tree) {
-  # Recast AED in terms of individuals
-  AED <- AED(tree)
-  abundance <- abundance(tree)
-  scaledIndivAED <- rep(AED, abundance) / pd(tree)
-  return(-sum(scaledIndivAED * log(scaledIndivAED)))
-}
-
-EAED <- function(tree) {
-  HAED(tree) / log(sum(abundance(tree)))
-}
-
-value <- function(tree) {
-  aed <- AED(tree)
-  aed/sum(aed)
-}
-

Copied: pkg/R/pae.R (from rev 91, branches/single-tree/R/pae.R)
===================================================================
--- pkg/R/pae.R	                        (rev 0)
+++ pkg/R/pae.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -0,0 +1,41 @@
+##
+## PAE methods
+##
+
+setGeneric("pae", function(x, na.rm=TRUE) {
+    standardGeneric("pae")
+})
+
+setMethod("pae", signature(x="phylo4d"), function(x, na.rm=TRUE) {
+    phyc <- phylo4com(x)
+    pae(phyc, na.rm=na.rm)
+})
+
+setMethod("pae", signature(x="phylo4com"), function(x, na.rm=TRUE) {
+
+    N <- abundance(x)
+    comms <- x at metadata$comms
+    if (is.null(comms)) {
+        stop("no community data specified in phylo4com object")
+    }
+    subtrees <- x at subtrees[unique(as.character(comms))]
+
+    # now for each subtree...
+    #PD <- sapply(subtrees, pd)[as.character(comms)]
+    PD <- pd(x)
+    tmp <- setNames(rep(0, nTips(x)), tipLabels(x))
+    TL <- lapply(subtrees, function(tree) {
+        res <- tipLength(tree)
+        tmp[match(names(res), names(tmp))] <- res
+        tmp
+    })
+    TL <- do.call("cbind", TL[as.character(comms)])
+
+    numer <- PD + colSums(TL * (N - 1))
+    denom <- PD + (colSums(N, na.rm = na.rm) / colSums(presence(x),
+        na.rm=na.rm) - 1) * colSums(TL)
+    res <- numer/denom
+    names(res) <- names(comms)
+    return(res)
+
+})

Modified: pkg/R/pd.R
===================================================================
--- pkg/R/pd.R	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/R/pd.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -1,20 +1,55 @@
-# basic pd calculation
-pd <- function(phy, method=c("traditional", "polytomy", "yule")) {
-  ## if using non-traditional pd, adjust tip lengths based on abundance
-  method <- match.arg(method)
-  if (method %in% c("polytomy", "yule")) {
-    phy <- weightByAbund(phy, method)
-  } 
-  # exclude root edge from calculation (if it exists)
-  if (isRooted(phy)) {
-    nonroot.nodes <- setdiff(nodeId(phy), rootNode(phy))
-    tot.length <- sum(edgeLength(phy, nonroot.nodes))
-  } else {
-    tot.length <- sum(edgeLength(phy))
+##
+## PD methods
+##
+
+setGeneric("pd", function(x, ...) {
+    standardGeneric("pd")
+})
+
+setMethod("pd", signature(x="phylo4"),
+  function(x, method=c("traditional")) {
+    method <- match.arg(method)
+    if (isRooted(x)) {
+        nonroot.nodes <- setdiff(nodeId(x), rootNode(x))
+        tot.length <- sum(edgeLength(x, nonroot.nodes))
+    } else {
+        tot.length <- sum(edgeLength(x))
+    }
+    return(tot.length)
   }
-  return(tot.length)
-}
+)
 
+setMethod("pd", signature(x="phylo4d"), function(x,
+  method=c("traditional", "polytomy", "yule"), ...) {
+    phyc <- phylo4com(x, ...)
+    pd(phyc, method=method)
+})
+
+setMethod("pd", signature(x="phylo4com"), function(x,
+  method=c("traditional", "polytomy", "yule")) {
+
+    method <- match.arg(method)
+
+    if (method %in% c("polytomy", "yule")) {
+        message("estimating minTL values from Supertree")
+        res <- sapply(communities(x), function(community) {
+            phyd <- phylo4d(x, community)
+            n <- tipData(phyd)[[community]]
+            pd(extractTree(weightByAbund(phyd, n, method)))
+        })
+    } else if (method == "traditional") {
+        comms <- x at metadata$comms
+        if (is.null(comms)) {
+            return(.pd(extractTree(x), method))
+        }
+        subtrees <- x at subtrees[as.character(comms)]
+        res <- sapply(subtrees, pd)[as.character(comms)]
+        names(res) <- names(comms)
+    }
+    return(res)
+
+})
+
 # lookup function for minimum tip length
 getMinTL <- function(tree, genera) {
 
@@ -74,26 +109,17 @@
 }
 
 # function to weight tip length based on abundance
-weightByAbund <- function(tree, method=c("polytomy", "yule")) {
+weightByAbund <- function(tree, n, method=c("polytomy", "yule")) {
 
   method <- match.arg(method)
 
-  if (is.null(abundance(tree))) {
-    stop("tree contains no abundance information")
-  }
-
   if (is.null(minTL(tree))) {
-    stop("tree contains no minTL information")
+    minLength <- getMinTL(tree, genera(tree))
+  } else {
+    minLength <- minTL(tree)
   }
 
-  n <- abundance(tree)
-  minLength <- minTL(tree)
   tipLen <- tipLength(tree)
- 
-  # Test statement:
-  if (!identical(names(n), names(minLength))) {
-    stop("mismatch between abundance and minTL vectors")
-  }
 
   if (method=="polytomy") {
     newLength <- tipLen + (n-1) * minLength

Copied: pkg/R/phylo4com.R (from rev 91, branches/single-tree/R/phylo4com.R)
===================================================================
--- pkg/R/phylo4com.R	                        (rev 0)
+++ pkg/R/phylo4com.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -0,0 +1,123 @@
+setClass("phylo4com",
+    representation(subtrees="list"),
+    prototype = list(subtrees = list()),
+    validity = checkPhylo4,
+    contains="phylo4d")
+
+## create single tree with all data in it, plus a list of subtrees (sans
+## data) for each unique species composition
+setGeneric("phylo4com", function(x, n, ...) {
+    standardGeneric("phylo4com")
+})
+
+setMethod("phylo4com", c("phylo", "ANY"),
+  function(x, n, ..., check.node.labels="keep") {
+    x <- phylo4(x, check.node.labels)
+    phylo4com(x, n, ...)
+})
+
+setMethod("phylo4com", c("phylo4", "numeric"),
+  function(x, n, communityID, species,
+    missing=c("warn", "OK", "fail")) {
+
+    ## create site-by-species abundance matrix
+    comm <- factor(communityID, levels=unique(communityID))
+    taxa <- factor(species)
+    dat <- matrix(0, nrow=nlevels(taxa), ncol=nlevels(comm),
+        dimnames=list(levels(taxa), unique(comm)))
+    dat[cbind(taxa, comm)] <- n
+
+    ## hand off to the phylo4com matrix method
+    phylo4com(x, dat, missing)
+
+})
+
+setMethod("phylo4com", c("phylo4", "matrix"),
+  function(x, n, missing=c("warn", "OK", "fail")) {
+
+    taxa <- rownames(n)
+    if (any(duplicated(taxa))) {
+        stop("duplicated taxa are not permitted")
+    }
+    comm <- colnames(n)
+    if (any(duplicated(comm))) {
+        stop("duplicated community IDs are not permitted")
+    }
+
+    phy <- subset(x, tips.include=taxa)
+    phyd <- addData(phy, tip.data=n, extra.data="OK")
+    phylo4com(phyd, cols=comm)
+
+})
+
+setMethod("phylo4com", c("phylo4", "data.frame"),
+  function(x, n, missing=c("warn", "OK", "fail")) {
+    n <- as.matrix(n)
+    phylo4com(x, n, missing)
+})
+
+setMethod("phylo4com", c("phylo4d", "missing"),
+  function(x, n, cols) {
+
+    if (missing(cols)) {
+        cols <- names(tipData(x))
+    }
+    if (is.null(cols)) {
+        res <- as(x, "phylo4com")
+        res at metadata$comms <- NULL
+        return(res)
+    }
+    x at metadata$comms <- setNames(rep(NA, length(cols)),
+        make.names(cols))
+
+    # create trees for each unique community wrt composition only
+    P <- presence(x)
+    phy <- extractTree(x)
+    ids <- as.character(as.numeric(factor(sapply(P, paste,
+        collapse=""))))
+    subtrees <- lapply(P[!duplicated(ids)],
+        function(n) subset(phy, rownames(P)[n %in% 1]))
+    names(subtrees) <- ids[!duplicated(ids)]
+
+    res <- as(x, "phylo4com")
+    res at subtrees <- subtrees
+    res at metadata$comms[] <- ids
+    
+    return(res)
+})
+
+
+## older approach: create a single phylo4d object with all data in it
+#phylo4com <- function(communityID, species, abundance, tree,
+#    missing=c("warn", "OK", "fail")) {
+#
+#    comm <- factor(communityID, levels=unique(communityID))
+#    taxa <- factor(species)
+#    dat <- matrix(0, nrow=nlevels(taxa), ncol=nlevels(comm),
+#      dimnames=list(levels(taxa), unique(comm)))
+#    dat[cbind(taxa, comm)] <- abundance
+#
+#    subtree <- subset(tree, tips.include=levels(taxa))
+#
+#    res <- addData(subtree, tip.data=dat, extra.data="OK")
+#    res at metadata$comms <- make.names(unique(comm))
+#    return(res)
+#
+#}
+
+## similar to above, but cast function from reshape package
+#phylo4com <- function(communityID, species, abundance, tree,
+#    missing=c("warn", "OK", "fail")) {
+#
+#    raw <- data.frame(.taxa=factor(species), .comm=factor(communityID),
+#      value=abundance, stringsAsFactors=FALSE)
+#    dat <- cast(.taxa ~ .comm, data=raw)
+#    row.names(dat) <- dat[[1]]
+#    dat <- dat[-1]
+#
+#    subtree <- subset(tree, tips.include=row.names(dat))
+#
+#    addData(subtree, tip.data=dat,
+#      metadata=list(comms=make.names(colnames(dat))), extra.data="OK")
+#
+#}

Copied: pkg/R/simpson.R (from rev 91, branches/single-tree/R/simpson.R)
===================================================================
--- pkg/R/simpson.R	                        (rev 0)
+++ pkg/R/simpson.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -0,0 +1,41 @@
+##
+## Simpson's index with and without phylogenetic distances
+##
+
+setGeneric("simpson",
+  function(x, method=c("phylogenetic", "traditional")) {
+    standardGeneric("simpson")
+})
+
+setMethod("simpson", signature(x="phylo4d"),
+  function(x, method=c("phylogenetic", "traditional")) {
+    phyc <- phylo4com(x)
+    simpson(phyc, method=method)
+})
+
+setMethod("simpson", signature(x="phylo4com"),
+  function(x, method=c("phylogenetic", "traditional")) {
+    method <- match.arg(method)
+    N.relative <- prop.table(t(abundance(x)), 1)
+    if (method=="phylogenetic") {
+        dmat <- pairdist(x, type="tip")
+    } else {
+        dmat <- matrix(1, nTips(x), nTips(x))
+        diag(dmat) <- 0
+    }   
+    out <- apply(N.relative, 1, function(n) sum((n %o% n)*dmat))
+    return(out) 
+})
+
+## earlier version: works on a single phylo4d tree with abundance data
+#simpson <- function(phy, method=c("traditional", "phylogenetic")) {
+#  method <- match.arg(method)
+#  x <- prop.table(abundance(phy))
+#  if (method=="phylogenetic") {
+#    dmat <- pairdist(phy, type="tip")
+#  } else {
+#    dmat <- matrix(1, nTips(phy), nTips(phy))
+#    diag(dmat) <- 0
+#  }   
+#  sum((x %o% x) * dmat)
+#}

Modified: pkg/R/utilities.R
===================================================================
--- pkg/R/utilities.R	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/R/utilities.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -45,19 +45,55 @@
 }
 
 # abundance extractor
-abundance <- function(phy) {
-  abund <- tipData(phy)$abundance
-  if (is.null(abund)) abund <- rep(NA_real_, nTips(phy))
-  names(abund) <- row.names(tipData(phy))
-  return(abund)
+abundance <- function(phy, comm, tip, na.zero=FALSE) {
+    communities <- names(phy at metadata$comms)
+    if (missing(comm)) {
+        comm <- communities
+    }
+    doNotExist <- !comm %in% communities
+    if (any(doNotExist)) {
+        stop("one or more communities not found in phy: ",
+            paste(comm[doNotExist], collapse=", "))
+    }
+    if (missing(tip)) {
+        tip <- tipLabels(phy)
+    } else {
+        tip <- getNode(phy, tip, type="tip", missing="warn")
+        tip <- names(tip)[!is.na(tip)]
+    }
+    N <- tipData(phy)[tip, comm, drop=FALSE]
+    if (na.zero) N[is.na(N)] <- 0
+    return(N)
 }
 
 # abundance assignment function
-`abundance<-` <- function(phy, value) {
-  tipData(phy)$abundance <- value
-  return(phy)
+`abundance<-` <- function(phy, comm, tip, value) {
+    if (!is.atomic(comm) || length(comm)!=1) {
+        stop("when replacing, comm must be a vector of length 1")
+    } else if (!comm %in% names(phy at metadata$comms)) {
+        stop(paste("community", comm, "not found in phy", sep=" "))
+    }
+    if (missing(tip)) {
+        tip <- tipLabels(phy)
+    } else {
+        tip <- names(getNode(phy, tip, type="tip", missing="fail"))
+    }
+    tipData(phy)[tip, comm] <- value
+    return(phy)
 }
 
+presence <- function(phy, comm, tip, na.zero=FALSE) {
+    N <- abundance(phy, comm, tip, na.zero=na.zero)
+    N[N > 0] <- 1
+    N[N <= 0] <- 0
+    N
+}
+
+richness <- function(phy, comm, na.zero=FALSE) {
+    P <- presence(phy, comm, na.zero=na.zero)
+    colSums(P)
+}
+
 # minTL extractor
 minTL <- function(phy) {
   minTL <- tipData(phy)$minTL
@@ -79,13 +115,11 @@
   return(phy)
 }
 
-# genera extractor
-genera <- function(phy) {
-  #From taxa names in tree, remove "_" and species name after
-  gsub("_.*$", "", tipLabels(phy))
+# community labels extractor
+communities <- function(x) {
+  names(x at metadata$comms)
 }
 
-
 ## this works as implementation of dist.nodes for phylo4 objects, albeit
 ## about 1.5x slower than dist.nodes
 pairdist <- function(phy, type=c("all", "tip"), use.labels=FALSE) {

Copied: pkg/data/weeds.data.rda (from rev 91, branches/single-tree/data/weeds.data.rda)
===================================================================
(Binary files differ)

Modified: pkg/data/weeds.rda
===================================================================
(Binary files differ)

Copied: pkg/data/weeds.tree.rda (from rev 91, branches/single-tree/data/weeds.tree.rda)
===================================================================
(Binary files differ)

Modified: pkg/inst/unitTests/runit.indices.R
===================================================================
--- pkg/inst/unitTests/runit.indices.R	2009-11-18 21:04:44 UTC (rev 91)
+++ pkg/inst/unitTests/runit.indices.R	2009-11-18 21:23:00 UTC (rev 92)
@@ -1,167 +1,152 @@
 #
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/ecopd -r 92


More information about the Ecopd-commits mailing list