[Phylobase-commits] r694 - in pkg: R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Oct 15 00:20:00 CEST 2009
Author: regetz
Date: 2009-10-15 00:19:59 +0200 (Thu, 15 Oct 2009)
New Revision: 694
Added:
pkg/src/ancestors.c
Modified:
pkg/R/treewalk.R
pkg/man/treewalk.Rd
pkg/src/descendants.c
Log:
vectorized ancestors and descendants, both using C functions for
performance; augmented documentation
Modified: pkg/R/treewalk.R
===================================================================
--- pkg/R/treewalk.R 2009-10-14 20:56:12 UTC (rev 693)
+++ pkg/R/treewalk.R 2009-10-14 22:19:59 UTC (rev 694)
@@ -70,47 +70,55 @@
}
## get descendants [recursively]
-descendants <- function (phy, node, type=c("tips","children","all"))
-{
- ## FIXME: allow vector of nodes? (or just let people lapply?)
+descendants <- function (phy, node, type=c("tips","children","all")) {
type <- match.arg(type)
- if (type=="children") {
- return(children(phy, node))
- }
- node <- getNode(phy, node)
- if (is.na(node)) stop("node ", node, " not found in tree")
- n <- nTips(phy)
- if (node <= n) {
- return(node)
- }
- ## edge matrix must be in preorder for the C function!
- if (phy at order=="preorder") {
- edge <- phy at edge
+ ## look up nodes, warning about and excluding invalid nodes
+ oNode <- node
+ node <- getNode(phy, node, missing="warn")
+ isValid <- !is.na(node)
+ node <- as.integer(node[isValid])
+
+ if (type == "children") {
+ res <- lapply(node, function(x) children(phy, x))
} else {
- edge <- reorder(phy, order="preorder")@edge
- }
- ## deal with NA root node
- edge[is.na(edge)] <- 0
- ## extract edge columns
- ancestor <- as.integer(edge[, 1])
- descendant <- as.integer(edge[, 2])
- ## create indicator vector and seed it based on children
- isChild <- rep(0L, length=nrow(edge))
- isChild[ancestor==node] <- 1L
- numEdges <- as.integer(length(isChild))
+ ## edge matrix must be in preorder for the C function!
+ if (phy at order=="preorder") {
+ edge <- phy at edge
+ } else {
+ edge <- reorder(phy, order="preorder")@edge
+ }
+ ## extract edge columns
+ ancestor <- as.integer(edge[, 1])
+ descendant <- as.integer(edge[, 2])
- ## returned value of isChild will indicate *all* descendants
- isDescendant <- .C("descendants", isChild, ancestor, descendant,
- numEdges)[[1]]
+ ## return indicator matrix of ALL descendants (including self)
+ isDes <- .Call("descendants", node, ancestor, descendant)
+ storage.mode(isDes) <- "logical"
- l <- descendant[as.logical(isDescendant)]
- if (type=="tips") {
- l <- l[l<=n]
+ ## for internal nodes only, drop self (not sure why this rule?)
+ int.node <- intersect(node, nodeId(phy, "internal"))
+ isDes[cbind(match(int.node, descendant),
+ match(int.node, node))] <- FALSE
+ ## if only tips desired, drop internal nodes
+ if (type=="tips") {
+ isDes[descendant %in% nodeId(phy, "internal"),] <- FALSE
+ }
+ res <- lapply(seq_along(node), function(n) getNode(phy,
+ descendant[isDes[,n]]))
}
+ names(res) <- as.character(oNode[isValid])
+ ## if just a single node, return as a single vector
+ if (length(res)==1) res <- res[[1]]
+ res
+
## Original pure R implementation of the above
## (note that it does not require preorder ordering)
+ ##n <- nTips(phy)
+ ##if (node <= n) {
+ ## return(node)
+ ##}
##l <- numeric()
##d <- children(phy, node)
##for (j in d) {
@@ -120,8 +128,6 @@
## descendants(phy,j,type="all"))
## else l <- c(l, descendants(phy,j,type=type))
##}
-
- return(getNode(phy, l))
}
siblings <- function(phy, node, include.self=FALSE) {
@@ -131,26 +137,57 @@
}
## get ancestors (all nodes)
-ancestors <- function (phy, node, type=c("all","parent","ALL"))
-{
+ancestors <- function (phy, node, type=c("all","parent","ALL")) {
+
type <- match.arg(type)
- if (type=="parent") return(ancestor(phy,node))
- oNode <- node <- getNode(phy,node)
- if (is.na(node)) stop("node ",node," not found in tree")
- res <- numeric(0)
- n <- nTips(phy)
- ## correct behavior when node==root
- if(node == rootNode(phy)) return(NULL)
+ ## look up nodes, warning about and excluding invalid nodes
+ oNode <- node
+ node <- getNode(phy, node, missing="warn")
+ isValid <- !is.na(node)
+ node <- as.integer(node[isValid])
- repeat {
- anc <- ancestor(phy,node)
- res <- c(res,anc)
- node <- anc
- if (anc==n+1) break
+ if (type == "parent") {
+ res <- lapply(node, function(x) ancestor(phy, x))
+ } else {
+ ## edge matrix must be in postorder for the C function!
+ if (phy at order=="postorder") {
+ edge <- phy at edge
+ } else {
+ edge <- reorder(phy, order="postorder")@edge
+ }
+ ## extract edge columns
+ ancestor <- as.integer(edge[, 1])
+ descendant <- as.integer(edge[, 2])
+
+ ## return indicator matrix of ALL ancestors (including self)
+ isAnc <- .Call("ancestors", node, ancestor, descendant)
+ storage.mode(isAnc) <- "logical"
+
+ ## drop self if needed
+ if (type=="all") {
+ isAnc[cbind(match(node, descendant), seq_along(node))] <- FALSE
+ }
+ res <- lapply(seq_along(node), function(n) getNode(phy,
+ descendant[isAnc[,n]]))
}
- if(type == "ALL") res <- c(oNode, res)
- return(getNode(phy,res))
+ names(res) <- as.character(oNode[isValid])
+
+ ## if just a single node, return as a single vector
+ if (length(res)==1) res <- res[[1]]
+ res
+
+ ## Original pure R implementation of the above
+ ## (note that it does not require preorder ordering)
+ ##if (node == rootNode(phy))
+ ## return(NULL)
+ ##repeat {
+ ## anc <- ancestor(phy, node)
+ ## res <- c(res, anc)
+ ## node <- anc
+ ## if (anc == n + 1)
+ ## break
+ ##}
}
MRCA <- function(phy, ...) {
Modified: pkg/man/treewalk.Rd
===================================================================
--- pkg/man/treewalk.Rd 2009-10-14 20:56:12 UTC (rev 693)
+++ pkg/man/treewalk.Rd 2009-10-14 22:19:59 UTC (rev 694)
@@ -19,7 +19,7 @@
siblings(phy,node,include.self=FALSE)
children(phy, node)
descendants(phy, node, type=c("tips","children","all"))
-MRCA(phy,\dots)
+MRCA(phy, \dots)
shortestPath(phy, node1, node2)
\S4method{sumEdgeLength}{phylo4}(x, node)
}
@@ -30,7 +30,10 @@
\item{x}{a \linkS4class{phylo4} object (or one inheriting from
\linkS4class{phylo4}, e.g. a \linkS4class{phylo4d} object)
}
- \item{node}{a node ID number (or name)}
+ \item{node}{either an integer corresponding to a node ID number, or a
+ character corresponding to a node label; for \code{ancestors} and
+ \code{descendants}, this may be a vector of multiple node numbers or
+ names}
\item{type}{(\code{ancestors}) specify whether to return just direct
ancestor ("parent"), all ancestor nodes ("all"), or all ancestor
nodes including self ("ALL"); (\code{descendants})
@@ -43,14 +46,35 @@
\item{node1}{a node number (or name)}
\item{node2}{a node number (or name)}
}
+\details{
+ \code{ancestors} and \code{descendants} can take \code{node} vectors
+ of arbitrary length, returning a list of output vectors if the number
+ of valid input nodes is greater than one. List element names are taken
+ directly from the input node vector.
+
+ If any supplied nodes are not found in the tree, the behavior
+ currently varies across functions. Invalid nodes are automatically
+ omitted by \code{ancestors} and \code{descendants}, with a warning.
+ \code{ancestor} will return \code{NA} for any invalid nodes, with
+ a warning. Both \code{children} and \code{siblings} will return an empty
+ vector, again with a warning. In contrast, \code{MRCA} and
+ \code{shortestPath} will throw an immediate error if any input nodes
+ are invalid.
+}
\value{
\item{\code{ancestors} and \code{descendants}}{
- return named vectors of the ancestors and descendants of a node; \code{ancestor} is a synonym
- for \code{ancestors(\dots, type="parent")} (i.e. direct ancestor only), while \code{children}
- is a synonym for \code{descendants(\dots, type="children")} (i.e. direct descendants only)}
+ return a named vector (or a list of such vectors in the case of
+ multiple input nodes) of the ancestors and descendants of a node}
+ \item{\code{ancestor} and \code{children}}{
+ \code{ancestor} is analogous to \code{ancestors(\dots,
+ type="parent")} (i.e. direct ancestor only), but returns a single
+ concatenated vector in the case of multiple input nodes;
+ \code{children} is analogous to \code{descendants(\dots,
+ type="children")} (i.e. direct descendants only), but is not
+ currently intended to be used with multiple input nodes }
\item{\code{siblings}}{
returns sibling nodes (children of the same parent)}
- \item{\code{mrca}}{
+ \item{\code{MRCA}}{
returns the most recent common ancestor of two or more nodes}
\item{\code{shortestPath}}{
returns the nodes of the shortest path from one node to another (excluding \code{node1} and
@@ -65,18 +89,18 @@
\examples{
data(geospiza)
nodeLabels(geospiza) <- LETTERS[1:nNodes(geospiza)]
- plot(as(geospiza,"phylo4"), show.node.label=TRUE)
- ancestor(geospiza,"E")
- children(geospiza,"C")
- descendants(geospiza,"D",type="tips")
- descendants(geospiza,"D",type="all")
- ancestors(geospiza,"D")
- MRCA(geospiza,"conirostris","difficilis","fuliginosa")
- MRCA(geospiza,"olivacea","conirostris")
+ plot(as(geospiza, "phylo4"), show.node.label=TRUE)
+ ancestor(geospiza, "E")
+ children(geospiza, "C")
+ descendants(geospiza, "D", type="tips")
+ descendants(geospiza, "D", type="all")
+ ancestors(geospiza, "D")
+ MRCA(geospiza, "conirostris", "difficilis", "fuliginosa")
+ MRCA(geospiza, "olivacea", "conirostris")
## shortest path between 2 nodes
- shortestPath(geospiza, "fortis","fuliginosa")
- shortestPath(geospiza, "F","L")
+ shortestPath(geospiza, "fortis", "fuliginosa")
+ shortestPath(geospiza, "F", "L")
## FIXME
## if(require(ape)){ edgelabels() }
Added: pkg/src/ancestors.c
===================================================================
--- pkg/src/ancestors.c (rev 0)
+++ pkg/src/ancestors.c 2009-10-14 22:19:59 UTC (rev 694)
@@ -0,0 +1,53 @@
+/*
+ ancestors.c:
+ Identify all ancestors of each node in the input vector. Function
+ inputs are derived from a phylo4 edge matrix, which *must* be in
+ postorder order. The isAncestor output is an indicator matrix of
+ which nodes (rows, corresponding to the decendant vector) are
+ ancestors of each input node (columns, corresponding to the nodes
+ vector). It will contain 1 for each ancestor of the node, *including
+ itself*, and 0 for all other nodes.
+
+ Jim Regetz (NCEAS)
+*/
+
+#include <R.h>
+#include <Rinternals.h>
+
+SEXP ancestors(SEXP nod, SEXP anc, SEXP des) {
+
+ int numEdges = length(anc);
+ int numNodes = length(nod);
+
+ int* nodes = INTEGER(nod);
+ int* ancestor = INTEGER(anc);
+ int* descendant = INTEGER(des);
+
+ int parent = 0;
+ SEXP isAncestor;
+
+ PROTECT(isAncestor = allocMatrix(INTSXP, numEdges, numNodes));
+ for (int n=0; n<numNodes; n++) {
+ for (int i=0; i<numEdges; i++) {
+ if (nodes[n]==descendant[i]) {
+ INTEGER(isAncestor)[i + n*numEdges] = 1;
+ } else {
+ INTEGER(isAncestor)[i + n*numEdges] = 0;
+ }
+ }
+ }
+ for (int n=0; n<numNodes; n++) {
+ for (int i=0; i<numEdges; i++) {
+ if (INTEGER(isAncestor)[i + n*numEdges]==1) {
+ parent = ancestor[i];
+ for (int j=i+1; j<numEdges; j++) {
+ if (descendant[j]==parent) {
+ INTEGER(isAncestor)[j + n*numEdges]=1;
+ }
+ }
+ }
+ }
+ }
+ UNPROTECT(1);
+ return isAncestor;
+}
Property changes on: pkg/src/ancestors.c
___________________________________________________________________
Name: svn:eol-style
+ native
Modified: pkg/src/descendants.c
===================================================================
--- pkg/src/descendants.c 2009-10-14 20:56:12 UTC (rev 693)
+++ pkg/src/descendants.c 2009-10-14 22:19:59 UTC (rev 694)
@@ -1,24 +1,53 @@
/*
descendants.c:
- Identify all descendants of a given node. Function inputs are
- derived from a phylo4 edge matrix, which *must* be in preorder order.
- The isDescendant input vector should contain 1 for the immediate
- children of the node, and 0 otherwise. The function returns this
- vector updated to include all further descendants.
+ Identify all descendants of each node in the input vector. Function
+ inputs are derived from a phylo4 edge matrix, which *must* be in
+ preorder order. The isDescendant output is an indicator matrix of
+ which nodes (rows, corresponding to the decendant vector) are
+ descendants of each input node (columns, corresponding to the nodes
+ vector). It will contain 1 for each descendant of the node, *including
+ itself*, and 0 for all other nodes.
+
+ Jim Regetz (NCEAS)
*/
#include <R.h>
+#include <Rinternals.h>
-void descendants(int *isDescendant, int *ancestor, int *descendant,
- int *numEdges) {
+SEXP descendants(SEXP nod, SEXP anc, SEXP des) {
- int child=0;
- for (int i=0; i<*numEdges; i++) {
- if (isDescendant[i]==1) {
- child = descendant[i];
- for (int j=i+1; j<*numEdges; j++) {
- if (ancestor[j]==child) isDescendant[j]=1;
+ int numEdges = length(anc);
+ int numNodes = length(nod);
+
+ int* nodes = INTEGER(nod);
+ int* ancestor = INTEGER(anc);
+ int* descendant = INTEGER(des);
+
+ int child = 0;
+ SEXP isDescendant;
+
+ PROTECT(isDescendant = allocMatrix(INTSXP, numEdges, numNodes));
+ for (int n=0; n<numNodes; n++) {
+ for (int i=0; i<numEdges; i++) {
+ if (nodes[n]==descendant[i]) {
+ INTEGER(isDescendant)[i + n*numEdges] = 1;
+ } else {
+ INTEGER(isDescendant)[i + n*numEdges] = 0;
}
}
}
+ for (int n=0; n<numNodes; n++) {
+ for (int i=0; i<numEdges; i++) {
+ if (INTEGER(isDescendant)[i + n*numEdges]==1) {
+ child = descendant[i];
+ for (int j=i+1; j<numEdges; j++) {
+ if (ancestor[j]==child) {
+ INTEGER(isDescendant)[j + n*numEdges] = 1;
+ }
+ }
+ }
+ }
+ }
+ UNPROTECT(1);
+ return isDescendant;
}
More information about the Phylobase-commits
mailing list