[Phylobase-commits] r918 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Apr 8 23:13:38 CEST 2014


Author: francois
Date: 2014-04-08 23:13:38 +0200 (Tue, 08 Apr 2014)
New Revision: 918

Removed:
   pkg/R/descendants.R
Log:
descendants.R not needed anymore, content moved to ancestors.R and getNode-methods.R

Deleted: pkg/R/descendants.R
===================================================================
--- pkg/R/descendants.R	2014-04-08 21:12:54 UTC (rev 917)
+++ pkg/R/descendants.R	2014-04-08 21:13:38 UTC (rev 918)
@@ -1,496 +0,0 @@
-## matching node labels with node numbers ...
-## e.g.
-## 14 tips, 13 int nodes
-## N04 = nodeLabels[4]
-##   <-> node 18
-## x = n-nTips(phy)
-## so:     n = x+nTips(phy)
-
-
-
-#' node and edge look-up functions
-#' 
-#' Functions for retrieving node and edge IDs (possibly with corresponding
-#' labels) from a phylogenetic tree.
-#' 
-#' \code{getNode} and \code{getEdge} are primarily intended for looking up the
-#' IDs either of nodes themselves or of edges associated with those nodes. Note
-#' that they behave quite differently. With \code{getNode}, any input nodes are
-#' looked up against tree nodes of the specified type, and those that match are
-#' returned as numeric node IDs with node labels (if they exist) as element
-#' names. With \code{getEdge}, any input nodes are looked up against edge ends
-#' of the specified type, and those that match are returned as character edge
-#' IDs with the corresponding node ID as element names.
-#' 
-#' If \code{missing} is \dQuote{warn} or \dQuote{OK}, \code{NA} is returned for
-#' any nodes that are unmatched for the specified type. This can provide a
-#' mechanism for filtering a set of nodes or edges.
-#' 
-#' \code{nodeId} provides similar output to \code{getNode} in the case when no
-#' node is supplied, but it is faster and returns an unnamed vector of the
-#' numeric IDs of all nodes of the specified node type.  Similarly,
-#' \code{edgeId} simply returns an unnamed vector of the character IDs of all
-#' edges for which the descendant node is of the specified node type.
-#' 
-#' @aliases getNode getEdge nodeId nodeId,phylo4-method edgeId
-#' edgeId,phylo4-method
-#' @param x a \linkS4class{phylo4} object (or one inheriting from
-#' \linkS4class{phylo4}, e.g. a \linkS4class{phylo4d} object)
-#' @param node either an integer vector corresponding to node ID numbers, or a
-#' character vector corresponding to node labels; if missing, all nodes
-#' appropriate to the specified type will be returned by \code{getNode}, and
-#' all edges appropriate to the specified type will be returned by
-#' \code{getEdge}.
-#' @param type (\code{getNode}) specify whether to return nodes matching "all"
-#' tree nodes (default), only "tip" nodes, or only "internal" nodes;
-#' (\code{nodeId, edgeId}) specify whether to return "all" tree nodes, or only
-#' those corresponding to "tip", "internal", or "root" nodes; (\code{getEdge})
-#' specify whether to look up edges based on their descendant node
-#' ("descendant") or ancestral node ("ancestor")
-#' @param missing what to do if some requested node IDs or names are not in the
-#' tree: warn, do nothing, or stop with an error
-#' @return \item{list("getNode")}{returns a named integer vector of node IDs,
-#' in the order of input nodes if provided, otherwise in nodeId order}
-#' \item{list("getEdge")}{returns a named character vector of edge IDs, in the
-#' order of input nodes if provide, otherwise in nodeId order}
-#' \item{list("nodeId")}{returns an unnamed integer vector of node IDs, in
-#' ascending order} \item{list("getEdge")}{returns an unnamed character vector
-#' of edge IDs, in edge matrix order}
-#' @keywords misc
-#' @examples
-#' 
-#'   data(geospiza)
-#'   nodeLabels(geospiza) <- LETTERS[1:nNodes(geospiza)]
-#'   plot(as(geospiza, "phylo4"), show.node.label=TRUE)
-#'   getNode(geospiza, 18)
-#'   getNode(geospiza, "D")
-#'   getEdge(geospiza, "D")
-#'   getEdge(geospiza, "D", type="ancestor")
-#' 
-#'   ## match nodes only to tip nodes, flagging invalid cases as NA
-#'   getNode(geospiza, c(1, 18, 999), type="tip", missing="OK")
-#' 
-#'   ## get all edges that descend from internal nodes
-#'   getEdge(geospiza, type="ancestor")
-#' 
-#'   ## identify an edge from its terminal node
-#'   getEdge(geospiza, c("olivacea", "B", "fortis"))
-#'   getNode(geospiza, c("olivacea", "B", "fortis"))
-#'   edges(geospiza)[c(26, 1, 11),]
-#' 
-#'   ## quickly get all tip node IDs and tip edge IDs
-#'   nodeId(geospiza, "tip")
-#'   edgeId(geospiza, "tip")
-#' 
-getNode <- function(x, node, type=c("all", "tip", "internal"),
-    missing=c("warn","OK","fail")) {
-
-    type <- match.arg(type)
-    missing <- match.arg(missing)
-
-    ## if missing node arg, get all nodes of specified type
-    if (missing(node)) {
-        node <- nodeId(x, type)
-    }
-
-    if (length(node) == 0) {
-      rval <- integer(0)
-      names(rval) <- character(0)
-      return(rval)
-    }
-    
-    lblTmp <- labels(x, type)
-    
-    ## match node to tree
-    if (is.character(node)) {
-        ndTmp <- paste("^\\Q", node, "\\E$", sep="")        
-        irval <- lapply(ndTmp, function(ND) {
-            grep(ND, lblTmp, perl=TRUE)
-        })
-        irvalL <- sapply(irval, length)
-        irval[irvalL == 0] <- 0
-        irval <- unlist(irval)
-    } else if (is.numeric(node) && all(floor(node) == node, na.rm=TRUE)) {
-        irval <- match(as.character(node), names(lblTmp))
-    } else {
-        stop("Node must be a vector of class \'integer\' or \'character\'.")
-    }
-
-    ## node numbers
-    rval <- names(lblTmp)[irval]
-    rval[is.na(node)] <- NA # return NA for any NA_character_ inputs, not needed but ensure rval has correct length
-    rval <- as.integer(rval)
-
-    ## node labels
-    nmNd <- lblTmp[irval]
-    names(rval) <- nmNd
-    
-    ## deal with nodes that don't match
-    if (any(is.na(rval))) {
-        missnodes <- node[is.na(rval)]
-        msg <- paste("Some nodes not found among", type, "nodes in tree:",
-            paste(missnodes,collapse=", "))
-        if (missing=="fail") {
-            stop(msg)
-        } else if (missing=="warn") {
-            warning(msg)
-        }
-    }
-    return(rval)
-}
-
-
-#' tree traversal and utility functions
-#' 
-#' Functions for describing relationships among phylogenetic nodes (i.e.
-#' internal nodes or tips).
-#' 
-#' \code{ancestors} and \code{descendants} can take \code{node} vectors of
-#' arbitrary length, returning a list of output vectors if the number of valid
-#' input nodes is greater than one. List element names are taken directly from
-#' the input node vector.
-#' 
-#' If any supplied nodes are not found in the tree, the behavior currently
-#' varies across functions. Invalid nodes are automatically omitted by
-#' \code{ancestors} and \code{descendants}, with a warning.  \code{ancestor}
-#' will return \code{NA} for any invalid nodes, with a warning. Both
-#' \code{children} and \code{siblings} will return an empty vector, again with
-#' a warning. In contrast, \code{MRCA} and \code{shortestPath} will throw an
-#' immediate error if any input nodes are invalid.
-#' 
-#' @aliases children descendants ancestor ancestors siblings MRCA shortestPath
-#' sumEdgeLength sumEdgeLength,phylo4-method
-#' @param phy a \linkS4class{phylo4} object (or one inheriting from
-#' \linkS4class{phylo4}, e.g. a \linkS4class{phylo4d} object)
-#' @param x a \linkS4class{phylo4} object (or one inheriting from
-#' \linkS4class{phylo4}, e.g. a \linkS4class{phylo4d} object)
-#' @param node either an integer corresponding to a node ID number, or a
-#' character corresponding to a node label; for \code{ancestors} and
-#' \code{descendants}, this may be a vector of multiple node numbers or names
-#' @param type (\code{ancestors}) specify whether to return just direct
-#' ancestor ("parent"), all ancestor nodes ("all"), or all ancestor nodes
-#' including self ("ALL"); (\code{descendants}) specify whether to return just
-#' direct descendants ("children"), all extant descendants ("tips"), or all
-#' descendant nodes ("all")
-#' @param include.self whether to include self in list of siblings
-#' @param \dots a list of node numbers or names, or a vector of node numbers or
-#' names
-#' @param node1 a node number (or name)
-#' @param node2 a node number (or name)
-#' @return \item{list("ancestors")}{ return a named vector (or a list of such
-#' vectors in the case of multiple input nodes) of the ancestors and
-#' descendants of a node}\item{ and }{ return a named vector (or a list of such
-#' vectors in the case of multiple input nodes) of the ancestors and
-#' descendants of a node}\item{list("descendants")}{ return a named vector (or
-#' a list of such vectors in the case of multiple input nodes) of the ancestors
-#' and descendants of a node} \item{list("ancestor")}{ \code{ancestor} is
-#' analogous to \code{ancestors(\dots{}, type="parent")} (i.e. direct ancestor
-#' only), but returns a single concatenated vector in the case of multiple
-#' input nodes; \code{children} is analogous to \code{descendants(\dots{},
-#' type="children")} (i.e. direct descendants only), but is not currently
-#' intended to be used with multiple input nodes }\item{ and }{ \code{ancestor}
-#' is analogous to \code{ancestors(\dots{}, type="parent")} (i.e. direct
-#' ancestor only), but returns a single concatenated vector in the case of
-#' multiple input nodes; \code{children} is analogous to
-#' \code{descendants(\dots{}, type="children")} (i.e. direct descendants only),
-#' but is not currently intended to be used with multiple input nodes
-#' }\item{list("children")}{ \code{ancestor} is analogous to
-#' \code{ancestors(\dots{}, type="parent")} (i.e. direct ancestor only), but
-#' returns a single concatenated vector in the case of multiple input nodes;
-#' \code{children} is analogous to \code{descendants(\dots{}, type="children")}
-#' (i.e. direct descendants only), but is not currently intended to be used
-#' with multiple input nodes } \item{list("siblings")}{ returns sibling nodes
-#' (children of the same parent)} \item{list("MRCA")}{ returns the most recent
-#' common ancestor of two or more nodes} \item{list("shortestPath")}{ returns
-#' the nodes of the shortest path from one node to another (excluding
-#' \code{node1} and \code{node2})} \item{list("sumEdgeLength")}{ returns the
-#' sum of branch length for branches starting at nodes provided}
-#' @note \code{MRCA} is uppercase to avoid conflict with \code{mrca} in ape
-#' @seealso \code{\link[ape]{mrca}}, in the ape package, gives a list of all
-#' subtrees
-#' @keywords misc
-#' @examples
-#' 
-#'   data(geospiza)
-#'   nodeLabels(geospiza) <- LETTERS[1:nNodes(geospiza)]
-#'   plot(as(geospiza, "phylo4"), show.node.label=TRUE)
-#'   ancestor(geospiza, "E")
-#'   children(geospiza, "C")
-#'   descendants(geospiza, "D", type="tips")
-#'   descendants(geospiza, "D", type="all")
-#'   ancestors(geospiza, "D")
-#'   MRCA(geospiza, "conirostris", "difficilis", "fuliginosa")
-#'   MRCA(geospiza, "olivacea", "conirostris")
-#' 
-#'   ## shortest path between 2 nodes
-#'   shortestPath(geospiza, "fortis", "fuliginosa")
-#'   shortestPath(geospiza, "F", "L")
-#' 
-#'   ## branch length from a tip to the root
-#'   sumEdgeLength(geospiza, ancestors(geospiza, "fortis", type="ALL"))
-ancestor <- function(phy,node) {
-    node2 <- getNode(phy,node)
-    ## r <- which(edges(phy)[,2]==node)
-    r <- match(node2,edges(phy)[,2])
-    return(getNode(phy,edges(phy)[r,1],missing="OK"))
-}
-
-
-children <- function(phy,node) {
-    node2 <- getNode(phy,node)
-    r <- which(edges(phy)[,1]==node2)
-    getNode(phy,edges(phy)[r,2])
-}
-
-## get descendants [recursively]
-descendants <- function (phy, node, type=c("tips","children","all")) {
-    type <- match.arg(type)
-
-    ## look up nodes, warning about and excluding invalid nodes
-    oNode <- node
-    node <- getNode(phy, node, missing="warn")
-    isValid <- !is.na(node)
-    node <- as.integer(node[isValid])
-
-    if (type == "children") {
-        res <- lapply(node, function(x) children(phy, x))
-        ## if just a single node, return as a single vector
-        if (length(res)==1) res <- res[[1]]
-    } else {
-        ## edge matrix must be in preorder for the C function!
-        if (phy at order=="preorder") {
-            edge <- phy at edge
-        } else {
-            edge <- reorder(phy, order="preorder")@edge
-        }
-        ## extract edge columns
-        ancestor <- as.integer(edge[, 1])
-        descendant <- as.integer(edge[, 2])
-
-        ## return indicator matrix of ALL descendants (including self)
-        isDes <- .Call("descendants", node, ancestor, descendant)
-        storage.mode(isDes) <- "logical"
-
-        ## for internal nodes only, drop self (not sure why this rule?)
-        int.node <- intersect(node, nodeId(phy, "internal"))
-        isDes[cbind(match(int.node, descendant),
-            match(int.node, node))] <- FALSE
-        ## if only tips desired, drop internal nodes
-        if (type=="tips") {
-            isDes[descendant %in% nodeId(phy, "internal"),] <- FALSE
-        }
-        ## res <- lapply(seq_along(node), function(n) getNode(phy,
-        ##     descendant[isDes[,n]]))
-        res <- getNode(phy, descendant[isDes[, seq_along(node)]])
-    }
-    ## names(res) <- as.character(oNode[isValid])
-
-    res
-
-    ## Original pure R implementation of the above
-    ## (note that it does not require preorder ordering)
-    ##n <- nTips(phy)
-    ##if (node <= n) {
-    ##    return(node)
-    ##}
-    ##l <- numeric()
-    ##d <- children(phy, node)
-    ##for (j in d) {
-    ##    if (j <= n)
-    ##      l <- c(l,j)
-    ##    else if (type=="all") l <- c(l,j,
-    ##               descendants(phy,j,type="all"))
-    ##    else l <- c(l, descendants(phy,j,type=type))
-    ##}
-}
-
-siblings <- function(phy, node, include.self=FALSE) {
-    v <- children(phy,ancestor(phy,node))
-    if (!include.self) v <- v[v!=getNode(phy,node)]
-    v
-}
-
-## get ancestors (all nodes)
-ancestors <- function (phy, node, type=c("all","parent","ALL")) {
-
-    type <- match.arg(type)
-
-    ## look up nodes, warning about and excluding invalid nodes
-    oNode <- node
-    node <- getNode(phy, node, missing="warn")
-    isValid <- !is.na(node)
-    node <- as.integer(node[isValid])
-
-    if (length(node) == 0) {
-      return(NA)
-    }
-    
-    if (type == "parent") {
-        res <- lapply(node, function(x) ancestor(phy, x))
-    } else {
-        ## edge matrix must be in postorder for the C function!
-        if (phy at order=="postorder") {
-            edge <- phy at edge
-        } else {
-            edge <- reorder(phy, order="postorder")@edge
-        }
-        ## extract edge columns
-        ancestor <- as.integer(edge[, 1])
-        descendant <- as.integer(edge[, 2])
-
-        ## return indicator matrix of ALL ancestors (including self)
-        isAnc <- .Call("ancestors", node, ancestor, descendant)
-        storage.mode(isAnc) <- "logical"
-
-        ## drop self if needed
-        if (type=="all") {
-            isAnc[cbind(match(node, descendant), seq_along(node))] <- FALSE
-        }
-        res <- lapply(seq_along(node), function(n) getNode(phy,
-            descendant[isAnc[,n]]))
-    }
-    names(res) <- as.character(oNode[isValid])
-
-    ## if just a single node, return as a single vector
-    if (length(res)==1) res <- res[[1]]
-    res
-
-    ## Original pure R implementation of the above
-    ## (note that it does not require preorder ordering)
-    ##if (node == rootNode(phy))
-    ##    return(NULL)
-    ##repeat {
-    ##    anc <- ancestor(phy, node)
-    ##    res <- c(res, anc)
-    ##    node <- anc
-    ##    if (anc == n + 1)
-    ##        break
-    ##}
-}
-
-MRCA <- function(phy, ...) {
-    nodes <- list(...)
-    ## if length==1 and first element is a vector,
-    ##   use it as the list
-    if (length(nodes)==1 && length(nodes[[1]])>1) {
-        nodes <- as.list(nodes[[1]])
-    }
-
-    ## Correct behavior when the root is part of the nodes
-    testNodes <- lapply(nodes, getNode, x=phy)
-    ## BMB: why lapply, not sapply?
-    lNodes <- unlist(testNodes)
-    if (any(is.na(lNodes)))
-      stop("nodes not found in tree: ",paste(names(lNodes)[is.na(lNodes)],
-                                             collapse=", "))
-    uniqueNodes <- unique(testNodes)
-    root <- nTips(phy)+1
-    if(root %in% uniqueNodes) {
-        res <- getNode(phy, root)
-        return(res)
-    }
-    ## Correct behavior in case of MRCA of identical taxa
-    if(length(uniqueNodes) == 1) {
-        res <- uniqueNodes[[1]]
-        return(res)
-    }
-    else {
-        ancests <- lapply(nodes, ancestors, phy=phy, type="ALL")
-        res <- getNode(phy, max(Reduce(intersect, ancests)))
-        return(res)
-    }
-} # end MRCA
-
-
-###############
-# shortestPath
-###############
-shortestPath <- function(phy, node1, node2){
-
-  ## conversion from phylo, phylo4 and phylo4d
-  if (class(phy) == "phylo4d") {
-    x <- extractTree(phy)
-  }
-  else if (class(phy) != "phylo4"){
-    x <- as(phy, "phylo4")
-  }
-
-    ## some checks
-    ## if (is.character(checkval <- checkPhylo4(x))) stop(checkval) # no need
-    t1 <- getNode(x, node1)
-    t2 <- getNode(x, node2)
-    if(any(is.na(c(t1,t2)))) stop("wrong node specified")
-    if(t1==t2) return(NULL)
-
-    ## main computations
-    comAnc <- MRCA(x, t1, t2) # common ancestor
-    desComAnc <- descendants(x, comAnc, type="all")
-    ancT1 <- ancestors(x, t1, type="all")
-    path1 <- intersect(desComAnc, ancT1) # path: common anc -> t1
-
-    ancT2 <- ancestors(x, t2, type="all")
-    path2 <- intersect(desComAnc, ancT2) # path: common anc -> t2
-
-    res <- union(path1, path2) # union of the path
-    ## add the common ancestor if it differs from t1 or t2
-    if(!comAnc %in% c(t1,t2)){
-        res <- c(comAnc,res)
-    }
-
-    res <- getNode(x, res)
-
-    return(res)
-} # end shortestPath
-
-
-
-###########
-# getEdge
-###########
-getEdge <- function(x, node, type=c("descendant", "ancestor"),
-    missing=c("warn", "OK", "fail")) {
-
-    if(!identical(class(x), "phylo4")) x <- as(x, "phylo4")
-
-    type <- match.arg(type)
-    missing <- match.arg(missing)
-    if (missing(node)) {
-        if (type=="descendant") {
-            node <- nodeId(x, "all")
-        } else if (type=="ancestor") {
-            node <- nodeId(x, "internal")
-        }
-    }
-
-    node.id <- getNode(x, node, missing="OK")
-
-    nd <- lapply(node.id, function(nid) {
-        if (is.na(nid)) {
-            res <- NA
-        } else {
-            res <- switch(type,
-                descendant = edgeId(x)[edges(x)[,2] %in% nid],
-                ancestor = edgeId(x)[edges(x)[,1] %in% nid])
-            ## hack to return NA for tip nodes when type='ancestor'
-            if(length(res)==0) res <- NA
-            names(res) <- rep(nid, length(res))
-        }
-        names(res) <- rep(nid, length(res))
-        res
-    })
-
-    ## warn or stop if necessary
-    is.missing <- is.na(nd)
-    if (missing!="OK" && any(is.missing)) {
-        msg <- paste("Not all nodes are ", type, "s in this tree: ",
-            paste(node[is.missing], collapse=", "), sep="")
-        if (missing=="fail") {
-            stop(msg)
-        } else if (missing=="warn") {
-            warning(msg)
-        }
-    }
-
-    return(unlist(unname(nd)))
-
-}



More information about the Phylobase-commits mailing list