[Phylobase-commits] r178 - in pkg: R man tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon May 26 15:58:30 CEST 2008


Author: bbolker
Date: 2008-05-26 15:58:29 +0200 (Mon, 26 May 2008)
New Revision: 178

Modified:
   pkg/R/checkdata.R
   pkg/R/methods-phylo4.R
   pkg/R/treestruc.R
   pkg/man/check.phylo4.Rd
   pkg/man/hasSingles.Rd
   pkg/tests/phylotorture.R
Log:
 added checks for structure, to match phylo
structure rules



Modified: pkg/R/checkdata.R
===================================================================
--- pkg/R/checkdata.R	2008-05-22 14:08:01 UTC (rev 177)
+++ pkg/R/checkdata.R	2008-05-26 13:58:29 UTC (rev 178)
@@ -1,19 +1,54 @@
 
 ## REQUIRED for all trees
 check_phylo4 <- function(object) {
+    check_tree(object)
+}
+
+check_tree <- function(object,warn="retic",err=NULL) {
     ## FIXME: check for cyclicity?
     N <- nrow(object at edge)  
     if (hasEdgeLength(object) && length(object at edge.length) != N)
       return("edge lengths do not match number of edges")
     ## if (length(object at tip.label)+object at Nnode-1 != N) # does not work with multifurcations
     ##  return("number of tip labels not consistent with number of edges and nodes")
-    if(length(object at tip.label) != nTips(object)) return("number of tip labels not consistent with number of tips")
-    nAncest <- tabulate(edges(object)[, 2])
+    ## check: internal node numbers = 1:m
+    
+    ## check: tip numbers = (m+1):(m+n)
+    ntips <- nTips(object)
+    if(length(object at tip.label) != ntips)
+      return("number of tip labels not consistent with number of tips")
+    E <- edges(object)
+    tips <- sort(E[,2][!E[,2] %in% E[,1]])
+    nodes <- unique(sort(c(E)))
+    intnodes <- nodes[!nodes %in% tips]
+    if (!(all(tips==1:ntips) && all(nodes=(ntips+1):(ntips+length(intnodes)))))
+      return("tips and nodes incorrectly numbered")
+    nAncest <- tabulate(E[, 2])
+    nDesc <- tabulate(E[,1])
+    nTips <- sum(nDesc==0)
+    if (!all(nDesc[1:nTips]==0))
+      return("nodes 1 to nTips must all be tips")
+    if (!all(nDesc[(nTips+1):(nTips+nNodes(object))]>0))
+      return("nodes (nTips+1) to (nTips+nNodes) must all be internal nodes")
+    if (any(nDesc>1)) {
+        if ("poly" %in% err)
+          return("tree includes polytomies")
+        if ("poly" %in% warn)
+          warning("tree includes polytomies")
+    }
     nRoots <- sum(nAncest==0)
+    msg <- character(0)
     if (nRoots>1)
-      return("tree has more than one root")
+      msg <- "tree has more than one root"
     if (any(nAncest>1))
-      return("some nodes have multiple ancestors")
+      msg <- c(msg,"some nodes have multiple ancestors")
+    msg <- paste(msg,collapse=", ")
+    if (nzchar(msg)) {
+        if ("retic" %in% err)
+          return(paste("tree is reticulated:",msg))
+        if ("retic" %in% warn)
+          warning("tree is reticulated:",msg)
+    }
     return(TRUE)
 }
 

Modified: pkg/R/methods-phylo4.R
===================================================================
--- pkg/R/methods-phylo4.R	2008-05-22 14:08:01 UTC (rev 177)
+++ pkg/R/methods-phylo4.R	2008-05-26 13:58:29 UTC (rev 178)
@@ -7,10 +7,11 @@
 
 setMethod("nTips", "phylo4", function(x, ...) {
     E <- edges(x)
-#    res <- sum(!E[, 2] %in% E[, 1])
-## change suggested by Aaron Mackey, handles reticulated networks better
-##  sum(tabulate(E[,1]) == 0) would work too (which is faster??)
-    res <- sum(!(unique(E[,2]) %in% E[,1]))
+    ## doesn't handle reticulated networks
+    ##    res <- sum(!E[, 2] %in% E[, 1])
+    res <- sum(tabulate(E[,1]) == 0) ## twice as fast as ...
+    ## change suggested by Aaron Mackey, handles reticulated networks better
+    ## res <- sum(!(unique(E[,2]) %in% E[,1]))
     return(res)
 })
 

Modified: pkg/R/treestruc.R
===================================================================
--- pkg/R/treestruc.R	2008-05-22 14:08:01 UTC (rev 177)
+++ pkg/R/treestruc.R	2008-05-26 13:58:29 UTC (rev 178)
@@ -18,7 +18,13 @@
   any(degree == 1)
 }
 
+hasRetic <- function(object) {
+  if(!check_phylo4(object)) stop("to be used with a phylo4 object")
+  ancest <- tabulate(edges(object)[, 2])
+  any(ancest>1)
+}
 
+
 ### TO BE FINISHED - Thibaut
 
 # Returns a vector of logical 

Modified: pkg/man/check.phylo4.Rd
===================================================================
--- pkg/man/check.phylo4.Rd	2008-05-22 14:08:01 UTC (rev 177)
+++ pkg/man/check.phylo4.Rd	2008-05-26 13:58:29 UTC (rev 178)
@@ -1,21 +1,32 @@
 \name{check_phylo4}
 \alias{check_phylo4}
+\alias{check_tree}
 \title{Validity checking for phylo4 objects}
 \description{
   Basic checks on the validity of S4 phylogenetic objects
 }
 \usage{
 check_phylo4(object)
+check_tree(object,warn="retic",err=NULL)
 }
 \arguments{
   \item{object}{A prospective S4 object}
+  \item{warn}{a character vector listing phenomena to warn about:
+    current options are "poly" and "retic"}
+  \item{err}{a character vector listing phenomena to trigger errors}
 }
 \value{
   As required by \code{\link{validObject}}, returns an
   error string (describing problems) or TRUE if everything is OK
 }
+\note{\code{check_phylo4} is an (inflexible) wrapper for
+  \code{check_tree} that uses the default settings}
 \seealso{
-  the \code{\link{phylo4}} constructor, the \linkS4class{phylo4} class. See also the \code{\link{check_data}}, the \code{\link{phylo4d}} constructor and the \linkS4class{phylo4d} class. See \code{\link{coerce-methods}} for translation functions.
+  the \code{\link{phylo4}} constructor and \linkS4class{phylo4} class;
+  \code{\link{check_data}}, the \code{\link{phylo4d}} constructor and
+  the \linkS4class{phylo4d}
+  class do checks for the data associated with trees.
+  See \code{\link{coerce-methods}} for translation functions.
 }
 \author{Ben Bolker}
 \keyword{misc}

Modified: pkg/man/hasSingles.Rd
===================================================================
--- pkg/man/hasSingles.Rd	2008-05-22 14:08:01 UTC (rev 177)
+++ pkg/man/hasSingles.Rd	2008-05-26 13:58:29 UTC (rev 178)
@@ -1,14 +1,18 @@
 \name{hasSingles}
 \alias{hasSingles}
 \alias{hasPoly}
-\title{Test trees for polytomies or inline nodes}
+\alias{hasRetic}
+\title{Test trees for polytomies, inline nodes,
+or reticulation}
 \description{checks to see whether trees have
-  (structural) polytomies or inline nodes (i.e.,
-  nodes with a single descendant)
+  (structural) polytomies, inline nodes (i.e.,
+  nodes with a single descendant), or reticulation
+  (i.e., nodes with more than one ancestor)
 }
 \usage{
 hasSingles(object)
 hasPoly(object)
+hasRetic(object)
 }
 \arguments{
   \item{object}{an object inheriting from class \code{phylo4}}
@@ -19,7 +23,8 @@
 \author{Ben Bolker}
 \note{
   Some algorithms are unhappy with structural polytomies (i.e., >2
-  descendants from a node) or with single-descendant nodes; these
+  descendants from a node), with single-descendant nodes,
+  or with reticulation; these
   functions check those properties.
   We haven't bothered to check for zero branch lengths:
   the consensus is that it doesn't come up much,

Modified: pkg/tests/phylotorture.R
===================================================================
--- pkg/tests/phylotorture.R	2008-05-22 14:08:01 UTC (rev 177)
+++ pkg/tests/phylotorture.R	2008-05-26 13:58:29 UTC (rev 178)
@@ -6,6 +6,8 @@
 ## don't want to slow down R CMD check by doing this every time:
 ## n <- 10000
 for (i in 1:n) {
+##    e2 <- c(sample(1:5,replace=FALSE,size=5),sample(6:10,replace=FALSE,size=5))
+##    e1 <- sample(6:10,replace=TRUE
     e <- matrix(sample(1:10,replace=TRUE,size=10),ncol=2)
     p1[[i]] <- try(phylo4(e),silent=TRUE)
 }
@@ -17,6 +19,7 @@
     length(p2)
     has.poly <- sapply(p2,hasPoly)
     has.sing <- sapply(p2,hasSingles)
+    has.retic <- sapply(p2,hasRetic)   
     ##
     if (any(has.sing)) {
         p4 <- p2[has.sing]
@@ -31,6 +34,8 @@
         ## plot(p2[[13]])
     }
 }
+
+## elements 8 and 34 are 
 ## what SHOULD the rules for trees be?
 
 ## (a) reduce node numbers to 1 ... N ?



More information about the Phylobase-commits mailing list