[Phylobase-commits] r773 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Apr 1 21:48:46 CEST 2010
Author: francois
Date: 2010-04-01 21:48:45 +0200 (Thu, 01 Apr 2010)
New Revision: 773
Modified:
pkg/R/checkdata.R
Log:
cleaning up checkTree(), improve consistency in the way the error/warning messages are handled, addresses bug 682
Modified: pkg/R/checkdata.R
===================================================================
--- pkg/R/checkdata.R 2010-03-29 20:56:54 UTC (rev 772)
+++ pkg/R/checkdata.R 2010-04-01 19:48:45 UTC (rev 773)
@@ -19,23 +19,11 @@
## get options
opt <- phylobase.options()
- ## FIXME: check for cyclicity?
- nedges <- nrow(object at edge)
+ ## Storage of error/warning messages
+ err <- wrn <- character(0)
- if (hasEdgeLength(object)) {
- if (length(object at edge.length) != nedges)
- return("edge lengths do not match number of edges")
- if(!is.numeric(object at edge.length))
- return("edge lengths are not numeric")
- ## presumably we shouldn't allow NAs mixed
- ## with numeric branch lengths except at the root
- if (sum(is.na(object at edge.length)) > 1)
- return("NAs in edge lengths")
- ## Strip root edge branch length (if set to NA)
- if (any(object at edge.length[!is.na(object at edge.length)] < 0))
- return("edge lengths must be non-negative")
- }
-
+ ## Define variables
+ nedges <- nEdges(object)
ntips <- nTips(object)
E <- edges(object)
tips <- unique(sort(E[,2][!E[,2] %in% E[,1]]))
@@ -43,28 +31,53 @@
intnodes <- nodes[!nodes %in% tips]
roots <- E[which(is.na(E[,1])),2]
nRoots <- length(roots)
+
+ ## Check edge lengths
+ if (hasEdgeLength(object)) {
+ if (length(object at edge.length) != nedges)
+ err <- c(err, "edge lengths do not match number of edges")
+ if(!is.numeric(object at edge.length))
+ err <- c(err, "edge lengths are not numeric")
+ ## presumably we shouldn't allow NAs mixed
+ ## with numeric branch lengths except at the root
+ if (sum(is.na(object at edge.length)) > 1)
+ err <- c(err, "NAs in edge lengths")
+ ## Strip root edge branch length (if set to NA)
+ if (any(object at edge.length[!is.na(object at edge.length)] < 0))
+ err <- c(err, "edge lengths must be non-negative")
+ ## Check edge length labels
+ elen.msg <- "Use edgeLength<- to update them."
+ if (is.null(names(object at edge.length))) {
+ err <- c(err, paste("Edge lengths must have names matching edge IDs.",
+ elen.msg))
+ }
+ if (!all(names(object at edge.length) %in% edgeId(object, "all"))) {
+ err <- c(err, paste("One or more edge lengths has an unmatched ID name.",
+ elen.msg))
+ }
+ }
+
+ ## Make sure tips and
if (!(all(tips==1:ntips) && all(nodes=(ntips+1):(ntips+length(intnodes)))))
- return("tips and nodes incorrectly numbered")
+ err <- c(err, "tips and nodes incorrectly numbered")
##careful - nAncest does not work for counting nRoots in unrooted trees
nAncest <- tabulate(na.omit(E)[, 2],nbins=max(nodes)) ## bug fix from Jim Regetz
nDesc <- tabulate(na.omit(E[,1]))
nTips <- sum(nDesc==0)
if (!all(nDesc[1:nTips]==0))
- return("nodes 1 to nTips must all be tips")
+ err <- c(err, "nodes 1 to nTips must all be tips")
if (nRoots > 0) {
if (sum(E[, 1] == 0) != 1) {
- return("for a rooted tree, edge matrix must contain (exactly one) explicit root edge with ancestor==0")
+ err <- c(err, "for a rooted tree, edge matrix must contain (exactly one) explicit root edge with ancestor==0")
}
root.node <- unname(E[which(E[,1] == 0), 2])
- if (!root.node == nTips + 1)
- ## TODO this isn't actually a requirement
- return("root node must be first row of edge matrix")
}
+ ## Check that nodes are correctly numbered
if (!all(nDesc[(nTips+1):(nTips+nNodes(object))]>0))
- return("nodes (nTips+1) to (nTips+nNodes) must all be internal nodes")
+ err <- c(err, "nodes (nTips+1) to (nTips+nNodes) must all be internal nodes")
## how do we identify loops???
## EXPERIMENTAL: could be time-consuming for large trees?
@@ -73,101 +86,90 @@
Emat[E] <- 1
}
if (!object at order %in% phylo4_orderings) {
- stop("unknown order: allowed values are ",
- paste(phylo4_orderings,collapse=","))
+ err <- c(err, paste("unknown order: allowed values are",
+ paste(phylo4_orderings,collapse=",")))
}
## make sure tip/node labels have internal names that match node IDs
lab.msg <- "Use tipLabels<- (and nodeLabels<- if needed) to update them."
if (is.null(names(object at label))) {
- stop(c("Tip and node labels must have names matching node IDs. ",
- lab.msg))
+ err <- c(err, paste("Tip and node labels must have names matching node IDs.",
+ lab.msg))
} else {
if (!all(tips %in% names(na.omit(object at label)))) {
- stop(c("All tips must have associated tip labels. ",
- lab.msg))
+ err <- c(err, paste("All tips must have associated tip labels.",
+ lab.msg))
}
if (!all(names(object at label) %in% nodeId(object, "all"))) {
- stop(c("One or more tip/node label has an unmatched ID name ",
- lab.msg))
+ err <- c(err, paste("One or more tip/node label has an unmatched ID name",
+ lab.msg))
}
}
- ## make sure edge lengths have internal names that match the edges
- elen.msg <- "Use edgeLength<- to update them."
- if(hasEdgeLength(object)) {
- if (is.null(names(object at edge.length))) {
- stop(c("Edge lengths must have names matching edge IDs. ",
- elen.msg))
- }
- if (!all(names(object at edge.length) %in% edgeId(object, "all"))) {
- stop(c("One or more edge lengths has an unmatched ID name. ",
- elen.msg))
- }
- }
-
## make sure edge labels have internal names that match the edges
elab.msg <- "Use edgeLabels<- to update them."
if(hasEdgeLabels(object)) {
if (is.null(names(object at edge.label))) {
- stop(c("Edge labels must have names matching edge IDs. ",
- elab.msg))
+ err <- c(err, paste("Edge labels must have names matching edge IDs.",
+ elab.msg))
}
if (!all(names(object at edge.label) %in% edgeId(object, "all"))) {
- stop(c("One or more edge labels has an unmatched ID name. ",
- elab.msg))
+ err <- c(err, paste("One or more edge labels has an unmatched ID name.",
+ elab.msg))
}
}
- ## all done with fatal errors. Now construct a list
- ## of warnings and paste them together
- msg <- character(0)
-
## make sure that tip and node labels are unique
if (hasDuplicatedLabels(object)) {
currmsg <- "Labels are not unique"
if (opt$allow.duplicated.labels == "fail")
- return(currmsg)
+ err <- c(err, currmsg)
if (opt$allow.duplicated.labels == "warn")
- msg <- c(msg, currmsg)
+ wrn <- c(wrn, currmsg)
}
if (any(nDesc>2)) {
currmsg <- "tree includes polytomies"
if (opt$poly == "fail")
- return(currmsg)
+ err <- c(err, currmsg)
if (opt$poly == "warn")
- msg <- c(msg, currmsg)
+ wrn <- c(wrn, currmsg)
}
if (nRoots>1) {
currmsg <- "tree has more than one root"
if (opt$multiroot == "fail")
- return(currmsg)
+ err <- c(err, currmsg)
if (opt$multiroot == "warn")
- msg <- c(msg,currmsg)
+ wrn <- c(wrn,currmsg)
}
if (any(nDesc==1)) {
currmsg <- "tree contains singleton nodes"
if (opt$singleton == "fail")
- return(currmsg)
+ err <- c(err, currmsg)
if (opt$singleton == "warn")
- msg <- c(msg, currmsg)
+ wrn <- c(wrn, currmsg)
}
if (any(nAncest>1)) {
currmsg <- paste("tree is reticulated [most functions in phylobase haven't",
"been tested with reticulated trees]")
if (opt$retic == "fail")
- return(currmsg)
+ err <- c(err, currmsg)
if (opt$retic == "warn")
- msg <- c(msg, currmsg)
+ wrn <- c(wrn, currmsg)
}
- if (length(msg)>0) {
- msg <- paste(msg, collapse=", ")
- warning(msg)
+ if (length(wrn) > 0) {
+ wrn <- paste(wrn, collapse=", ")
+ warning(wrn)
}
- return(TRUE)
+ if (length(err) > 0) {
+ err <- paste(err, collapse=", ")
+ return(err) #failures are returned as text
+ }
+ else {
+ return(TRUE)
+ }
}
checkPhylo4Data <- function(object) {
@@ -175,19 +177,6 @@
## These are just some basic tests to make sure that the user does not
## alter the object in a significant way
-# JR: I don't think this part is necessary. All that matters is that all
-# rows in the data have names corresponding to (valid) node numbers
-# ntips <- nTips(object)
-# nnodes <- nNodes(object)
-#
-# ## Check dimensions
-# if (nrow(object at tip.data) > 0 && nrow(object at tip.data) != ntips)
-# stop("The number of tip data does not match the number ",
-# "of tips in the tree")
-# if (nrow(object at node.data) > 0 && nrow(object at node.data) != nnodes)
-# stop("The number of node data does not match the number ",
-# "of internal nodes in the tree")
-
## Check rownames
if (nrow(object at data) > 0 &&
!all(row.names(object at data) %in% nodeId(object, "all")))
More information about the Phylobase-commits
mailing list