[Phylobase-commits] r358 - in pkg: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Sat Dec 20 01:13:38 CET 2008


Author: francois
Date: 2008-12-20 01:13:38 +0100 (Sat, 20 Dec 2008)
New Revision: 358

Modified:
   pkg/R/checkdata.R
   pkg/man/check.phylo4d.Rd
Log:
added new arguments to deal with non-unique labels for tips and nodes

Modified: pkg/R/checkdata.R
===================================================================
--- pkg/R/checkdata.R	2008-12-19 23:58:52 UTC (rev 357)
+++ pkg/R/checkdata.R	2008-12-20 00:13:38 UTC (rev 358)
@@ -6,11 +6,11 @@
 
 check_tree <- function(object,warn="retic",err=NULL) {
     ## FIXME: check for cyclicity?
-    N <- nrow(object at edge)  
+    N <- nrow(object at edge)
     if (hasEdgeLength(object) && length(object at edge.length) != N)
       return("edge lengths do not match number of edges")
     ## if (length(object at tip.label)+object at Nnode-1 != N) # does not work with multifurcations
-    ##  return("number of tip labels not consistent with number of edges and nodes")    
+    ##  return("number of tip labels not consistent with number of edges and nodes")
     ## check: tip numbers = (m+1):(m+n)
     ntips <- nTips(object)
     if(length(object at tip.label) != ntips)
@@ -42,7 +42,7 @@
     ##if (any(nAncest==0) && E[1,1]!=nTips+1) {
     ##  return("root node must be first row of edge matrix")
     ##}
-    
+
     ##
     ## how do we identify loops???
     ## EXPERIMENTAL: could be time-consuming for large trees?
@@ -75,7 +75,6 @@
     return(TRUE)
 }
 
-
 check_data <- function(object,
                        label.type=c("row.names","column"),
                        label.column=1,
@@ -84,41 +83,48 @@
                        extra.tip.data=c("fail","OK","warn"),
                        default.tip.names=c("warn","OK","fail"),
                        use.node.names=FALSE,
-                       missing.node.data=c("OK","warn","fail"),		
-                       extra.node.data=c("OK","warn","fail"),												
-                       default.node.names=c("warn","OK","fail"),...)							 
-
+                       missing.node.data=c("OK","warn","fail"),
+                       extra.node.data=c("OK","warn","fail"),
+                       default.node.names=c("warn","OK","fail"),
+                       non.unique.tips=c("warn", "OK", "fail"),
+                       non.unique.nodes=c("warn", "OK", "fail"),
+                       ...)
 {
 
     ## name matching default: use row.names of data frame
-    label.type = match.arg(label.type)
+    label.type <- match.arg(label.type)
     if (identical(label.type, "row.names")) {
         tip.names <- row.names(object at tip.data)
         node.names <- row.names(object at node.data)
     }
     else {
         tip.names <- object at tip.data[,label.column]
-        node.names <- object at node.data[,label.column]        
+        node.names <- object at node.data[,label.column]
     }
-    
+
     ## tip default: use names, require names, must match exactly
     missing.tip.data <- match.arg(missing.tip.data)
     extra.tip.data <- match.arg(extra.tip.data)
     default.tip.names <- match.arg(default.tip.names)
-    
+
     ## node default: don't use node names, don't require names, do not need to match exactly
     missing.node.data <- match.arg(missing.node.data)
     extra.node.data <- match.arg(extra.node.data)
     default.node.names <- match.arg(default.node.names)
-    
+
+    ## non unique tip default: by default if some tip names are non-unique they
+    ## all get associated the same value and this is done with a warning. Other
+    ## options are: association of data without warning and require uniqueness of tips.
+    non.unique.tips <- match.arg(non.unique.tips)
+
     ## for each set of data, check for names, missing and extra data and take appropriate actions
-    
+
     ## tip data checks
     ## if tip.data exist
     if (!all(dim(object at tip.data)==0)) {
         ## if we want to use tip.names
         if (use.tip.names) {
-            
+
             ## check for default names
             if (all(tip.names == 1:length(tip.names))) {
                 ## no tip.names
@@ -131,18 +137,32 @@
                             "Consider using the use.tip.names=FALSE option.")
                 }
             }
-            
+
             ## check tip names
             ## check for missing or extra tip data (relative to tree taxa)
             if (setequal(tip.names, object at tip.label)) {
-                ## names are perfect match - ok
-                return(TRUE)
+                if(length(tip.names) == nTips(object)) {
+                    ## names are perfect match - ok
+                    return(TRUE)
+                }
+                else {
+                    ## Some tips are non-unique
+                    tipsTable <- table(labels(object))
+                    if(any(nU <- tipsTable > 1)) {
+                        nonUnique <- paste(names(tipsTable[nU]), collapse=", ")
+                        nonUniqueMsg <- paste("Tip \'", nonUnique, "\' not unique", sep = "")
+                        if(non.unique.tips == "fail")
+                            stop(nonUniqueMsg)
+                        if(non.unique.tips == "warn")
+                            warning(nonUniqueMsg)
+                    }
+                }
             }
             else {
                 ## we know the tree taxa and tip.data taxa are not a perfect match
                 ## if tip.data taxa are subset of tree taxa, check missing.tip.data arg and act accordingly
                 tips.in.rownames <- object at tip.label %in% tip.names
-                rownames.in.tips <- tip.names %in% object at tip.label 
+                rownames.in.tips <- tip.names %in% object at tip.label
                 missing.data.names <- object at tip.label[!tips.in.rownames]
                 missing.data.name.msg <- if (length(missing.data.names)==0) "" else {
                     paste("\n(missing data names: ",
@@ -180,7 +200,7 @@
                       }
                     ##else ok
                 }
-                
+
                 ##if tree taxa are subset of tip.data, check extra.tip arg and act accordingly
                 if (!all(tip.names %in% object at tip.label)) {
                     ##we know it's not an exact match - we have extra.tip.data - take action
@@ -197,9 +217,9 @@
                     }
                     ##else ok
                 }
-                
+
                 return(TRUE)
-            } 
+            }
         }
         else
           {
@@ -215,9 +235,9 @@
     if (!all(dim(object at node.data)==0)) {
         ## if we want to use node.names
         if (use.node.names) {
-            
+
             ## check for default names
-            if (all(node.names == 1:length(node.names)) 
+            if (all(node.names == 1:length(node.names))
                 || all(node.names == (nTips(object)+1):nEdges(object))) {
                 ## no node.names
                 if (default.node.names == "fail") {
@@ -229,18 +249,32 @@
                             "Consider using the use.node.names=FALSE option.")
                 }
             }
-            
+
             ## check node names
             ## check for missing or extra node data (relative to tree taxa)
             if (setequal(node.names, object at node.label)) {
-                ## names are perfect match - ok
-                return(TRUE)
+                if(length(node.names) == nNodes(object)) {
+                    ## names are perfect match - ok
+                    return(TRUE)
+                }
+                else {
+                    ## Some nodes are non-unique
+                    nodesTable <- table(nodeLabels(object))
+                    if(any(nU <- nodesTable > 1)) {
+                        nonUnique <- paste(names(nodesTable[nU]), collapse=", ")
+                        nonUniqueMsg <- paste("Node \'", nonUnique, "\' not unique", sep = "")
+                        if(non.unique.nodes == "fail")
+                            stop(nonUniqueMsg)
+                        if(non.unique.nodes == "warn")
+                            warning(nonUniqueMsg)
+                    }
+                }
             }
             else {
                 ## we know the tree taxa and node.data taxa are not a perfect match
                 ## if node.data taxa are subset of tree taxa, check missing.node.data arg and act accordingly
                 nodes.in.rownames <- object at node.label %in% node.names
-                rownames.in.nodes <- node.names %in% object at node.label 
+                rownames.in.nodes <- node.names %in% object at node.label
                 missing.data.names <- object at node.label[!nodes.in.rownames]
                 missing.data.name.msg <- if (length(missing.data.names)==0) "" else {
                     paste("\n(missing data names: ",
@@ -278,7 +312,7 @@
                       }
                     ##else ok
                 }
-                
+
                 ##if tree taxa are subset of node.data, check extra.node arg and act accordingly
                 if (!all(node.names %in% object at node.label)) {
                     ##we know it's not an exact match - we have extra.node.data - take action
@@ -295,9 +329,9 @@
                     }
                     ##else ok
                 }
-                
+
                 return(TRUE)
-            } 
+            }
         }
         else
           {
@@ -314,9 +348,9 @@
                         label.column=1,
                         use.tip.names=TRUE,
                         use.node.names=FALSE,
-                        ...)							 
+                        ...)
 {
-    
+
     ## assumes data have already been checked by check_data!
     ## name matching default: use row.names of data frame
     label.type = match.arg(label.type)
@@ -326,12 +360,12 @@
     }
     else {
         tip.names <- object at tip.data[,label.column]
-        node.names <- object at node.data[,label.column]        
+        node.names <- object at node.data[,label.column]
     }
 
 
     ## for each set of data, take appropriate actions
-    
+
     ## tip data operations:
     ## if tip.data exist
     if (!all(dim(object at tip.data)==0)) {
@@ -341,7 +375,7 @@
         }
         #tip.names <- object at tip.label
     }
-    
+
     ## node data operations
     if (!all(dim(object at node.data)==0)) {
         ## if we want to use tip.names
@@ -350,7 +384,7 @@
         }
         #node.names <- object at node.label
     }
-    
+
     return(object)
-    
+
 }

Modified: pkg/man/check.phylo4d.Rd
===================================================================
--- pkg/man/check.phylo4d.Rd	2008-12-19 23:58:52 UTC (rev 357)
+++ pkg/man/check.phylo4d.Rd	2008-12-20 00:13:38 UTC (rev 358)
@@ -7,8 +7,12 @@
   (i.e. \linkS4class{phylo4d} objects).
 }
 \usage{
-check_data(object, label.type = c("row.names", "column"), 
-label.column = 1, use.tip.names=TRUE, missing.tip.data=c("fail","OK","warn"), extra.tip.data=c("fail","OK","warn"), default.tip.names=c("warn","OK","fail"), use.node.names=FALSE, missing.node.data=c("OK","warn","fail"), extra.node.data=c("OK","warn","fail"), default.node.names=c("warn","OK","fail"),\dots)
+check_data(object, label.type = c("row.names", "column"),
+label.column = 1, use.tip.names=TRUE, missing.tip.data=c("fail","OK","warn"),
+extra.tip.data=c("fail","OK","warn"), default.tip.names=c("warn","OK","fail"),
+use.node.names=FALSE, missing.node.data=c("OK","warn","fail"),
+extra.node.data=c("OK","warn","fail"), default.node.names=c("warn","OK","fail"),
+non.unique.tips=c("warn", "OK", "fail"), non.unique.nodes=c("warn", "OK", "fail"), \dots)
 
 attach_data(object, label.type = c("row.names", "column"), label.column = 1,
 use.tip.names = TRUE, use.node.names = FALSE,\dots)
@@ -21,26 +25,35 @@
   \item{use.tip.names}{Use tip data names if present (default=\code{TRUE})}
   \item{missing.tip.data}{Can tip data taxa be a subset of tree taxa? (default=\code{fail})}
   \item{extra.tip.data}{Can tip data taxa be a superset of tree taxa? (default=\code{fail})}
-  \item{default.tip.names}{Do tip data have the default (uninformative) names? (default=\code{warn}}
+  \item{default.tip.names}{Do tip data have the default (uninformative) names? (default=\code{warn})}
   \item{use.node.names}{Use node data names if present
     (default=\code{FALSE})}
   \item{missing.node.data}{Can node data taxa be a subset of tree taxa?
     (default=\code{OK})}
   \item{extra.node.data}{Can node data taxa be a superset of tree taxa?
     (default=\code{OK})}
-  \item{default.node.names}{Do tip data have the default (uninformative) names? (default=\code{warn}}
-  \item{\dots}{Further arguments (for consistency)}  
+  \item{default.node.names}{Do tip data have the default (uninformative) names?
+  (default=\code{warn})}
+  \item{non.unique.tips}{Can you attach data to non-unique tip? (default=\code{warn})}
+  \item{non.unique.nodes}{Can you attach data to non-unique nodes? (default=\code{warn})}
+  \item{\dots}{Further arguments (for consistency)}
 }
 \value{
   \code{check_data} returns an error string (describing problems) or \code{TRUE} if everything is OK. If option \code{fail} is specified and a problem is encountered, returns an error string. If option \code{warn} is specified and a problem is encountered, returns \code{TRUE} plus a warning message.
-  
+
   By default, tip data names are used to attach the tip data to the tree, must be present, and must match the set of tree taxa exactly. By default node data names are not used to attach the data to the tree, and thus node data names are not required.
-  
+
   \code{attach_data} returns the phylo4 object with tipdata and nodedata (if present) attached to the phylo4 tree.
 }
 \details{
-  \code{check_data} checks validity of a phylo4d object. For tipdata and nodedata, checks for presence of tip and node names, whether tip or node data taxa and tree taxa are an exact match, and whether tip or node data taxa are a superset or subset of the tree taxa.
-  
+  \code{check_data} checks validity of a phylo4d object. For tipdata and nodedata, checks
+  for presence of tip and node names, whether tip or node data taxa and tree taxa are an
+  exact match, and whether tip or node data taxa are a superset or subset of the tree
+  taxa. By default, if several tip or node labels are non-unique, they all receive the same
+  value. This is only true if you use names to attach data to your tree. You can prevent
+  this behavior by changing the value of the arguments \code{non.unique.tips} and
+  \code{non.unique.nodes} to \code{fail}.
+
   \code{attach_data} takes a phylo4d object that has been checked, and attaches the tipdata and nodedata to the object. This involves the data being subset to include only the taxa present in the tree, with missing data taxa assigned NA values. The data are sorted to match the order of the tip.labels of the tree to allow use with comparative analysis functions.
 }
 \seealso{the \code{\link{phylo4d}} constructor, the \linkS4class{phylo4d} class. See also the \code{\link{check_phylo4}}, the \code{\link{phylo4}} constructor and the \linkS4class{phylo4} class. See \code{\link{coerce-methods}} for translation functions.}
@@ -77,7 +90,7 @@
 p4d.nod <- phylo4d(p4, node.data = nod.dat, which = "node")
 
 #create phylo4 objects with node and tip data
-p4d.all1 <- phylo4d(p4, node.data = nod.dat, tip.data = dat, 
+p4d.all1 <- phylo4d(p4, node.data = nod.dat, tip.data = dat,
   which = "all")
 p4d.all2 <- phylo4d(p4, all.data = rbind(dat, nod.dat), which = "all")
 }



More information about the Phylobase-commits mailing list