[Phylobase-commits] r673 - in pkg: R data inst/unitTests man tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Sep 29 07:52:39 CEST 2009


Author: regetz
Date: 2009-09-29 07:52:39 +0200 (Tue, 29 Sep 2009)
New Revision: 673

Modified:
   pkg/R/checkdata.R
   pkg/R/class-phylo4.R
   pkg/R/class-phylo4d.R
   pkg/R/formatData.R
   pkg/R/methods-phylo4.R
   pkg/R/methods-phylo4d.R
   pkg/R/prune.R
   pkg/R/setAs-Methods.R
   pkg/R/subset.R
   pkg/R/tbind.R
   pkg/R/treePlot.R
   pkg/data/geospiza.rda
   pkg/inst/unitTests/runit.class-phylo4d.R
   pkg/inst/unitTests/runit.labelsReplaceMethod.R
   pkg/inst/unitTests/runit.methods-phylo4.R
   pkg/inst/unitTests/runit.methods-phylo4d.R
   pkg/inst/unitTests/runit.setAs-Methods.R
   pkg/inst/unitTests/runit.subset.R
   pkg/man/addData.Rd
   pkg/man/formatData.Rd
   pkg/man/phylo4-class.Rd
   pkg/man/phylo4-labels.Rd
   pkg/man/phylo4d-class.Rd
   pkg/man/phylo4d-hasData.Rd
   pkg/man/phylo4d.Rd
   pkg/man/tdata.Rd
   pkg/tests/misctests.R
   pkg/tests/misctests.Rout.save
   pkg/tests/phylo4dtests.R
   pkg/tests/phylo4dtests.Rout.save
   pkg/tests/phylosubtest.R
   pkg/tests/phylotorture.R
   pkg/tests/phylotorture.Rout.save
   pkg/tests/testprune.Rout.save
Log:
merging slot-mods branch changes r657:672 into trunk


Modified: pkg/R/checkdata.R
===================================================================
--- pkg/R/checkdata.R	2009-09-29 04:05:17 UTC (rev 672)
+++ pkg/R/checkdata.R	2009-09-29 05:52:39 UTC (rev 673)
@@ -16,8 +16,7 @@
 
     ## case of empty phylo4 object
     if(nrow(object at edge) == 0 && length(object at edge.length) == 0 &&
-       length(object at node.label) == 0 &&
-       length(object at tip.label) == 0 && length(object at edge.label) == 0)
+       length(object at label) == 0 && length(object at edge.label) == 0)
         return(TRUE)
 
     ## FIXME: check for cyclicity?
@@ -27,7 +26,7 @@
       if (length(object at edge.length) != nedges)
         return("edge lengths do not match number of edges")
       if(!is.numeric(object at edge.length))
-          stop("Edge lengths are not numeric.")
+          return("edge lengths are not numeric")
       ## presumably we shouldn't allow NAs mixed
       ## with numeric branch lengths except at the root
       if (sum(is.na(object at edge.length)) > 1)
@@ -41,8 +40,6 @@
     ##  return("number of tip labels not consistent with number of edges and nodes")
     ## check: tip numbers = (m+1):(m+n)
     ntips <- nTips(object)
-    if(length(object at tip.label) != ntips)
-      return("number of tip labels not consistent with number of tips")
     E <- edges(object)
     tips <- unique(sort(E[,2][!E[,2] %in% E[,1]]))
     nodes <- unique(sort(c(E)))
@@ -82,49 +79,33 @@
            paste(phylo4_orderings,collapse=","))
     }
 
-    ## make sure that nodes and edges have internal names
-    ## and that they match the nodes
-    if (is.null(names(object at tip.label))) {
-        if(length(object at tip.label) == nTips(object)) {
-            stop("There is no internal name associated with your tips. Use the ",
-                 "function tipLabels <- to change your tip labels.")
+    ## make sure tip/node labels have internal names that match node IDs
+    lab.msg <- "Use tipLabels<- (and nodeLabels<- if needed) to update them."
+    if (is.null(names(object at label))) {
+        return(c("Tip and node labels must have names matching node IDs. ",
+            lab.msg))
+             
+    } else {
+        if (!all(tips %in% names(na.omit(object at label)))) {
+            return(c("All tips must have associated tip labels. ",
+                lab.msg))
         }
-        else
-            stop("Your object doesn't have internal node names and the number of ",
-                 "tip labels doesn't match the number tips.")
-    }
-    else {
-        if(!all(names(object at tip.label) %in%  nodeId(object, "tip")))
-            stop("Internal names for tips don't match tip ID numbers")
-    }
-
-    if (is.null(names(object at node.label))) {
-        if(length(object at node.label) == nNodes(object)) {
-            stop("There is no internal names associated with internal ",
-                 "nodes. Use the function nodeLabels <- to create or ",
-                 "change your internal node labels.")
+        if (!all(names(object at label) %in% nodeId(object, "all"))) {
+            return(c("One or more tip/node label has an unmatched ID name ",
+                lab.msg))
         }
-        else
-            stop("Your object doesn't have internal node names and the number of ",
-                 "node labels doesn't match the number nodes.")
     }
-    else {
-        if(!all(names(object at node.label) %in%  nodeId(object, "internal")))
-            stop("Internal names for nodes don't match node ID numbers")
-    }
 
+    ## make sure edge lengths have internal names that match the edges
+    elen.msg <- "Use edgeLength<- to update them."
     if(hasEdgeLength(object)) {
-        if(is.null(names(object at edge.length))) {
-            warning("Your edges don't have internal names. Use the function ",
-                    "edgeLength <- to update the the branch lengths of your ",
-                    "tree.")
+        if (is.null(names(object at edge.length))) {
+            return(c("Edge lengths must have names matching edge IDs. ",
+                elen.msg))
         }
-        else {
-            tEdgLbl <- paste(object at edge[,1], object at edge[,2], sep="-")
-            if(!all(names(object at edge.length) %in% tEdgLbl))
-                stop("There is something wrong with your internal edge length ",
-                     "labels. Use the function edgeLength <- to update the the ",
-                     "branch lengths of your tree.")
+        if (!all(names(object at edge.length) %in% edgeId(object, "all"))) {
+            return(c("One or more edge lengths has an unmatched ID name. ",
+                elen.msg))
         }
     }
 
@@ -184,24 +165,23 @@
     ## These are just some basic tests to make sure that the user does not
     ## alter the object in a significant way
 
-    ntips <- nTips(object)
-    nnodes <- nNodes(object)
+# JR: I don't think this part is necessary. All that matters is that all
+# rows in the data have names corresponding to (valid) node numbers
+#    ntips <- nTips(object)
+#    nnodes <- nNodes(object)
+#
+#    ## Check dimensions
+#    if (nrow(object at tip.data) > 0 && nrow(object at tip.data) != ntips)
+#        stop("The number of tip data does not match the number ",
+#             "of tips in the tree")
+#    if (nrow(object at node.data) > 0 && nrow(object at node.data) != nnodes)
+#        stop("The number of node data does not match the number ",
+#             "of internal nodes in the tree")
 
-    ## Check dimensions
-    if (nrow(object at tip.data) > 0 && nrow(object at tip.data) != ntips)
-        stop("The number of tip data does not match the number ",
-             "of tips in the tree")
-    if (nrow(object at node.data) > 0 && nrow(object at node.data) != nnodes)
-        stop("The number of node data does not match the number ",
-             "of internal nodes in the tree")
-
     ## Check rownames
-    if (nrow(object at tip.data) > 0 &&
-       !all(rownames(object at tip.data) %in% nodeId(object, "tip")))
-        stop("The row names of tip data do not match the tip numbers")
-    if (nrow(object at node.data) > 0 &&
-        !all(rownames(object at node.data) %in% nodeId(object, "internal")))
-        stop("The row names of node data do not match the node numbers")
+    if (nrow(object at data) > 0 &&
+        !all(row.names(object at data) %in% nodeId(object, "all")))
+        stop("The row names of tree data do not match the node numbers")
 
     return(TRUE)
 }

Modified: pkg/R/class-phylo4.R
===================================================================
--- pkg/R/class-phylo4.R	2009-09-29 04:05:17 UTC (rev 672)
+++ pkg/R/class-phylo4.R	2009-09-29 05:52:39 UTC (rev 673)
@@ -1,8 +1,7 @@
 setClass("phylo4",
          representation(edge = "matrix",
                         edge.length = "numeric",
-                        node.label = "character",
-                        tip.label = "character",
+                        label = "character",
                         edge.label = "character",
                         order = "character",
                         annote = "list"),
@@ -10,8 +9,7 @@
                         edge = matrix(nrow = 0, ncol = 2,
                             dimname = list(NULL, c("ancestor", "descendant"))),
                         edge.length = numeric(0),
-                        tip.label = character(0),
-                        node.label = character(0),
+                        label = character(0),
                         edge.label = character(0),
                         order = "unknown",
                         annote = list()
@@ -23,12 +21,12 @@
 #####################
 
 .createLabels <- function(value, ntips, nnodes, use.names = TRUE,
-                          type = c("tip", "internal", "allnode")) {
+                          type = c("all", "tip", "internal")) {
 
     type <- match.arg(type)
 
     ## set up final length of object to return
-    lgthRes <- switch(type, tip=ntips, internal=nnodes, allnode=ntips+nnodes)
+    lgthRes <- switch(type, tip=ntips, internal=nnodes, all=ntips+nnodes)
 
     ## create NA character vector of node labels
     res <- character(lgthRes)
@@ -38,7 +36,7 @@
     names(res) <- switch(type,
                          tip = 1:ntips,
                          internal = seq(from=ntips+1, length=lgthRes),
-                         allnode = 1:(ntips+nnodes))
+                         all = 1:(ntips+nnodes))
 
 
     ## if no values are provided
@@ -140,8 +138,7 @@
     res <- new("phylo4")
     res at edge <- edge
     res at edge.length <- edge.length
-    res at tip.label <- tip.label
-    res at node.label <- node.label
+    res at label <- c(tip.label, node.label)
     res at edge.label <- edge.label
     res at order <- order
     res at annote <- annote

Modified: pkg/R/class-phylo4d.R
===================================================================
--- pkg/R/class-phylo4d.R	2009-09-29 04:05:17 UTC (rev 672)
+++ pkg/R/class-phylo4d.R	2009-09-29 05:52:39 UTC (rev 673)
@@ -2,12 +2,11 @@
 ## phylo4d class
 ## extend: phylo with data
 setClass("phylo4d",
-         representation(tip.data="data.frame",
-                        node.data="data.frame",
+         representation(data="data.frame",
                         metadata = "list"),
 
-         prototype = list( tip.data = data.frame(NULL),
-           node.data = data.frame(NULL),
+         prototype = list(
+           data = data.frame(NULL),
            metadata = list()),
 
          validity = checkPhylo4,
@@ -30,122 +29,59 @@
                         rownamesAsLabels=FALSE,
                         ...) {
 
-    ## Make sure that data provided are a data frame
-    classData <- function(someData) {
-        if(!is.null(someData)) {
-            if(is.vector(someData))
-                someData <- as.data.frame(someData)
-            if(!is.data.frame(someData)) {
-                nmSomeData <- substitute(someData)
-                stop(paste(nmSomeData, "must be a vector or a data frame"))
-            }
-            someData
-        }
-    }
-
     ## Check validity of phylo4 object
     if(is.character(checkval <- checkPhylo4(x))) stop(checkval)
 
-    ## Check/Transform provided data to data.frame
-    all.data <- classData(all.data)
-    tip.data <- classData(tip.data)
-    node.data <- classData(node.data)
+    ## apply formatData to ensure data have node number rownames and
+    ## correct dimensions
+    all.data <- formatData(phy=x, dt=all.data, type="all",
+        match.data=match.data, rownamesAsLabels=rownamesAsLabels, ...)
+    tip.data <- formatData(phy=x, dt=tip.data, type="tip",
+        match.data=match.data, rownamesAsLabels=rownamesAsLabels, ...)
+    node.data <- formatData(phy=x, dt=node.data, type="internal",
+        match.data=match.data, rownamesAsLabels=rownamesAsLabels, ...)
 
-    is.empty <- function(x) { is.null(x) || all(dim(x)==0) }
+    # don't allow all.data columns of same name as tip.data or node.data
+    colnamesTipOrNode <- union(names(tip.data), names(node.data))
+    if (any(names(all.data) %in% colnamesTipOrNode)) {
+        stop("all.data column names must be distinct from ",
+             "tip.data and node.data column names")
+    }
 
-    ## Replacing node labels by node numbers and formatting the data to make sure
-    ## they have the correct dimensions
-    if(!is.empty(all.data))
-        all.data <- formatData(phy=x, dt=all.data, type="all",
-                               match.data=match.data,
-                               rownamesAsLabels=rownamesAsLabels, ...)
-
-    if(!is.empty(tip.data))
-        tip.data <- formatData(phy=x, dt=tip.data, type="tip",
-                               match.data=match.data,
-                               rownamesAsLabels=rownamesAsLabels, ...)
-
-    if(!is.empty(node.data))
-        node.data <- formatData(phy=x, dt=node.data, type="internal",
-                                match.data=match.data,
-                                rownamesAsLabels=rownamesAsLabels, ...)
-
-    ## Merging dataset
-    if(!is.empty(all.data)) {
-        tmpData <- all.data
-        if(!is.empty(tip.data)) {
-            emptyNodeData <- array(, dim = c(nNodes(x), ncol(tip.data)),
-                                   dimnames = list(nodeId(x, "internal"),
-                                   colnames(tip.data)))
-            tmpTipData <- rbind(tip.data, emptyNodeData)
-
-            tmpTipData <- tmpTipData[match(rownames(all.data),
-                                           rownames(tmpTipData)) ,,
-                                     drop = FALSE]
-            tmpData <- cbind(all.data, tmpTipData)
-        }
-        if(!is.empty(node.data)) {
-            emptyTipData <- array(, dim = c(nTips(x), ncol(node.data)),
-                                  dimnames = list(nodeId(x, "tip"),
-                                  colnames(node.data)))
-            tmpNodeData <- rbind(emptyTipData, node.data)
-            tmpNodeData <- tmpNodeData[match(rownames(all.data),
-                                             rownames(tmpNodeData)) ,,
-                                       drop = FALSE]
-            tmpData <- cbind(tmpData, tmpNodeData)
-        }
-
-        tip.data <- tmpData[rownames(tmpData) %in% nodeId(x, "tip") ,,
-                            drop = FALSE]
-        node.data <- tmpData[rownames(tmpData) %in% nodeId(x, "internal") ,,
-                             drop = FALSE]
+    ## combine common columns and move into all.data if merging,
+    ## otherwise rename them
+    colsToMerge <- intersect(names(tip.data), names(node.data))
+    if (merge.data && length(colsToMerge)>0) {
+        ##TODO could really just index rows directly on 1:nTip and
+        ## (nTip+1):(nTip+nNode) in the next two statements for speed,
+        ## but this is more robust to changes in node numbering rules
+        tip.rows <- tip.data[match(nodeId(x, "tip"),
+            row.names(tip.data)), colsToMerge, drop=FALSE]
+        node.rows <- node.data[match(nodeId(x, "internal"),
+            row.names(tip.data)), colsToMerge, drop=FALSE]
+        merge.data <- rbind(tip.rows, node.rows)
+        all.data <- data.frame(all.data, merge.data)
+    } else {
+        names(tip.data)[names(tip.data) %in% colsToMerge] <-
+            paste(colsToMerge, "tip", sep=".")
+        names(node.data)[names(node.data) %in% colsToMerge] <-
+            paste(colsToMerge, "node", sep=".")
     }
+    ## now separate tips-only and nodes-only data
+    tip.only.data <- tip.data[setdiff(names(tip.data), names(node.data))]
+    node.only.data <- node.data[setdiff(names(node.data), names(tip.data))]
 
-    else {
-        if(!is.empty(tip.data) && !is.empty(node.data)) {
-            if(identical(colnames(tip.data), colnames(node.data)) && merge.data) {
-                tmpAllData <- rbind(tip.data, node.data)
-                tip.data <- tmpAllData[rownames(tmpAllData) %in%
-                                           nodeId(x, "tip") ,, drop=FALSE]
-                node.data <- tmpAllData[rownames(tmpAllData) %in%
-                                            nodeId(x, "internal") ,, drop=FALSE]
-            }
-            else {
-                emptyTipData <- array(, dim = c(nTips(x), ncol(node.data)),
-                                      dimnames = list(nodeId(x, "tip"),
-                                      colnames(node.data)))
-                emptyNodeData <- array(, dim = c(nNodes(x), ncol(tip.data)),
-                                       dimnames = list(nodeId(x, "internal"),
-                                       colnames(tip.data)))
-                tmpTipData <- rbind(tip.data, emptyNodeData)
-                tmpNodeData <- rbind(emptyTipData, node.data)
-                tmpNodeData <- tmpNodeData[rownames(tmpTipData) ,, drop=FALSE]
+    ## combine all data
+    complete.data <- data.frame(all.data, tip.only.data, node.only.data)
 
-                tmpData <- cbind(tmpTipData, tmpNodeData)
-
-                if(match.data) {
-                    tip.data <- tmpData[rownames(tmpData) %in%
-                                            nodeId(x, "tip") ,, drop=FALSE]
-                    node.data <- tmpData[rownames(tmpData) %in%
-                                             nodeId(x, "internal") ,, drop=FALSE]
-                }
-                else {
-                    tip.data <- tmpData[1:nTips(x) ,, drop=FALSE]
-                    node.data <- tmpData[-(1:nTips(x)) ,, drop=FALSE]
-                }
-            }
-        }
-        else {
-            ## at this point provide NULL data frame for empty arguments
-            if(is.empty(tip.data)) tip.data <- data.frame(NULL)
-            if(is.empty(node.data)) node.data <- data.frame(NULL)
-
-            tip.data <- tip.data
-            node.data <- node.data
-        }
+    ## drop any rows that only contain NAs
+    if (ncol(complete.data)==0) {
+        return(data.frame())
+    } else {
+        empty.rows <- as.logical(rowSums(!is.na(complete.data)))
+        return(complete.data[empty.rows, , drop=FALSE])
     }
 
-    return(list(tip.data=tip.data, node.data=node.data))
 }
 
 
@@ -156,17 +92,14 @@
                    match.data=TRUE, merge.data=TRUE, rownamesAsLabels=FALSE,
                    metadata = list(),
                    ...) {
-
-    ## prepare the data
-    tmpData <- .phylo4Data(x=x, tip.data=tip.data, node.data=node.data,
+    ## coerce tree to phylo4d
+    res <- as(x, "phylo4d")
+    ## add any data
+    res at data <- .phylo4Data(x=x, tip.data=tip.data, node.data=node.data,
                            all.data=all.data, match.data=match.data,
                            merge.data=merge.data,
                            rownamesAsLabels=rownamesAsLabels, ...)
-
-    ## coerce to phylo4d and add data/metadata
-    res <- as(x, "phylo4d")
-    res at tip.data <- tmpData$tip.data
-    res at node.data <- tmpData$node.data
+    ## add any metadata
     res at metadata <- metadata
     return(res)
 })

Modified: pkg/R/formatData.R
===================================================================
--- pkg/R/formatData.R	2009-09-29 04:05:17 UTC (rev 672)
+++ pkg/R/formatData.R	2009-09-29 05:52:39 UTC (rev 673)
@@ -2,114 +2,123 @@
                        match.data=TRUE, rownamesAsLabels=FALSE,
                        label.type=c("rownames", "column"),
                        label.column=1, missing.data=c("fail", "warn", "OK"),
-                       extra.data=c("warn", "OK", "fail")
+                       extra.data=c("warn", "OK", "fail"), keep.all=TRUE
                        ) {
 
+    ## determine whether to return rows for all nodes, or just 'type'
     type <- match.arg(type)
+    if (keep.all) {
+        ids.out <- nodeId(phy, "all")
+    } else {
+        ids.out <- nodeId(phy, type)
+    }
+
+    ## if null, return empty data frame with node numbers as row names
+    if (is.null(dt)) {
+        return(data.frame(row.names=ids.out))
+    }
+    ## if vector, coerce to data.frame
+    if (is.vector(dt)) {
+        dt <- as.data.frame(dt)
+    }
+    ## before proceeding, make sure that data provided are a data frame
+    if (!is.data.frame(dt)) {
+        nmSomeData <- substitute(dt)
+        stop(paste(nmSomeData, "must be a vector or a data frame"))
+    }
+    ## if lacking rows or columns, return a placeholder data frame with
+    ## node numbers as row names
+    if (any(dim(dt)==0)) {
+        return(data.frame(row.names=ids.out))
+    }
+        
     label.type <- match.arg(label.type)
     stopifnot(label.column %in% 1:ncol(dt))
     missing.data <- match.arg(missing.data)
     extra.data <- match.arg(extra.data)
 
-    nr <- switch(type,
-                 tip = nTips(phy),
-                 internal = nNodes(phy),
-                 all = nTips(phy)+nNodes(phy))
-
-    tmpDt <- array(, dim=c(nr, ncol(dt)),
-                   dimnames=list(nodeId(phy, type), colnames(dt)))
-    tmpDt <- data.frame(tmpDt)
-
     if(match.data) {
-        ## Replace node labels by node numbers
+        ## extract values to be matched to nodes
         ndNames <- switch(label.type,
                           rownames = rownames(dt),
                           column = dt[,label.column])
-        ndDt <- lapply(ndNames, function(nd) {
-            if(nchar(gsub("[0-9]", "", nd)) == 0 && !rownamesAsLabels)
-                getNode(phy, as.integer(nd), missing="OK")
-            else getNode(phy, nd, missing="OK")
-        })
-        ndDt <- unlist(ndDt)
+        ## either force matching on labels, or match on node
+        ## numbers for any number-like elements and labels otherwise
+        if (rownamesAsLabels) {
+            ids.in <- getNode(phy, as.character(ndNames), missing="OK")
+        } else {
+            ids.in <- as.numeric(rep(NA, length(ndNames)))
+            treatAsNumber <- nchar(gsub("[0-9]", "", ndNames))==0
+            ids.in[treatAsNumber] <- getNode(phy,
+                as.integer(ndNames[treatAsNumber]), missing="OK")
+            ids.in[!treatAsNumber] <- getNode(phy,
+                as.character(ndNames[!treatAsNumber]), missing="OK")
+        }
 
         ## Make sure that data are matched to appropriate nodes
-        if(type != "all") {
-            switch(type,
-                   tip = {
-                     ## BMB: don't bother trying to match NAs
-                       if(any(na.omit(names(ndDt)) %in% labels(phy, "internal")))
-                           stop("You are trying to match tip data to internal ",
-                                "nodes. Make sure that your data identifiers ",
-                                "are correct.")
-                   },
-                   internal = {
-                       if(any(na.omit(names(ndDt)) %in% labels(phy, "tip")))
-                           stop("You are trying to match node data to tip ",
-                                "nodes. Make sure that your data identifiers ",
-                                "are correct.")
-                   })
+        if (type=="tip" && any(na.omit(ids.in) %in% nodeId(phy,
+            "internal"))) {
+            stop("Your tip data are being matched to internal ",
+                "nodes. Make sure that your data identifiers ",
+                "are correct.")
         }
+        if (type=="internal" && any(na.omit(ids.in) %in% nodeId(phy,
+            "tip"))) {
+            stop("Your node data are being matched to tip ",
+                "nodes. Make sure that your data identifiers ",
+                "are correct.")
+        }
 
         ## Check differences
-        extra <- names(ndDt[is.na(ndDt)])
-        mssng <- nodeId(phy, type)[! nodeId(phy, type) %in% ndDt]
-
+        mssng <- setdiff(nodeId(phy, type), ids.in)
         if(length(mssng) > 0 && missing.data != "OK") {
+            ## provide label if it exists and node number otherwise
+            mssng <- getNode(phy, mssng)
+            mssng <- ifelse(is.na(names(mssng)), mssng, names(mssng))
             msg <- "The following nodes are not found in the dataset: "
-
-            ## provides label if it exists and node number otherwise
-            mssng <- sapply(mssng, function(m) {
-                m <- getNode(phy, m)
-                if (is.na(names(m)) || is.null(names(m)))
-                    m
-                else
-                    names(m)
-            })
-
             msg <- paste(msg, paste(mssng, collapse=", "))
             switch(missing.data,
                    warn = warning(msg),
                    fail = stop(msg))
         }
-
+        extra <- ndNames[is.na(ids.in)]
         if(length(extra) > 0 && extra.data != "OK") {
             msg <- "The following names are not found in the tree: "
             msg <- paste(msg, paste(extra, collapse=", "))
             switch(extra.data,
                    warn = warning(msg),
                    fail = stop(msg))
-
         }
         ## Format data to have correct dimensions
-        dt <- dt[!is.na(ndDt) ,, drop=FALSE]
-        rownames(dt) <- ndDt[!is.na(ndDt)]
-        tmpDt[,] <- dt[match(rownames(tmpDt), rownames(dt)) ,, drop=FALSE]
-        if(label.type == "column") tmpDt <- tmpDt[, -label.column, drop=FALSE]
-    }
-    else {
-        ## Remove rownames in data provided
-        rownames(dt) <- NULL
+        dt <- dt[!is.na(ids.in), , drop=FALSE]
+        rownames(dt) <- ids.in[!is.na(ids.in)]
+        dt.out <- dt[match(ids.out, rownames(dt)), , drop=FALSE]
+        rownames(dt.out) <- ids.out
+        if(label.type == "column") dt.out <- dt.out[, -label.column, drop=FALSE]
 
-        ## Tips before internal nodes for all.data
-        if (type == "all")
-            rownames(tmpDt) <- 1:nr
+    } else {
 
-        ## Check differences between dataset and tree
-        diffNr <- nrow(dt) - nr
-        if(diffNr > 0 && extra.data != "OK") {
+        ## Check if too many or not enough rows in input data
+        expected.nrow <- length(nodeId(phy, type))
+        diffNr <- nrow(dt) - expected.nrow
+        if(nrow(dt) > expected.nrow && extra.data != "OK") {
             msg <- paste("There are", diffNr, "extra rows.")
             switch(extra.data,
                    warn = warning(msg),
                    fail = stop(msg))
         }
-        if(diffNr < 0 && missing.data != "OK") {
+        if(nrow(dt) < expected.nrow && missing.data != "OK") {
             msg <- paste("There are", abs(diffNr), "missing rows.")
             switch(missing.data,
                    warn = warning(msg),
                    fail = stop(msg))
         }
-        tmpDt[,] <- dt[1:min(nrow(dt), nr) ,, drop = FALSE]
+        ## truncate rows of input data frame if necessary
+        dt <- dt[1:min(nrow(dt), expected.nrow) ,, drop = FALSE]
+        rownames(dt) <- nodeId(phy, type)[seq_len(nrow(dt))]
+        dt.out <- dt[match(ids.out, rownames(dt)) ,, drop=FALSE]
+        rownames(dt.out) <- ids.out
     }
 
-    tmpDt
+    dt.out
 }

Modified: pkg/R/methods-phylo4.R
===================================================================
--- pkg/R/methods-phylo4.R	2009-09-29 04:05:17 UTC (rev 672)
+++ pkg/R/methods-phylo4.R	2009-09-29 05:52:39 UTC (rev 673)
@@ -202,7 +202,7 @@
 setReplaceMethod("edgeLength", signature(x="phylo4"),
  function(x, use.names=TRUE, ..., value) {
     if(use.names && !is.null(names(value))) {
-        if(!all(names(value) %in% names(edgeLength(x))))
+        if(!all(names(value) %in% edgeId(x, "all")))
             stop("Names provided don't match internal edge labels")
         x at edge.length[match(names(value), names(x at edge.length))] <- value
     }
@@ -257,89 +257,54 @@
     type <- match.arg(type)
     ## [JR: below, using match for ordering rather than direct character
     ## indexing b/c the latter is slow for vectors of a certain size]
-    lbl <- switch(type,
-                  all={
-                      all <- c(object at tip.label, object at node.label)
-                      all[match(nodeId(object, "all"), names(all))]
-                  },
-                  tip={
-                      tip <- object at tip.label
-                      tip[match(nodeId(object, "tip"), names(tip))]
-                  },
-                  internal={
-                      int <- object at node.label
-                      int[match(nodeId(object, "internal"), names(int))]
-                  })
+    label <- object at label
+    id <- nodeId(object, type)
+    lbl <- label[match(id, names(label))]
+    # reassign names b/c any unmatched will be NA (could instead assign
+    # names only to the unmatched ones, but this seems simpler)
+    names(lbl) <- id
     return(lbl)
 })
 
 setReplaceMethod("labels",
                  signature(x="phylo4", type="ANY",
                            use.names="ANY", value="character"),
-   function(x, type = c("tip", "internal", "allnode"),
+   function(x, type = c("all", "tip", "internal"),
             use.names, ..., value) {
 
        ## Default options
        if(missing(type))
-           type <- "tip"
+           type <- "all"
        if (missing(use.names))
            use.names <- FALSE
 
        type <- match.arg(type)
 
+       ## generate new labels of the desired type
+       new.label <- .createLabels(value, nTips(x), nNodes(x), use.names,
+           type=type)
 
-       ob <- switch(type,
-              ## If 'tip'
-              tip = {
-                  x at tip.label <- .createLabels(value, nTips(x),
-                                                    nNodes(x), use.names,
-                                                    type="tip")
-                  x
-              },
-              ## If 'internal'
-              internal = {
-                  x at node.label <- .createLabels(value, nTips(x),
-                                                     nNodes(x), use.names,
-                                                     type="internal")
-                  x
-              },
-              ## If 'allnode'
-              allnode = {
-                  if(use.names) {
-                      tipVal <- value[names(value) %in% nodeId(x, "tip")]
-                      nodVal <- value[names(value) %in% nodeId(x, "internal")]
-                      x at tip.label <- .createLabels(tipVal, nTips(x),
-                                                        nNodes(x), use.names,
-                                                        type="tip")
-                      x at node.label <- .createLabels(nodVal, nTips(x),
-                                                         nNodes(x), use.names,
-                                                         type="internal")
-                  }
-                  else {
-                      ntips <- nTips(x)
-                      nedges <- nTips(x) + nNodes(x)
-                      x at tip.label <- .createLabels(value[1:ntips], nTips(x),
-                                                        nNodes(x), use.names,
-                                                        type="tip")
-                      x at node.label <- .createLabels(value[(ntips+1):nedges],
-                                                         nTips(x),
-                                                         nNodes(x), use.names,
-                                                         type="internal")
-                  }
-                  x
-              })
+       ## replace existing labels and add new ones as needed
+       old.label <- x at label
+       old.index <- match(names(new.label), names(old.label))
+       isNew <- is.na(old.index)
+       old.label[old.index[!isNew]] <- new.label[!isNew]
+       updated.label <- c(old.label, new.label[isNew])
 
-       if(is.character(checkval <- checkPhylo4(ob)))
+       ## for efficiency, drop any NA labels
+       x at label <- updated.label[!is.na(updated.label)]
+
+       if(is.character(checkval <- checkPhylo4(x)))
            stop(checkval)
        else
-           return(ob)
+           return(x)
    })
 
 
 ### Node Labels
 setMethod("hasNodeLabels", signature(x="phylo4"),
  function(x) {
-    !all(is.na(x at node.label))
+    !all(is.na(nodeLabels(x)))
 })
 
 setMethod("nodeLabels", signature(x="phylo4"),

Modified: pkg/R/methods-phylo4d.R
===================================================================
--- pkg/R/methods-phylo4d.R	2009-09-29 04:05:17 UTC (rev 672)
+++ pkg/R/methods-phylo4d.R	2009-09-29 05:52:39 UTC (rev 673)
@@ -1,5 +1,5 @@
 setMethod("tdata", signature(x="phylo4d"),
-  function(x, type=c("tip", "internal", "allnode"),
+  function(x, type=c("tip", "internal", "all"),
            label.type=c("row.names","column"),
            empty.columns=TRUE, ...) {
 
@@ -10,112 +10,43 @@
       type <- match.arg(type)
       label.type <- match.arg(label.type)
 
-      if (type == "tip") {
-          if (all(dim(x at tip.data) == 0)) {
-              return(x at tip.data)
-          }
-          tdata <- x at tip.data
-          data.names <- tipLabels(x)[match(names(tipLabels(x)), rownames(tdata))]
-          if ( label.type ==  "row.names" ) {
-              if (!any(duplicated(data.names)) &&
-                  !any(is.na(data.names)) ) {
-                  row.names(tdata) <- data.names
-              }
-              else {
-                  warning("Non-unique or missing labels found, ",
-                          "labels cannot be coerced to tdata row.names. ",
-                          "Use the label.type argument to include labels ",
-                          "as first column of data.")
-              }
-          }
-          if (identical(label.type,"column")) {
-              tdata <- data.frame(label=data.names, tdata)
-          }
-      }
+      ids <- nodeId(x, type)
+      labs <- labels(x, type)
+      ## replace any missing labels with node numbers
+      labs[is.na(labs)] <- names(labs)[is.na(labs)]
 
-      if (type == "internal") {
-          if (all(dim(x at node.data)==0)) {
-              return(x at node.data)
-          }
-          tdata <- x at node.data
-          if(hasNodeLabels(x))
-              data.names <- nodeLabels(x)[match(names(nodeLabels(x)), rownames(tdata))]
-          else
-              data.names <- nodeId(x, "internal")
+      tdata <- x at data[match(ids, row.names(x at data)), , drop=FALSE]
+      row.names(tdata) <- ids
+      data.names <- labs[match(names(labs), rownames(tdata))]
 
-          if ( identical(label.type, "row.names") ) {
-              if ( length(data.names) > 0 &&
-                  !any(duplicated(data.names)) &&
-                  !(any(is.na(data.names)))) {
-                  row.names(tdata) <- data.names
-              }
-              else {
-                  warning("Non-unique or missing labels found, ",
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/phylobase -r 673


More information about the Phylobase-commits mailing list