[Phylobase-commits] r688 - in pkg: R man tests

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed Oct 7 00:37:55 CEST 2009


Author: regetz
Date: 2009-10-07 00:37:55 +0200 (Wed, 07 Oct 2009)
New Revision: 688

Modified:
   pkg/R/class-phylo4d.R
   pkg/R/methods-phylo4d.R
   pkg/man/addData.Rd
   pkg/man/formatData.Rd
   pkg/man/phylo4d.Rd
   pkg/tests/misctests.Rout.save
Log:
pulled formatData out of .phylo4Data (except for null case) and cleaned
up argument lists in calling functions; updated docs to make formatData
the definitive help page for non-default data matching options


Modified: pkg/R/class-phylo4d.R
===================================================================
--- pkg/R/class-phylo4d.R	2009-10-06 05:13:11 UTC (rev 687)
+++ pkg/R/class-phylo4d.R	2009-10-06 22:37:55 UTC (rev 688)
@@ -25,16 +25,15 @@
 
 ## Core part that takes care of the data
 .phylo4Data <- function(x, tip.data=NULL, node.data=NULL, all.data=NULL,
-                        merge.data=TRUE, ...) {
+                        merge.data=TRUE) {
 
     ## Check validity of phylo4 object
-    if(is.character(checkval <- checkPhylo4(x))) stop(checkval)
+    if (is.character(checkval <- checkPhylo4(x))) stop(checkval)
 
-    ## apply formatData to ensure data have node number rownames and
-    ## correct dimensions
-    all.data <- formatData(phy=x, dt=all.data, type="all", ...)
-    tip.data <- formatData(phy=x, dt=tip.data, type="tip", ...)
-    node.data <- formatData(phy=x, dt=node.data, type="internal", ...)
+    ## Create placeholder data frames for any null data arguments
+    if (is.null(tip.data)) tip.data <- formatData(x, NULL, "tip")
+    if (is.null(node.data)) node.data <- formatData(x, NULL, "internal")
+    if (is.null(all.data)) all.data <- formatData(x, NULL, "all")
 
     # don't allow all.data columns of same name as tip.data or node.data
     colnamesTipOrNode <- union(names(tip.data), names(node.data))
@@ -84,16 +83,19 @@
 ### phylo4d class rewrite
 setMethod("phylo4d", "phylo4",
           function(x, tip.data=NULL, node.data=NULL, all.data=NULL,
-                   match.data=TRUE, merge.data=TRUE, rownamesAsLabels=FALSE,
-                   metadata = list(),
-                   ...) {
+                   merge.data=TRUE, metadata = list(), ...) {
     ## coerce tree to phylo4d
     res <- as(x, "phylo4d")
+
+    ## apply formatData to ensure data have node number rownames and
+    ## correct dimensions
+    tip.data <- formatData(phy=x, dt=tip.data, type="tip", ...)
+    node.data <- formatData(phy=x, dt=node.data, type="internal", ...)
+    all.data <- formatData(phy=x, dt=all.data, type="all", ...)
+
     ## add any data
     res at data <- .phylo4Data(x=x, tip.data=tip.data, node.data=node.data,
-                           all.data=all.data, match.data=match.data,
-                           merge.data=merge.data,
-                           rownamesAsLabels=rownamesAsLabels, ...)
+        all.data=all.data, merge.data=merge.data)
     ## add any metadata
     res at metadata <- metadata
     return(res)

Modified: pkg/R/methods-phylo4d.R
===================================================================
--- pkg/R/methods-phylo4d.R	2009-10-06 05:13:11 UTC (rev 687)
+++ pkg/R/methods-phylo4d.R	2009-10-06 22:37:55 UTC (rev 688)
@@ -52,7 +52,7 @@
     type <- match.arg(type)
 
     ## format new data
-    value <- formatData(x, value, type, keep.all=FALSE, ...)
+    value <- formatData(x, value, type, keep.all=TRUE, ...)
 
     ## get old data to keep (if any)
     if (clear.all || type=="all") {
@@ -60,8 +60,10 @@
     } else {
         if (type=="tip") {
             keep <- tdata(x, type="internal", empty.column=FALSE)
+            keep <- formatData(x, keep, "internal", match.data=FALSE)
         } else if (type=="internal") {
             keep <- tdata(x, type="tip", empty.column=FALSE)
+            keep <- formatData(x, keep, "tip", match.data=FALSE)
         }
     }
 
@@ -109,14 +111,19 @@
 
 ### Add new data
 setMethod("addData", signature(x="phylo4d"),
-  function(x, tip.data=NULL, node.data=NULL,
-           all.data=NULL, pos=c("after", "before"),
-           merge.data=TRUE, match.data=TRUE,  ...) {
+  function(x, tip.data=NULL, node.data=NULL, all.data=NULL,
+           merge.data=TRUE, pos=c("after", "before"), ...) {
 
     pos <- match.arg(pos)
 
+    ## apply formatData to ensure data have node number rownames and
+    ## correct dimensions
+    tip.data <- formatData(phy=x, dt=tip.data, type="tip", ...)
+    node.data <- formatData(phy=x, dt=node.data, type="internal", ...)
+    all.data <- formatData(phy=x, dt=all.data, type="all", ...)
+    ## combine data as needed
     new.data <- .phylo4Data(x=x, tip.data=tip.data, node.data=node.data,
-        all.data=all.data, merge.data=merge.data, match.data=match.data, ...)
+        all.data=all.data, merge.data=merge.data)
 
     if (all(dim(new.data) == 0)) {
         return(x)
@@ -140,11 +147,10 @@
 })
 
 setMethod("addData", signature(x="phylo4"),
-  function(x, tip.data=NULL, node.data=NULL,
-           all.data=NULL, pos=c("after", "before"),
-           merge.data=TRUE, match.data=TRUE, ...) {
+  function(x, tip.data=NULL, node.data=NULL, all.data=NULL,
+           merge.data=TRUE, pos=c("after", "before"), ...) {
     phylo4d(x, tip.data=tip.data, node.data=node.data, all.data=all.data,
-            merge.data=merge.data, match.data=match.data, ...)
+            merge.data=merge.data, ...)
 })
 
 ## Alternative phylo4d summary method, using phylo4 summary

Modified: pkg/man/addData.Rd
===================================================================
--- pkg/man/addData.Rd	2009-10-06 05:13:11 UTC (rev 687)
+++ pkg/man/addData.Rd	2009-10-06 22:37:55 UTC (rev 688)
@@ -11,9 +11,9 @@
 }
 \usage{
 \S4method{addData}{phylo4}(x, tip.data, node.data, all.data,
-pos=c("after", "before"), merge.data=TRUE, match.data=TRUE, \dots)
+  merge.data=TRUE, pos=c("after", "before"), \dots)
 \S4method{addData}{phylo4d}(x, tip.data, node.data, all.data,
-pos=c("after", "before"), merge.data=TRUE, match.data=TRUE, \dots)
+  merge.data=TRUE, pos=c("after", "before"), \dots)
 }
 \arguments{
   \item{x}{a phylo4 or a phylo4d object}
@@ -23,23 +23,23 @@
     containing only node data}
   \item{all.data}{a data frame (or object to be coerced to one)
     containing both tip and node data}
-  \item{match.data}{(logical), should the row names provided in
-    \code{tip.data}, \code{node.data} and/or \code{all.data} be used to
-    match data against the tree labels?}
-  \item{pos}{should the new data provided be bound \code{before} or
-    \code{after} the pre-existing data?}
   \item{merge.data}{if both \code{tip.data} and \code{node.data} are
     provided, it determines whether columns with common names will be
     merged together (default TRUE). If FALSE, columns with common names
     will be preserved separately, with ".tip" and ".node" appended to
     the names. This argument has no effect if \code{tip.data} and
     \code{node.data} have no column names in common.}
+  \item{pos}{should the new data provided be bound \code{before} or
+    \code{after} the pre-existing data?}
   \item{\dots}{additional arguments to be passed to \link{formatData}}
 }
 \value{
   \code{addData} returns a \code{phylo4d} object. 
 }
 \details{
+  Rules for matching data to tree nodes are identical to those used by
+  the \code{\link{phylo4d}} constructor.
+
   If any column names in the original data are the same as columns in
   the new data, ".old" is appended to the former column names and ".new"
   is appended to the new column names.
@@ -55,7 +55,7 @@
   data(geospiza)
   nDt <- data.frame(a=rnorm(nNodes(geospiza)), b=1:nNodes(geospiza),
     row.names=nodeId(geospiza, "internal"))
-  t1 <-addData(geospiza, node.data=nDt)
+  t1 <- addData(geospiza, node.data=nDt)
 }
 \author{Francois Michonneau}
 \keyword{methods}

Modified: pkg/man/formatData.Rd
===================================================================
--- pkg/man/formatData.Rd	2009-10-06 05:13:11 UTC (rev 687)
+++ pkg/man/formatData.Rd	2009-10-06 22:37:55 UTC (rev 688)
@@ -1,11 +1,11 @@
 \name{formatData}
 \alias{formatData}
 
-\title{Formatting data before attaching them to a phylo4 object}
+\title{Format data for use in phylo4d objects}
 
 \description{
-  Basic checks on the validity of S4 phylogenetic plus data objects
-  (i.e. \linkS4class{phylo4d} objects).
+  Associates data with tree nodes and applies consistent formatting
+  rules.
 }
 
 \usage{
@@ -20,8 +20,12 @@
   \item{phy}{a valid \code{phylo4} object}
   \item{dt}{a data frame, matrix, vector, or factor}
   \item{type}{type of data to attach}
-  \item{match.data}{(logical) Are data labels matched against tree
-    labels?} 
+  \item{match.data}{(logical) should the rownames of the data frame
+    be used to be matched against tip and internal node identifiers? See
+    details.}
+  \item{rownamesAsLabels}{(logical), should the row names of the data
+    provided be matched only to labels (TRUE), or should any number-like
+    row names be matched to node numbers (FALSE and default)}
   \item{label.type}{character, \code{rownames} or \code{column}: should
     the labels be taken from the row names of \code{dt} or from
     the \code{label.column} column of \code{dt}?}
@@ -31,12 +35,10 @@
     there are data labels that don't match}
   \item{extra.data}{action to take if there are extra data or if there
     are labels that don't match} 
-  \item{rownamesAsLabels}{(logical), in the case of number-like labels
-    should the row names of the data provided be considered as labels
-    (TRUE) or node numbers (FALSE and default)}
-  \item{keep.all}{(logical), should the data have rows for all nodes
-    (with NA values for internal rows when type='tip', and vice versa) 
-    (TRUE and default) or only rows corresponding to type argument}
+  \item{keep.all}{(logical), should the returned data have rows for all
+    nodes (with NA values for internal rows when type='tip', and vice
+    versa) (TRUE and default) or only rows corresponding to the type
+    argument}
 }
 
 \value{
@@ -47,22 +49,28 @@
 
 \details{
 
-  \code{formatData} is an internal function that should not be called by
-  the user. It formats properly the data provided by the user before
-  being attached to a tree.
+  \code{formatData} is an internal function that should not be called
+  directly by the user. It is used to format data provided by the user
+  before associating it with a tree, and is called internally by the
+  \code{phylo4d}, \code{tdata}, and \code{addData} methods. However,
+  users may pass additional arguments to these methods in order to
+  control how the data are matched to nodes.
   
-  By default, data names are used to attach tip data to the
-  tree. In this case data names must be present and must match the set
-  of tip or internal nodes exactly. The matching process can be done
-  either by using \code{phylo4} labels (i.e. tip or node) or by using
-  node numbers. It is possible to provide both at the same time by using
-  the option \code{rownamesAsLabels=FALSE}.
+  Rules for matching rows of data to tree nodes are determined jointly
+  by the \code{match.data} and \code{rownamesAsLabels} arguments. If
+  \code{match.data} is TRUE, data frame rows will be matched exclusively
+  against tip and node labels if \code{rownamesAsLabels} is also TRUE,
+  whereas any all-digit row names will be matched against tip and node
+  numbers if \code{rownamesAsLabels} is FALSE (the default). If
+  \code{match.data} is FALSE, \code{rownamesAsLabels} has no effect, and
+  row matching is purely positional with respect to the order returned
+  by \code{nodeId(phy, type)}.
   
   \code{formatData} (1) converts labels provided in the data into node
   numbers, (2) makes sure that the data are appropriately matched
-  against tip or nodes, (3) checks for differences between data and
-  tree, (4) creates a data frame with the correct dimensions given a
-  tree.
+  against tip and/or internal nodes, (3) checks for differences between
+  data and tree, (4) creates a data frame with the correct dimensions
+  given a tree.
   
 }
 

Modified: pkg/man/phylo4d.Rd
===================================================================
--- pkg/man/phylo4d.Rd	2009-10-06 05:13:11 UTC (rev 687)
+++ pkg/man/phylo4d.Rd	2009-10-06 22:37:55 UTC (rev 688)
@@ -27,37 +27,29 @@
         all.data = NULL, check.node.labels = c("keep", "drop", "asdata"), 
         annote=list(), metadata=list(), ...)
      \S4method{phylo4d}{phylo4}(x, tip.data = NULL, node.data = NULL,
-        all.data = NULL, match.data = TRUE, merge.data = TRUE, 
-        rownamesAsLabels = FALSE, metadata = list(), ...)
+        all.data = NULL, merge.data = TRUE, metadata = list(), ...)
      \S4method{phylo4d}{matrix}(x, tip.data = NULL, node.data = NULL,
         all.data = NULL, metadata = list(), ...)
 }
 
 \arguments{
   \item{x}{an object of class \code{phylo4}, \code{phylo} or a matrix of
-  edges (see above)}
+    edges (see above)}
   \item{tip.data}{a data frame (or object to be coerced to one)
     containing only tip data}
   \item{node.data}{a data frame (or object to be coerced to one)
     containing only node data}
   \item{all.data}{a data frame (or object to be coerced to one)
     containing both tip and node data}
-  \item{match.data}{(logical) should the rownames of the data frame
-    provided in \code{tip.data}, \code{node.data} and/or \code{all.data}
-    be used to be matched against tip and internal node identifiers? See
-    details.}
   \item{merge.data}{if both \code{tip.data} and \code{node.data} are
     provided, should columns with common names will be merged together
     (default TRUE) or not (FALSE)? See details.}
   \item{check.node.labels}{if \code{x} is of class \code{phylo}, use
-    either "keep" (the default) to retain internal node labels, "drop"
-    to drop them, or "asdata" to convert them to numeric tree data. This
-    argument is useful if the \code{phylo} object has non-unique node
-    labels or node labels with informative data (e.g., posterior
-    probabilities).}
-  \item{rownamesAsLabels}{(logical), should the row names of the data
-    provided be matched only to labels (TRUE), or should any number-like
-    row names be matched to node numbers (FALSE and default)}
+    either \dQuote{keep} (the default) to retain internal node labels,
+    \dQuote{drop} to drop them, or \dQuote{asdata} to convert them to
+    numeric tree data. This argument is useful if the \code{phylo}
+    object has non-unique node labels or node labels with informative
+    data (e.g., posterior probabilities).}
   \item{annote}{any additional annotation data to be passed to the new object}
   \item{metadata}{any additional metadata to be passed to the new object}
   \item{\dots}{further arguments to be passed to
@@ -68,23 +60,22 @@
 }
 
 \details{
+
   You can provide several data frames to define traits associated with
-  tip and/or internal nodes. Rules for matching rows of data to tree
-  nodes are determined jointly by the \code{match.data} and
-  \code{rownamesAsLabels} arguments, and these will apply the same way
-  to all supplied data frames. If \code{match.data} is TRUE, data frame
-  rows will be matched exclusively against tip and node labels if
-  \code{rownamesAsLabels} is also TRUE, whereas any all-digit row names
-  will be matched against tip and node numbers if
-  \code{rownamesAsLabels} is FALSE (the default). If \code{match.data}
-  is FALSE, \code{rownamesAsLabels} has no effect, and row matching is
-  purely positional with respect to the order returned by
-  \code{nodeId(x, "tip")} for tip.data, \code{nodeId(x, "internal")} for
-  node.data, and \code{nodeId(x, "all")} for all.data.
+  tip and/or internal nodes. By default, data row names are used to
+  link data to nodes in the tree, with any number-like names (e.g.,
+  \dQuote{10}) matched against node ID numbers, and any non-number-like
+  names (e.g., \dQuote{n10}) matched against node labels. Alternative
+  matching rules can be specified by passing additional arguments to
+  \code{formatData}; these include positional matching, matching
+  exclusively on node labels, and matching based on a column of data
+  rather than on row names. See \code{\link{formatData}} for more
+  information.
 
+  Matching rules will apply the same way to all supplied data frames.
   This means that you need to be consistent with the row names of your
   data frames. It is good practice to use tip and node labels (or node
-  numbers) when you merge data with a tree.
+  numbers) when you combine data with a tree.
 
   If you provide both \code{tip.data} and \code{node.data}, the
   treatment of columns with common names will depend on the
@@ -92,9 +83,9 @@
   both data frames will be merged; when merging columns of different
   data types, coercion to a common type will follow standard R rules. If
   \code{merge.data} is FALSE, columns with common names will be
-  preserved independently, with ".tip" and ".node" appended to the
-  names. This argument has no effect if \code{tip.data} and
-  \code{node.data} have no column names in common.
+  preserved independently, with \dQuote{.tip} and \dQuote{.node}
+  appended to the names. This argument has no effect if \code{tip.data}
+  and \code{node.data} have no column names in common.
 
   If you provide \code{all.data} along with either of \code{tip.data}
   and \code{node.data}, it must have distinct column names, otherwise an
@@ -174,7 +165,7 @@
 tipLabels(trGeo) <- as.character(sample(1:nTips(trGeo)))
 (exGeo6 <- phylo4d(trGeo, tip.data=tDt, rownamesAsLabels=TRUE))
 (exGeo7 <- phylo4d(trGeo, tip.data=tDt, rownamesAsLabels=FALSE))
-(exGeo8 <- phylo4d(trGeo, tip.data=tDt, rownamesAsLabels=FALSE, match.data=FALSE))
+(exGeo8 <- phylo4d(trGeo, tip.data=tDt, match.data=FALSE))
 
 require(ape) ## for rcoal
 ## generate a tree and some data

Modified: pkg/tests/misctests.Rout.save
===================================================================
--- pkg/tests/misctests.Rout.save	2009-10-06 05:13:11 UTC (rev 687)
+++ pkg/tests/misctests.Rout.save	2009-10-06 22:37:55 UTC (rev 688)
@@ -375,7 +375,7 @@
 26   N26   26       25     0.05167  internal <NA>
 27   N27   27       26     0.01500  internal <NA>
 
-phyl4d> (exGeo8 <- phylo4d(trGeo, tip.data=tDt, rownamesAsLabels=FALSE, match.data=FALSE))
+phyl4d> (exGeo8 <- phylo4d(trGeo, tip.data=tDt, match.data=FALSE))
    label node ancestor edge.length node.type    x
 1      2    1       24     0.05500       tip    a
 2     12    2       24     0.05500       tip    b



More information about the Phylobase-commits mailing list