[Phylobase-commits] r203 - in pkg: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Jul 18 10:14:52 CEST 2008
Author: skembel
Date: 2008-07-18 10:14:52 +0200 (Fri, 18 Jul 2008)
New Revision: 203
Modified:
pkg/R/checkdata.R
pkg/R/methods-phylo4d.R
pkg/R/setAs-Methods.R
pkg/R/subset.R
pkg/man/as-methods.Rd
pkg/man/check.phylo4d.Rd
pkg/man/phylo4d.Rd
pkg/man/printphylo4.Rd
Log:
Updated phylo4d data representation. Added ability to match data to tree based on either row.names or a data.frame column. Now works fine with missing/non-unique node names. Subsetting methods may still need work.
Modified: pkg/R/checkdata.R
===================================================================
--- pkg/R/checkdata.R 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/R/checkdata.R 2008-07-18 08:14:52 UTC (rev 203)
@@ -54,6 +54,8 @@
check_data <- function(object,
+ label.type=c("row.names","column"),
+ label.column=1,
use.tip.names=TRUE,
missing.tip.data=c("fail","OK","warn"),
extra.tip.data=c("fail","OK","warn"),
@@ -65,6 +67,17 @@
{
+ ## name matching default: use row.names of data frame
+ label.type = match.arg(label.type)
+ if (identical(label.type, "row.names")) {
+ tip.names <- row.names(object at tip.data)
+ node.names <- row.names(object at node.data)
+ }
+ else {
+ tip.names <- object at tip.data[,label.column]
+ node.names <- object at node.data[,label.column]
+ }
+
## tip default: use names, require names, must match exactly
missing.tip.data <- match.arg(missing.tip.data)
extra.tip.data <- match.arg(extra.tip.data)
@@ -84,7 +97,7 @@
if (use.tip.names) {
## check for default names
- if (all(row.names(object at tip.data) == 1:length(row.names(object at tip.data)))) {
+ if (all(tip.names == 1:length(tip.names))) {
## no tip.names
if (default.tip.names == "fail") {
stop("Tip data have default names and may not match tree tip labels. ",
@@ -98,21 +111,21 @@
## check tip names
## check for missing or extra tip data (relative to tree taxa)
- if (setequal(row.names(object at tip.data), object at tip.label)) {
+ if (setequal(tip.names, object at tip.label)) {
## names are perfect match - ok
return(TRUE)
}
else {
## we know the tree taxa and tip.data taxa are not a perfect match
## if tip.data taxa are subset of tree taxa, check missing.tip.data arg and act accordingly
- tips.in.rownames <- object at tip.label %in% row.names(object at tip.data)
- rownames.in.tips <- row.names(object at tip.data) %in% object at tip.label
+ tips.in.rownames <- object at tip.label %in% tip.names
+ rownames.in.tips <- tip.names %in% object at tip.label
missing.data.names <- object at tip.label[!tips.in.rownames]
missing.data.name.msg <- if (length(missing.data.names)==0) "" else {
paste("\n(missing data names: ",
paste(missing.data.names,collapse=","),")",sep="")
}
- extra.data.names <- row.names(object at tip.data)[!rownames.in.tips]
+ extra.data.names <- tip.names[!rownames.in.tips]
extra.data.name.msg <- if (length(extra.data.names)==0) "" else {
paste("\n(extra data names: ",
paste(extra.data.names,collapse=","),")",sep="")
@@ -146,7 +159,7 @@
}
##if tree taxa are subset of tip.data, check extra.tip arg and act accordingly
- if (!all(row.names(object at tip.data) %in% object at tip.label)) {
+ if (!all(tip.names %in% object at tip.label)) {
##we know it's not an exact match - we have extra.tip.data - take action
##fail
errmsg <- paste("Tip data names are a superset of tree tip labels",
@@ -181,8 +194,8 @@
if (use.node.names) {
## check for default names
- if (all(row.names(object at node.data) == 1:length(row.names(object at node.data)))
- || all(row.names(object at node.data) == (nTips(object)+1):nEdges(object))) {
+ if (all(node.names == 1:length(node.names))
+ || all(node.names == (nTips(object)+1):nEdges(object))) {
## no node.names
if (default.node.names == "fail") {
stop("Node data have default names and may not match tree node labels. ",
@@ -196,21 +209,21 @@
## check node names
## check for missing or extra node data (relative to tree taxa)
- if (setequal(row.names(object at node.data), object at node.label)) {
+ if (setequal(node.names, object at node.label)) {
## names are perfect match - ok
return(TRUE)
}
else {
## we know the tree taxa and node.data taxa are not a perfect match
## if node.data taxa are subset of tree taxa, check missing.node.data arg and act accordingly
- nodes.in.rownames <- object at node.label %in% row.names(object at node.data)
- rownames.in.nodes <- row.names(object at node.data) %in% object at node.label
+ nodes.in.rownames <- object at node.label %in% node.names
+ rownames.in.nodes <- node.names %in% object at node.label
missing.data.names <- object at node.label[!nodes.in.rownames]
missing.data.name.msg <- if (length(missing.data.names)==0) "" else {
paste("\n(missing data names: ",
paste(missing.data.names,collapse=","),")",sep="")
}
- extra.data.names <- row.names(object at node.data)[!rownames.in.nodes]
+ extra.data.names <- node.names[!rownames.in.nodes]
extra.data.name.msg <- if (length(extra.data.names)==0) "" else {
paste("\n(extra data names: ",
paste(extra.data.names,collapse=","),")",sep="")
@@ -244,7 +257,7 @@
}
##if tree taxa are subset of node.data, check extra.node arg and act accordingly
- if (!all(row.names(object at node.data) %in% object at node.label)) {
+ if (!all(node.names %in% object at node.label)) {
##we know it's not an exact match - we have extra.node.data - take action
##fail
errmsg <- paste("Node data names are a superset of tree node labels",
@@ -274,13 +287,26 @@
}
attach_data <- function(object,
+ label.type=c("row.names","column"),
+ label.column=1,
use.tip.names=TRUE,
use.node.names=FALSE,
...)
{
## assumes data have already been checked by check_data!
+ ## name matching default: use row.names of data frame
+ label.type = match.arg(label.type)
+ if (identical(label.type, "row.names")) {
+ tip.names <- row.names(object at tip.data)
+ node.names <- row.names(object at node.data)
+ }
+ else {
+ tip.names <- object at tip.data[,label.column]
+ node.names <- object at node.data[,label.column]
+ }
+
## for each set of data, take appropriate actions
## tip data operations:
@@ -288,18 +314,18 @@
if (!all(dim(object at tip.data)==0)) {
## if we want to use tip.names
if (use.tip.names) {
- object at tip.data <- object at tip.data[match(object at tip.label,row.names(object at tip.data)),,drop=FALSE]
+ object at tip.data <- object at tip.data[match(object at tip.label,tip.names),,drop=FALSE]
}
- row.names(object at tip.data) <- object at tip.label
+ #tip.names <- object at tip.label
}
## node data operations
if (!all(dim(object at node.data)==0)) {
## if we want to use tip.names
if (use.node.names) {
- object at node.data <- object at node.data[match(object at node.label,row.names(object at node.data)),,drop=FALSE]
+ object at node.data <- object at node.data[match(object at node.label,node.names),,drop=FALSE]
}
- row.names(object at node.data) <- object at node.label
+ #node.names <- object at node.label
}
return(object)
Modified: pkg/R/methods-phylo4d.R
===================================================================
--- pkg/R/methods-phylo4d.R 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/R/methods-phylo4d.R 2008-07-18 08:14:52 UTC (rev 203)
@@ -3,17 +3,81 @@
setMethod("show", "phylo4d", function(object) printphylo4(object))
setMethod("tdata", "phylo4d", function(x, which = c("tip",
- "node", "allnode"), ...) {
+ "node", "allnode"), label.type=c("row.names","column"), ...) {
which <- match.arg(which)
+ label.type <- match.arg(label.type)
+ if (which == "tip") {
+ if (all(dim(x at tip.data)==0)) {
+ return(x at tip.data)
+ }
+ tdata <- x at tip.data
+ data.names <- x at tip.label
+ if ( identical(label.type,"row.names") ) {
+ if ( identical(data.names,unique(data.names)) || !(any(is.na(data.names))) ) {
+ row.names(tdata) <- data.names
+ }
+ else {
+ warning("Non-unique or missing labels found, labels cannot be coerced to tdata row.names. Use the label.type argument to include labels as first column of data.")
+ }
+ }
+ if (identical(label.type,"column")) {
+ tdata <- data.frame(label=data.names,tdata)
+ }
+ return(tdata)
+ }
+
+ if (which == "node") {
+ if (all(dim(x at node.data)==0)) {
+ return(x at node.data)
+ }
+ tdata <- x at node.data
+ data.names <- x at node.label
+ if ( identical(label.type,"row.names") ) {
+ if ( identical(data.names,unique(data.names)) || !(any(is.na(data.names))) ) {
+ row.names(tdata) <- data.names
+ }
+ else {
+ warning("Non-unique or missing labels found, labels cannot be coerced to tdata row.names. Use the label.type argument to include labels as first column of data.")
+ }
+ }
+ if (identical(label.type,"column")) {
+ tdata <- data.frame(label=data.names,tdata)
+ }
+ return(tdata)
+ }
+
if (which == "allnode") {
- namesmatch <- all(colnames(x at tip.data) == colnames(x at node.data))
- classmatch <- all(sapply(x at tip.data, class) == sapply(x at node.data,
- class))
- if (!(classmatch && namesmatch))
- stop("Node and tip columns do not match, access tip and node data separately")
+ if (all(dim(x at node.data)==0)) {
+ nodedata <- data.frame(label=x at node.label)
+ }
+ else {
+ nodedata <- tdata(x, "node", label.type="column")
+ }
+ if (all(dim(x at tip.data)==0)) {
+ tipdata <- data.frame(label=x at tip.label)
+ }
+ else {
+ tipdata <- tdata(x, "tip", label.type="column")
+ }
+
+ data.names <- c(as.character(nodedata$label),as.character(tipdata$label))
+ tipdata$label <- (x at Nnode+1):(x at Nnode+length(x at tip.label))
+ nodedata$label <- 1:x at Nnode
+ ## FIXME - kludgy merge and subsequent cleanup - make robust
+ tdata <- merge(nodedata,tipdata, all=TRUE,sort=FALSE)[,-1,drop=FALSE]
+ tdata <- data.frame(label=data.names,tdata)
+
+ if ( identical(label.type,"row.names") ) {
+ if ( identical(data.names,unique(data.names)) || !(any(is.na(data.names))) ) {
+ tdata <- data.frame(tdata[,-1,drop=FALSE])
+ row.names(tdata) <- data.names
+ }
+ else {
+ stop("Non-unique or missing labels found, labels cannot be coerced to tdata row.names. Use the label.type argument to include labels as first column of data.")
+ }
+ }
+ return(tdata)
}
- switch(which, tip = x at tip.data, node = x at node.data, allnode = rbind(x at tip.data,
- x at node.data))
})
setMethod("tdata<-", "phylo4d", function(object, which = c("tip",
@@ -28,7 +92,7 @@
"you should access tip and node data separately")
}
switch(which,
- ## FIXME: add checks for matching row names etc ...
+ ## FIXME: add checks for matching row names etc ... use check_data
tip = object at tip.data <- value,
node = object at node.data <- value,
allnode = stop("for now, must set tip and node data separately"))
@@ -70,13 +134,13 @@
setMethod("nodeLabels<-", "phylo4d", function(object, ...,
value) {
object at node.label <- value
- rownames(object at node.data) <- value
+ #rownames(object at node.data) <- value
object
})
setMethod("labels<-", "phylo4d", function(object, ..., value) {
object at tip.label <- value
- rownames(object at tip.data) <- value
+ #rownames(object at tip.data) <- value
object
})
@@ -84,8 +148,8 @@
## (don't even know how that should be done in this case)
setMethod("na.omit", "phylo4d", function(object, ...) {
tipdata <- tdata(object, "tip")
- na.names <- rownames(tipdata)[!complete.cases(tipdata)]
- prune(object, tip = na.names)
+ na.index <- which(!complete.cases(tipdata))
+ prune(object, tip = na.index)
})
setMethod("names", signature(x = "phylo4d"), function(x) {
Modified: pkg/R/setAs-Methods.R
===================================================================
--- pkg/R/setAs-Methods.R 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/R/setAs-Methods.R 2008-07-18 08:14:52 UTC (rev 203)
@@ -115,29 +115,22 @@
nl <- rep(NA, nNodes(x))
}
tl <- labels(x)
- taxon.name <- c(nl, tl)
+ label <- c(nl, tl)
if (!isRooted(x)) {
node.type <- c(rep("internal", n.int), rep("tip",
n.tip))
}
else node.type <- c("root", rep("internal", n.int - 1),
rep("tip", n.tip))
- return(data.frame(taxon.name, node, ancestor, branch.length,
+ return(data.frame(label, node, ancestor, branch.length,
node.type))
})
setAs(from = "phylo4d", to = "data.frame", function(from) {
tree <- as(from, "phylo4") # get tree
t_df <- as(tree, "data.frame") # convert to data.frame
- dat <- tdata(from, "allnode") # get data
- old.ord <- t_df$taxon.name # save roworder of tree
-
- ## merge data.frames of tree and data
- tdat <- merge(t_df, dat, by.x = "taxon.name", by.y = "row.names",
- all.x = TRUE, all.y = FALSE, sort = FALSE)
-
- ## restore the correct order (i.e. the one of the tree data.frame)
- idx <- match(old.ord, tdat$taxon.name)
- res <- tdat[idx, ]
- return(res) # drop "order"
+ dat <- tdata(from, "allnode", label.type="column") # get data
+ tdat <- cbind(t_df,dat[,-1,drop=FALSE])
+ #tdat <- dat[,-1,drop=FALSE]
+ return(tdat)
})
Modified: pkg/R/subset.R
===================================================================
--- pkg/R/subset.R 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/R/subset.R 2008-07-18 08:14:52 UTC (rev 203)
@@ -78,6 +78,7 @@
#### data handling
## for now handle only tip data
+ ## FIXME update to remove dependency on row.names
tab <- tdata(x, which="tip")[i, j, ...,drop=FALSE]
oldtabnames <- row.names(tdata(x,which="tip"))
Modified: pkg/man/as-methods.Rd
===================================================================
--- pkg/man/as-methods.Rd 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/man/as-methods.Rd 2008-07-18 08:14:52 UTC (rev 203)
@@ -74,7 +74,7 @@
}}
}
-\author{Ben Bolker, Thibaut Jombart \email{jombart at biomserv.univ-lyon1.fr}, Marguerite Butler}
+\author{Ben Bolker, Thibaut Jombart \email{jombart at biomserv.univ-lyon1.fr}, Marguerite Butler, Steve Kembel}
\seealso{ generic \code{\link[methods]{as}}, \code{\link{phylo4}}, \code{\link{phylo4d}}, \code{\link{extract.tree}}, the original \code{\link[ade4]{phylog}} from the
\code{ade4} package and \code{\link[ape]{as.phylo}} from the \code{ape} package.
}
Modified: pkg/man/check.phylo4d.Rd
===================================================================
--- pkg/man/check.phylo4d.Rd 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/man/check.phylo4d.Rd 2008-07-18 08:14:52 UTC (rev 203)
@@ -7,14 +7,17 @@
(i.e. \linkS4class{phylo4d} objects).
}
\usage{
-check_data(object, use.tip.names=TRUE, missing.tip.data=c("fail","OK","warn"), extra.tip.data=c("fail","OK","warn"), default.tip.names=c("warn","OK","fail"), use.node.names=FALSE, missing.node.data=c("OK","warn","fail"), extra.node.data=c("OK","warn","fail"), default.node.names=c("warn","OK","fail"),\dots)
+check_data(object, label.type = c("row.names", "column"),
+label.column = 1, use.tip.names=TRUE, missing.tip.data=c("fail","OK","warn"), extra.tip.data=c("fail","OK","warn"), default.tip.names=c("warn","OK","fail"), use.node.names=FALSE, missing.node.data=c("OK","warn","fail"), extra.node.data=c("OK","warn","fail"), default.node.names=c("warn","OK","fail"),\dots)
-attach_data(object, use.tip.names = TRUE, use.node.names = FALSE,
- \dots)
+attach_data(object, label.type = c("row.names", "column"), label.column = 1,
+use.tip.names = TRUE, use.node.names = FALSE,\dots)
}
\arguments{
\item{object}{A phylo4d object}
+ \item{label.type}{Obtain labels for matching data to tree labels from the row.names or a column of the data? (default=\code{row.names})}
+ \item{label.column}{If label.type="column", number or name of column to use for matching data to tree labels (default=\code{1})}
\item{use.tip.names}{Use tip data names if present (default=\code{TRUE})}
\item{missing.tip.data}{Can tip data taxa be a subset of tree taxa? (default=\code{fail})}
\item{extra.tip.data}{Can tip data taxa be a superset of tree taxa? (default=\code{fail})}
Modified: pkg/man/phylo4d.Rd
===================================================================
--- pkg/man/phylo4d.Rd 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/man/phylo4d.Rd 2008-07-18 08:14:52 UTC (rev 203)
@@ -47,7 +47,7 @@
\seealso{
\code{\link{coerce-methods}} for translation functions. The \linkS4class{phylo4d} class, the \code{\link{check_data}}
function to check the validity of \code{phylo4d} objects; \linkS4class{phylo4} class and \link{phylo4} constructor.}
-\author{Ben Bolker, Thibaut Jombart}
+\author{Ben Bolker, Thibaut Jombart, Steve Kembel}
\note{
Checking on matches will be done by the validity checker
(label matches between data and tree tips, number of rows
Modified: pkg/man/printphylo4.Rd
===================================================================
--- pkg/man/printphylo4.Rd 2008-07-14 17:41:00 UTC (rev 202)
+++ pkg/man/printphylo4.Rd 2008-07-18 08:14:52 UTC (rev 203)
@@ -19,7 +19,7 @@
A data.frame with a row for each node (descendant), sorted as
follows: root first, then other internal nodes, and finally tips.\cr
The returned data.frame has the following columns:\cr
- \item{taxon.name}{Label for the taxon at the node (usually species name).}
+ \item{label}{Label for the taxon at the node (usually species name).}
\item{node}{Node number, i.e. the number identifying the node in \code{x at edge}.}
\item{ancestor}{Node number of the node's ancestor.}
\item{branch.length}{The branch length connecting the node to its
@@ -30,6 +30,7 @@
\author{
Marguerite Butler
Thibaut Jombart \email{jombart at biomserv.univ-lyon1.fr}
+ Steve Kembel
}
\note{This is the default show() method for phylo4, phylo4d. It prints the user-supplied information for building a phylo4 object. For a full description of the phylo4 S4 object and slots, see \code{\link{phylo4}}. }
\seealso{See Also as \code{\link{printphylo}} for an \code{ape}-like version of \code{print()}. }
More information about the Phylobase-commits
mailing list