[Phylobase-commits] r427 - in pkg: . R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Apr 21 17:13:57 CEST 2009


Author: francois
Date: 2009-04-21 17:13:57 +0200 (Tue, 21 Apr 2009)
New Revision: 427

Modified:
   pkg/DESCRIPTION
   pkg/R/checkdata.R
   pkg/R/class-phylo4.R
   pkg/R/methods-phylo4.R
   pkg/R/readNexus.R
   pkg/R/setAs-Methods.R
Log:
'edges.length' now has internal names that can be used to match explicitly the correct
	length to an edge

The internal names of node.label now match the nodeId

If all node.labels are numerical values, they are automatically converted as data. Useful
	when importing consensus tree from MrBayes for example.
	
Changed nodeId(): now returns correct nodes in case of unrooted trees

Changed setAs(phylo4, data.frame):
	- cleaned up the code and removed obsolete parts
	- match edge.length based on the identity of the edge.
	- explicitly name row names of the data frame exported by the nodeId
	- now associates data explicitly based on nodeId
	


Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION	2009-01-18 19:20:00 UTC (rev 426)
+++ pkg/DESCRIPTION	2009-04-21 15:13:57 UTC (rev 427)
@@ -2,7 +2,7 @@
 Type: Package
 Title: Base package for phylogenetic structures and comparative data
 Version: 0.4
-Date: 2008-07-27
+Date: 2009-04-21
 Depends: methods, grid, ape(>= 2.1)
 Suggests: ade4, MASS, gridBase
 Author: R Hackathon (Ben Bolker, Marguerite Butler, Peter Cowan,  Damien de Vienne, Thibaut Jombart, Steve Kembel, Francois Michonneau, David Orme, Brian O'Meara, Emmanuel Paradis, Derrick Zwickl)

Modified: pkg/R/checkdata.R
===================================================================
--- pkg/R/checkdata.R	2009-01-18 19:20:00 UTC (rev 426)
+++ pkg/R/checkdata.R	2009-04-21 15:13:57 UTC (rev 427)
@@ -38,11 +38,6 @@
     nTips <- sum(nDesc==0)
     if (!all(nDesc[1:nTips]==0))
       return("nodes 1 to nTips must all be tips")
-    #nRoots <- sum(nAncest==0)
-    ## no longer
-    ##if (which(nAncest==0)!=nTips+1) {
-    ##  return("root node is not at position (nTips+1)")
-    ##}
 
     if (nRoots>0) {
       if (sum(is.na(E[,1]))!=1) {

Modified: pkg/R/class-phylo4.R
===================================================================
--- pkg/R/class-phylo4.R	2009-01-18 19:20:00 UTC (rev 426)
+++ pkg/R/class-phylo4.R	2009-04-21 15:13:57 UTC (rev 427)
@@ -40,48 +40,82 @@
     } else {
         edge.length <- as.numeric(NULL)
     }
+    if(length(edge.length) != nrow(edge))
+        stop("The number of edge length is different from the number of edges.")
+    ## FM - 2009-04-19
+    ## edge.length is named according to the nodes the edge links together
+    ## (ancestor-descendant). This should allow more robust edge/edge.length
+    ## association and limit the problems associated with reordering trees.
+    names(edge.length) <- paste(edge[,1], edge[,2], sep="-")
 
     ## tip.label
     ntips <- sum(tabulate(na.omit(edge[, 1])) == 0)
     if(is.null(tip.label)) {
         tip.label <- .genlab("T", ntips)
     } else {
-        if(length(tip.label) != ntips) stop("the tip labels are not consistent with the number of tips")
+        if(length(tip.label) != ntips)
+            stop("the tip labels are not consistent with the number of tips")
         tip.label <- as.character(tip.label)
     }
     names(tip.label) <- seq(along=tip.label)
+
     ## node.label for internal nodes
     nnodes <- length(unique(na.omit(c(edge)))) - ntips
-    ##    if(is.null(node.label)) {
-    ##        node.label <- .genlab("N", nnodes)
-    ## } else {
+
     if(is.null(node.label)) {
       node.label <- character(0) ## empty node labels
-      ## node.label <- character(nnodes)
-      ## is.na(node.label) <- TRUE ## ???
-    } else if (length(node.label)>0 && length(node.label) != nnodes)
-      stop("number of node labels is not consistent with the number of nodes")
-    names(node.label) <- seq(along=node.label)
+    }
+    else {
+        if(length(node.label)>0 && length(node.label) != nnodes)
+            stop("number of node labels is not consistent with the number of nodes")
+    }
+    names(node.label) <- seq(from=ntips+1, along=node.label)
+
+
     ## edge.label
-    ## an edge is named by the descendant
     if(is.null(edge.label)) {
       edge.label <- character(0)
-    ##        edge.label <- paste("E", edge[, 2], sep = "")
     } else if (length(edge.label)>0 && length(edge.label) != nrow(edge))
       stop("number of edge labels is not consistent with the number of edges")
+
+
     ## fill in the result
     res <- new("phylo4")
     res at edge <- edge
     res at edge.length <- edge.length
     res at Nnode <- nnodes
     res at tip.label <- tip.label
-    res at node.label <- node.label
     res at edge.label <- edge.label
     res at order <- order
 
-    ## checkPhylo4 will return a character string if object is
-    ##  bad, otherwise TRUE
-    if (is.character(checkval <- checkPhylo4(res))) stop(checkval)
+    ## Tweak to deal with numerical values returned as node labels (ie. MrBayes)
+    if(!all(is.na(node.label)) && any(nchar(node.label) > 0) &&
+            !length(grep("[a-zA-Z]", node.label))) {
+        warning("All node labels are numeric values and converted as data.")
+        res at node.label <- character(0)
+        node.data <- node.label
+
+        node.data[!nzchar(node.data)] <- NA
+
+        node.label <- character(nnodes)
+        is.na(node.label) <- TRUE
+
+        node.data <- data.frame(labelValues=as.numeric(node.data))
+        res at node.label <- node.label
+
+        res <- phylo4d(res, node.data=node.data, use.node.names=F)
+        if(is.character(checkval <- checkPhylo4(res))) stop(checkval)
+
+        return(res)
+
+    }
+    else {
+        res at node.label <- node.label
+        ## checkPhylo4 will return a character string if object is
+        ##  bad, otherwise TRUE
+        if (is.character(checkval <- checkPhylo4(res))) stop(checkval)
+    }
+
     return(res)
 }
 

Modified: pkg/R/methods-phylo4.R
===================================================================
--- pkg/R/methods-phylo4.R	2009-01-18 19:20:00 UTC (rev 426)
+++ pkg/R/methods-phylo4.R	2009-04-21 15:13:57 UTC (rev 427)
@@ -89,9 +89,12 @@
     if (!hasEdgeLength(x))
         NULL
     else {
-      if (missing(which)) return(x at edge.length)
-      n <- getNode(x,which)
-      return(x at edge.length[n])
+      if (missing(which))
+          return(x at edge.length)
+      else {
+          n <- getNode(x,which)
+          return(x at edge.length[n])
+      }
     }
 })
 
@@ -212,12 +215,14 @@
 
 setMethod("nodeId", "phylo4", function(x,which=c("internal","tip","allnode")) {
   which <- match.arg(which)
+  tipNid <- x at edge[x at edge[,2]<=nTips(x),2]
+  allNid <- unique(as.vector(x at edge))
+  intNid <- allNid[! allNid %in% tipNid]
   nid <- switch(which,
-                internal=x at edge[x at edge[,2]>nTips(x),2],
-                tip = x at edge[x at edge[,2]<=nTips(x),2],
-                allnode = x at edge[,2])
-  #sort(nid)
-  return(nid)
+                internal = intNid,
+                tip = tipNid,
+                allnode = allNid)
+  return(nid[!is.na(nid)])
 })
 
 setReplaceMethod("nodeLabels", signature(object="phylo4", value="character"),

Modified: pkg/R/readNexus.R
===================================================================
--- pkg/R/readNexus.R	2009-01-18 19:20:00 UTC (rev 426)
+++ pkg/R/readNexus.R	2009-04-21 15:13:57 UTC (rev 427)
@@ -1,43 +1,43 @@
 readNexus <- function (file, simplify=TRUE, which=c("all","tree","data"), char.all=FALSE, polymorphic.convert=TRUE, levels.uniform=TRUE) {
 #file = input nexus file
-#simplify = 
-#which = specify whether to return trees+data as phylo4d object ("all") if 
+#simplify =
+#which = specify whether to return trees+data as phylo4d object ("all") if
 #        both are found, returning a data.frame or phylo4 object if only one
-#        is found, "tree": return a phylo4 object only, regardless of 
+#        is found, "tree": return a phylo4 object only, regardless of
 #        whether there are data, "data": return a data.frame (no tree), even
 #        if a tree is present
 #char.all = if TRUE, includes even excluded chars in the nexus file
-#polymorphic.convert = if TRUE, convert polymorphic characters to missing 
+#polymorphic.convert = if TRUE, convert polymorphic characters to missing
 #                      characters
-#levels.uniform = if TRUE, categorical data are loaded with the same levels, 
+#levels.uniform = if TRUE, categorical data are loaded with the same levels,
 #             even if one character is missing a state
-	output<-c("Failure")
-	if (which=="all" || which=="data") {
+        output<-c("Failure")
+        if (which=="all" || which=="data") {
         params <- list(filename=file, allchar=char.all, polymorphictomissing=polymorphic.convert, levelsall=levels.uniform)
-        
+
 # Check that params is properly formatted.
         if(!is.list(params) || length(params) == 0) {
             stop("The params parameter must be a non-empty list")
         }
-        
+
         incharsstring <- .Call("ReadCharsWithNCL",params,
                                PACKAGE="phylobase")
 #print(incharsstring)
-        tipdata<-eval(parse(text=incharsstring))	
+        tipdata<-eval(parse(text=incharsstring))
     }
     if (which=="all" || which=="tree") {
         trees<-c("Failure");
         params <- list(filename=file)
-        
+
 # Check that params is properly formatted.
         if(!is.list(params) || length(params) == 0) {
             stop("The params parameter must be a non-empty list");
         }
-        
+
 # Finally ready to make the call...
         intreesstring <- .Call("ReadTreesWithNCL", params,
                                PACKAGE="phylobase")
-        
+        print(intreesstring)
         intreesphylolist <- read.nexustreestring(intreesstring);
         if (length(intreesphylolist)>1 || !simplify) {
             trees<-list()
@@ -49,7 +49,7 @@
             trees<-as(intreesphylolist[[1]], "phylo4");
         }
     }
-    if (which=="tree" || length(tipdata) == 0 ) { 
+    if (which=="tree" || length(tipdata) == 0 ) {
         output<-trees;
     }
     else if (which=="data") {
@@ -64,10 +64,10 @@
         }
         else {
             output<-phylo4d(as(intreesphylolist[[1]], "phylo4"), tip.data = tipdata)
-        }        
+        }
     }
-    
-	output
+
+        output
 }
 
 read.nexustreestring <- function(X)
@@ -75,15 +75,15 @@
 #Returns list of phylo objects (not multi.phylo, and always a list, even if there is only one element
 #X is a character vector, each element is one line from a treefile
 #This is based almost entirely on read.nexus from APE (Emmanuel Paradis).
-	
+
     X<-unlist(strsplit(unlist(X),c("\n")))
-    
+
 ## first remove all the comments
-    
+
 ## BCO took out the "speedier removal of comments" code -- it keeps [&R] as a node label, replaced it with original APE code
 ## speedier removal of comments pc 13 April 2008
 ##X <- lapply(X, gsub, pattern = "\\[[^\\]]*\\]", replacement = "")
-    
+
     LEFT <- grep("\\[", X)
     RIGHT <- grep("\\]", X)
     if (length(LEFT)) { # in case there are no comments at all
@@ -148,7 +148,7 @@
     STRING <- gsub(" ", "", STRING)
     colon <- grep(":", STRING)
     if (!length(colon)) {
-#TODO: recode clado.build & .treeBuildWithTokens from ape to phylobase
+#TODO: recode clado.build, tree.build & .treeBuildWithTokens from ape to phylobase
         trees <- lapply(STRING, clado.build)
     } else if (length(colon) == Ntree) {
         trees <-
@@ -189,4 +189,4 @@
         }
     }
     trees
-}
\ No newline at end of file
+}

Modified: pkg/R/setAs-Methods.R
===================================================================
--- pkg/R/setAs-Methods.R	2009-01-18 19:20:00 UTC (rev 426)
+++ pkg/R/setAs-Methods.R	2009-04-21 15:13:57 UTC (rev 427)
@@ -8,7 +8,7 @@
         int.idx <- (nTips(from)+1):dim(from$edge)[1]
         root.node <- as.numeric(setdiff(unique(from$edge[,1]), unique(from$edge[,2])))
         #from$edge <- rbind(from$edge,c(NA,root.edge))
-        from$edge <- rbind(from$edge[tip.idx,],c(NA,root.node),from$edge[int.idx,])        
+        from$edge <- rbind(from$edge[tip.idx,],c(NA,root.node),from$edge[int.idx,])
         if (!is.null(from$edge.length)) {
             if (is.null(from$root.edge)) {
                 from$edge.length <- c(from$edge.length[tip.idx],as.numeric(NA),from$edge.length[int.idx])
@@ -130,78 +130,54 @@
 #######################################################
 ## Exporting to dataframe
 setAs(from = "phylo4", to = "data.frame", def = function(from) {
-    if (is.character(checkval <- checkPhylo4(from))) # check the phylo4
+
+    ## Check the phylo4
+    if (is.character(checkval <- checkPhylo4(from)))
         stop(checkval)
     x <- from
-    if (isRooted(x)) {
-        E <- edges(x) # E: matrix of edges
-        ancestor <- E[, 1]
-        node <- E[, 2]
-        root <- which(is.na(ancestor))
-        int.node <-  node[(node %in% ancestor)]
-        tip <- node[!(node %in% ancestor)]
-        n.tip <- length(tip)
-        n.int <- length(int.node)
-        ## node <- c(root, node) # doesn't fit the ordering: root, other internal nodes, tips
-        #node <- c(int.node, tip)
-        ## retrieve the ancestor of each node
-        #idx <- match(node, E[, 2]) # new ordering of the descendants/edges
-        ## if (length(ancestor)>0) ancestor <- c(NA, ancestor)
-        #ancestor <- E[idx, 1]
-        ## branch.length <- c(x at root.edge, x at edge.length) # root.edge is not an edge length
-        branch.length <- edgeLength(x)#[idx]
-        if (is.null(edgeLength(x))) {
-            branch.length <- rep(NA, length(node))
-        }
-        ## node and tip labels ##
-        ## beware: they cannot be NULL
-        ## there are always tip labels (or checkPhylo4 complains)
-        ## there may not be node labels (character(0))
-        label <- labels(x,which="all")[nodeId(x,"all")]
-        node.type <- nodeType(x)[node]
-        d <- data.frame(label, node, ancestor, branch.length,
-            node.type)
-        d$label <- as.character(d$label)
-        return(d)
+
+    node <- nodeId(x, "all")
+    ancestr <- ancestor(x, node)
+    ndType <- nodeType(x)
+    intNode <- names(ndType[ndType == "internal"])
+    tip <- names(ndType[ndType == "tip"])
+
+    E <- data.frame(node, ancestr)
+    ## !! in phylobase, the order is node-ancestors whereas in ape it's
+    ## ancestor-node
+    nmE <- paste(E[,2], E[,1], sep="-")
+
+    if (hasEdgeLength(x)) {
+        edge.length <- edgeLength(x)[match(nmE, names(x at edge.length))]
     }
     else {
-        E <- edges(x) # E: matrix of edges
-        node <- unique(c(E))
-        ancestor <- E[, 1][node]
-        #orphan <- setdiff(E[,1],E[,2])
-        branch.length <- edgeLength(x)[node]
-        if (!hasEdgeLength(x)) {
-          branch.length <- rep(NA, length(node))
-        }
-        ## node and tip labels ##
-        ## beware: they cannot be NULL
-        ## there are always tip labels (or checkPhylo4 complains)
-        ## there may not be node labels (character(0))
-        label <- labels(x,which="all")[node]
-        node.type <- nodeType(x)[node]
-        d <- data.frame(label, node, ancestor, branch.length,
-            node.type)
-        d$label <- as.character(d$label)
-        return(d)
+        edge.length <- rep(NA, nNodes(x))
     }
+
+    label <- labels(x,which="all")[node]
+
+    d <- data.frame(label, node, ancestr, edge.length, node.type=ndType[node],
+                    row.names=node)
+    d$label <- as.character(d$label)
+    d
 })
 
 setAs(from = "phylo4d", to = "data.frame", function(from) {
 
-    tree <- extractTree(from) ## as(from, "phylo4") # get tree
-    t_df <- as(tree, "data.frame") # convert to data.frame
+    tree <- extractTree(from)
+    ## Convert to data.frame
+    tDf <- as(tree, "data.frame")
 
     dat <- tdata(from, "allnode", label.type="column") # get data
+
     ## reorder data to edge matrix order, drop labels (first column)
-    dat2 <- dat[nodeId(from,"all"),-1,drop=FALSE] 
-    tdat <- cbind(t_df, dat2)
-##     if(nrow(dat) > 0 && ncol(dat) > 1) {
-##         dat <- dat[match(t_df$label, dat$label), ]
-##         tdat <- cbind(t_df, dat[ ,-1 , drop=FALSE])
-##     }
-##     else {
-##         tdat <- t_df
-##         cat("No data associated with the tree\n")
-##     }
-    return(tdat)
+    if(nrow(dat) > 0 && ncol(dat) > 1) {
+         dat <- dat[match(rownames(tDf), rownames(dat)), ]
+         tdat <- cbind(tDf, dat[ ,-1 , drop=FALSE])
+     }
+     else {
+        tdat <- tDf
+        cat("No data associated with the tree\n")
+     }
+    tdat
 })



More information about the Phylobase-commits mailing list