[Phylobase-commits] r812 - in pkg: R man src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Tue Sep 7 23:45:40 CEST 2010


Author: francois
Date: 2010-09-07 23:45:40 +0200 (Tue, 07 Sep 2010)
New Revision: 812

Modified:
   pkg/R/readNCL.R
   pkg/man/readNCL.Rd
   pkg/src/GetNCL.cpp
Log:
clean up code related to builing edge matrix within NCL, added option check.names to readNCL

Modified: pkg/R/readNCL.R
===================================================================
--- pkg/R/readNCL.R	2010-08-09 01:08:26 UTC (rev 811)
+++ pkg/R/readNCL.R	2010-09-07 21:45:40 UTC (rev 812)
@@ -7,8 +7,11 @@
                     char.all=FALSE, polymorphic.convert=TRUE,
                     levels.uniform=FALSE, quiet=TRUE,
                     check.node.labels=c("keep", "drop", "asdata"),
-                    return.labels=TRUE, file.format=c("nexus", "newick"), ...) {
+                    return.labels=TRUE, file.format=c("nexus", "newick"),
+                    check.names=TRUE, ...) {
 
+  ## turn on to TRUE to test new way of building trees in NCL
+ experimental <- FALSE
 
  type <- match.arg(type)
  check.node.labels <- match.arg(check.node.labels)
@@ -111,7 +114,7 @@
        }
      }
    }
-   tipData <- data.frame(tipData)
+   tipData <- data.frame(tipData, check.names=check.names)
    if (length(ncl$taxaNames) == nrow(tipData)) {
      rownames(tipData) <- ncl$taxaNames
    }
@@ -123,39 +126,53 @@
 
  if (returnTrees && length(ncl$trees) > 0) {
    listTrees <- vector("list", length(ncl$trees))
-   for (i in 1:length(ncl$trees)) {
-     if (length(grep(":", ncl$trees[i]))) {
-       listTrees[[i]] <- tree.build(ncl$trees[i])
-     }
-     else {
-       listTrees[[i]] <- clado.build(ncl$trees[i])
-     }
-   }
-   listTrees <- lapply(listTrees, function(tr) {       
-     if (length(ncl$taxaNames) == nTips(tr)) {
-       tr$tip.label <- ncl$taxaNames[as.numeric(tr$tip.label)]     
-     }
-     else stop("phylobase doesn't deal with multiple taxa block at this time.")
-     if (is.null(tr$node.label)) {
-       if (check.node.labels == "asdata") {
-         warning("Could not use value \"asdata\" for ",
-                 "check.node.labels because there are no ",
-                 "labels associated with the tree")
-         check.node.labels <- "drop"
+   
+   if (!experimental) {
+     for (i in 1:length(ncl$trees)) {
+       if (length(grep(":", ncl$trees[i]))) {
+         listTrees[[i]] <- tree.build(ncl$trees[i])
        }
-       tr <- phylo4(tr, check.node.labels=check.node.labels, ...)       
+       else {
+         listTrees[[i]] <- clado.build(ncl$trees[i])
+       }
      }
-     else {
-       if (check.node.labels == "asdata") {
-         tr <- phylo4d(tr, check.node.labels=check.node.labels, ...)
+     listTrees <- lapply(listTrees, function(tr) {       
+       if (length(ncl$taxaNames) == nTips(tr)) {
+         tr$tip.label <- ncl$taxaNames[as.numeric(tr$tip.label)]     
        }
+       else stop("phylobase doesn't deal with multiple taxa block at this time.")
+       if (is.null(tr$node.label)) {
+         if (check.node.labels == "asdata") {
+           warning("Could not use value \"asdata\" for ",
+                   "check.node.labels because there are no ",
+                   "labels associated with the tree")
+           check.node.labels <- "drop"
+         }
+         tr <- phylo4(tr, check.node.labels=check.node.labels, ...)       
+       }
        else {
-         tr <- phylo4(tr, check.node.labels=check.node.labels, ...)
+         if (check.node.labels == "asdata") {
+           tr <- phylo4d(tr, check.node.labels=check.node.labels, ...)
+         }
+         else {
+           tr <- phylo4(tr, check.node.labels=check.node.labels, ...)
+         }
        }
+     })
+     if (length(listTrees) == 1 || simplify)
+       listTrees <- listTrees[[1]]
+   }
+   else {
+     edgeMat <- cbind(ncl$parentVector, c(1:length(ncl$parentVector)))
+     edgeLgth <- ncl$branchLengthVector
+     edgeLgth[edgeLgth == -1] <- NA
+     if (length(ncl$taxaNames) != min(ncll$parentVector)-1) {
+       stop("phylobase doesn't deal with multiple taxa block at this time.")
      }
-   })
-   if (length(listTrees) == 1 || simplify)
-     listTrees <- listTrees[[1]]
+     ## TODO: code node labels in GetNCL
+     tr <- phylo4(x=edgeMat, edge.length=edgeLgth, tip.label=ncl$taxaNames,
+                  ...)
+   }
  }
  else {
    listTrees <- NULL
@@ -204,12 +221,13 @@
                        char.all=FALSE, polymorphic.convert=TRUE,
                        levels.uniform=FALSE, quiet=TRUE,
                        check.node.labels=c("keep", "drop", "asdata"),
-                       return.labels=TRUE, ...) {
+                       return.labels=TRUE, check.names=TRUE, ...) {
 
   return(readNCL(file=file, simplify=simplify, type=type, char.all=char.all,
           polymorphic.convert=polymorphic.convert, levels.uniform=levels.uniform,
           quiet=quiet, check.node.labels=check.node.labels,
-          return.labels=return.labels, file.format="nexus", ...))
+          return.labels=return.labels, file.format="nexus",
+          check.names=check.names, ...))
 }
 
 readNewick <- function(file, simplify=FALSE, quiet=TRUE,

Modified: pkg/man/readNCL.Rd
===================================================================
--- pkg/man/readNCL.Rd	2010-08-09 01:08:26 UTC (rev 811)
+++ pkg/man/readNCL.Rd	2010-09-07 21:45:40 UTC (rev 812)
@@ -19,7 +19,7 @@
 readNexus(file, simplify=FALSE, type=c("all", "tree", "data"),
           char.all=FALSE, polymorphic.convert=TRUE, levels.uniform=FALSE,
           quiet=TRUE, check.node.labels=c("keep", "drop", "asdata"),
-          return.labels=TRUE, ...)
+          return.labels=TRUE, check.names=TRUE, ...)
 
 readNewick(file, simplify=FALSE, quiet=TRUE,
            check.node.labels=c("keep", "drop", "asdata"), ...)
@@ -57,6 +57,12 @@
   
   \item{return.labels}{Determines whether state names (if TRUE) or state
     codes should be returned.}
+
+  \item{check.names}{logical. If \sQuote{TRUE} then the names of the
+          characters from the NEXUS file are checked to ensure that they
+          are syntactically valid variable names and are not duplicated.
+          If necessary they are adjusted (by \sQuote{make.names}) so
+          that they are.}
   
   \item{\dots}{Additional arguments to be passed to phylo4 or phylo4d
     constructor (see Details)}
@@ -110,7 +116,8 @@
  
 }
 \note{Underscores in state labels (i.e. trait or taxon names)
-  will be translated to spaces when read by NCL; trait names will
+  will be translated to spaces when read by NCL. Unless
+  \code{check.names=FALSE}, trait names will
   be converted to valid R names (see \code{\link{make.names}}) on
   input to R, so spaces will be translated to periods.}
 \value{

Modified: pkg/src/GetNCL.cpp
===================================================================
--- pkg/src/GetNCL.cpp	2010-08-09 01:08:26 UTC (rev 811)
+++ pkg/src/GetNCL.cpp	2010-09-07 21:45:40 UTC (rev 812)
@@ -82,13 +82,10 @@
     std::vector<std::string> charLabels;     //labels for the characters
     std::vector<std::string> stateLabels;    //labels for the states
     std::vector<int> nbStates;               //number of states for each character (for Standard datatype)
-#   if defined (NEW_TREE_RETURN_TYPE)
-        std::vector<std::string> taxonLabelVector; //Index of the parent. 0 means no parent.
-        std::vector<unsigned> parentVector; //Index of the parent. 0 means no parent.
-        std::vector<double> branchLengthVector; 
-#   else
-        std::vector<std::string> trees;          //vector of Newick strings holding the names
-#   endif
+    Rcpp::List lTaxaLabelVector = Rcpp::List::create();
+    Rcpp::List lParentVector = Rcpp::List::create();
+    Rcpp::List lBranchLengthVector = Rcpp::List::create();
+    std::vector<std::string> trees;          //vector of Newick strings holding the names
     std::vector<std::string> treeNames;      //vector of tree names
     std::vector<std::string> taxaNames;      //vector of taxa names
     std::string errorMsg;                    //error message
@@ -182,11 +179,8 @@
 	    taxaNames.push_back (taxaBlock->GetTaxonLabel(j));
 	}
 
-#   if defined (NEW_TREE_RETURN_TYPE)
-        taxonLabelVector.reserve(nTax);
-        parentVector.reserve(2*nTax);
-        branchLengthVector.reserve(2*nTax);
-#   endif
+      
+
 	/* Get trees */
 	if (returnTrees) {
 	    if (nTreesBlocks == 0) {
@@ -196,77 +190,74 @@
 		NxsTreesBlock* treeBlock = nexusReader.GetTreesBlock(taxaBlock, i);
 		const unsigned nTrees = treeBlock->GetNumTrees();
 		if (nTrees > 0) {
+		    // lTaxaLabelVector.reserve(nTrees);
+		    // lParentVector.reserve(nTrees);
+		    // lBranchLengthVector.reserve(nTrees);
+
 		    for (unsigned k = 0; k < nTrees; k++) {
-#           if defined(NEW_TREE_RETURN_TYPE)
-                taxonLabelVector.clear();
-                parentVector.clear();
-                branchLengthVector.clear();
-                
-                const NxsFullTreeDescription & ftd = treeBlock->GetFullTreeDescription(k); 
-                treeNames.push_back(ftd.GetName());
-                NxsSimpleTree simpleTree(ftd, -1, -1.0);
-                std::vector<const NxsSimpleNode *> ndVector =  simpleTree.GetPreorderTraversal();
-                unsigned internalNdIndex = nTax;
-                for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin(); ndIt != ndVector.end(); ++ndIt)
-                    {
-                    NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
-                    unsigned nodeIndex;
-                    if (nd->IsTip())
-                        {
-                        nodeIndex = nd->GetTaxonIndex();
-                        taxonLabelVector.push_back(taxaNames[nodeIndex]);
-                        std::cout << " leaf node # = " <<  nodeIndex << '\n';
-                        }
-                    else
-                        {
-                        nodeIndex = internalNdIndex++;
-                        nd->SetTaxonIndex(nodeIndex);
-                        std::cout << " internal node # = " << nd->GetTaxonIndex()  << '\n';
-                        }
-                    if (parentVector.size() < nodeIndex + 1)
-                        {
-                        parentVector.resize(nodeIndex + 1);
-                        }
-                    if (branchLengthVector.size() < nodeIndex + 1)
-                        {
-                        branchLengthVector.resize(nodeIndex + 1);
-                        }
-                    NxsSimpleEdge edge = nd->GetEdgeToParent();
-    
-                    NxsSimpleNode * par = 0L;
-                    par = (NxsSimpleNode *) edge.GetParent();
-                    if (par != 0L)
-                        {
-                        parentVector[nodeIndex] = 1 + par->GetTaxonIndex();
-                        branchLengthVector[nodeIndex] = edge.GetDblEdgeLen();
-                        }
-                    else
-                        {
-                        parentVector[nodeIndex] = 0;
-                        branchLengthVector[nodeIndex] = -1.0;
-                        }
-                    }
-                std::cout << "Parents = [";
-                for (std::vector<unsigned>::const_iterator nIt = parentVector.begin(); nIt != parentVector.end(); ++nIt)
-                    {
-                    std::cout << *nIt << ", ";				
-                    }
-                std::cout << "]\nbranch lengths = [";
-                for (std::vector<double>::const_iterator nIt = branchLengthVector.begin(); nIt != branchLengthVector.end(); ++nIt)
-                    {
-                    std::cout << *nIt << ", ";				
-                    }
-                std::cout << "]\n";
 
+			std::vector<std::string> taxonLabelVector; //Index of the parent. 0 means no parent.
+			std::vector<unsigned> parentVector;        //Index of the parent. 0 means no parent.
+			std::vector<double> branchLengthVector;   
 
+			taxonLabelVector.reserve(nTax);
+			parentVector.reserve(2*nTax);
+			branchLengthVector.reserve(2*nTax); 
 
+			taxonLabelVector.clear();
+			parentVector.clear();
+			branchLengthVector.clear();
+                
+			const NxsFullTreeDescription & ftd = treeBlock->GetFullTreeDescription(k); 
+			treeNames.push_back(ftd.GetName());
+			NxsSimpleTree simpleTree(ftd, -1, -1.0);
+			std::vector<const NxsSimpleNode *> ndVector =  simpleTree.GetPreorderTraversal();
+			unsigned internalNdIndex = nTax;
+			for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin(); ndIt != ndVector.end(); ++ndIt)
+			{
+			    NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
+			    unsigned nodeIndex;
+			    if (nd->IsTip())
+			    {
+				nodeIndex = nd->GetTaxonIndex();
+				taxonLabelVector.push_back(taxaNames[nodeIndex]);				
+			    }
+			    else {
+				nodeIndex = internalNdIndex++;
+				nd->SetTaxonIndex(nodeIndex);
+			    }
+			    if (parentVector.size() < nodeIndex + 1)
+			    {
+				parentVector.resize(nodeIndex + 1);
+			    }
+			    if (branchLengthVector.size() < nodeIndex + 1)
+			    {
+				branchLengthVector.resize(nodeIndex + 1);
+			    }
+			    NxsSimpleEdge edge = nd->GetEdgeToParent();
+			    
+			    NxsSimpleNode * par = 0L;
+			    par = (NxsSimpleNode *) edge.GetParent();
+			    if (par != 0L)
+			    {
+				parentVector[nodeIndex] = 1 + par->GetTaxonIndex();
+				branchLengthVector[nodeIndex] = edge.GetDblEdgeLen();
+			    }
+			    else
+			    {
+				parentVector[nodeIndex] = 0;
+				branchLengthVector[nodeIndex] = -1.0;
+			    }
+			}
 
-#           else
-    			NxsString trNm = treeBlock->GetTreeName(k);
-	    		treeNames.push_back(trNm);
-    			NxsString ts = treeBlock->GetTreeDescription(k);
-    			trees.push_back (ts);
-#           endif
+			NxsString trNm = treeBlock->GetTreeName(k);
+			treeNames.push_back (trNm);
+			NxsString ts = treeBlock->GetTreeDescription(k);
+			trees.push_back (ts);
+
+			lTaxaLabelVector.push_back (taxonLabelVector);
+			lParentVector.push_back (parentVector);
+			lBranchLengthVector.push_back (branchLengthVector);
 		    }
 		}
 		else {
@@ -361,13 +352,11 @@
     /* Prepare list to return */
     Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames,
 					Rcpp::Named("treeNames") = treeNames,
-#               if defined (NEW_TREE_RETURN_TYPE)
-                    Rcpp::Named("parentVector") = parentVector,
-                    Rcpp::Named("branchLengthVector") = branchLengthVector,
-#               else
-					Rcpp::Named("trees") = trees,
-#               endif
-                    Rcpp::Named("dataTypes") = dataTypes,
+					Rcpp::Named("taxonLabelVector") = lTaxaLabelVector,
+					Rcpp::Named("parentVector") = lParentVector,
+					Rcpp::Named("branchLengthVector") = lBranchLengthVector,
+					Rcpp::Named("trees") = trees,					
+					Rcpp::Named("dataTypes") = dataTypes,
 					Rcpp::Named("nbCharacters") = nbCharacters,
 					Rcpp::Named("charLabels") = charLabels,
 					Rcpp::Named("nbStates") = nbStates,



More information about the Phylobase-commits mailing list