[Phylobase-commits] r812 - in pkg: R man src
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Sep 7 23:45:40 CEST 2010
Author: francois
Date: 2010-09-07 23:45:40 +0200 (Tue, 07 Sep 2010)
New Revision: 812
Modified:
pkg/R/readNCL.R
pkg/man/readNCL.Rd
pkg/src/GetNCL.cpp
Log:
clean up code related to builing edge matrix within NCL, added option check.names to readNCL
Modified: pkg/R/readNCL.R
===================================================================
--- pkg/R/readNCL.R 2010-08-09 01:08:26 UTC (rev 811)
+++ pkg/R/readNCL.R 2010-09-07 21:45:40 UTC (rev 812)
@@ -7,8 +7,11 @@
char.all=FALSE, polymorphic.convert=TRUE,
levels.uniform=FALSE, quiet=TRUE,
check.node.labels=c("keep", "drop", "asdata"),
- return.labels=TRUE, file.format=c("nexus", "newick"), ...) {
+ return.labels=TRUE, file.format=c("nexus", "newick"),
+ check.names=TRUE, ...) {
+ ## turn on to TRUE to test new way of building trees in NCL
+ experimental <- FALSE
type <- match.arg(type)
check.node.labels <- match.arg(check.node.labels)
@@ -111,7 +114,7 @@
}
}
}
- tipData <- data.frame(tipData)
+ tipData <- data.frame(tipData, check.names=check.names)
if (length(ncl$taxaNames) == nrow(tipData)) {
rownames(tipData) <- ncl$taxaNames
}
@@ -123,39 +126,53 @@
if (returnTrees && length(ncl$trees) > 0) {
listTrees <- vector("list", length(ncl$trees))
- for (i in 1:length(ncl$trees)) {
- if (length(grep(":", ncl$trees[i]))) {
- listTrees[[i]] <- tree.build(ncl$trees[i])
- }
- else {
- listTrees[[i]] <- clado.build(ncl$trees[i])
- }
- }
- listTrees <- lapply(listTrees, function(tr) {
- if (length(ncl$taxaNames) == nTips(tr)) {
- tr$tip.label <- ncl$taxaNames[as.numeric(tr$tip.label)]
- }
- else stop("phylobase doesn't deal with multiple taxa block at this time.")
- if (is.null(tr$node.label)) {
- if (check.node.labels == "asdata") {
- warning("Could not use value \"asdata\" for ",
- "check.node.labels because there are no ",
- "labels associated with the tree")
- check.node.labels <- "drop"
+
+ if (!experimental) {
+ for (i in 1:length(ncl$trees)) {
+ if (length(grep(":", ncl$trees[i]))) {
+ listTrees[[i]] <- tree.build(ncl$trees[i])
}
- tr <- phylo4(tr, check.node.labels=check.node.labels, ...)
+ else {
+ listTrees[[i]] <- clado.build(ncl$trees[i])
+ }
}
- else {
- if (check.node.labels == "asdata") {
- tr <- phylo4d(tr, check.node.labels=check.node.labels, ...)
+ listTrees <- lapply(listTrees, function(tr) {
+ if (length(ncl$taxaNames) == nTips(tr)) {
+ tr$tip.label <- ncl$taxaNames[as.numeric(tr$tip.label)]
}
+ else stop("phylobase doesn't deal with multiple taxa block at this time.")
+ if (is.null(tr$node.label)) {
+ if (check.node.labels == "asdata") {
+ warning("Could not use value \"asdata\" for ",
+ "check.node.labels because there are no ",
+ "labels associated with the tree")
+ check.node.labels <- "drop"
+ }
+ tr <- phylo4(tr, check.node.labels=check.node.labels, ...)
+ }
else {
- tr <- phylo4(tr, check.node.labels=check.node.labels, ...)
+ if (check.node.labels == "asdata") {
+ tr <- phylo4d(tr, check.node.labels=check.node.labels, ...)
+ }
+ else {
+ tr <- phylo4(tr, check.node.labels=check.node.labels, ...)
+ }
}
+ })
+ if (length(listTrees) == 1 || simplify)
+ listTrees <- listTrees[[1]]
+ }
+ else {
+ edgeMat <- cbind(ncl$parentVector, c(1:length(ncl$parentVector)))
+ edgeLgth <- ncl$branchLengthVector
+ edgeLgth[edgeLgth == -1] <- NA
+ if (length(ncl$taxaNames) != min(ncll$parentVector)-1) {
+ stop("phylobase doesn't deal with multiple taxa block at this time.")
}
- })
- if (length(listTrees) == 1 || simplify)
- listTrees <- listTrees[[1]]
+ ## TODO: code node labels in GetNCL
+ tr <- phylo4(x=edgeMat, edge.length=edgeLgth, tip.label=ncl$taxaNames,
+ ...)
+ }
}
else {
listTrees <- NULL
@@ -204,12 +221,13 @@
char.all=FALSE, polymorphic.convert=TRUE,
levels.uniform=FALSE, quiet=TRUE,
check.node.labels=c("keep", "drop", "asdata"),
- return.labels=TRUE, ...) {
+ return.labels=TRUE, check.names=TRUE, ...) {
return(readNCL(file=file, simplify=simplify, type=type, char.all=char.all,
polymorphic.convert=polymorphic.convert, levels.uniform=levels.uniform,
quiet=quiet, check.node.labels=check.node.labels,
- return.labels=return.labels, file.format="nexus", ...))
+ return.labels=return.labels, file.format="nexus",
+ check.names=check.names, ...))
}
readNewick <- function(file, simplify=FALSE, quiet=TRUE,
Modified: pkg/man/readNCL.Rd
===================================================================
--- pkg/man/readNCL.Rd 2010-08-09 01:08:26 UTC (rev 811)
+++ pkg/man/readNCL.Rd 2010-09-07 21:45:40 UTC (rev 812)
@@ -19,7 +19,7 @@
readNexus(file, simplify=FALSE, type=c("all", "tree", "data"),
char.all=FALSE, polymorphic.convert=TRUE, levels.uniform=FALSE,
quiet=TRUE, check.node.labels=c("keep", "drop", "asdata"),
- return.labels=TRUE, ...)
+ return.labels=TRUE, check.names=TRUE, ...)
readNewick(file, simplify=FALSE, quiet=TRUE,
check.node.labels=c("keep", "drop", "asdata"), ...)
@@ -57,6 +57,12 @@
\item{return.labels}{Determines whether state names (if TRUE) or state
codes should be returned.}
+
+ \item{check.names}{logical. If \sQuote{TRUE} then the names of the
+ characters from the NEXUS file are checked to ensure that they
+ are syntactically valid variable names and are not duplicated.
+ If necessary they are adjusted (by \sQuote{make.names}) so
+ that they are.}
\item{\dots}{Additional arguments to be passed to phylo4 or phylo4d
constructor (see Details)}
@@ -110,7 +116,8 @@
}
\note{Underscores in state labels (i.e. trait or taxon names)
- will be translated to spaces when read by NCL; trait names will
+ will be translated to spaces when read by NCL. Unless
+ \code{check.names=FALSE}, trait names will
be converted to valid R names (see \code{\link{make.names}}) on
input to R, so spaces will be translated to periods.}
\value{
Modified: pkg/src/GetNCL.cpp
===================================================================
--- pkg/src/GetNCL.cpp 2010-08-09 01:08:26 UTC (rev 811)
+++ pkg/src/GetNCL.cpp 2010-09-07 21:45:40 UTC (rev 812)
@@ -82,13 +82,10 @@
std::vector<std::string> charLabels; //labels for the characters
std::vector<std::string> stateLabels; //labels for the states
std::vector<int> nbStates; //number of states for each character (for Standard datatype)
-# if defined (NEW_TREE_RETURN_TYPE)
- std::vector<std::string> taxonLabelVector; //Index of the parent. 0 means no parent.
- std::vector<unsigned> parentVector; //Index of the parent. 0 means no parent.
- std::vector<double> branchLengthVector;
-# else
- std::vector<std::string> trees; //vector of Newick strings holding the names
-# endif
+ Rcpp::List lTaxaLabelVector = Rcpp::List::create();
+ Rcpp::List lParentVector = Rcpp::List::create();
+ Rcpp::List lBranchLengthVector = Rcpp::List::create();
+ std::vector<std::string> trees; //vector of Newick strings holding the names
std::vector<std::string> treeNames; //vector of tree names
std::vector<std::string> taxaNames; //vector of taxa names
std::string errorMsg; //error message
@@ -182,11 +179,8 @@
taxaNames.push_back (taxaBlock->GetTaxonLabel(j));
}
-# if defined (NEW_TREE_RETURN_TYPE)
- taxonLabelVector.reserve(nTax);
- parentVector.reserve(2*nTax);
- branchLengthVector.reserve(2*nTax);
-# endif
+
+
/* Get trees */
if (returnTrees) {
if (nTreesBlocks == 0) {
@@ -196,77 +190,74 @@
NxsTreesBlock* treeBlock = nexusReader.GetTreesBlock(taxaBlock, i);
const unsigned nTrees = treeBlock->GetNumTrees();
if (nTrees > 0) {
+ // lTaxaLabelVector.reserve(nTrees);
+ // lParentVector.reserve(nTrees);
+ // lBranchLengthVector.reserve(nTrees);
+
for (unsigned k = 0; k < nTrees; k++) {
-# if defined(NEW_TREE_RETURN_TYPE)
- taxonLabelVector.clear();
- parentVector.clear();
- branchLengthVector.clear();
-
- const NxsFullTreeDescription & ftd = treeBlock->GetFullTreeDescription(k);
- treeNames.push_back(ftd.GetName());
- NxsSimpleTree simpleTree(ftd, -1, -1.0);
- std::vector<const NxsSimpleNode *> ndVector = simpleTree.GetPreorderTraversal();
- unsigned internalNdIndex = nTax;
- for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin(); ndIt != ndVector.end(); ++ndIt)
- {
- NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
- unsigned nodeIndex;
- if (nd->IsTip())
- {
- nodeIndex = nd->GetTaxonIndex();
- taxonLabelVector.push_back(taxaNames[nodeIndex]);
- std::cout << " leaf node # = " << nodeIndex << '\n';
- }
- else
- {
- nodeIndex = internalNdIndex++;
- nd->SetTaxonIndex(nodeIndex);
- std::cout << " internal node # = " << nd->GetTaxonIndex() << '\n';
- }
- if (parentVector.size() < nodeIndex + 1)
- {
- parentVector.resize(nodeIndex + 1);
- }
- if (branchLengthVector.size() < nodeIndex + 1)
- {
- branchLengthVector.resize(nodeIndex + 1);
- }
- NxsSimpleEdge edge = nd->GetEdgeToParent();
-
- NxsSimpleNode * par = 0L;
- par = (NxsSimpleNode *) edge.GetParent();
- if (par != 0L)
- {
- parentVector[nodeIndex] = 1 + par->GetTaxonIndex();
- branchLengthVector[nodeIndex] = edge.GetDblEdgeLen();
- }
- else
- {
- parentVector[nodeIndex] = 0;
- branchLengthVector[nodeIndex] = -1.0;
- }
- }
- std::cout << "Parents = [";
- for (std::vector<unsigned>::const_iterator nIt = parentVector.begin(); nIt != parentVector.end(); ++nIt)
- {
- std::cout << *nIt << ", ";
- }
- std::cout << "]\nbranch lengths = [";
- for (std::vector<double>::const_iterator nIt = branchLengthVector.begin(); nIt != branchLengthVector.end(); ++nIt)
- {
- std::cout << *nIt << ", ";
- }
- std::cout << "]\n";
+ std::vector<std::string> taxonLabelVector; //Index of the parent. 0 means no parent.
+ std::vector<unsigned> parentVector; //Index of the parent. 0 means no parent.
+ std::vector<double> branchLengthVector;
+ taxonLabelVector.reserve(nTax);
+ parentVector.reserve(2*nTax);
+ branchLengthVector.reserve(2*nTax);
+ taxonLabelVector.clear();
+ parentVector.clear();
+ branchLengthVector.clear();
+
+ const NxsFullTreeDescription & ftd = treeBlock->GetFullTreeDescription(k);
+ treeNames.push_back(ftd.GetName());
+ NxsSimpleTree simpleTree(ftd, -1, -1.0);
+ std::vector<const NxsSimpleNode *> ndVector = simpleTree.GetPreorderTraversal();
+ unsigned internalNdIndex = nTax;
+ for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin(); ndIt != ndVector.end(); ++ndIt)
+ {
+ NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
+ unsigned nodeIndex;
+ if (nd->IsTip())
+ {
+ nodeIndex = nd->GetTaxonIndex();
+ taxonLabelVector.push_back(taxaNames[nodeIndex]);
+ }
+ else {
+ nodeIndex = internalNdIndex++;
+ nd->SetTaxonIndex(nodeIndex);
+ }
+ if (parentVector.size() < nodeIndex + 1)
+ {
+ parentVector.resize(nodeIndex + 1);
+ }
+ if (branchLengthVector.size() < nodeIndex + 1)
+ {
+ branchLengthVector.resize(nodeIndex + 1);
+ }
+ NxsSimpleEdge edge = nd->GetEdgeToParent();
+
+ NxsSimpleNode * par = 0L;
+ par = (NxsSimpleNode *) edge.GetParent();
+ if (par != 0L)
+ {
+ parentVector[nodeIndex] = 1 + par->GetTaxonIndex();
+ branchLengthVector[nodeIndex] = edge.GetDblEdgeLen();
+ }
+ else
+ {
+ parentVector[nodeIndex] = 0;
+ branchLengthVector[nodeIndex] = -1.0;
+ }
+ }
-# else
- NxsString trNm = treeBlock->GetTreeName(k);
- treeNames.push_back(trNm);
- NxsString ts = treeBlock->GetTreeDescription(k);
- trees.push_back (ts);
-# endif
+ NxsString trNm = treeBlock->GetTreeName(k);
+ treeNames.push_back (trNm);
+ NxsString ts = treeBlock->GetTreeDescription(k);
+ trees.push_back (ts);
+
+ lTaxaLabelVector.push_back (taxonLabelVector);
+ lParentVector.push_back (parentVector);
+ lBranchLengthVector.push_back (branchLengthVector);
}
}
else {
@@ -361,13 +352,11 @@
/* Prepare list to return */
Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames,
Rcpp::Named("treeNames") = treeNames,
-# if defined (NEW_TREE_RETURN_TYPE)
- Rcpp::Named("parentVector") = parentVector,
- Rcpp::Named("branchLengthVector") = branchLengthVector,
-# else
- Rcpp::Named("trees") = trees,
-# endif
- Rcpp::Named("dataTypes") = dataTypes,
+ Rcpp::Named("taxonLabelVector") = lTaxaLabelVector,
+ Rcpp::Named("parentVector") = lParentVector,
+ Rcpp::Named("branchLengthVector") = lBranchLengthVector,
+ Rcpp::Named("trees") = trees,
+ Rcpp::Named("dataTypes") = dataTypes,
Rcpp::Named("nbCharacters") = nbCharacters,
Rcpp::Named("charLabels") = charLabels,
Rcpp::Named("nbStates") = nbStates,
More information about the Phylobase-commits
mailing list