[Phylobase-commits] r778 - in pkg: . src src/ncl
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Fri Apr 2 00:19:29 CEST 2010
Author: francois
Date: 2010-04-02 00:19:27 +0200 (Fri, 02 Apr 2010)
New Revision: 778
Modified:
pkg/DESCRIPTION
pkg/src/ncl/nxstreesblock.h
pkg/src/nxsstring.cpp
pkg/src/nxstoken.cpp
pkg/src/nxstreesblock.cpp
Log:
update to NCL 2.1.12, update phylobase to 0.5.10
Modified: pkg/DESCRIPTION
===================================================================
--- pkg/DESCRIPTION 2010-04-01 20:49:07 UTC (rev 777)
+++ pkg/DESCRIPTION 2010-04-01 22:19:27 UTC (rev 778)
@@ -1,8 +1,8 @@
Package: phylobase
Type: Package
Title: Base package for phylogenetic structures and comparative data
-Version: 0.5.9
-Date: 2010-03-29
+Version: 0.5.10
+Date: 2010-04-01
Depends: methods, grid, ape(>= 2.1), Rcpp (>= 0.7.4)
Suggests: ade4, MASS
Author: R Hackathon et al. (alphabetically: Ben Bolker, Marguerite Butler, Peter Cowan, Damien de Vienne, Thibaut Jombart, Steve Kembel, Francois Michonneau, David Orme, Brian O'Meara, Emmanuel Paradis, Jim Regetz, Derrick Zwickl)
Modified: pkg/src/ncl/nxstreesblock.h
===================================================================
--- pkg/src/ncl/nxstreesblock.h 2010-04-01 20:49:07 UTC (rev 777)
+++ pkg/src/ncl/nxstreesblock.h 2010-04-01 22:19:27 UTC (rev 778)
@@ -36,10 +36,19 @@
virtual bool IsDefaultTree(unsigned i) = 0;
virtual bool IsRootedTree(unsigned i) = 0;
};
+/*! This function provides rudimentary support for parsing of NHX comments.
+ It is called during the creation of a NxsSimpleTree to handle any NHX comments
-std::string parseNHXComment(const std::string comment, std::map<std::string, std::string> *infoMap);
+ It fills `infoMap` with the key value pairs parsed from a comment that starts with
+ &&NHX
+ \returns the unparsed portion of the comment
+*/
+std::string parseNHXComment(const std::string comment, /*! the comment without the [] braces. If the comment does not start with &&NHX then the entire comment will be returned*/
+ std::map<std::string, std::string> *infoMap); /*!< the destination for key value pairs parsed out of the NHX comment */
class NxsFullTreeDescription;
class NxsSimpleNode;
+/*! The edge used by the NxsSimpleTree class.
+*/
class NxsSimpleEdge
{
public:
@@ -68,9 +77,10 @@
return unprocessedComments;
}
- /// returns true if `key` was processed from a comment.
- /// If found, (and value is not NULL), the *value will hold the
- /// value on exit
+ /*! \returns true if `key` was processed from a comment.
+ If the key was found and `value` pointer is not NULL, then the
+ *value will hold the value on exit
+ */
bool GetInfo(const std::string &key, std::string *value) const
{
std::map<std::string, std::string>::const_iterator kvit = parsedInfo.find(key);
@@ -80,7 +90,16 @@
*value = kvit->second;
return true;
}
+ /*! Returns a reference to the map that stores information in a generic
+ key to value mapping where both elements are strings.
+ This map is populated by the information from NHX comments during the creation of
+ a NxsSimpleTree.
+ */
+ const std::map<std::string, std::string> & GetInfo() const
+ {
+ return parsedInfo;
+ }
const NxsSimpleNode * GetParent() const
{
return parent;
@@ -151,6 +170,8 @@
friend class NxsSimpleNode;
};
+/*! The node used by the NxsSimpleTree class.
+*/
class NxsSimpleNode
{
public:
@@ -260,7 +281,10 @@
unsigned taxIndex; // present for every leaf. UINT_MAX for internals labeled with taxlabels
friend class NxsSimpleTree;
};
-
+/*! A simple tree class.
+ Internally NCL stores trees as newick strings with metadata (see the NxsFullTreeDescription class)
+ but you can create a NxsSimpleTree
+*/
class NxsSimpleTree
{
public:
@@ -337,6 +361,25 @@
NxsSimpleTree & operator=(const NxsSimpleTree &); //not defined. Not copyable
};
+/*! A class that encapsulates a newick string description of a tree and metadata about the tree.
+
+ the NxsTreesBlock stores the trees as NxsFullTreeDescription because during its parse
+ and validation of a tree string.
+ By default, NCL will "process" each tree -- converting the taxon labels to
+ numbers for the taxa (the number will be 1 + the taxon index).
+ During this processing, the trees block detects things about the tree such as whether
+ there are branch lengths on the tree, whether there are polytomies...
+
+ This data about the tree is then stored in a NxsFullTreeDescription
+ so that the client code can access some information about a tree before it parses
+ the newick string.
+
+ If you do not want to parse the newick string yourself, you can construct a
+ NxsSimpleTree object from a NxsFullTreeDescription object if the NxsFullTreeDescription
+ is "processed"
+
+ If the NxsTreesBlock is configured NOT to process trees (see NxsTreesBlock::SetProcessAllTreesDuringParse())
+*/
class NxsFullTreeDescription
{
public:
@@ -357,86 +400,125 @@
NXS_SOME_NEGATIVE_EDGE_LEN_BIT = 0x1000,
NXS_TREE_PROCESSED = 0x2000
};
- NxsFullTreeDescription(const std::string & newickStr, const std::string &treeName, int infoFlags)
+ /*! Creates a Tree description from a newick string, name and int with bits that indicate
+ some metadata about the tree.
+ */
+ NxsFullTreeDescription(const std::string & newickStr, /*!< the newick string */
+ const std::string &treeName, /*!< the name of the tree */
+ int infoFlags) /*!< union of the relevant bits from TreeDescFlags */
:newick(newickStr),
name(treeName),
flags(infoFlags),
minIntEdgeLen(INT_MAX),
minDblEdgeLen(DBL_MAX)
{}
+ /*! Tokenizes the tree into a vector of NEXUS tokens.
+ This makes it easier for to parse.
+ */
std::vector<std::string> GetTreeTokens() const;
- /** returns a newick string with 1-based numbers corresponding to (1 + Taxa block's index of taxon)*/
+ /** returns a newick string.
+ If the NxsFullTreeDescription is processed, then the string will have
+ 1-based numbers corresponding to (1 + Taxa block's index of taxon)
+ If it is not processed, then it will correspond with the exact string
+ in the file. Handling unprocessed newick strings requires that the
+ client code consult the Translation table and implement NEXUS'
+ numeric interpretation of labels in order to decode correctly
+ decode all taxon labels
+ */
const std::string & GetNewick() const
{
return newick;
}
+ /*! \returns the name of the tree */
const std::string & GetName() const
{
return name;
}
+ /*! \returns true if the newick string has been processed. */
bool IsProcessed() const
{
return (flags&NXS_TREE_PROCESSED) != 0;
}
+ /*! \throws a NxsNCLAPIException if the tree has not been "processed" */
void AssertProcessed() const
{
if (!IsProcessed())
throw NxsNCLAPIException("Tree description queries are only supported on processed tree descriptions.");
}
+ /*! \returns true if the tree was rooted. */
bool IsRooted() const
{
AssertProcessed();
return (flags&NXS_IS_ROOTED_BIT) != 0;
}
+ /*! \returns true all of the edges in the tree have edge length.
+ \raises a NxsNCLAPIException if the tree has not been processed!
+ */
bool AllEdgesHaveLengths() const
{
AssertProcessed();
return (flags&NXS_EDGE_LENGTH_UNION) == NXS_HAS_SOME_EDGE_LENGTHS_BIT;
}
+ /*! \returns true at least one edge in the tree have edge length
+ \raises a NxsNCLAPIException if the tree has not been processed!
+ */
bool SomeEdgesHaveLengths() const
{
AssertProcessed();
return (flags&NXS_HAS_SOME_EDGE_LENGTHS_BIT) != 0;
}
+ /*! \returns true all of the edge lengths that are specified can be read as integers
+ \raises a NxsNCLAPIException if the tree has not been processed!
+ */
bool EdgeLengthsAreAllIntegers() const
{
AssertProcessed();
return (flags&NXS_INT_EDGE_LENGTHS_BIT) != 0;
}
+ /*! \returns true if the tree contains all of the taxa listed in the NxsTaxaBlock associated with the trees block that generated this NxsFullTreeDescription
+ \raises a NxsNCLAPIException if the tree has not been processed!
+ */
bool AllTaxaAreIncluded() const
{
AssertProcessed();
return (flags&NXS_HAS_ALL_TAXA_BIT) != 0;
}
+ /*! \returns true if some of the edges in the tree have New Hampshire Extended style comments (see http://www.phylosoft.org/NHX)
+ \raises a NxsNCLAPIException if the tree has not been processed!
+ */
bool HasNHXComments() const
{
AssertProcessed();
return (flags&NXS_HAS_NHX_BIT) != 0;
}
+ /*! \returns true if the tree has polytomies
+ \raises a NxsNCLAPIException if the tree has not been processed!
+ */
bool HasPolytomies() const
{
AssertProcessed();
return (flags&NXS_HAS_POLYTOMY_BIT) != 0;
}
+ /*! \returns true if the tree some internal nodes that only have one child.
+ \raises a NxsNCLAPIException if the tree has not been processed!
+ */
bool HasDegreeTwoNodes() const
{
AssertProcessed();
return (flags&NXS_HAS_DEG_TWO_NODES_BIT) != 0;
}
- /**---------------------------------------------------------------------
- | If EdgeLengthsAreAllIntegers returns true then this will return the
- | shortest edge length in the tree (useful as means of checking for
- | constraints by programs that prohibit 0 or negative branch lengths)
+ /*! If EdgeLengthsAreAllIntegers returns true then this will return the
+ shortest edge length in the tree (useful as means of checking for
+ constraints by programs that prohibit 0 or negative branch lengths)
*/
int smallestIntEdgeLength() const
{
return minIntEdgeLen;
}
- /**---------------------------------------------------------------------
- | If EdgeLengthsAreAllIntegers returns false then this will return the
- | shortest edge length in the tree (useful as means of checking for
- | constraints by programs that prohibit 0 or negative branch lengths)
+ /*! If EdgeLengthsAreAllIntegers returns false then this will return the
+ shortest edge length in the tree (useful as means of checking for
+ constraints by programs that prohibit 0 or negative branch lengths)
*/
double smallestRealEdgeLength() const
{
@@ -454,29 +536,19 @@
class NxsTreesBlock;
typedef bool (* ProcessedTreeValidationFunction)(NxsFullTreeDescription &, void *, NxsTreesBlock *);
/*!
- This class handles reading and storage for the NEXUS block TREES. It overrides the member functions Read and Reset,
- which are abstract virtual functions in the base class NxsBlock. The translation table (if one is supplied) is
- stored in the `translateList'. The tree names are stored in `treeName' and the tree descriptions in
- `treeDescription'. Information about rooting of trees is stored in `rooted'. Note that no checking is done to
- ensure that the tree descriptions are valid. The validity of the tree descriptions could be checked after the TREES
- block has been read (but before the next block in the file has been read) by overriding the NxsReader::ExitingBlock
- member function, but no functionality for this is provided by the NCL. Below is a table showing the correspondence
- between the elements of a TREES block and the variables and member functions that can be used to access each piece
- of information stored.
->
- NEXUS command Data members Member functions
- -----------------------------------------------------
- TRANSLATE translateList
+ This class handles reading and storage for the NEXUS block TREES.
+ The class can read the TRANSLATE and TREE commands.
- TREE treeName GetTreeName
- GetTreeDescription
- GetNumTrees
- GetNumDefaultTree
- IsDefaultTree
+ The tree is validated during the parse and then stored as a NxsFullTreeDescription
+ object which will hold the newick string. This newick string will have
+ numbers rather than names. The numbers in the tree string start at 1 (like other NEXUS numbering),
+ but they are simply 1 + the taxon index.
- rooted IsRootedTree
- -----------------------------------------------------
->
+ In previous versions of NCL (before v2.1), the client code would have to use the translate
+ table to convert the newick string into the taxon numbers.
+
+ As of v2.1, NCL now does this translation.
+
*/
class NxsTreesBlock
: public NxsTreesBlockAPI, public NxsTaxaBlockSurrogate
@@ -491,15 +563,51 @@
return NxsLabelToIndicesMapper::GetIndicesFromSets(label, toFill, treeSets);
}
+ /*! \returns the index of the default tree (the last tree in the TREES block with a * before its name)
+ if no default tree was specified than the first index (0) will be returned
+ */
unsigned GetNumDefaultTree();
+ /*! \returns the number of trees stored */
unsigned GetNumTrees();
+ /*! \returns the number of trees stored */
unsigned GetNumTrees() const;
+ /*! \returns the NxsFullTreeDescription for tree with index `i`
+ `i` should be in the range [0, num_trees)
+
+ If the NxsFullTreeDescription is processed (see NxsFullTreeDescription::IsProcessed())
+ then its newick string will have numbers rather than names. The numbers in the tree
+ string start at 1 (like other NEXUS numbering), but they are simply 1 + the taxon index.
+
+ In previous versions of NCL (before v2.1), the client code would have to use the translate
+ table to convert the newick string into the taxon numbers.
+
+
+ */
const NxsFullTreeDescription & GetFullTreeDescription(unsigned i) const;
+ /*! \returns a 1-based number for the last tree read that has the name `name` */
unsigned TreeLabelToNumber(const std::string & name) const;
+ /*! \returns the tree name for the tree with index `i`
+ i should be in the range [0, ntrees)
+ */
NxsString GetTreeName(unsigned i);
+ /*! \returns the tree description object for the tree with index `i`
+ i should be in the range [0, ntrees)
+ */
NxsString GetTreeDescription(unsigned i);
+ /*! \returns the newick string for the tree with index i. The string will have
+ the taxon names rather than numbers (or other translate table keys) in it.
+ i should be in the range [0, ntrees)
+ */
NxsString GetTranslatedTreeDescription(unsigned i);
+ /*! \returns true if the tree with index i is the default tree
+ i should be in the range [0, ntrees)
+ */
bool IsDefaultTree(unsigned i);
+ /*! \returns true if the tree is thought to be rooted (could be rooted
+ because this is NCL's default, or it could indicate that a [&R]
+ comment was encountered.
+ i should be in the range [0, ntrees)
+ */
bool IsRootedTree(unsigned i);
virtual void Report(std::ostream &out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
virtual void BriefReport(NxsString &s) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
@@ -541,6 +649,13 @@
{
return allowImplicitNames;
}
+ /*! \returns true if the block uses the v2.1 style of parsing in which the tree is interpretted and converted into
+ a newick string with standard taxon numbering
+ If false, then the NxsTreesBlock uses the v2.0 API in which the tree reader simply stores the tree string
+ as written in the file (so the client code has to check the translate table in order to interpret
+ the newick stream).
+ true by default.
+ */
bool GetProcessAllTreesDuringParse() const
{
return processAllTreesDuringParse;
@@ -549,15 +664,42 @@
{
allowImplicitNames = s;
}
+ /*! If true then the block will use the v2.1 style of parsing in which the tree is interpretted and converted into
+ a newick string with standard taxon numbering
+ If false, then the NxsTreesBlock will use the v2.0 API in which the tree reader simply stores the tree string
+ as written in the file (so the client code has to check the translate table in order to interpret
+ the newick stream).
+ true by default.
+ */
void SetProcessAllTreesDuringParse(bool s)
{
processAllTreesDuringParse = s;
}
- /*Interprets the newick string as a tree (builds trees as in memory
- structures, which may reveal illegal newick strings that were not
- detected as illegal on the parse).
+ /* Interprets the newick string as a tree. This converts the newick string
+ into one in which 1-based numbers are used for taxon labels (raw newick
+ strings can contain numbers, taxon labels, tax set names or translate
+ table keys as taxon identifiers).
+
+ \raises NxsException
+ This function builds trees as in memory. It may reveal illegal newick strings that were not
+ detected as illegal on the parse, so NxsExceptions may be raised.
+
+ Explicitly calling this function is not necessary unless
+ processAllTreesDuringParse is false (because of a previous call to
+ SetProcessAllTreesDuringParse()).
*/
void ProcessTree(NxsFullTreeDescription &treeDesc) const;
+ /* Convenience function that calls ProcessTree() one each stored
+ NxsFullTreeDescription instance.
+
+ \raises NxsException
+ This function builds trees as in memory. It may reveal illegal newick strings that were not
+ detected as illegal on the parse, so NxsExceptions may be raised.
+
+ Explicitly calling this function is not necessary unless
+ processAllTreesDuringParse is false (because of a previous call to
+ SetProcessAllTreesDuringParse()).
+ */
void ProcessAllTrees() const
{
std::vector<NxsFullTreeDescription>::iterator trIt = trees.begin();
Modified: pkg/src/nxsstring.cpp
===================================================================
--- pkg/src/nxsstring.cpp 2010-04-01 20:49:07 UTC (rev 777)
+++ pkg/src/nxsstring.cpp 2010-04-01 22:19:27 UTC (rev 778)
@@ -17,12 +17,12 @@
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
#include <climits>
-#include "ncl/nxsdefs.h"
-#include "ncl/nxsstring.h"
#include <cstdarg>
#include <cmath>
#include <cfloat>
-#include <stdlib.h>
+#include <cstdlib>
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsstring.h"
using namespace std;
Modified: pkg/src/nxstoken.cpp
===================================================================
--- pkg/src/nxstoken.cpp 2010-04-01 20:49:07 UTC (rev 777)
+++ pkg/src/nxstoken.cpp 2010-04-01 22:19:27 UTC (rev 778)
@@ -17,6 +17,7 @@
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
#include <cstdlib>
+#include <cassert>
#include "ncl/nxstoken.h"
using namespace std;
@@ -698,24 +699,63 @@
// level by one, so that we know we can stop when level becomes 0.
//
int level = 1;
-
+ std::vector<NxsComment> prevEmbedded = embeddedComments;
+ embeddedComments.clear();
char ch;
+ ch = GetNextChar();
for(;;)
{
- ch = GetNextChar();
if (atEOF)
break;
- if (ch == ')')
- level--;
- else if (ch == '(')
- level++;
+ if (ch == '\'')
+ {
+ AppendToToken('\'');
+ GetQuoted();
+ AppendToToken('\'');
+ ch = saved;
+ saved = '\0';
+ if (atEOF)
+ {
+ if (ch == ')' && level == 1)
+ {
+ AppendToToken(')');
+ break;
+ }
+ else
+ {
+ NxsX_UnexpectedEOF x(*this);
+ x.msg << "(end-of-file inside () statement)";
+ }
+ }
+ continue;
+ }
+ if (ch == '[')
+ {
+ GetComment();
+ assert(embeddedComments.size() == 1);
+ AppendToToken('[');
+ const std::string & body = embeddedComments[0].GetText();
+ token.append(body.begin(), body.end());
+ AppendToToken(']');
+ embeddedComments.clear();
- AppendToToken(ch);
+ }
+ else
+ {
+ if (ch == ')')
+ level--;
+ else if (ch == '(')
+ level++;
+ AppendToToken(ch);
+ }
+
if (level == 0)
break;
+ ch = GetNextChar();
}
+ embeddedComments = prevEmbedded;
}
/*!
Modified: pkg/src/nxstreesblock.cpp
===================================================================
--- pkg/src/nxstreesblock.cpp 2010-04-01 20:49:07 UTC (rev 777)
+++ pkg/src/nxstreesblock.cpp 2010-04-01 22:19:27 UTC (rev 778)
@@ -512,11 +512,6 @@
return pathDistMat;
}
-/*!
- Fills `infoMap` with the key value pairs parsed from a comment that starts with
- &&NHX
- returns unparsed component
-*/
std::string parseNHXComment(const std::string comment, std::map<std::string, std::string> *infoMap)
{
if (comment.length() < 6 || comment[0] != '&' || comment[1] != '&' || comment[2] != 'N' ||comment[3] != 'H' || comment[4] != 'X' )
@@ -792,9 +787,11 @@
NCL_ASSERT(tb != NULL);
taxa = tb;
}
-/*!
- Returns the description of the tree stored at position `i' in `treeDescription'. Assumes that `i' will be in the
+/*! \returns the description of the tree stored at position `i' in `treeDescription'. Assumes that `i' will be in the
range [0..`ntrees').
+
+ in NCL version 2.1 and greater, this newick string is guaranteed to use taxon numbers (1-based)
+ in the newick string. This makes it easier to parse.
*/
NxsString NxsTreesBlock::GetTreeDescription(
unsigned i) /* the index of the tree for which the description is to be returned */
@@ -1651,10 +1648,12 @@
}
}
}
-/*!
- Returns the description of the tree with index `i' where i is in [0..ntrees).
+/*! Returns the description of the tree with index `i' where i is in [0..ntrees).
Node numbers will be translated to names in the resulting tree description.
Use GetTreeDescription if translation is not desired.
+
+ Note that if the Names are complex they may complicate simple parses of the tree
+ For example "A (" is a valid NEXUS taxon name (though one that I hope no one is crazy enough to use.
*/
NxsString NxsTreesBlock::GetTranslatedTreeDescription(
unsigned i) /* the index of the tree for which the description is to be returned */
More information about the Phylobase-commits
mailing list