[Phylobase-commits] r808 - in pkg/src: . ncl

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Fri Aug 6 21:03:54 CEST 2010


Author: francois
Date: 2010-08-06 21:03:53 +0200 (Fri, 06 Aug 2010)
New Revision: 808

Modified:
   pkg/src/GetNCL.cpp
   pkg/src/ncl/nxscharactersblock.h
   pkg/src/ncl/nxscxxdiscretematrix.h
   pkg/src/ncl/nxsexception.h
   pkg/src/ncl/nxsmultiformat.h
   pkg/src/ncl/nxsreader.h
   pkg/src/ncl/nxsstring.h
   pkg/src/ncl/nxstreesblock.h
   pkg/src/nxscharactersblock.cpp
   pkg/src/nxscxxdiscretematrix.cpp
   pkg/src/nxsexception.cpp
   pkg/src/nxsmultiformat.cpp
   pkg/src/nxsreader.cpp
   pkg/src/nxssetreader.cpp
   pkg/src/nxsstring.cpp
   pkg/src/nxstoken.cpp
   pkg/src/nxstreesblock.cpp
   pkg/src/nxsunalignedblock.cpp
Log:
update to 2.1.13, allow NCL interface to read multiple file formats, code to build edge matrix within the NCL interface (all code by Mark Holder)

Modified: pkg/src/GetNCL.cpp
===================================================================
--- pkg/src/GetNCL.cpp	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/GetNCL.cpp	2010-08-06 19:03:53 UTC (rev 808)
@@ -1,6 +1,8 @@
 #include <Rcpp.h>
 #include "ncl/nxsmultiformat.h"
 
+//#define NEW_TREE_RETURN_TYPE
+
 NxsString contData(NxsCharactersBlock& charBlock, NxsString& charString, 
 		   const int& eachChar, const int& nTax) {
     for (int taxon=0; taxon < nTax; ++taxon) {
@@ -80,7 +82,13 @@
     std::vector<std::string> charLabels;     //labels for the characters
     std::vector<std::string> stateLabels;    //labels for the states
     std::vector<int> nbStates;               //number of states for each character (for Standard datatype)
-    std::vector<std::string> trees;          //vector of Newick strings holding the names
+#   if defined (NEW_TREE_RETURN_TYPE)
+        std::vector<std::string> taxonLabelVector; //Index of the parent. 0 means no parent.
+        std::vector<unsigned> parentVector; //Index of the parent. 0 means no parent.
+        std::vector<double> branchLengthVector; 
+#   else
+        std::vector<std::string> trees;          //vector of Newick strings holding the names
+#   endif
     std::vector<std::string> treeNames;      //vector of tree names
     std::vector<std::string> taxaNames;      //vector of taxa names
     std::string errorMsg;                    //error message
@@ -103,9 +111,56 @@
     treesB->SetAllowImplicitNames(true);
     nexusReader.cullIdenticalTaxaBlocks(true);
     /* End of making NCL less strict */
-    
+
+    MultiFormatReader::DataFormatType fileFormat =  MultiFormatReader::NEXUS_FORMAT;
+    std::string fileFormatString = list["fileFormat"];
+    if (!fileFormatString.empty())
+        {
+        fileFormat = MultiFormatReader::formatNameToCode(fileFormatString);
+        if (fileFormat == MultiFormatReader::UNSUPPORTED_FORMAT)
+            {
+            std::string m = "Unsupported format \"";
+            m.append(fileFormatString);
+            m.append("\"");
+            Rcpp::List res = Rcpp::List::create(Rcpp::Named("ErrorMsg") = m);
+	        return res;
+            }
+        }
+/* 
+
+fileFormatString should be one of these: 	"nexus",
+								"dnafasta",
+								"aafasta",
+								"rnafasta",
+								"dnaphylip",
+								"rnaphylip",
+								"aaphylip",
+								"discretephylip",
+								"dnaphylipinterleaved",
+								"rnaphylipinterleaved",
+								"aaphylipinterleaved",
+								"discretephylipinterleaved",
+								"dnarelaxedphylip",
+								"rnarelaxedphylip",
+								"aarelaxedphylip",
+								"discreterelaxedphylip",
+								"dnarelaxedphylipinterleaved",
+								"rnarelaxedphylipinterleaved",
+								"aarelaxedphylipinterleaved",
+								"discreterelaxedphylipinterleaved",
+								"dnaaln",
+								"rnaaln",
+								"aaaln",
+								"phyliptree",
+								"relaxedphyliptree",
+								"nexml",
+								"dnafin",
+								"aafin",
+								"rnafin"
+							};
+							*/
     try {
-	nexusReader.ReadFilepath(const_cast < char* > (filename.c_str()), MultiFormatReader::NEXUS_FORMAT);  
+	nexusReader.ReadFilepath(const_cast < char* > (filename.c_str()), fileFormat);  
     }
     catch (NxsException &x) {
 	errorMsg = x.msg;
@@ -127,6 +182,11 @@
 	    taxaNames.push_back (taxaBlock->GetTaxonLabel(j));
 	}
 
+#   if defined (NEW_TREE_RETURN_TYPE)
+        taxonLabelVector.reserve(nTax);
+        parentVector.reserve(2*nTax);
+        branchLengthVector.reserve(2*nTax);
+#   endif
 	/* Get trees */
 	if (returnTrees) {
 	    if (nTreesBlocks == 0) {
@@ -137,10 +197,76 @@
 		const unsigned nTrees = treeBlock->GetNumTrees();
 		if (nTrees > 0) {
 		    for (unsigned k = 0; k < nTrees; k++) {
-			NxsString ts = treeBlock->GetTreeDescription(k);
-			NxsString trNm = treeBlock->GetTreeName(k);
-			treeNames.push_back(trNm);
-			trees.push_back (ts);
+#           if defined(NEW_TREE_RETURN_TYPE)
+                taxonLabelVector.clear();
+                parentVector.clear();
+                branchLengthVector.clear();
+                
+                const NxsFullTreeDescription & ftd = treeBlock->GetFullTreeDescription(k); 
+                treeNames.push_back(ftd.GetName());
+                NxsSimpleTree simpleTree(ftd, -1, -1.0);
+                std::vector<const NxsSimpleNode *> ndVector =  simpleTree.GetPreorderTraversal();
+                unsigned internalNdIndex = nTax;
+                for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin(); ndIt != ndVector.end(); ++ndIt)
+                    {
+                    NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
+                    unsigned nodeIndex;
+                    if (nd->IsTip())
+                        {
+                        nodeIndex = nd->GetTaxonIndex();
+                        taxonLabelVector.push_back(taxaNames[nodeIndex]);
+                        std::cout << " leaf node # = " <<  nodeIndex << '\n';
+                        }
+                    else
+                        {
+                        nodeIndex = internalNdIndex++;
+                        nd->SetTaxonIndex(nodeIndex);
+                        std::cout << " internal node # = " << nd->GetTaxonIndex()  << '\n';
+                        }
+                    if (parentVector.size() < nodeIndex + 1)
+                        {
+                        parentVector.resize(nodeIndex + 1);
+                        }
+                    if (branchLengthVector.size() < nodeIndex + 1)
+                        {
+                        branchLengthVector.resize(nodeIndex + 1);
+                        }
+                    NxsSimpleEdge edge = nd->GetEdgeToParent();
+    
+                    NxsSimpleNode * par = 0L;
+                    par = (NxsSimpleNode *) edge.GetParent();
+                    if (par != 0L)
+                        {
+                        parentVector[nodeIndex] = 1 + par->GetTaxonIndex();
+                        branchLengthVector[nodeIndex] = edge.GetDblEdgeLen();
+                        }
+                    else
+                        {
+                        parentVector[nodeIndex] = 0;
+                        branchLengthVector[nodeIndex] = -1.0;
+                        }
+                    }
+                std::cout << "Parents = [";
+                for (std::vector<unsigned>::const_iterator nIt = parentVector.begin(); nIt != parentVector.end(); ++nIt)
+                    {
+                    std::cout << *nIt << ", ";				
+                    }
+                std::cout << "]\nbranch lengths = [";
+                for (std::vector<double>::const_iterator nIt = branchLengthVector.begin(); nIt != branchLengthVector.end(); ++nIt)
+                    {
+                    std::cout << *nIt << ", ";				
+                    }
+                std::cout << "]\n";
+
+
+
+
+#           else
+    			NxsString trNm = treeBlock->GetTreeName(k);
+	    		treeNames.push_back(trNm);
+    			NxsString ts = treeBlock->GetTreeDescription(k);
+    			trees.push_back (ts);
+#           endif
 		    }
 		}
 		else {
@@ -235,8 +361,13 @@
     /* Prepare list to return */
     Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames,
 					Rcpp::Named("treeNames") = treeNames,
+#               if defined (NEW_TREE_RETURN_TYPE)
+                    Rcpp::Named("parentVector") = parentVector,
+                    Rcpp::Named("branchLengthVector") = branchLengthVector,
+#               else
 					Rcpp::Named("trees") = trees,
-					Rcpp::Named("dataTypes") = dataTypes,
+#               endif
+                    Rcpp::Named("dataTypes") = dataTypes,
 					Rcpp::Named("nbCharacters") = nbCharacters,
 					Rcpp::Named("charLabels") = charLabels,
 					Rcpp::Named("nbStates") = nbStates,

Modified: pkg/src/ncl/nxscharactersblock.h
===================================================================
--- pkg/src/ncl/nxscharactersblock.h	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/ncl/nxscharactersblock.h	2010-08-06 19:03:53 UTC (rev 808)
@@ -487,13 +487,13 @@
 		enum DataTypesEnum /*! values used to represent different basic types of data stored in a CHARACTERS block, and used with the data member `datatype' */
 			{
 			standard = 1, /*! indicates `matrix' holds characters with arbitrarily-assigned, discrete states, such as discrete morphological data */
-			dna, /*! indicates `matrix' holds DNA sequences (states A, C, G, T) */
-			rna, /*! indicates `matrix' holds RNA sequences (states A, C, G, U) */
-			nucleotide, /*! indicates `matrix' holds nucleotide sequences */
-			protein, /*! indicates `matrix' holds amino acid sequences */
-			codon, /*! AAA=>0, AAC=1, AAAG=>2, AAU=>3, ACA=>4... UUU=>63 */
-			continuous, /*! indicates `matrix' holds continuous data */
-			mixed /*! indicates that there are multiple datatype mappers that must be used to decode the columns of the matrix (one mapper per column, but not one mapper per matrix). A MrBayes NEXUS feature*/
+			dna = 2, /*! indicates `matrix' holds DNA sequences (states A, C, G, T) */
+			rna = 3, /*! indicates `matrix' holds RNA sequences (states A, C, G, U) */
+			nucleotide = 4, /*! indicates `matrix' holds nucleotide sequences */
+			protein = 5, /*! indicates `matrix' holds amino acid sequences */
+			continuous = 6, /*! indicates `matrix' holds continuous data */
+			codon = 7, /*! AAA=>0, AAC=1, AAAG=>2, AAU=>3, ACA=>4... UUU=>63 */
+			mixed = 8 /*! indicates that there are multiple datatype mappers that must be used to decode the columns of the matrix (one mapper per column, but not one mapper per matrix). A MrBayes NEXUS feature*/
 			};
 		enum StatesFormatEnum
 			{
@@ -1002,7 +1002,16 @@
 			return SurrogateSwapEquivalentTaxaBlock(tb);
 		}
 
+		/*! Writes a range of characater states as NEXUS to out.
 
+		*/
+		void WriteStatesForMatrixRow(std::ostream &out, /*!< ostream that will be written to.*/
+									unsigned taxon, /*!< index of the row (taxon) to be written.  Should be in [0,ntax). */
+									unsigned first_taxon, /*!< UINT_MAX to avoid using the matchchar in output. Otherwise the [0,ntax) index of the taxon that is printed first. */
+									unsigned begChar, /*!< first character index to write. Should be in [0, nchar). */
+									unsigned endChar) const; /*!< end of character range. This index is one greater than the last index to be printed. Should be in the range (begChar, nchar] */
+
+
 	protected:
 		// This function should not be called to remove characters, it is only used in the creation of new char blocks from existing blocks
 		void SetNChar(unsigned nc)
@@ -1016,7 +1025,6 @@
 			}
 
 		NxsString GetStateLabelImpl(unsigned i, unsigned j) const; /*v2.1to2.2 4 */
-		void WriteStatesForMatrixRow(std::ostream &out, unsigned taxon, unsigned first_taxon, unsigned begChar, unsigned endChar) const;
 
 		NxsDiscreteDatatypeMapper * GetMutableDatatypeMapperForChar(unsigned charIndex);
 		bool IsInSymbols(char ch) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
@@ -1130,8 +1138,8 @@
 
 
 
-		static void GenerateNxsExceptionMatrixReading(const char *, unsigned taxInd, unsigned charInd, NxsToken &, const NxsString &nameStr);
-		static void GenerateNxsExceptionMatrixReading(const std::string &s, unsigned taxInd, unsigned charInd, NxsToken & token, const NxsString &nameStr)
+		static void GenerateNxsExceptionMatrixReading(const char *, unsigned taxInd, unsigned charInd, NxsToken *, const NxsString &nameStr);
+		static void GenerateNxsExceptionMatrixReading(const std::string &s, unsigned taxInd, unsigned charInd, NxsToken * token, const NxsString &nameStr)
 			{
 			GenerateNxsExceptionMatrixReading(s.c_str(), taxInd, charInd, token, nameStr);
 			}
@@ -1233,7 +1241,7 @@
 			return PositionInSymbols(currChar);
 			}
 		std::string StateCodeToNexusString(NxsDiscreteStateCell, bool demandSymbols = true) const;
-		NxsDiscreteStateCell StateCodeForNexusChar(const char currChar, NxsToken & token,
+		NxsDiscreteStateCell StateCodeForNexusChar(const char currChar, NxsToken * token,
 								  unsigned taxInd, unsigned charInd,
 								  const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr) const;
 		void WriteStartOfFormatCommand(std::ostream & out) const;
@@ -1286,7 +1294,7 @@
 		void DebugWriteMapperFields(std::ostream & out) const;
 	private:
 		NxsDiscreteStateCell AddStateSet(const std::set<NxsDiscreteStateCell> & states, char nexusSymbol, bool symRespectCase, bool isPolymorphic);
-		NxsDiscreteStateCell StateCodeForNexusMultiStateSet(const char nexusSymbol, const std::string & stateAsNexus, NxsToken & token,
+		NxsDiscreteStateCell StateCodeForNexusMultiStateSet(const char nexusSymbol, const std::string & stateAsNexus, NxsToken * token,
 								  unsigned taxInd, unsigned charInd,
 								  const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr);
 		NxsDiscreteStateCell StateCodeForNexusPossibleMultiStateSet(const char nexusSymbol, const std::string & stateAsNexus, NxsToken & token,
@@ -1326,6 +1334,7 @@
 		mutable IsStateSubsetMatrix isStateSubsetMatrixGapsMissing;
 
 		friend class NxsCharactersBlock;
+		friend class MultiFormatReader;
 	};
 
 inline unsigned NxsDiscreteDatatypeMapper::GetNumStatesIncludingGap() const
@@ -1412,10 +1421,10 @@
 	{
 	const unsigned tlen = (unsigned) stateAsNexus.length();
 	if (tlen == 0)
-		GenerateNxsExceptionMatrixReading("Unexpected empty token encountered", taxInd, charInd, token, nameStr);
+		GenerateNxsExceptionMatrixReading("Unexpected empty token encountered", taxInd, charInd, &token, nameStr);
 	if (tlen == 1)
-		return StateCodeForNexusChar(stateAsNexus[0], token, taxInd, charInd, firstTaxonRow, nameStr);
-	return StateCodeForNexusMultiStateSet('\0', stateAsNexus, token, taxInd, charInd, firstTaxonRow, nameStr);
+		return StateCodeForNexusChar(stateAsNexus[0], &token, taxInd, charInd, firstTaxonRow, nameStr);
+	return StateCodeForNexusMultiStateSet('\0', stateAsNexus, &token, taxInd, charInd, firstTaxonRow, nameStr);
 	}
 
 /*! MrBayes introduced the datatype=restriction syntax for 01 symbols.

Modified: pkg/src/ncl/nxscxxdiscretematrix.h
===================================================================
--- pkg/src/ncl/nxscxxdiscretematrix.h	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/ncl/nxscxxdiscretematrix.h	2010-08-06 19:03:53 UTC (rev 808)
@@ -41,9 +41,9 @@
 			Initialize(0L, false);
 			}
 		NxsCXXDiscreteMatrix(const NxsCDiscreteMatrix & );
-		NxsCXXDiscreteMatrix(const NxsCharactersBlock & cb, bool convertGapsToMissing);
+		NxsCXXDiscreteMatrix(const NxsCharactersBlock & cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L);
 
-		void Initialize(const NxsCharactersBlock * cb, bool convertGapsToMissing);
+		void Initialize(const NxsCharactersBlock * cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L);
 
 		const NxsCDiscreteMatrix & getConstNativeC() const
 			{

Modified: pkg/src/ncl/nxsexception.h
===================================================================
--- pkg/src/ncl/nxsexception.h	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/ncl/nxsexception.h	2010-08-06 19:03:53 UTC (rev 808)
@@ -48,6 +48,10 @@
 			return msg.empty() ? "Unknown Nexus Exception" : msg.c_str();
 			}
 		const char * nxs_what () const;
+		void addPositionInfo(const NxsToken & t);
+		void addPositionInfo(const ProcessedNxsToken & t);
+		void addPositionInfo(const NxsTokenPosInfo & t);
+		void addPositionInfo(file_pos fp, long fl, long fc);
 	};
 
 typedef NxsException XNexus;

Modified: pkg/src/ncl/nxsmultiformat.h
===================================================================
--- pkg/src/ncl/nxsmultiformat.h	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/ncl/nxsmultiformat.h	2010-08-06 19:03:53 UTC (rev 808)
@@ -71,6 +71,9 @@
 				PHYLIP_TREE_FORMAT,
 				RELAXED_PHYLIP_TREE_FORMAT,
 				NEXML_FORMAT,
+				FIN_DNA_FORMAT,
+				FIN_AA_FORMAT,
+				FIN_RNA_FORMAT,
 				UNSUPPORTED_FORMAT // keep this last
 			};
 
@@ -132,6 +135,7 @@
 		void moveDataToMatrix(std::list<NxsDiscreteStateRow> & matList,  NxsDiscreteStateMatrix &mat);
 		void moveDataToUnalignedBlock(const std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, NxsUnalignedBlock * uB);
 		bool readFastaSequences(FileToCharBuffer & ftcb, const NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, size_t & longest);
+		bool readFinSequences(FileToCharBuffer & ftcb, NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, size_t & longest);
 		void readPhylipFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt, bool relaxedNames, bool interleaved);
 		void readPhylipTreeFile(std::istream & inf, bool relaxedNames);
 		void readAlnFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt);
@@ -142,6 +146,11 @@
 		void readInterleavedPhylipData(FileToCharBuffer & ftcb, const NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, const unsigned n_taxa, const unsigned n_char, bool relaxedNames);
 		std::string readPhylipName(FileToCharBuffer & ftcb, unsigned i, bool relaxedNames);
 
+		/*! A convenience function for reading .fin files
+			\arg inf the input stream to read
+			\arg dt a facet of  NxsCharactersBlock::DataTypesEnum that indicates the expected datatype
+		*/
+		void readFinFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt);
 
 };
 

Modified: pkg/src/ncl/nxsreader.h
===================================================================
--- pkg/src/ncl/nxsreader.h	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/ncl/nxsreader.h	2010-08-06 19:03:53 UTC (rev 808)
@@ -130,7 +130,7 @@
 			};
 		/** Enum different levels of warnings.  See NxsReader::SetWarningOutputLevel*/
 		enum NxsWarnLevel
-			{
+			{ //TODO: we need another warning level for status messages.
 			UNCOMMON_SYNTAX_WARNING = 0,  /**< Legal but uncommon syntax that could indicate a typo */
 			SKIPPING_CONTENT_WARNING = 1, /**< Content is being skipped by NCL */
 			OVERWRITING_CONTENT_WARNING = 2, /**< New content is replacing old information (eg. CharSets with the same name as a previously defined CharSet)*/

Modified: pkg/src/ncl/nxsstring.h
===================================================================
--- pkg/src/ncl/nxsstring.h	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/ncl/nxsstring.h	2010-08-06 19:03:53 UTC (rev 808)
@@ -28,6 +28,16 @@
 #include <string>
 #include "ncl/nxsdefs.h"
 
+
+
+
+
+// Define HAVE_NCL_NXSSTRING_ENDL if your code needs it
+#if ! defined (HAVE_NCL_NXSSTRING_ENDL)
+#   define HIDE_NCL_NXSSTRING_ENDL
+#else
+#   warning "use of endl with NxsString instances has been deprecated"
+#endif
 class IndexSet;
 
 /*!
@@ -130,7 +140,9 @@
 		NxsString			&operator<<(char c);
 		NxsString			&operator<<(const std::string &s);
 		NxsString			&operator<<(const IndexSet &s);
-		//NxsString			&operator<<(NxsString &(*funcPtr)(NxsString	&));
+#       if ! defined(HIDE_NCL_NXSSTRING_ENDL)
+            NxsString			&operator<<(NxsString &(*funcPtr)(NxsString	&));
+#       endif
 
 		// Functions that should be in base class string but aren't
 		void				clear();
@@ -536,17 +548,18 @@
 		}
 	return false;
 	}
-#if 0
-/*!
+
+# if ! defined(HIDE_NCL_NXSSTRING_ENDL)
+
 	Allows functions that take and return references to NxsString strings to be placed in a series of << operators.
 	See the NxsString endl function.
-*/
 inline NxsString &NxsString::operator<<(
   NxsString &(*funcPtr)(NxsString &))	/* pointer to a function returning a reference to a NxsString */
 	{
 	return funcPtr(*this);
 	}
 #endif
+
 /*!
 	Returns true if `c' is any Nexus punctuation character:
 >
@@ -648,6 +661,8 @@
 	return (*this += s);
 	}
 
+
+
 /*!
 	Returns string as a Pascal string (array of unsigned characters with the length in the first byte).
 */
@@ -660,6 +675,8 @@
 	return buffer;
 	}
 
+
+
 // ############################# start of standalone functions ##########################
 
 # if ! defined(HIDE_NCL_NXSSTRING_ENDL)

Modified: pkg/src/ncl/nxstreesblock.h
===================================================================
--- pkg/src/ncl/nxstreesblock.h	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/ncl/nxstreesblock.h	2010-08-06 19:03:53 UTC (rev 808)
@@ -180,6 +180,10 @@
 			return edgeToPar;
 			}
 
+		bool IsTip() const
+			{
+			return (lChild == 0L);
+			}
 		NxsSimpleNode *GetFirstChild() const
 			{
 			return lChild;
@@ -219,6 +223,12 @@
 			return taxIndex;
 			}
 
+		// present for every leaf. UINT_MAX for internals labeled with taxlabels
+		void SetTaxonIndex(unsigned i)
+			{
+			taxIndex = i;
+			}
+
 		// non-empty only for internals that are labelled with names that are NOT taxLabels
 		std::string GetName() const
 			{
@@ -728,6 +738,7 @@
 			allowImplicitNames = other.allowImplicitNames;
 			processAllTreesDuringParse = other.processAllTreesDuringParse;
 			writeFromNodeEdgeDataStructure = other.writeFromNodeEdgeDataStructure;
+			validateInternalNodeLabels = other.validateInternalNodeLabels;
 			constructingTaxaBlock = other.constructingTaxaBlock;
 			newtaxa = other.newtaxa;
 			trees = other.trees;
@@ -746,8 +757,8 @@
 			*a = *this;
 			return a;
 			}
-		static void ProcessTokenVecIntoTree(const ProcessedNxsCommand & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false);
-		static void ProcessTokenStreamIntoTree(NxsToken & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false);
+		static void ProcessTokenVecIntoTree(const ProcessedNxsCommand & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false, const bool validateInternalNodeLabels=true);
+		static void ProcessTokenStreamIntoTree(NxsToken & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false, const bool validateInternalNodeLabels=true);
 
 		void SetWriteFromNodeEdgeDataStructure(bool v)
 			{
@@ -804,6 +815,29 @@
 		{
 			this->writeTranslateTable = wtt;
 		}
+		/*! Sets the boolean field that determines whether or not the trees
+			block will validate treat internal node labels
+			as taxon labels during the parse. In this case the labels will
+			checked against the taxa block (true is the default).
+
+			This can cause problems if the internal node names are integers that
+			are not intended to be taxon labels (eg. support statements for the
+			subtending branches).
+		*/
+		void setValidateInternalNodeLabels(bool x) {
+			this->validateInternalNodeLabels = x; /** if true then labels that occur for internal nodes will be validated via the taxa block (true is the default).  This can cause problems if the internal node names are integer that are not intended to be taxon labels. */
+		}
+		/*! \returns true if the block will validate treat internal node labels
+			as taxon labels during the parse. In this case the labels will
+			checked against the taxa block (true is the default).
+
+			This can cause problems if the internal node names are integers that
+			are not intended to be taxon labels (eg. support statements for the
+			subtending branches).
+		*/
+		bool getValidateInternalNodeLabels() const {
+			return this->validateInternalNodeLabels;
+		}
 	protected :
 		void ReadTreeFromOpenParensToken(NxsFullTreeDescription &td, NxsToken & token);
 
@@ -815,6 +849,7 @@
 		bool processAllTreesDuringParse; /** true by default, false speeds processing but disables detection of errors*/
 		bool constructingTaxaBlock; /** true if new names are being tolerated */
 		bool writeFromNodeEdgeDataStructure; /**this will probably only ever be set to true in testing code. If true the WriteTrees function will convert each tree to NxsSimpleTree object to write the newick*/
+		bool validateInternalNodeLabels; /** if true then labels that occur for internal nodes will be validated via the taxa block (true is the default).  This can cause problems if the internal node names are integer that are not intended to be taxon labels. */
 
 		mutable std::vector<NxsFullTreeDescription> trees;
 		mutable std::map<std::string, unsigned> capNameToInd;

Modified: pkg/src/nxscharactersblock.cpp
===================================================================
--- pkg/src/nxscharactersblock.cpp	2010-08-04 15:34:33 UTC (rev 807)
+++ pkg/src/nxscharactersblock.cpp	2010-08-06 19:03:53 UTC (rev 808)
@@ -29,6 +29,7 @@
 #include "ncl/nxsassumptionsblock.h"
 #include "ncl/nxssetreader.h"
 #include <algorithm>
+#include <iterator>
 using namespace std;
 
 CodonRecodingStruct getCodonRecodingStruct(NxsGeneticCodesEnum gCode);
@@ -206,87 +207,87 @@
 	std::vector<NxsDiscreteStateCell> v;
 	if(gCode == NXS_GCODE_STANDARD) {
 		const NxsDiscreteStateCell trnxs_gcode_standard[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
-		std::copy(trnxs_gcode_standard, trnxs_gcode_standard + 64, back_inserter(v));
+		std::copy(trnxs_gcode_standard, trnxs_gcode_standard + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_VERT_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_vert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, -1, 46, -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59};
-		std::copy(trnxs_gcode_vert_mito, trnxs_gcode_vert_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_vert_mito, trnxs_gcode_vert_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_YEAST_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_yeast_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_yeast_mito, trnxs_gcode_yeast_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_yeast_mito, trnxs_gcode_yeast_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_MOLD_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_mold_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_mold_mito, trnxs_gcode_mold_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_mold_mito, trnxs_gcode_mold_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_INVERT_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_invert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_invert_mito, trnxs_gcode_invert_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_invert_mito, trnxs_gcode_invert_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_CILIATE) {
 		const NxsDiscreteStateCell trnxs_gcode_ciliate[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, -1, 56, 57, 58, 59, 60, 61, 62};
-		std::copy(trnxs_gcode_ciliate, trnxs_gcode_ciliate + 64, back_inserter(v));
+		std::copy(trnxs_gcode_ciliate, trnxs_gcode_ciliate + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_ECHINO_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_echino_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_echino_mito, trnxs_gcode_echino_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_echino_mito, trnxs_gcode_echino_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_EUPLOTID) {
 		const NxsDiscreteStateCell trnxs_gcode_euplotid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_euplotid, trnxs_gcode_euplotid + 64, back_inserter(v));
+		std::copy(trnxs_gcode_euplotid, trnxs_gcode_euplotid + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_PLANT_PLASTID) {
 		const NxsDiscreteStateCell trnxs_gcode_plant_plastid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
-		std::copy(trnxs_gcode_plant_plastid, trnxs_gcode_plant_plastid + 64, back_inserter(v));
+		std::copy(trnxs_gcode_plant_plastid, trnxs_gcode_plant_plastid + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_ALT_YEAST) {
 		const NxsDiscreteStateCell trnxs_gcode_alt_yeast[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
-		std::copy(trnxs_gcode_alt_yeast, trnxs_gcode_alt_yeast + 64, back_inserter(v));
+		std::copy(trnxs_gcode_alt_yeast, trnxs_gcode_alt_yeast + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_ASCIDIAN_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_ascidian_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_ascidian_mito, trnxs_gcode_ascidian_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_ascidian_mito, trnxs_gcode_ascidian_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_ALT_FLATWORM_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_alt_flatworm_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, -1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62};
-		std::copy(trnxs_gcode_alt_flatworm_mito, trnxs_gcode_alt_flatworm_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_alt_flatworm_mito, trnxs_gcode_alt_flatworm_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_BLEPHARISMA_MACRO) {
 		const NxsDiscreteStateCell trnxs_gcode_blepharisma_macro[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, 51, 52, 53, 54, -1, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_blepharisma_macro, trnxs_gcode_blepharisma_macro + 64, back_inserter(v));
+		std::copy(trnxs_gcode_blepharisma_macro, trnxs_gcode_blepharisma_macro + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_CHLOROPHYCEAN_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_chlorophycean_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, 51, 52, 53, 54, -1, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_chlorophycean_mito, trnxs_gcode_chlorophycean_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_chlorophycean_mito, trnxs_gcode_chlorophycean_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_TREMATODE_MITO) {
 		const NxsDiscreteStateCell trnxs_gcode_trematode_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
-		std::copy(trnxs_gcode_trematode_mito, trnxs_gcode_trematode_mito + 64, back_inserter(v));
+		std::copy(trnxs_gcode_trematode_mito, trnxs_gcode_trematode_mito + 64, std::back_inserter(v));
 		return v;
 	}
 	if(gCode == NXS_GCODE_SCENEDESMUS_MITO) {
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/phylobase -r 808


More information about the Phylobase-commits mailing list