[Phylobase-commits] r766 - libncl/current/ncl

Mon Mar 29 20:44:30 CEST 2010

Author: francois
Date: 2010-03-29 20:44:30 +0200 (Mon, 29 Mar 2010)
New Revision: 766

Modified:
   libncl/current/ncl/nxscharactersblock.cpp
   libncl/current/ncl/nxscharactersblock.h
   libncl/current/ncl/nxsemptyblock.cpp
   libncl/current/ncl/nxspublicblocks.cpp
   libncl/current/ncl/nxsreader.cpp
   libncl/current/ncl/nxsreader.h
Log:
changes to vendor branch to reflect phylobase customizations to ncl

Modified: libncl/current/ncl/nxscharactersblock.cpp
===================================================================

--- libncl/current/ncl/nxscharactersblock.cpp	2010-03-25 21:33:47 UTC (rev 765)
+++ libncl/current/ncl/nxscharactersblock.cpp	2010-03-29 18:44:30 UTC (rev 766)
@@ -13,7 +13,7 @@
 //	GNU General Public License for more details.
 //
 //	You should have received a copy of the GNU General Public License
-//	along with NCL; if not, write to the Free Software Foundation, Inc., 
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
 //	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 //
 /**
@@ -82,9 +82,9 @@
 
 
 /*******************************************************************************
- * deletes "fundamental" states (rather than gaps or ambiguity codes) from a 
+ * deletes "fundamental" states (rather than gaps or ambiguity codes) from a
  * datatype mapper.
- * Equates (default or user-defined) are not supported in the current version of the function 
+ * Equates (default or user-defined) are not supported in the current version of the function
  *	(so this will only work on standard or codons data).
  */
 
@@ -112,9 +112,9 @@
 	const unsigned oldNStates = nStates;
 	std::vector<NxsDiscreteStateSetInfo> oldStateSetsVec = this->stateSetsVec;
 	symbols = nsym;
-	
+
 	this->RefreshMappings(0L);
-	
+
 	for (unsigned i = oldNStates - sclOffset; i < oldStateSetsVec.size(); ++i)
 		{
 		const NxsDiscreteStateSetInfo & ssi = oldStateSetsVec[i];
@@ -415,7 +415,7 @@
 		throw NxsException("Characters block must be of the type codons when RemoveStopCodons is called");
 	if (mapper->geneticCode != NXS_GCODE_NO_CODE)
 		throw NxsException("Characters block must be an uncompressed codons type when RemoveStopCodons is called");
-	
+
 	const std::vector<int> v = getToCodonRecodingMapper(gCode);
 	CodonRecodingStruct c = getCodonRecodingStruct(gCode);
 	const unsigned nRS = c.compressedCodonIndToAllCodonsInd.size();
@@ -480,7 +480,7 @@
 					if (c == NXS_MISSING_CODE)
 						nAmbig++;
 					}
-				else 
+				else
 					{
 					if (c != NXS_GAP_STATE_CODE || treatGapsAsMissing)
 						nAmbig++;
@@ -502,7 +502,7 @@
 					if (sc == NXS_MISSING_CODE)
 						nAmbig++;
 					}
-				else 
+				else
 					{
 					if (sc != NXS_GAP_STATE_CODE || treatGapsAsMissing)
 						nAmbig++;
@@ -514,9 +514,9 @@
 }
 
 bool NxsCharactersBlock::FirstTaxonStatesAreSubsetOfSecond(
-  const unsigned firstTaxonInd, 
-  const unsigned secondTaxonInd, 
-  const NxsUnsignedSet * charIndices, 
+  const unsigned firstTaxonInd,
+  const unsigned secondTaxonInd,
+  const NxsUnsignedSet * charIndices,
   const bool treatAmbigAsMissing,
   const bool treatGapAsMissing) const
 {
@@ -565,9 +565,9 @@
 }
 
 std::pair<unsigned, unsigned> NxsCharactersBlock::GetPairwiseDist(
-  const unsigned firstTaxonInd, 
-  const unsigned secondTaxonInd, 
-  const NxsUnsignedSet * charIndices, 
+  const unsigned firstTaxonInd,
+  const unsigned secondTaxonInd,
+  const NxsUnsignedSet * charIndices,
   const bool treatAmbigAsMissing,
   const bool treatGapAsMissing) const
 {
@@ -663,11 +663,11 @@
 void NxsDiscreteDatatypeMapper::BuildStateIntersectionMatrix() const
 {
 	const std::set<int> emptySet;
-	
+
 	stateIntersectionMatrix.clear();
-	
+
 	const unsigned nsPlus = stateSetsVec.size();
-	const unsigned offset = (unsigned)(sclOffset + 2); 
+	const unsigned offset = (unsigned)(sclOffset + 2);
 	StateIntersectionRow emptyRow(nsPlus, emptySet);
 	stateIntersectionMatrix.assign(nsPlus, emptyRow);
 	for (unsigned i = offset; i < nsPlus; ++i)
@@ -685,7 +685,7 @@
 				stateIntersectionMatrix[j - NXS_GAP_STATE_CODE][i - NXS_GAP_STATE_CODE] = stateIntersectionMatrix[i - NXS_GAP_STATE_CODE][j - NXS_GAP_STATE_CODE];
 			}
 		}
-	
+
 	std::set<int> tmpSet;
 	NCL_ASSERT(1 == NXS_MISSING_CODE - NXS_GAP_STATE_CODE);
 	tmpSet.insert(NXS_GAP_STATE_CODE);
@@ -926,7 +926,7 @@
 	aaInd[62] = 9;
 	aaInd[63] = 4;
 	if (codeIndex == NXS_GCODE_VERT_MITO) {
-		aaInd[8] = 20;
+	        aaInd[8] = 20;
 		aaInd[10] = 20;
 		aaInd[12] = 10;
 		aaInd[56] = 18;
@@ -1005,17 +1005,17 @@
 	const NxsUnsignedSet * thirdPos = 0L;
 	for (NxsPartition::const_iterator pIt = codonPos.begin(); pIt != codonPos.end(); ++pIt)
 		{
-		if (pIt->first == "1") 
+		if (pIt->first == "1")
 			{
 			NCL_ASSERT(firstPos == 0L);
 			firstPos = &(pIt->second);
 			}
-		else if (pIt->first == "2") 
+		else if (pIt->first == "2")
 			{
 			NCL_ASSERT(secondPos == 0L);
 			secondPos = &(pIt->second);
 			}
-		else if (pIt->first == "3") 
+		else if (pIt->first == "3")
 			{
 			NCL_ASSERT(thirdPos == 0L);
 			thirdPos = &(pIt->second);
@@ -1040,7 +1040,7 @@
 /* allocates a new charaters block with amino acids for the codons in the characters block (which should have datatype = codon).
 */
 NxsCharactersBlock * NxsCharactersBlock::NewProteinCharactersBlock(
-  const NxsCharactersBlock * codonBlock, 
+  const NxsCharactersBlock * codonBlock,
   bool mapPartialAmbigToUnknown,
   bool gapToUnknown,
   NxsGeneticCodesEnum codeIndex)
@@ -1054,7 +1054,7 @@
 
 */
 NxsCharactersBlock * NxsCharactersBlock::NewProteinCharactersBlock(
-  const NxsCharactersBlock * codonBlock, 
+  const NxsCharactersBlock * codonBlock,
   bool mapPartialAmbigToUnknown,
   bool gapToUnknown,
   const std::vector<int> & aaIndices) /** the index of the amino acid symbols for the codon (where the order of codons is alphabetical: AAA, AAC, AAG, AAT, ACA, ...TTT **/
@@ -1062,9 +1062,9 @@
 	if (!codonBlock)
 		return NULL;
 	if (codonBlock->GetDataType() != NxsCharactersBlock::codon)
-		throw NxsException("NewProteinCharactersBlock must be called with a block of codon datatype"); 
+		throw NxsException("NewProteinCharactersBlock must be called with a block of codon datatype");
 	const unsigned nc = codonBlock->GetNCharTotal();
-	
+
 	/* create a new characters block with the same TAXA, but no ASSUMPTIONS block */
 	NxsTaxaBlockAPI * taxa = codonBlock->GetTaxaBlockPtr(NULL);
 	NxsCharactersBlock * aaBlock = new NxsCharactersBlock(taxa, NULL);
@@ -1115,26 +1115,26 @@
 		}
 	else
 		{
-		throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code."); 
+		throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
 		}
 	return aaBlock;
 }
 
 
-/* allocates a new charaters block with all of the active characters in `charBlock` 
+/* allocates a new charaters block with all of the active characters in `charBlock`
 	but with a 64-state codon datatype. The order of codons is:
 	 0   1   2   3   4   5  ... 63
 	AAA AAC AAG AAT ACA ACC ... TTT
 	The caller is responsible for deleting the new NxsCharactersBlock object
-	
-	If honorCharActive is true, then inactive characters are simply skipped in the reading 
+
+	If honorCharActive is true, then inactive characters are simply skipped in the reading
 	frame (treated as if they were introns) rather than being treated as missing.
 */
 NxsCharactersBlock * NxsCharactersBlock::NewCodonsCharactersBlock(
-  const NxsCharactersBlock * dnaBlock, 
-  bool mapPartialAmbigToUnknown, 
+  const NxsCharactersBlock * dnaBlock,
+  bool mapPartialAmbigToUnknown,
   bool gapsToUnknown,
-  bool honorCharActive, 
+  bool honorCharActive,
   const std::list<int> * charIndices,
   NxsCharactersBlock ** spareNucs)
 {
@@ -1147,20 +1147,20 @@
 	const std::list<int> * sourceChars;
 	std::list<int> culled;
 	NxsUnsignedSet untranslated;
-	
-		
-	
+
+
+
 	unsigned nc = dnaBlock->GetNCharTotal();
 
 	if (charIndices == NULL)
 		{
-		for (unsigned i = 0; i < nc; ++i) 
+		for (unsigned i = 0; i < nc; ++i)
 			charInds.push_back((int)i);
 		sourceChars = &charInds;
 		}
 	else
 		sourceChars = charIndices;
-	
+
 	if (honorCharActive)
 		{
 		for (std::list<int>::const_iterator cIt = sourceChars->begin(); cIt != sourceChars->end(); ++cIt)
@@ -1184,12 +1184,12 @@
 		for (unsigned c = 0; c < nc; ++c)
 			untranslated.insert(c);
 		}
-	
+
 	const unsigned nnucs = (const unsigned)sourceChars->size();
 	if (nnucs % 3)
-		throw NxsException("Cannot create a codons block with a number of characters that is not a multiple of 3"); 
+		throw NxsException("Cannot create a codons block with a number of characters that is not a multiple of 3");
 	const unsigned ncodons = nnucs/3;
-	
+
 	/* create a new characters block with the same TAXA, but no ASSUMPTIONS block */
 	NxsTaxaBlockAPI * taxa = dnaBlock->GetTaxaBlockPtr(NULL);
 	NxsCharactersBlock * codonsBlock = new NxsCharactersBlock(taxa, NULL);
@@ -1201,9 +1201,9 @@
 	codonsBlock->symbols.assign(64, '\0');
 	codonsBlock->tokens = false;
 	const char * gsl[] = {"AAA",  "AAC",  "AAG",  "AAT",  "ACA",  "ACC",  "ACG",  "ACT",  "AGA",  "AGC",  "AGG",  "AGT",  "ATA",  "ATC",  "ATG",  "ATT",  "CAA",  "CAC",  "CAG",  "CAT",  "CCA",  "CCC",  "CCG",  "CCT",  "CGA",  "CGC",  "CGG",  "CGT",  "CTA",  "CTC",  "CTG",  "CTT",  "GAA",  "GAC",  "GAG",  "GAT",  "GCA",  "GCC",  "GCG",  "GCT",  "GGA",  "GGC",  "GGG",  "GGT",  "GTA",  "GTC",  "GTG",  "GTT",  "TAA",  "TAC",  "TAG",  "TAT",  "TCA",  "TCC",  "TCG",  "TCT",  "TGA",  "TGC",  "TGG",  "TGT",  "TTA",  "TTC",  "TTG",  "TTT"};
-	
+
 	codonsBlock->globalStateLabels.reserve(64);
-	for (unsigned i = 0 ; i < 64; ++i)	
+	for (unsigned i = 0 ; i < 64; ++i)
 		codonsBlock->globalStateLabels.push_back(NxsString(gsl[i]));
 
 	/* equivalent of HandleFormat */
@@ -1268,12 +1268,12 @@
 		}
 	else
 		{
-		throw NxsException("NewCodonsCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code."); 
+		throw NxsException("NewCodonsCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
 		}
 	if (!untranslated.empty())
 		{
 		const unsigned nunt = (const unsigned)untranslated.size();
-			
+
 		NxsCharactersBlock * untBlock = new NxsCharactersBlock(taxa, NULL);
 		untBlock->SetNChar(nunt);
 		untBlock->SetNTax(ntax);
@@ -1284,8 +1284,8 @@
 		untBlock->originalDatatype = dnaBlock->originalDatatype;
 		untBlock->ResetSymbols();
 		untBlock->tokens = false;
-	
-	
+
+
 		untBlock->CreateDatatypeMapperObjects(dummy, dummyVec);
 		untBlock->datatypeReadFromFormat = false;
 		untBlock->statesFormat = STATES_PRESENT;
@@ -1293,8 +1293,8 @@
 		untBlock->supportMixedDatatype = false;
 		untBlock->convertAugmentedToMixed = false;
 		untBlock->writeInterleaveLen = INT_MAX;
-	
-	
+
+
 		NxsDiscreteStateRow umatRow(nunt, 0);
 		untBlock->discreteMatrix.assign(ntax, umatRow);
 		if (mapPartialAmbigToUnknown && (gapsToUnknown || dnaBlock->GetGapSymbol() != '\0'))
@@ -1313,7 +1313,7 @@
 			}
 		else
 			{
-			throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code."); 
+			throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
 			}
 		*spareNucs = untBlock;
 		}
@@ -1325,7 +1325,7 @@
 
 std::vector<double>  NxsTransformationManager::GetDoubleWeights(const std::string &set_name) const
 	{
-	std::vector<double> r; 
+	std::vector<double> r;
 	const ListOfDblWeights *p = 0L;
 	std::map<std::string, ListOfDblWeights>::const_iterator dIt = dblWtSets.begin();
 	for (; dIt != dblWtSets.end(); ++dIt)
@@ -1359,7 +1359,7 @@
 
 std::vector<int> NxsTransformationManager::GetIntWeights(const std::string &set_name) const
 	{
-	std::vector<int> r; 
+	std::vector<int> r;
 	const ListOfIntWeights *p = 0L;
 	std::map<std::string, ListOfIntWeights>::const_iterator dIt = intWtSets.begin();
 	for (; dIt != intWtSets.end(); ++dIt)
@@ -1392,12 +1392,12 @@
 	}
 
 NxsDiscreteDatatypeMapper::NxsDiscreteDatatypeMapper(
-	NxsCharactersBlock::DataTypesEnum datatypeE, 
+	NxsCharactersBlock::DataTypesEnum datatypeE,
 	const std::string & symbolsStr,
-	char missingChar, 
-	char gap, 
+	char missingChar,
+	char gap,
 	char matchingChar,
-	bool respectingCase, 
+	bool respectingCase,
 	const std::map<char, NxsString> & moreEquates)
 	:geneticCode(NXS_GCODE_NO_CODE),
 	cLookup(NULL),
@@ -1456,7 +1456,7 @@
 	}
 
 /*----------------------------------------------------------------------------------------------------------------------
-|	Takes the parsed settings that pertain to the datatype and converts them into a set of NxsDiscreteDatatypeMapper 
+|	Takes the parsed settings that pertain to the datatype and converts them into a set of NxsDiscreteDatatypeMapper
 |	objects to be used to encode the characters.
 */
 void NxsCharactersBlock::CreateDatatypeMapperObjects(const NxsPartition & dtParts, const std::vector<DataTypesEnum> & dtcodes)
@@ -1465,7 +1465,7 @@
 	if (datatype != mixed)
 		{
 		NxsDiscreteDatatypeMapper d(datatype, symbols, missing, gap, matchchar, respectingCase, userEquates);
-		datatype = d.GetDatatype();	
+		datatype = d.GetDatatype();
 		DatatypeMapperAndIndexSet das(d, NxsUnsignedSet());
 		datatypeMapperVec.clear();
 		datatypeMapperVec.push_back(das);
@@ -1497,11 +1497,11 @@
 
 /*----------------------------------------------------------------------------------------------------------------------
 |	If you say FORMAT DATATYPE=DNA SYMBOLS="01" ; then the valid symbols become "ACGT01"
-|	
+|
 |	AugmentedSymbolsToMixed tries to split such a matrix into a datatype=mixed(dna:charset_dna,standard:charset_std)
 |	by inferring the charpartition (charset_dna,charset_std).  It does this by using GetNamedStateSetOfColumn to
 |	detect which states were listed in a column.
-|	
+|
 |	Returns true if the translation to mixed was performed.  This will only occur if GetOriginalDataType() == GetDataType()
 | 	because this is the symptom that there was symbol augmentation of a built in datatype.
 |
@@ -1510,13 +1510,13 @@
 |	Matrix 1:                     Matrix 2:
 |	s   ACGT10{ACGT01-}           s   ACGT10?
 |	t   ACGT100                   t   ACGT100
-|		The last character of the first taxon would be parsed as having the potential to have states {ACGT01-}. 
+|		The last character of the first taxon would be parsed as having the potential to have states {ACGT01-}.
 |		But when interperted with GetNamedStateSetOfColumn, Matrix 2 can be "explained" by four DNA columns, and three
 |		Standard (01) columns.  Matrix 1, on the other hand would be found to have four DNA columns, and two
 |		Standard (01) columns, and one standard ("ACGT01") column.
-|	Note: this function ignores the gap mode setting and treats gaps as newstates for the purposes of 
+|	Note: this function ignores the gap mode setting and treats gaps as newstates for the purposes of
 |		the conversion.
-| 
+|
 |	Temporary:  Will return false if userDefinedEquatesBeforeConversion is true
 */
 bool NxsCharactersBlock::AugmentedSymbolsToMixed()
@@ -1536,7 +1536,7 @@
 		if (!isdigit(*a))
 			return false;
 		}
-	
+
 	NxsUnsignedSet stdTypeChars;
 	NxsUnsignedSet origTypeChars;
 	std::set<int> torigStateInds;
@@ -1581,7 +1581,7 @@
 		}
 	this->gapMode = cached_gap_mode;
 	/* If we get here then the mapping to mixed type will succeed */
-	
+
 	/* copy the incoming matrix and mapper */
 	VecDatatypeMapperAndIndexSet mdm = datatypeMapperVec;
 	const NxsDiscreteDatatypeMapper & oldMapper = mdm[0].first;
@@ -1595,8 +1595,8 @@
 	datatypeMapperVec.push_back(DatatypeMapperAndIndexSet(o, origTypeChars));
 	NxsDiscreteDatatypeMapper s(NxsCharactersBlock::standard, augmentSymbols, missing, gap, matchchar, respectingCase, noEquates);
 	datatypeMapperVec.push_back(DatatypeMapperAndIndexSet(s, stdTypeChars));
-	
-	
+
+
 	NxsDiscreteDatatypeMapper & newOrigTMapper = datatypeMapperVec[0].first;
 	NxsDiscreteDatatypeMapper & newStdTMapper = datatypeMapperVec[1].first;
 
@@ -1611,7 +1611,7 @@
 		for (NxsDiscreteStateRow::iterator cell = row.begin(); cell != row.end(); ++cell, ++column)
 			{
 			const int initStateCode = *cell;
-			if (initStateCode  >= 0 ) //gap and missing codes do not need translation 
+			if (initStateCode  >= 0 ) //gap and missing codes do not need translation
 				{
 				std::map<int, int>::const_iterator otnIt = oldToNewStateCode.find(initStateCode);
 				if (otnIt == oldToNewStateCode.end())
@@ -1623,7 +1623,7 @@
 					const bool isPoly =  oldMapper.IsPolymorphic(initStateCode);
 					int newStateCode ;
 					if (isOrigT)
-						{ //old symbol indices will still be the new symbol indices 
+						{ //old symbol indices will still be the new symbol indices
 						newStateCode = newOrigTMapper.StateCodeForStateSet(oldSymbols, isPoly, true, oldNexusChar);
 						newOrigTMapper.StateCodeToNexusString(newStateCode);
 						}
@@ -1638,7 +1638,7 @@
 								{
 								NCL_ASSERT(*sIt < 0);
 								transSymbols.insert(*sIt);
-								}	
+								}
 							}
 						newStateCode = newStdTMapper.StateCodeForStateSet(transSymbols, isPoly, true, oldNexusChar);
 						newStdTMapper.StateCodeToNexusString(newStateCode);
@@ -1655,7 +1655,7 @@
 	return true;
 	}
 /*----------------------------------------------------------------------------------------------------------------------
-|	Called when FORMAT command needs to be parsed from within the DIMENSIONS block. Deals with everything after the 
+|	Called when FORMAT command needs to be parsed from within the DIMENSIONS block. Deals with everything after the
 |	token FORMAT up to and including the semicolon that terminates the FORMAT command.
 */
 void NxsCharactersBlock::HandleFormat(
@@ -1767,7 +1767,7 @@
 								dtv.push_back(nucleotide);
 							else if (name == "PROTEIN")
 								dtv.push_back(protein);
-							else 
+							else
 								{
 								errormsg << pIt->first <<  " is not a valid DATATYPE within a " <<  id << " block";
 								throw NxsException(errormsg, *wIt);
@@ -1798,10 +1798,10 @@
 		}
 	for (ProcessedNxsCommand::const_iterator wIt = tokenVec.begin(); wIt != tvEnd; ++wIt)
 		{
-		
+
 		if (wIt->Equals("DATATYPE"))// we should have already processed this
 			{
-			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after DATATYPE in FORMAT command"); // = 
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after DATATYPE in FORMAT command"); // =
 			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after DATATYPE = in FORMAT command"); // datatype
 			}
 		else if (wIt->Equals("RESPECTCASE"))
@@ -1815,7 +1815,7 @@
 		else if (wIt->Equals("MISSING"))
 			{
 			DemandEquals(wIt, tvEnd, "after keyword MISSING");
-			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MISSING = \" in FORMAT command");			
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MISSING = \" in FORMAT command");
 			const std::string t = wIt->GetToken();
 			if (t.length() != 1)
 				{
@@ -1837,7 +1837,7 @@
 		else if (wIt->Equals("GAP"))
 			{
 			DemandEquals(wIt, tvEnd, "after keyword GAP");
-			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"GAP = \" in FORMAT command");		
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"GAP = \" in FORMAT command");
 			const std::string t = wIt->GetToken();
 			if (t.length() != 1)
 				{
@@ -1859,7 +1859,7 @@
 		else if (wIt->Equals("MATCHCHAR"))
 			{
 			DemandEquals(wIt, tvEnd, "after keyword MATCHCHAR");
-			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MATCHCHAR = \" in FORMAT command");		
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MATCHCHAR = \" in FORMAT command");
 			const std::string t = wIt->GetToken();
 			if (t.length() != 1)
 				{
@@ -1919,7 +1919,7 @@
 				s += wIt->GetToken().c_str();
 				ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "closing \" of symbols list");
 				}
-				
+
 			const std::string tos = NxsString::strip_whitespace(s);
 			const char * to = tos.c_str();
 			unsigned tlen = (unsigned)tos.length();
@@ -1941,7 +1941,7 @@
 						errormsg << "A ~ in a SYMBOLS list is interpreted as a range of symbols.  The ~ cannot be the first or last character in the symbols list";
 						throw NxsException(errormsg, token);
 						}
-					const int jj = i - 1 ; 
+					const int jj = i - 1 ;
 					const char prevChar = to[jj];
 					const char nextChar = to[i+1];
 					if ((isdigit(prevChar) && isdigit(nextChar)) || (isalpha(prevChar) && isalpha(nextChar)))
@@ -1991,7 +1991,7 @@
 				else
 					processedS += *pp;
 				}
-			if (!processedS.empty()) 
+			if (!processedS.empty())
 				{
 				if (this->datatype == dna || this->datatype == rna || this->restrictionDataype || this->datatype == protein)
 					{
@@ -2059,7 +2059,7 @@
 					errormsg << "EQUATE symbol specified (" << wIt->GetToken() <<  ") is not valid; An Equate symbol cannot be a state symbol or identical to the  missing,  gap, or matchchar symbols.";
 					throw NxsException(errormsg, *wIt);
 					}
-				
+
 				DemandEquals(wIt, tvEnd, " in EQUATE definition");
 				ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "State or set of states in Equate definition");
 				NxsString s;
@@ -2122,7 +2122,7 @@
 					items.push_back(std::string(s.c_str()));
 					}
 				}
-			else 
+			else
 				{
 				if (!wIt->Equals("STATES"))
 					throw NxsException("Sorry, only ITEMS=STATES is supported for discrete datatypes at this time", *wIt);
@@ -2172,7 +2172,7 @@
 		errormsg << "The \"gap\" character \'" << gap << "\' may not be included in the SYMBOLS list.";
 		throw NxsException(errormsg, *tokenVec.begin());
 		}
-		
+
 	if (matchchar != '\0')
 		{
 		if ((matchchar == gap) || (!respectingCase && toupper(matchchar) == toupper(gap)))
@@ -2191,7 +2191,7 @@
 		errormsg << "Gap symbol and Missing symbol cannot be identical!  Both were set to " << missing;
 		throw NxsException(errormsg, *tokenVec.begin());
 		}
-	
+
 	// Perform some last checks before leaving the FORMAT command
 	//
 	if (!tokens && datatype == continuous)
@@ -2232,7 +2232,7 @@
 
 NxsDiscreteDatatypeMapper & NxsDiscreteDatatypeMapper::operator=(const NxsDiscreteDatatypeMapper& other)
 	{
-	symbols = other.symbols; 
+	symbols = other.symbols;
 	lcsymbols = other.lcsymbols;
 	nStates = other.nStates;
 	matchChar = other.matchChar;
@@ -2253,7 +2253,7 @@
 	}
 
 /*----------------------------------------------------------------------------------------------------------------------
-|	Must be called when the symbols list changes. 
+|	Must be called when the symbols list changes.
 |	Uses symbols, gap, missing, respectCase,  extraEquates, and datatype fields to establish new mappings.
 |	token can be NULL if the call is not triggered by the reading of a NEXUS token.
 */
@@ -2278,40 +2278,40 @@
 
 	if (missing == '\0')
 		throw NxsException("Cannot create a datatype mapper with no missing data symbol");
-	
+
 	charToStateCodeLookup.assign(384, NXS_INVALID_STATE_CODE); /*256+128 = 384 -- this way we can deal with signed or unsigned chars by pointing cLookup to element 128*/
 	cLookup = &charToStateCodeLookup[127];
 	stateIntersectionMatrix.clear();
 	isStateSubsetMatrix.clear();
 	isStateSubsetMatrixGapsMissing.clear();
-	
+
 	stateSetsVec.clear();
 	stateCodeLookupPtr = 0L;
 	sclOffset = (gapChar == '\0' ? -1 : -2);
-	
+
 	std::string bogus;
 	std::istringstream bogusStream(bogus);
 	NxsToken bogusToken(bogusStream);
 	token = (token == NULL ? &bogusToken : token);
-	
+
 	/* add the "fundamental" states. */
 	std::set<int> stSet;
 	std::set<int> missingSet;
 	if (gapChar != '\0')
 		{
 		stSet.insert(NXS_GAP_STATE_CODE);
-		/* this is the one of only 2 times that  we don't call AddStateSet to add a state set 
+		/* this is the one of only 2 times that  we don't call AddStateSet to add a state set
 			we do this to avoid illegal indexing of stateSets[1] when there
 			is only one element in the vector.
 		*/
 		stateSetsVec.push_back(NxsDiscreteStateSetInfo(stSet, false, gapChar));
 		cLookup[(int) gapChar] = NXS_GAP_STATE_CODE;
-		
+
 		missingSet.insert(NXS_GAP_STATE_CODE);
 		}
-	
-	
-		/* 
+
+
+		/*
 			Add the missing state code
 			this is the other time that we don't call AddStateSet (to avoid illegal indexing).
 		*/
@@ -2319,7 +2319,7 @@
 	NCL_ASSERT(nStates > 0);
 	for (int s = 0; s < (int) nStates; ++s)
 		missingSet.insert(s);
-	
+
 	char sym = (respectCase ? missing : (char) toupper(missing));
 	stateSetsVec.push_back(NxsDiscreteStateSetInfo(missingSet, false, sym));
 	const int stateCode = (const int)stateSetsVec.size() + sclOffset - 1;
@@ -2338,7 +2338,7 @@
 		stSet.insert(s);
 		AddStateSet(stSet, symbols[s], respectCase, false);
 		}
-	
+
 	/* add the default equates */
 	std::map<char, NxsString> defEq = NxsCharactersBlock::GetDefaultEquates(datatype);
 
@@ -2356,8 +2356,8 @@
 		defEq.insert(extraEquates.begin(), extraEquates.end());
 		extraEquates.clear();
 		defEq.swap(extraEquates);
-		/* respectcase is only "applicable" to Standard datatype 
-			Any symbol extension will be at the end of the symbols list, 
+		/* respectcase is only "applicable" to Standard datatype
+			Any symbol extension will be at the end of the symbols list,
 			so here we add the lower case symbols as equates.
 		*/
 		if (respectCase)
@@ -2398,8 +2398,8 @@
 			cLookup[(int) c] = sc;
 			}
 		}
-	
 
+
 	/* add user-defined equates, and only retain the new ones (those that are not datatype defaults). */
 	std::map<char, NxsString> neededExtraEquates;
 	for (eqIt = extraEquates.begin(); eqIt != extraEquates.end(); ++eqIt)
@@ -2429,17 +2429,17 @@
 			}
 		}
 	extraEquates = neededExtraEquates;
-	}   
+	}
 
 /*----------------------------------------------------------------------------------------------------------------------
 |	Returns the state code of a (possible new state set) `sset`.  This may trigger the reallocation of mapping info.
 |	nexusSymbol can be '\0' if there is not a single-character symbol that represents this state set.
 |
 |	if `addToLookup` is false and the state set is not found then NXS_INVALID_STATE_CODE will be returned.
-|	
-|	if the stateset is added with a `nexusSymbol` then the new "symbol" will be case-sensitive 
+|
+|	if the stateset is added with a `nexusSymbol` then the new "symbol" will be case-sensitive
 |	(this is an mechanism for entering equates and equates are always case sensitive).
-|	
+|
 |	New "fundamental" states can NOT be introduced using this function -- if unknown states are encountered, an exception will be generated.
 */
 int NxsDiscreteDatatypeMapper::StateCodeForStateSet(const std::set<int> & sset, bool isPolymorphic, bool addToLookup, char nexusSymbol)
@@ -2450,12 +2450,12 @@
 		ValidateStateIndex(c);
 		return c;
 		}
-	NCL_ASSERT(stateCodeLookupPtr); 
+	NCL_ASSERT(stateCodeLookupPtr);
 	NxsDiscreteStateSetInfo *sclStart = stateCodeLookupPtr + nStates;
 	const int nCodes = (int)stateSetsVec.size();
 
 	/*we can start at nStates, because < nStates will be handled in the sset.size() == 1 above */
-	for (int i = nStates - sclOffset; i < nCodes; ++i) 
+	for (int i = nStates - sclOffset; i < nCodes; ++i)
 		{
 		NxsDiscreteStateSetInfo & stateSetInfo = *sclStart++;
 		if (sset == stateSetInfo.states && isPolymorphic == stateSetInfo.isPolymorphic)
@@ -2471,11 +2471,11 @@
 		}
 	if (!addToLookup)
 		return NXS_INVALID_STATE_CODE;
-	return AddStateSet(sset, nexusSymbol, true, isPolymorphic); 
+	return AddStateSet(sset, nexusSymbol, true, isPolymorphic);
 	}
 
 /*----------------------------------------------------------------------------------------------------------------------
-|	Adds a new state set and returns its code. 
+|	Adds a new state set and returns its code.
 |	Does NOT check if the state set is present.
 |	It is also MANDATORY that this function be called with the fundamental states first (and in order) before
 |	 being called with any multi state sets (this is done by RefreshMappings)
@@ -2485,12 +2485,12 @@
 	stateIntersectionMatrix.clear();
 	isStateSubsetMatrix.clear();
 	isStateSubsetMatrixGapsMissing.clear();
-		
 
+
 	bool reallyIsPoly = (states.size() > 1 && isPolymorphic);
 	char sym = (symRespectCase ? nexusSymbol : (char) toupper(nexusSymbol));
 	stateSetsVec.push_back(NxsDiscreteStateSetInfo(states, reallyIsPoly, sym));
-	/* if we have gaps, then the sclOffset is -1 and we want to enable 
+	/* if we have gaps, then the sclOffset is -1 and we want to enable
 		stateCodeLookup[-1], so we set stateCodeLookup to &stateSets[1]
 		hence the -sclOffset below
 	*/
@@ -2531,12 +2531,12 @@
 		}
 	else if (c >= (int) nStates)
 		throw NxsNCLAPIException("Illegal usage of state index >= the number of states");
-	}	
+	}
 
 /*----------------------------------------------------------------------------------------------------------------------
 |	Throws an NxsNCLAPIException  if `c` is not a valid state code.
 */
-void NxsDiscreteDatatypeMapper::ValidateStateCode(int c) const 
+void NxsDiscreteDatatypeMapper::ValidateStateCode(int c) const
 	{
 	if (c < sclOffset)
 		{
@@ -2548,10 +2548,10 @@
 		}
 	else if (c >= (((int) stateSetsVec.size()) + sclOffset))
 		throw NxsNCLAPIException("Illegal usage of state code > the highest state code");
-	}	
+	}
 
-	
-void NxsDiscreteDatatypeMapper::GenerateNxsExceptionMatrixReading(char const* message, unsigned int taxInd, unsigned int charInd, 
+
+void NxsDiscreteDatatypeMapper::GenerateNxsExceptionMatrixReading(char const* message, unsigned int taxInd, unsigned int charInd,
 NxsToken& token, const NxsString &nameStr)
 	{
 	NxsString e = "Error reading character ";
@@ -2566,7 +2566,7 @@
 	e << ":\n" << message;
 	throw NxsException(e, token);
 	}
-	
+
 /*----------------------------------------------------------------------------------------------------------------------
 |	Returns true if the state code maps to a collection of states that were flagged as polymorphic.
 |	generates a NxsNCLAPIException if `c` is not a valid state code
@@ -2578,11 +2578,11 @@
 	return stateCodeLookupPtr[c].isPolymorphic;
 	}
 
-	
+
 /*----------------------------------------------------------------------------------------------------------------------
 |	Returns NXS_INVALID_STATE_CODE or the index of `c` in the symbols list.
 |	case-sensitivity is controlled by this->respectCase attribute.
-|	
+|
 |	NOTE: the gap "state" and missing characters are NOT in the symbols list.
 */
 int NxsDiscreteDatatypeMapper::PositionInSymbols(char c) const
@@ -2598,21 +2598,21 @@
 		}
 	return NXS_INVALID_STATE_CODE;
 	}
-	
 
+
 /*----------------------------------------------------------------------------------------------------------------------
-|	Returns the NEXUS reperesenation of the state code `scode` which may be a 
+|	Returns the NEXUS reperesenation of the state code `scode` which may be a
 |	multiple character string such as {DNY}
 |   Generates a NxsNCLAPIException if `c` is not a valid state code.
 |	If the string cannot be expressed (insufficient symbols are defined) then
-|	`demandSymbols` controls the behavior.  If `demandSymbols` is true than a 
+|	`demandSymbols` controls the behavior.  If `demandSymbols` is true than a
 |	NxsNCLAPIException is thrown. If `demandSymbols` is false then no output is
 |	written.
 */
 void NxsDiscreteDatatypeMapper::WriteStateCodeAsNexusString(std::ostream & out, int scode, bool demandSymbols) const
 	{
 	ValidateStateCode(scode);
-	const NxsDiscreteStateSetInfo * ssi = &(stateSetsVec.at(scode-sclOffset)); 
+	const NxsDiscreteStateSetInfo * ssi = &(stateSetsVec.at(scode-sclOffset));
 	const NxsDiscreteStateSetInfo & stateSetInfo =  stateCodeLookupPtr[scode];
 	NCL_ASSERT (ssi == &stateSetInfo);
 	char c = stateSetInfo.nexusSymbol;
@@ -2640,7 +2640,7 @@
 		else
 			return;
 		}
-		
+
 	out <<	(stateSetInfo.isPolymorphic ? '(' : '{');
 	out << towrite;
 	out <<	(stateSetInfo.isPolymorphic ? ')' : '}');
@@ -2671,7 +2671,7 @@
 		unsigned numDefStates = 4;
 		if (this->datatype == NxsCharactersBlock::protein)
 			numDefStates = 21;
-		else if (this->datatype == NxsCharactersBlock::standard)	
+		else if (this->datatype == NxsCharactersBlock::standard)
 			numDefStates = 0;
 		unsigned nSym = (unsigned)this->symbols.length();
 		if (nSym > numDefStates && this->datatype != NxsCharactersBlock::codon)
@@ -2721,7 +2721,7 @@
 		return false;
 	if (token.Equals("("))
 		{
-		token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);		   
+		token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
 		token.GetNextToken();
 		while (!token.Equals(")"))
 			{
@@ -2743,7 +2743,7 @@
 				v.push_back(t.ConvertToDouble());
 				scored.push_back(1);
 				}
-			token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);		   
+			token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
 			token.GetNextToken();
 			}
 		}
@@ -2837,10 +2837,10 @@
   NxsToken &token,
   unsigned taxNum,		/* the taxon index, in range [0..`ntax') */
   unsigned charNum,		/* the character index, in range [0..`nChar') */
-  const NxsDiscreteStateRow * firstTaxonRow, 
+  const NxsDiscreteStateRow * firstTaxonRow,
   const NxsString & nameStr) const
   	{
-  	int currState = cLookup[currChar];
+	int currState = cLookup[static_cast<int>(currChar)];
 	if (currState == NXS_INVALID_STATE_CODE)
 		{
 		NxsString emsg;
@@ -2865,12 +2865,12 @@
 	}
 
 bool NxsCharactersBlock::HandleNextDiscreteState(
-  NxsToken &token, 
-  unsigned taxNum, 
-  unsigned charNum, 
-  NxsDiscreteStateRow & row, 
-  NxsDiscreteDatatypeMapper &mapper, 
-  const NxsDiscreteStateRow * firstTaxonRow, 
+  NxsToken &token,
+  unsigned taxNum,
+  unsigned charNum,
+  NxsDiscreteStateRow & row,
+  NxsDiscreteDatatypeMapper &mapper,
+  const NxsDiscreteStateRow * firstTaxonRow,
   const NxsString & nameStr)
   	{
   	if (interleaving)
@@ -2887,7 +2887,7 @@
 	const NxsString &stateAsNexus = token.GetTokenReference();
 	int sc =  mapper.EncodeNexusStateString(stateAsNexus, token, taxNum, charNum, firstTaxonRow, nameStr);
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/phylobase -r 766