[Phylobase-commits] r820 - in pkg/src: . ncl
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Nov 23 21:04:36 CET 2010
Author: francois
Date: 2010-11-23 21:04:35 +0100 (Tue, 23 Nov 2010)
New Revision: 820
Modified:
pkg/src/ncl/ncl.h
pkg/src/ncl/nxsassumptionsblock.h
pkg/src/ncl/nxsblock.h
pkg/src/ncl/nxscxxdiscretematrix.h
pkg/src/ncl/nxsmultiformat.h
pkg/src/ncl/nxstaxablock.h
pkg/src/ncl/nxstoken.h
pkg/src/ncl/nxstreesblock.h
pkg/src/nxsassumptionsblock.cpp
pkg/src/nxscharactersblock.cpp
pkg/src/nxscxxdiscretematrix.cpp
pkg/src/nxsmultiformat.cpp
pkg/src/nxstaxablock.cpp
pkg/src/nxstoken.cpp
pkg/src/nxstreesblock.cpp
Log:
upgrade to NCL 2.1.14 (fixes issues with building in Windows)
Modified: pkg/src/ncl/ncl.h
===================================================================
--- pkg/src/ncl/ncl.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/ncl.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -20,20 +20,6 @@
#ifndef NCL_NCL_H
#define NCL_NCL_H
-#if defined(_MSC_VER)
-# pragma warning(disable:4786)
-# pragma warning(disable:4291)
-# if _MSC_VER >= 1500
-# include <cstdio>
-# if !defined(vsnprintf)
-# define vsnprintf _vsnprintf_s
-# endif
-# define sprintf sprintf_s
-# else
-# define vsnprintf _vsnprintf
-# endif
-#endif
-
#if !defined(__DECCXX)
# include <cctype>
# include <cmath>
Modified: pkg/src/ncl/nxsassumptionsblock.h
===================================================================
--- pkg/src/ncl/nxsassumptionsblock.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/nxsassumptionsblock.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -69,8 +69,21 @@
virtual void ReadTreesetDef(NxsString set_name, NxsToken &token, bool asterisked) = 0;
virtual NxsTransformationManager & GetNxsTransformationManagerRef() = 0;
+ virtual const NxsTransformationManager & GetNxsTransformationManagerConstRef() const = 0;
virtual NxsGeneticCodesManager & GetNxsGeneticCodesManagerRef() = 0;
virtual void SetGapsAsNewstate(bool v) = 0;
+
+ /*! delegates call to the NxsTransformationManager */
+ virtual std::vector<double> GetDefaultDoubleWeights() const
+ {
+ return GetNxsTransformationManagerConstRef().GetDefaultDoubleWeights();
+ }
+
+ /*! delegates call to the NxsTransformationManager */
+ virtual std::vector<int> GetDefaultIntWeights() const {
+ return GetNxsTransformationManagerConstRef().GetDefaultIntWeights();
+ }
+
};
/*!
@@ -158,6 +171,10 @@
NxsTaxaBlockAPI * GetTaxaBlockPtr(int *status=NULL); /*v2.1to2.2 13 */
NxsTreesBlockAPI * GetTreesBlockPtr(int *status=NULL); /*v2.1to2.2 13 */
+ const NxsTransformationManager & GetNxsTransformationManagerConstRef() const
+ {
+ return transfMgr;
+ }
NxsTransformationManager & GetNxsTransformationManagerRef()
{
return transfMgr;
Modified: pkg/src/ncl/nxsblock.h
===================================================================
--- pkg/src/ncl/nxsblock.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/nxsblock.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -46,6 +46,7 @@
public:
virtual ~NxsLabelToIndicesMapper(){}
virtual unsigned GetMaxIndex() const = 0;
+ virtual unsigned GetNumLabelsCurrentlyStored() const {return GetMaxIndex();}
/* Adds the 0-based indices corresponding to a label to the set.
\returns the number of indices that correspond to the label (and the number
Modified: pkg/src/ncl/nxscxxdiscretematrix.h
===================================================================
--- pkg/src/ncl/nxscxxdiscretematrix.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/nxscxxdiscretematrix.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -28,6 +28,7 @@
#include "ncl/nxscharactersblock.h"
#include "ncl/nxscdiscretematrix.h"
+class NxsCharacterPattern;
/**
* A C++ class that wraps a CDiscretMatrix in order to handle the memory
management more cleanly. This is intended to be an alternate, low-level way
@@ -41,9 +42,9 @@
Initialize(0L, false);
}
NxsCXXDiscreteMatrix(const NxsCDiscreteMatrix & );
- NxsCXXDiscreteMatrix(const NxsCharactersBlock & cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L);
+ NxsCXXDiscreteMatrix(const NxsCharactersBlock & cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L, bool standardizeCoding = true);
- void Initialize(const NxsCharactersBlock * cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L);
+ void Initialize(const NxsCharactersBlock * cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L, bool standardizeCoding = true);
const NxsCDiscreteMatrix & getConstNativeC() const
{
@@ -172,4 +173,79 @@
NxsCXXDiscreteMatrix & operator=(const NxsCXXDiscreteMatrix &); /** don't define, not copyable*/
};
+
+
+
+class NxsCharacterPattern
+ {
+ public:
+
+ bool operator < (const NxsCharacterPattern & other) const {
+ return this->stateCodes < other.stateCodes;
+ }
+ bool operator == (const NxsCharacterPattern & other) const {
+ return this->stateCodes == other.stateCodes;
+ }
+ std::vector<NxsCDiscreteState_t> stateCodes;
+ mutable unsigned count;
+ mutable unsigned patternIndex; // used as scratchspace not always valid!!!
+ mutable double sumOfPatternWeights; // stored as float. Use NxsCXXDiscreteMatrix::hasIntWeights of the original matrix to see if these weights should be interpretted as ints
+ };
+
+
+/*----------------------------------------------------------------------------------------------------------------------
+| Fills `compressedTransposedMatrix` with the compressed patterns found in `mat`
+|
+| Data structure for mapping between indices in these patterns can be obtained by the client providing
+| `compressedIndexPattern` arguments.
+|
+| Characters or taxa can be omitted by providing `taxaToInclude` or `charactersToInclude` arguments.
+| If these arguments are 0L (or not provided) then all data will be included. Note that skipping taxa
+| will cause the taxon indexing within a pattern to disagree with the overall taxon numbering because there will
+| be "frameshifts" for all of the skipped taxa. The included taxa will be present in the expected order, but it is
+| the caller code's responsibility to keep track of which taxa are included in the pattern.
+*/
+unsigned NxsCompressDiscreteMatrix(
+ const NxsCXXDiscreteMatrix & mat, /**< is the data source */
+ std::set<NxsCharacterPattern> & patternSet, /* matrix that will hold the compressed columns */
+ std::vector<const NxsCharacterPattern *> * compressedIndexPattern = 0L, /** if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+ const NxsUnsignedSet * taxaToInclude = 0L, /**< if not 0L, this should be the indices of the taxa in `mat` to include (if 0L all characters will be included). Excluding taxa will result in shorter patterns (the skipped taxa will not be filled with empty codes, instead the taxon indexing will be frameshifted -- the client code must keep track of these frameshifts). */
+ const NxsUnsignedSet * charactersToInclude = 0L); /**< if not 0L, this should be the indices of the characters in `mat` to include (if 0L all characters will be included) */
+
+/*----------------------------------------------------------------------------------------------------------------------
+| Fills `compressedTransposedMatrix` with the compressed patterns found in `mat`
+|
+| Data structure for mapping between indices in these representations can be obtained by the client providing
+| `originalIndexToCompressed` and/or compressedIndexToOriginal arguments.
+|
+| Characters or taxa can be omitted by providing `taxaToInclude` or `charactersToInclude` arguments.
+| If these arguments are 0L (or not provided) then all data will be included. Note that skipping taxa
+| will cause the taxon indexing within a pattern to disagree with the overall taxon numbering because there will
+| be "frameshifts" for all of the skipped taxa. The included taxa will be present in the expected order, but it is
+| the caller code's responsibility to keep track of which taxa are included in the pattern.
+*/
+unsigned NxsCompressDiscreteMatrix(
+ const NxsCXXDiscreteMatrix & mat, /**< is the data source */
+ std::vector<NxsCharacterPattern> & compressedTransposedMatrix, /* matrix that will hold the compressed columns */
+ std::vector<int> * originalIndexToCompressed, /** if not 0L, this will be filled to provide map an index in `mat` to the corresponding index in `compressedTransposedMatrix` (-1 in the vector indicates that the character was not included) */
+ std::vector<std::set<unsigned> > * compressedIndexToOriginal, /** if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+ const NxsUnsignedSet * taxaToInclude = 0L, /**< if not 0L, this should be the indices of the taxa in `mat` to include (if 0L all characters will be included). Excluding taxa will result in shorter patterns (the skipped taxa will not be filled with empty codes, instead the taxon indexing will be frameshifted -- the client code must keep track of these frameshifts). */
+ const NxsUnsignedSet * charactersToInclude = 0L); /**< if not 0L, this should be the indices of the characters in `mat` to include (if 0L all characters will be included) */
+
+
+void NxsConsumePatternSetToPatternVector(
+ std::set<NxsCharacterPattern> & patternSet, /* INPUT matrix that will hold the compressed columns */
+ std::vector<NxsCharacterPattern> & compressedTransposedMatrix, /* OUTPUT matrix that will hold the compressed columns */
+ const std::vector<const NxsCharacterPattern *> * compressedIndexPattern = 0L, /** INPUT This mapping must be provided if either `originalIndexToCompressed` or `compressedIndexToOriginal` is requested */
+ std::vector<int> * originalIndexToCompressed = 0L, /** OUTPUT if not 0L, this will be filled to provide map an index in `mat` to the corresponding index in `compressedTransposedMatrix` (-1 in the vector indicates that the character was not included) */
+ std::vector<std::set<unsigned> > * compressedIndexToOriginal = 0L); /** OUTPUT if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+
+void NxsTransposeCompressedMatrix(
+ const std::vector<NxsCharacterPattern> & compressedTransposedMatrix,
+ ScopedTwoDMatrix<NxsCDiscreteStateSet> & destination,
+ std::vector<unsigned> * patternCounts = 0L,
+ std::vector<double> * patternWeights = 0L);
+
+
+
#endif // NXS_CXX_DISCRETE_MATRIX_H
Modified: pkg/src/ncl/nxsmultiformat.h
===================================================================
--- pkg/src/ncl/nxsmultiformat.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/nxsmultiformat.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -77,6 +77,17 @@
UNSUPPORTED_FORMAT // keep this last
};
+
+ void SetCoerceUnderscoresToSpaces(bool v)
+ {
+ this->coerceUnderscoresToSpaces = v;
+ }
+
+ bool GetCoerceUnderscoresToSpaces() const
+ {
+ return this->coerceUnderscoresToSpaces;
+ }
+
/*! \returns a vector with the "official" format names that can be used with formatNameToCode
Currently this list is: {"nexus", "dnafasta", "aafasta", "rnafasta", "dnaphylip", "rnaphylip", "aaphylip", "discretephylip", "dnaphylipinterleaved", "rnaphylipinterleaved", "aaphylipinterleaved", "discretephylipinterleaved", "dnarelaxedphylip", "rnarelaxedphylip", "aarelaxedphylip", "discreterelaxedphylip", "dnarelaxedphylipinterleaved", "rnarelaxedphylipinterleaved", "aarelaxedphylipinterleaved", "discreterelaxedphylipinterleaved", "dnaaln", "rnaaln", "aaaln", "phyliptree", "relaxedphyliptree", "nexml"}
@@ -98,7 +109,8 @@
that indicates where warning messages should be directed.
*/
MultiFormatReader(const int blocksToRead = -1, NxsReader::WarningHandlingMode mode=NxsReader::WARNINGS_TO_STDERR)
- :PublicNexusReader(blocksToRead, mode)
+ :PublicNexusReader(blocksToRead, mode),
+ coerceUnderscoresToSpaces(false)
{}
virtual ~MultiFormatReader(){}
/*! Read the specified format
@@ -151,6 +163,8 @@
\arg dt a facet of NxsCharactersBlock::DataTypesEnum that indicates the expected datatype
*/
void readFinFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt);
+
+ bool coerceUnderscoresToSpaces;
};
Modified: pkg/src/ncl/nxstaxablock.h
===================================================================
--- pkg/src/ncl/nxstaxablock.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/nxstaxablock.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -185,6 +185,7 @@
void WriteTaxLabelsCommand(std::ostream &out) const;
unsigned GetMaxIndex() const;
+ unsigned GetNumLabelsCurrentlyStored() const;
unsigned GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const;
bool AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds);
bool AddNewPartition(const std::string &label, const NxsPartition & inds);
Modified: pkg/src/ncl/nxstoken.h
===================================================================
--- pkg/src/ncl/nxstoken.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/nxstoken.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -360,6 +360,8 @@
static unsigned DemandPositiveInt(NxsToken &token, NxsString & errormsg, const char *contextString);
static std::map<std::string, std::string> ParseAsSimpleKeyValuePairs(const ProcessedNxsCommand & tv, const char *cmdName);
+ static std::vector<ProcessedNxsToken> Tokenize(const std::string & );
+
enum NxsTokenFlags /* For use with the variable labileFlags */
{
saveCommandComments = 0x0001, /* if set, command comments of the form [&X] are not ignored but are instead saved as regular tokens (without the square brackets, however) */
Modified: pkg/src/ncl/nxstreesblock.h
===================================================================
--- pkg/src/ncl/nxstreesblock.h 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/ncl/nxstreesblock.h 2010-11-23 20:04:35 UTC (rev 820)
@@ -128,6 +128,10 @@
lenAsString.assign(asString);
}
mutable void * scratch;
+ void SetParent(NxsSimpleNode *p)
+ {
+ this->parent = p;
+ }
private:
void WriteAsNewick(std::ostream &out, bool nhx) const;
void DealWithNexusComments(const std::vector<NxsComment> & ecs, bool NHXComments);
@@ -180,6 +184,16 @@
return edgeToPar;
}
+ const NxsSimpleEdge & GetEdgeToParentRef() const
+ {
+ return edgeToPar;
+ }
+
+ NxsSimpleEdge & GetMutableEdgeToParentRef()
+ {
+ return edgeToPar;
+ }
+
bool IsTip() const
{
return (lChild == 0L);
@@ -239,10 +253,7 @@
name = n;
}
mutable void * scratch;
- private:
- void WriteAsNewick(std::ostream &out, bool nhx, bool useLeafNames, bool escapeNames, const NxsTaxaBlockAPI *taxa=0L) const;
-
NxsSimpleNode(NxsSimpleNode *par, double edgeLen)
:scratch(0L),
lChild(0L),
@@ -253,6 +264,11 @@
edgeToPar.child = this;
}
+
+ public:
+ void WriteAsNewick(std::ostream &out, bool nhx, bool useLeafNames, bool escapeNames, const NxsTaxaBlockAPI *taxa=0L) const;
+
+
NxsSimpleNode(int edgeLen, NxsSimpleNode *par)
:scratch(0L),
lChild(0L),
@@ -282,8 +298,40 @@
else
lChild = n;
}
+
+ bool RemoveChild(NxsSimpleNode *n)
+ {
+ if (n == 0L || lChild == 0L)
+ return false;
+ if (lChild == n)
+ lChild = lChild->rSib;
+ else
+ {
+ NxsSimpleNode * c = lChild;
+ for (;;)
+ {
+ if (c->rSib == n)
+ {
+ c->rSib = n->rSib;
+ break;
+ }
+ if (c->rSib == 0L)
+ return false;
+ }
+ }
+ n->edgeToPar.parent = 0L;
+ return true;
+ }
void AddSelfAndDesToPreorder(std::vector<const NxsSimpleNode *> &p) const;
NxsSimpleNode * FindTaxonIndex(unsigned leafIndex);
+
+ void LowLevelSetFirstChild(NxsSimpleNode *nd) {
+ lChild = nd;
+ }
+ void LowLevelSetNextSib(NxsSimpleNode *nd) {
+ rSib = nd;
+ }
+ private:
NxsSimpleNode * lChild;
NxsSimpleNode * rSib;
NxsSimpleEdge edgeToPar;
@@ -333,7 +381,7 @@
if (root)
root->WriteAsNewick(out, nhx, useLeafNames, escapeNames, taxa);
}
- void RerootAt(unsigned leafIndex);
+ NxsSimpleNode * RerootAt(unsigned leafIndex);
const NxsSimpleNode * GetRootConst() const
{
@@ -346,7 +394,7 @@
int defIntEdgeLen;
double defDblEdgeLen;
bool realEdgeLens;
- private:
+ public:
NxsSimpleNode * AllocNewNode(NxsSimpleNode *p)
{
NxsSimpleNode * nd;
@@ -674,6 +722,10 @@
{
allowImplicitNames = s;
}
+ void SetTreatIntegerLabelsAsNumbers(bool s)
+ {
+ treatIntegerLabelsAsNumbers = s;
+ }
/*! If true then the block will use the v2.1 style of parsing in which the tree is interpretted and converted into
a newick string with standard taxon numbering
If false, then the NxsTreesBlock will use the v2.0 API in which the tree reader simply stores the tree string
@@ -736,6 +788,7 @@
virtual void CopyTreesBlockContents(const NxsTreesBlock &other)
{
allowImplicitNames = other.allowImplicitNames;
+ treatIntegerLabelsAsNumbers = other.treatIntegerLabelsAsNumbers;
processAllTreesDuringParse = other.processAllTreesDuringParse;
writeFromNodeEdgeDataStructure = other.writeFromNodeEdgeDataStructure;
validateInternalNodeLabels = other.validateInternalNodeLabels;
@@ -749,16 +802,22 @@
treePartitions = other.treePartitions;
processedTreeValidationFunction = other.processedTreeValidationFunction;
ptvArg = other.ptvArg;
+ treatAsRootedByDefault = other.treatAsRootedByDefault;
}
-
+ bool GetTreatAsRootedByDefault() const {
+ return treatAsRootedByDefault;
+ }
+ void SetTreatAsRootedByDefault(bool v) {
+ this->treatAsRootedByDefault = v;
+ }
virtual NxsTreesBlock * Clone() const
{
NxsTreesBlock * a = new NxsTreesBlock(taxa);
*a = *this;
return a;
}
- static void ProcessTokenVecIntoTree(const ProcessedNxsCommand & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false, const bool validateInternalNodeLabels=true);
- static void ProcessTokenStreamIntoTree(NxsToken & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false, const bool validateInternalNodeLabels=true);
+ static void ProcessTokenVecIntoTree(const ProcessedNxsCommand & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false, const bool validateInternalNodeLabels=true, const bool treatIntegerLabelsAsNumbers=false);
+ static void ProcessTokenStreamIntoTree(NxsToken & token, NxsFullTreeDescription & ftd, NxsLabelToIndicesMapper *, std::map<std::string, unsigned> &capNameToInd, bool allowNewTaxa, NxsReader * nexusReader, const bool respectCase=false, const bool validateInternalNodeLabels=true, const bool treatIntegerLabelsAsNumbers=false);
void SetWriteFromNodeEdgeDataStructure(bool v)
{
@@ -838,14 +897,15 @@
bool getValidateInternalNodeLabels() const {
return this->validateInternalNodeLabels;
}
+ void WriteTranslateCommand(std::ostream & out) const;
protected :
void ReadTreeFromOpenParensToken(NxsFullTreeDescription &td, NxsToken & token);
- void WriteTranslateCommand(std::ostream & out) const;
void WriteTreesCommand(std::ostream & out) const;
void ConstructDefaultTranslateTable(NxsToken &token, const char * cmd);
bool allowImplicitNames; /** false by default, true causes the trees block to create a taxa block from the labels found in the trees. */
+ bool treatIntegerLabelsAsNumbers; // if true and allowImplicitNames is true, then new taxon labels that are integers will be treated as the taxon number (rather than arbitrary labels)
bool processAllTreesDuringParse; /** true by default, false speeds processing but disables detection of errors*/
bool constructingTaxaBlock; /** true if new names are being tolerated */
bool writeFromNodeEdgeDataStructure; /**this will probably only ever be set to true in testing code. If true the WriteTrees function will convert each tree to NxsSimpleTree object to write the newick*/
@@ -861,7 +921,7 @@
ProcessedTreeValidationFunction processedTreeValidationFunction;
void * ptvArg;
-
+ bool treatAsRootedByDefault; /* true by default */
virtual void Read(NxsToken &token);
void HandleTranslateCommand(NxsToken &token);
void HandleTreeCommand(NxsToken &token, bool rooted);
Modified: pkg/src/nxsassumptionsblock.cpp
===================================================================
--- pkg/src/nxsassumptionsblock.cpp 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/nxsassumptionsblock.cpp 2010-11-23 20:04:35 UTC (rev 820)
@@ -32,6 +32,7 @@
:queried(false)
{}
virtual ~BogusToIndMapper(){}
+
virtual unsigned GetMaxIndex() const
{
return UINT_MAX;
@@ -276,7 +277,7 @@
if (wIt->second.count(index) > 0)
return wIt->first;
}
- return 1;
+ return -1;
}
/*!
Modified: pkg/src/nxscharactersblock.cpp
===================================================================
--- pkg/src/nxscharactersblock.cpp 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/nxscharactersblock.cpp 2010-11-23 20:04:35 UTC (rev 820)
@@ -2739,7 +2739,12 @@
throw NxsNCLAPIException("Illegal usage of unknown negative state index");
}
else if (c >= (((NxsDiscreteStateCell) stateSetsVec.size()) + sclOffset))
- throw NxsNCLAPIException("Illegal usage of state code > the highest state code");
+ {
+ NxsString err = "Illegal usage of state code > the highest state code. c = ";
+ err << int(c) << " (NxsDiscreteStateCell) stateSetsVec.size() = " << (NxsDiscreteStateCell) stateSetsVec.size();
+ err << " sclOffset = " << sclOffset;
+ throw NxsNCLAPIException(err);
+ }
}
@@ -2806,6 +2811,7 @@
*/
void NxsDiscreteDatatypeMapper::WriteStateCodeAsNexusString(std::ostream & out, NxsDiscreteStateCell scode, bool demandSymbols) const
{
+ //out << "WriteStateCodeAsNexusString-debug scode=" << scode<< '\n';
ValidateStateCode(scode);
const NxsDiscreteStateSetInfo & stateSetInfo = stateCodeLookupPtr[scode];
NCL_ASSERT (&(stateSetsVec.at(scode-sclOffset)) == &stateSetInfo);
Modified: pkg/src/nxscxxdiscretematrix.cpp
===================================================================
--- pkg/src/nxscxxdiscretematrix.cpp 2010-11-23 16:03:06 UTC (rev 819)
+++ pkg/src/nxscxxdiscretematrix.cpp 2010-11-23 20:04:35 UTC (rev 820)
@@ -25,14 +25,260 @@
using std::cout;
using std::endl;
+/**===========================================================================
+| fills compressedTransposedMatrix and empties patternSet
+|
+| If `originalIndexToCompressed` or `compressedIndexToOriginal` are requested
+| then the `compressedIndexPattern` mapping must be supplied. `compressedIndexPattern`
+| must contain pointers to the keys in `patternSet.` Note that these will
+| be invalid after the call because patternSet will be emptied).
+*/
+void NxsConsumePatternSetToPatternVector(
+ std::set<NxsCharacterPattern> & patternSet, /* INPUT matrix that will hold the compressed columns */
+ std::vector<NxsCharacterPattern> & compressedTransposedMatrix, /* OUTPUT matrix that will hold the compressed columns */
+ const std::vector<const NxsCharacterPattern *> * compressedIndexPattern, /** INPUT This mapping must be provided if either `originalIndexToCompressed` or `compressedIndexToOriginal` is requested */
+ std::vector<int> * originalIndexToCompressed, /** OUTPUT if not 0L, this will be filled to provide map an index in `mat` to the corresponding index in `compressedTransposedMatrix` (-1 in the vector indicates that the character was not included) */
+ std::vector<std::set<unsigned> > * compressedIndexToOriginal) /** OUTPUT if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+{
+ const unsigned patternIndexOffset = compressedTransposedMatrix.size();
+ const unsigned numCompressedPatterns = patternSet.size();
+ if (originalIndexToCompressed != 0L || compressedIndexToOriginal != 0L)
+ {
+ if (compressedIndexPattern == 0L)
+ throw NxsException("compressedIndexPattern must be provided in ConsumePatternSetToPatternVector if mappings are requested");
+ unsigned patternIndex = 0;
+ for (std::set<NxsCharacterPattern>::iterator pIt = patternSet.begin(); pIt != patternSet.end(); ++pIt, ++patternIndex)
+ {
+ pIt->patternIndex = patternIndex + patternIndexOffset;
+ }
+ if (originalIndexToCompressed)
+ originalIndexToCompressed->resize(compressedIndexPattern->size());
+ if (compressedIndexToOriginal)
+ {
+ compressedIndexToOriginal->clear();
+ compressedIndexToOriginal->resize(numCompressedPatterns);
+ }
+ for (unsigned i = 0; i < compressedIndexPattern->size(); ++ i)
+ {
+ const NxsCharacterPattern * pat = (*compressedIndexPattern)[i];
+ if (pat)
+ {
+ if (originalIndexToCompressed)
+ (*originalIndexToCompressed)[i] = pat->patternIndex;
+ if (compressedIndexToOriginal)
+ {
+ NCL_ASSERT(pat->patternIndex < numCompressedPatterns);
+ compressedIndexToOriginal->at(pat->patternIndex).insert(i);
+ }
+ }
+ else
+ {
+ if (originalIndexToCompressed)
+ (*originalIndexToCompressed)[i] = -1;
+ }
+ }
+ }
+ compressedTransposedMatrix.reserve(numCompressedPatterns);
+ for (std::set<NxsCharacterPattern>::iterator pIt = patternSet.begin(); pIt != patternSet.end();)
+ {
+ compressedTransposedMatrix.push_back(*pIt);
+ std::set<NxsCharacterPattern>::iterator prevIt = pIt++;
+ patternSet.erase(prevIt);
+ }
+ patternSet.clear();
+}
-NxsCXXDiscreteMatrix::NxsCXXDiscreteMatrix(const NxsCharactersBlock & cb, bool gapsToMissing, const NxsUnsignedSet * toInclude)
+
+unsigned NxsCompressDiscreteMatrix(
+ const NxsCXXDiscreteMatrix & mat, /**< is the data source */
+ std::set<NxsCharacterPattern> & patternSet, /* matrix that will hold the compressed columns */
+ std::vector<const NxsCharacterPattern *> * compressedIndexPattern, /** if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+ const NxsUnsignedSet * taxaToInclude, /**< if not 0L, this should be the indices of the taxa in `mat` to include (if 0L all characters will be included). Excluding taxa will result in shorter patterns (the skipped taxa will not be filled with empty codes, instead the taxon indexing will be frameshifted -- the client code must keep track of these frameshifts). */
+ const NxsUnsignedSet * charactersToInclude)
+ {
+ const unsigned origNumPatterns = (unsigned) patternSet.size();
+ unsigned ntax = mat.getNTax();
+ unsigned patternLength = ntax;
+ unsigned nchar = mat.getNChar();
+ if (compressedIndexPattern)
+ {
+ compressedIndexPattern->resize(nchar);
+ }
+ NxsUnsignedSet allTaxaInds;
+ if (taxaToInclude)
+ {
+ if (taxaToInclude->empty())
+ return 0; // might want to warn about this!
+ const unsigned lastTaxonIndex = *(taxaToInclude->rbegin());
+ if (lastTaxonIndex >= ntax)
+ throw NxsException("Taxon index in taxaToInclude argument to NxsCompressDiscreteMatrix is out of range");
+ patternLength -= taxaToInclude->size();
+ }
+ else
+ {
+ for (unsigned i = 0; i < ntax; ++i)
+ allTaxaInds.insert(i);
+ taxaToInclude = &allTaxaInds;
+ }
+ if (charactersToInclude)
+ {
+ if (charactersToInclude->empty())
+ return 0; // might want to warn about this!
+ const unsigned lastColumnIndex = *(charactersToInclude->rbegin());
+ if (lastColumnIndex >= nchar)
+ throw NxsException("Character index in charactersToInclude argument to NxsCompressDiscreteMatrix is out of range");
+ }
+
+ // Create actingWeights vector and copy the integer weights from mat into it
+ // If there are no integer weights in mat, copy the floating point weights instead
+ // if floating point weights have been defined
+ const std::vector<int> & iwts = mat.getIntWeightsConst();
+ std::vector<double> actingWeights(nchar, 1.0);
+ bool weightsSpecified = false;
+ bool weightsAsInts = false;
+ if (!iwts.empty())
+ {
+ NCL_ASSERT(iwts.size() >= nchar);
+ weightsSpecified = true;
+ weightsAsInts = true;
+ for (unsigned j = 0; j < nchar; ++j)
+ actingWeights[j] = (double)iwts.at(j);
+ }
+ else
+ {
+ const std::vector<double> & dwts = mat.getDblWeightsConst();
+ if (!dwts.empty())
+ {
+ weightsSpecified = true;
+ actingWeights = dwts;
+ NCL_ASSERT(actingWeights.size() == nchar);
+ }
+ }
+
+ // Set corresponding actingWeights elements to zero if any characters have been excluded in mat
+ const NxsUnsignedSet & excl = mat.getExcludedCharIndices();
+ for (NxsUnsignedSet::const_iterator eIt = excl.begin(); eIt != excl.end(); ++eIt)
+ {
+ NCL_ASSERT(*eIt < nchar);
+ actingWeights[*eIt] = 0.0;
+ }
+ const double * wts = &(actingWeights[0]);
+
+ NxsCharacterPattern patternTemp;
+ patternTemp.count = 1;
+ for (unsigned j = 0; j < nchar; ++j)
+ {
+ double patternWeight = wts[j];
+ bool shouldInclude = (charactersToInclude == 0L || (charactersToInclude->find(j) != charactersToInclude->end()));
+ if (patternWeight > 0.0 && shouldInclude)
+ {
+ // Build up a vector representing the pattern of state codes at this site
+ patternTemp.stateCodes.clear();
+ patternTemp.stateCodes.reserve(patternLength);
+ patternTemp.sumOfPatternWeights = patternWeight;
+
+ unsigned indexInPattern = 0;
+ for (NxsUnsignedSet::const_iterator taxIndIt = taxaToInclude->begin(); taxIndIt != taxaToInclude->end(); ++taxIndIt, ++indexInPattern)
+ {
+ const unsigned taxonIndex = *taxIndIt;
+ const NxsCDiscreteStateSet * row = mat.getRow(taxonIndex);
+ const NxsCDiscreteStateSet code = row[j];
+ patternTemp.stateCodes.push_back(code);
+ }
+ NCL_ASSERT(indexInPattern == patternLength);
+
+ std::set<NxsCharacterPattern>::iterator lowBoundLoc = patternSet.lower_bound(patternTemp);
+ if ((lowBoundLoc == patternSet.end()) || (patternTemp < *lowBoundLoc))
+ {
+ std::set<NxsCharacterPattern>::iterator insertedIt = patternSet.insert(lowBoundLoc, patternTemp);
+ if (compressedIndexPattern)
+ {
+ const NxsCharacterPattern & patInserted = *insertedIt;
+ (*compressedIndexPattern)[j] = &patInserted;
+ }
+ }
+ else
+ {
+ NCL_ASSERT(patternTemp == *lowBoundLoc);
+ lowBoundLoc->sumOfPatternWeights += patternWeight;
+ lowBoundLoc->count += 1;
+ if (compressedIndexPattern)
+ {
+ (*compressedIndexPattern)[j] = &(*lowBoundLoc);
+ }
+ }
+ }
+ }
+ return patternSet.size() - origNumPatterns;
+ }
+
+/*----------------------------------------------------------------------------------------------------------------------
+| Copies data from `mat' to `pattern_vect' and `pattern_counts'. The `pattern_vect' vector holds the patterns while
+| `pattern_counts' holds the count of the number of sites having each pattern. Additionally, the vectors
+| `pattern_to_sites' and `charIndexToPatternIndex' are built: `pattern_to_sites' allows you to get a list of sites
+| given a specific pattern, and `charIndexToPatternIndex' lets you find the index of a pattern in `pattern_vect' and
+| `pattern_counts' given an original site index.
+*/
+unsigned NxsCompressDiscreteMatrix(
+ const NxsCXXDiscreteMatrix & mat,
+ std::vector<NxsCharacterPattern> & compressedTransposedMatrix,
+ std::vector<int> * originalIndexToCompressed,
+ std::vector<std::set<unsigned> > * compressedIndexToOriginal,
+ const NxsUnsignedSet * taxaToInclude,
+ const NxsUnsignedSet * charactersToInclude)
{
- Initialize(&cb, gapsToMissing, toInclude);
+ std::set<NxsCharacterPattern> patternSet;
+ std::vector<const NxsCharacterPattern *> toPatternMap;
+ std::vector<const NxsCharacterPattern *> *toPatternMapPtr = 0L;
+ if (originalIndexToCompressed != 0L || compressedIndexToOriginal != 0L)
+ toPatternMapPtr = &toPatternMap;
+
+ NxsCompressDiscreteMatrix(mat, patternSet, toPatternMapPtr, taxaToInclude, charactersToInclude);
+ const unsigned numPatternsAdded = patternSet.size();
+
+ NxsConsumePatternSetToPatternVector(patternSet, compressedTransposedMatrix, toPatternMapPtr, originalIndexToCompressed, compressedIndexToOriginal);
+ return numPatternsAdded;
}
-void NxsCXXDiscreteMatrix::Initialize(const NxsCharactersBlock * cb, bool gapsToMissing, const NxsUnsignedSet * toInclude)
+void NxsTransposeCompressedMatrix(
+ const std::vector<NxsCharacterPattern> & compressedTransposedMatrix,
+ ScopedTwoDMatrix<NxsCDiscreteStateSet> & destination,
+ std::vector<unsigned> * patternCounts,
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/phylobase -r 820
More information about the Phylobase-commits
mailing list