12 #ifndef __RD_SPARSE_INT_VECT_20070921__
13 #define __RD_SPARSE_INT_VECT_20070921__
27 template <
typename IndexType>
39 d_length = other.d_length;
41 d_data.insert(other.d_data.begin(), other.d_data.end());
46 initFromText(pkl.c_str(), pkl.size());
50 initFromText(pkl, len);
57 d_length = other.d_length;
59 d_data.insert(other.d_data.begin(), other.d_data.end());
67 #pragma clang diagnostic push
68 #pragma clang diagnostic ignored "-Wtautological-compare"
69 #elif (defined(__GNUC__) || defined(__GNUG__)) && \
70 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 1))
71 #if (__GNUC__ > 4 || __GNUC_MINOR__ > 5)
72 #pragma GCC diagnostic push
74 #pragma GCC diagnostic ignored "-Wtype-limits"
78 if (idx < 0 || idx >= d_length) {
82 typename StorageType::const_iterator iter = d_data.find(idx);
83 if (iter != d_data.end()) {
90 void setVal(IndexType idx,
int val) {
91 if (idx < 0 || idx >= d_length) {
101 #pragma clang diagnostic pop
102 #elif (defined(__GNUC__) || defined(__GNUG__)) && \
103 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5))
104 #pragma GCC diagnostic pop
116 typename StorageType::const_iterator iter;
117 for (iter = d_data.begin(); iter != d_data.end(); ++iter) {
121 res += abs(iter->second);
136 if (other.d_length != d_length) {
140 typename StorageType::iterator iter = d_data.begin();
141 typename StorageType::const_iterator oIter = other.d_data.begin();
142 while (iter != d_data.end()) {
144 while (oIter != other.d_data.end() && oIter->first < iter->first) {
147 if (oIter != other.d_data.end() && oIter->first == iter->first) {
149 if (oIter->second < iter->second) {
150 iter->second = oIter->second;
157 typename StorageType::iterator tmpIter = iter;
175 if (other.d_length != d_length) {
179 typename StorageType::iterator iter = d_data.begin();
180 typename StorageType::const_iterator oIter = other.d_data.begin();
181 while (iter != d_data.end()) {
183 while (oIter != other.d_data.end() && oIter->first < iter->first) {
184 d_data[oIter->first] = oIter->second;
187 if (oIter != other.d_data.end() && oIter->first == iter->first) {
189 if (oIter->second > iter->second) {
190 iter->second = oIter->second;
197 while (oIter != other.d_data.end()) {
198 d_data[oIter->first] = oIter->second;
210 if (other.d_length != d_length) {
213 typename StorageType::iterator iter = d_data.begin();
214 typename StorageType::const_iterator oIter = other.d_data.begin();
215 while (oIter != other.d_data.end()) {
216 while (iter != d_data.end() && iter->first < oIter->first) {
219 if (iter != d_data.end() && oIter->first == iter->first) {
221 iter->second += oIter->second;
223 typename StorageType::iterator tIter = iter;
231 d_data[oIter->first] = oIter->second;
244 if (other.d_length != d_length) {
247 typename StorageType::iterator iter = d_data.begin();
248 typename StorageType::const_iterator oIter = other.d_data.begin();
249 while (oIter != other.d_data.end()) {
250 while (iter != d_data.end() && iter->first < oIter->first) {
253 if (iter != d_data.end() && oIter->first == iter->first) {
255 iter->second -= oIter->second;
257 typename StorageType::iterator tIter = iter;
265 d_data[oIter->first] = -oIter->second;
277 typename StorageType::iterator iter = d_data.begin();
278 while (iter != d_data.end()) {
289 typename StorageType::iterator iter = d_data.begin();
290 while (iter != d_data.end()) {
301 typename StorageType::iterator iter = d_data.begin();
302 while (iter != d_data.end()) {
313 typename StorageType::iterator iter = d_data.begin();
314 while (iter != d_data.end()) {
326 if (d_length != v2.d_length) {
329 return d_data == v2.d_data;
332 return !(*
this == v2);
337 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
342 tInt =
sizeof(IndexType);
345 IndexType nEntries = d_data.size();
348 typename StorageType::const_iterator iter = d_data.begin();
349 while (iter != d_data.end()) {
351 std::int32_t tInt = iter->second;
359 initFromText(txt.c_str(), txt.length());
366 void initFromText(
const char *pkl,
const unsigned int len) {
368 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
374 if (vers == 0x0001) {
377 if (tInt >
sizeof(IndexType)) {
379 "IndexType cannot accommodate index size in SparseIntVect pickle");
383 readVals<unsigned char>(ss);
385 case sizeof(std::int32_t):
386 readVals<std::uint32_t>(ss);
388 case sizeof(boost::int64_t):
389 readVals<boost::uint64_t>(ss);
398 template <
typename T>
399 void readVals(std::stringstream &ss) {
400 PRECONDITION(
sizeof(T) <=
sizeof(IndexType),
"invalid size");
406 for (T i = 0; i < nEntries; ++i) {
415 template <
typename IndexType,
typename SequenceType>
417 const SequenceType &seq) {
418 typename SequenceType::const_iterator seqIt;
419 for (seqIt = seq.begin(); seqIt != seq.end(); ++seqIt) {
421 IndexType idx = *seqIt;
427 template <
typename IndexType>
428 void calcVectParams(
const SparseIntVect<IndexType> &v1,
429 const SparseIntVect<IndexType> &v2,
double &v1Sum,
430 double &v2Sum,
double &andSum) {
431 if (v1.getLength() != v2.getLength()) {
434 v1Sum = v2Sum = andSum = 0.0;
437 typename SparseIntVect<IndexType>::StorageType::const_iterator iter1, iter2;
438 iter1 = v1.getNonzeroElements().begin();
439 if (iter1 != v1.getNonzeroElements().end()) {
440 v1Sum += abs(iter1->second);
442 iter2 = v2.getNonzeroElements().begin();
443 if (iter2 != v2.getNonzeroElements().end()) {
444 v2Sum += abs(iter2->second);
446 while (iter1 != v1.getNonzeroElements().end()) {
447 while (iter2 != v2.getNonzeroElements().end() &&
448 iter2->first < iter1->first) {
450 if (iter2 != v2.getNonzeroElements().end()) {
451 v2Sum += abs(iter2->second);
454 if (iter2 != v2.getNonzeroElements().end()) {
455 if (iter2->first == iter1->first) {
456 if (abs(iter2->second) < abs(iter1->second)) {
457 andSum += abs(iter2->second);
459 andSum += abs(iter1->second);
462 if (iter2 != v2.getNonzeroElements().end()) {
463 v2Sum += abs(iter2->second);
467 if (iter1 != v1.getNonzeroElements().end()) {
468 v1Sum += abs(iter1->second);
474 if (iter1 != v1.getNonzeroElements().end()) {
476 while (iter1 != v1.getNonzeroElements().end()) {
477 v1Sum += abs(iter1->second);
481 if (iter2 != v2.getNonzeroElements().end()) {
483 while (iter2 != v2.getNonzeroElements().end()) {
484 v2Sum += abs(iter2->second);
491 template <
typename IndexType>
494 bool returnDistance =
false,
double bounds = 0.0) {
500 if (!returnDistance && bounds > 0.0) {
503 double denom = v1Sum + v2Sum;
504 if (fabs(denom) < 1e-6) {
508 double minV = v1Sum < v2Sum ? v1Sum : v2Sum;
509 if (2. * minV / denom < bounds) {
518 calcVectParams(v1, v2, v1Sum, v2Sum, numer);
520 double denom = v1Sum + v2Sum;
522 if (fabs(denom) < 1e-6) {
525 sim = 2. * numer / denom;
527 if (returnDistance) {
534 template <
typename IndexType>
537 bool returnDistance =
false,
double bounds = 0.0) {
546 calcVectParams(v1, v2, v1Sum, v2Sum, andSum);
548 double denom = a * v1Sum + b * v2Sum + (1 - a - b) * andSum;
551 if (fabs(denom) < 1e-6) {
554 sim = andSum / denom;
556 if (returnDistance) {
563 template <
typename IndexType>
566 bool returnDistance =
false,
double bounds = 0.0) {
#define RDUNUSED_PARAM(x)
#define PRECONDITION(expr, mess)
const int ci_SPARSEINTVECT_VERSION
version number to use in pickles
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
a class for efficiently storing sparse vectors of ints
SparseIntVect< IndexType > & operator+=(int v)
SparseIntVect< IndexType > & operator/(int v)
SparseIntVect(IndexType length)
initialize with a particular length
unsigned int size() const
returns the length
const SparseIntVect< IndexType > operator+(const SparseIntVect< IndexType > &other) const
SparseIntVect< IndexType > & operator*(int v)
SparseIntVect< IndexType > & operator+=(const SparseIntVect< IndexType > &other)
bool operator==(const SparseIntVect< IndexType > &v2) const
SparseIntVect(const SparseIntVect< IndexType > &other)
Copy constructor.
~SparseIntVect()=default
destructor (doesn't need to do anything)
SparseIntVect< IndexType > & operator|=(const SparseIntVect< IndexType > &other)
const SparseIntVect< IndexType > operator-(const SparseIntVect< IndexType > &other) const
const SparseIntVect< IndexType > operator|(const SparseIntVect< IndexType > &other) const
SparseIntVect< IndexType > & operator/=(int v)
const SparseIntVect< IndexType > operator&(const SparseIntVect< IndexType > &other) const
SparseIntVect(const char *pkl, const unsigned int len)
constructor from a pickle
int operator[](IndexType idx) const
support indexing using []
void fromString(const std::string &txt)
SparseIntVect< IndexType > & operator*=(int v)
void setVal(IndexType idx, int val)
set the value at an index
SparseIntVect< IndexType > & operator&=(const SparseIntVect< IndexType > &other)
SparseIntVect & operator=(const SparseIntVect< IndexType > &other)
std::string toString() const
returns a binary string representation (pickle)
int getTotalVal(bool doAbs=false) const
SparseIntVect< IndexType > & operator-(int v)
std::map< IndexType, int > StorageType
SparseIntVect< IndexType > & operator-=(const SparseIntVect< IndexType > &other)
bool operator!=(const SparseIntVect< IndexType > &v2) const
SparseIntVect< IndexType > & operator-=(int v)
SparseIntVect(const std::string &pkl)
constructor from a pickle
SparseIntVect< IndexType > & operator+(int v)
IndexType getLength() const
returns the length
int getVal(IndexType idx) const
return the value at an index
const StorageType & getNonzeroElements() const
returns our nonzero elements as a map(IndexType->int)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
double TverskySimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, double a, double b, bool returnDistance=false, double bounds=0.0)
void updateFromSequence(SparseIntVect< IndexType > &vect, const SequenceType &seq)
double TanimotoSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
double DiceSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream