From ccf7e10f8a553a2c33db0ee5b4b1dd971f2472db Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Mon, 16 Sep 2019 18:48:05 +0200 Subject: [PATCH 1/8] SDRClassifier: fix precision by using Real64 for PDF without this, the scores never correctly converge(learn). Thanks @Thanh-Binh for finding and solving this bug. Additionally, add some docs to the class. --- src/htm/algorithms/SDRClassifier.hpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 63f2221ef1..30630dcc3a 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -21,6 +21,13 @@ /** @file * Definitions for the SDR Classifier & Predictor. + * + * `Classifier` learns mapping from SDR->input value (encoder's output). This is used when you need to "explain" the HTM network back to real-world, ie. mapping SDRs + * back to digits in MNIST digit classification task. + * + * `Predictor` has similar functionality for time-sequences where you want to "predict" N-steps ahead and then return real-world value. + * Internally it uses (several) Classifiers, and in nupic.core this used to be part for SDRClassifier, for htm.core this is a separate class Predictor. + * */ #ifndef NTA_SDR_CLASSIFIER_HPP @@ -43,7 +50,7 @@ namespace htm { * * See also: https://en.wikipedia.org/wiki/Probability_distribution */ -using PDF = std::vector; +using PDF = std::vector; //Real64 (not Real/float) must be used here, otherwise precision is lost and Predictor never reaches sufficient results. /** * Returns the category with the greatest probablility. @@ -179,7 +186,8 @@ using Predictions = std::map; * This class handles missing datapoints. * * Compatibility Note: This class is the replacement for the old SDRClassifier. - * It no longer provides estimates of the actual value. + * It no longer provides estimates of the actual value. Instead, users can get a rough estimate + * from bucket-index. If more precision is needed, use more buckets in the encoder. * * Example Usage: * // Predict 1 and 2 time steps into the future. From d437a6a68557db55ef89741ec4e05a74d1420dea Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Mon, 16 Sep 2019 20:15:13 +0200 Subject: [PATCH 2/8] Classifier: more fixes use Real64 for weights_ too, make some methods const --- src/htm/algorithms/SDRClassifier.cpp | 12 ++++++------ src/htm/algorithms/SDRClassifier.hpp | 9 ++++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp index 9904735c84..e75ec0f1e6 100644 --- a/src/htm/algorithms/SDRClassifier.cpp +++ b/src/htm/algorithms/SDRClassifier.cpp @@ -44,14 +44,15 @@ void Classifier::initialize(const Real alpha) } -PDF Classifier::infer(const SDR & pattern) +PDF Classifier::infer(const SDR & pattern) //TODO could be const { // Check input dimensions, or if this is the first time the Classifier has // been used then initialize it with the given SDR's dimensions. if( dimensions_.empty() ) { dimensions_ = pattern.dimensions; while( weights_.size() < pattern.size ) { - weights_.push_back( vector( numCategories_, 0.0f )); + const auto initialEmptyWeights = PDF( numCategories_, 0.0f ); + weights_.push_back( initialEmptyWeights ); } } else if( pattern.dimensions != dimensions_ ) { stringstream err_msg; @@ -93,7 +94,7 @@ void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) } // Compute errors and update weights. - const vector error = calculateError_(categoryIdxList, pattern); + const auto& error = calculateError_(categoryIdxList, pattern); for( const auto& bit : pattern.getSparse() ) { for(size_t i = 0u; i < numCategories_; i++) { weights_[bit][i] += alpha_ * error[i]; @@ -103,9 +104,8 @@ void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) // Helper function to compute the error signal in learning. -std::vector Classifier::calculateError_( - const std::vector &categoryIdxList, const SDR &pattern) -{ +std::vector Classifier::calculateError_(const std::vector &categoryIdxList, + const SDR &pattern) const { // compute predicted likelihoods auto likelihoods = infer(pattern); diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 30630dcc3a..f694295bd1 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -154,12 +154,13 @@ class Classifier : public Serializable /** * 2D map used to store the data. * Use as: weights_[ input-bit ][ category-index ] + * Real64 (not just Real) so the computations do not lose precision. */ - std::vector> weights_; + std::vector> weights_; // Helper function to compute the error signal for learning. - std::vector calculateError_(const std::vector &bucketIdxList, - const SDR &pattern); + std::vector calculateError_(const std::vector &bucketIdxList, + const SDR &pattern) const; }; /** @@ -190,6 +191,7 @@ using Predictions = std::map; * from bucket-index. If more precision is needed, use more buckets in the encoder. * * Example Usage: + * ``` * // Predict 1 and 2 time steps into the future. * // Make a sequence of 4 random SDRs. Each SDR has 1000 bits and 2% sparsity. * vector sequence( 4, { 1000 } ); @@ -216,6 +218,7 @@ using Predictions = std::map; * Predictions B = pred.infer( 1, sequence[1] ); * argmax( B[1] ) -> labels[2] * argmax( B[2] ) -> labels[3] + * ``` */ class Predictor : public Serializable { From d34d0cd9c624e4f3f2fa16e70025a280e2a75a4f Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Mon, 16 Sep 2019 20:55:38 +0200 Subject: [PATCH 3/8] Classifier: make infer const as inference should never change internal state, --- src/htm/algorithms/SDRClassifier.cpp | 24 ++++++++++++++---------- src/htm/algorithms/SDRClassifier.hpp | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp index e75ec0f1e6..3e0956671e 100644 --- a/src/htm/algorithms/SDRClassifier.cpp +++ b/src/htm/algorithms/SDRClassifier.cpp @@ -44,16 +44,11 @@ void Classifier::initialize(const Real alpha) } -PDF Classifier::infer(const SDR & pattern) //TODO could be const -{ - // Check input dimensions, or if this is the first time the Classifier has - // been used then initialize it with the given SDR's dimensions. +PDF Classifier::infer(const SDR & pattern) const { + // Check input dimensions, or if this is the first time the Classifier is used and dimensions + // are unset, return zeroes. if( dimensions_.empty() ) { - dimensions_ = pattern.dimensions; - while( weights_.size() < pattern.size ) { - const auto initialEmptyWeights = PDF( numCategories_, 0.0f ); - weights_.push_back( initialEmptyWeights ); - } + return PDF(numCategories_, 0.0f); //empty } else if( pattern.dimensions != dimensions_ ) { stringstream err_msg; err_msg << "Classifier input SDR.dimensions mismatch: previously given SDR with dimensions ( "; @@ -82,6 +77,15 @@ PDF Classifier::infer(const SDR & pattern) //TODO could be const void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) { + // If this is the first time the Classifier is being used, weights are empty, + // so we set the dimensions to that of the input `pattern` + if( dimensions_.empty() ) { + dimensions_ = pattern.dimensions; + while( weights_.size() < pattern.size ) { + const auto initialEmptyWeights = PDF( numCategories_, 0.0f ); + weights_.push_back( initialEmptyWeights ); + } + } // Check if this is a new category & resize the weights table to hold it. const auto maxCategoryIdx = *max_element(categoryIdxList.begin(), categoryIdxList.end()); if( maxCategoryIdx >= numCategories_ ) { @@ -167,7 +171,7 @@ void Predictor::reset() { Predictions Predictor::infer(const UInt recordNum, const SDR &pattern) { - updateHistory_( recordNum, pattern ); + updateHistory_( recordNum, pattern ); //TODO should we update here in inference, that changes state? Infer could be stateless, thus const. Predictions result; for( const auto step : steps_ ) { diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index f694295bd1..94dd24458e 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -122,7 +122,7 @@ class Classifier : public Serializable * @returns: The Probablility Distribution Function (PDF) of the categories. * This is indexed by the category label. */ - PDF infer(const SDR & pattern); + PDF infer(const SDR & pattern) const; /** * Learn from example data. From bf5123b2f9d6fe9a29e0cee47ee2991707e501cb Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Mon, 16 Sep 2019 21:28:53 +0200 Subject: [PATCH 4/8] Predictor: make infer const this removes updateHistory_() from inference, updateHistory code was moved directly into learn() and rest split to a new method checkMonotonic_() --- src/htm/algorithms/SDRClassifier.cpp | 40 +++++++++++++--------------- src/htm/algorithms/SDRClassifier.hpp | 7 ++--- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp index 3e0956671e..f08db5e64f 100644 --- a/src/htm/algorithms/SDRClassifier.cpp +++ b/src/htm/algorithms/SDRClassifier.cpp @@ -169,13 +169,12 @@ void Predictor::reset() { } -Predictions Predictor::infer(const UInt recordNum, const SDR &pattern) -{ - updateHistory_( recordNum, pattern ); //TODO should we update here in inference, that changes state? Infer could be stateless, thus const. +Predictions Predictor::infer(const UInt recordNum, const SDR &pattern) const { + checkMonotonic_(recordNum); Predictions result; for( const auto step : steps_ ) { - result[step] = classifiers_[step].infer( pattern ); + result[step] = classifiers_.at(step).infer( pattern ); } return result; } @@ -184,7 +183,18 @@ Predictions Predictor::infer(const UInt recordNum, const SDR &pattern) void Predictor::learn(const UInt recordNum, const SDR &pattern, const std::vector &bucketIdxList) { - updateHistory_( recordNum, pattern ); + checkMonotonic_(recordNum); + + // Update pattern history if this is a new record. + const UInt lastRecordNum = recordNumHistory_.empty() ? -1 : recordNumHistory_.back(); + if (recordNumHistory_.size() == 0u || recordNum > lastRecordNum) { + patternHistory_.emplace_back( pattern ); + recordNumHistory_.push_back(recordNum); + if (patternHistory_.size() > steps_.back() + 1u) { + patternHistory_.pop_front(); + recordNumHistory_.pop_front(); + } + } // Iterate through all recently given inputs, starting from the furthest in the past. auto pastPattern = patternHistory_.begin(); @@ -201,24 +211,10 @@ void Predictor::learn(const UInt recordNum, const SDR &pattern, } -void Predictor::updateHistory_(const UInt recordNum, const SDR & pattern) -{ +void Predictor::checkMonotonic_(const UInt recordNum) const { // Ensure that recordNum increases monotonically. - UInt lastRecordNum = -1; + const UInt lastRecordNum = recordNumHistory_.empty() ? -1 : recordNumHistory_.back(); if( not recordNumHistory_.empty() ) { - lastRecordNum = recordNumHistory_.back(); - if (recordNum < lastRecordNum) { - NTA_THROW << "The record number must increase monotonically."; - } + NTA_CHECK(recordNum >= lastRecordNum) << "The record number must increase monotonically."; } - - // Update pattern history if this is a new record. - if (recordNumHistory_.size() == 0u || recordNum > lastRecordNum) { - patternHistory_.emplace_back( pattern ); - recordNumHistory_.push_back(recordNum); - if (patternHistory_.size() > steps_.back() + 1u) { - patternHistory_.pop_front(); - recordNumHistory_.pop_front(); - } - } } diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 94dd24458e..6fdd496df1 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -253,7 +253,7 @@ class Predictor : public Serializable * * @returns: A mapping from prediction step to PDF. */ - Predictions infer(UInt recordNum, const SDR &pattern); + Predictions infer(const UInt recordNum, const SDR &pattern) const; /** * Learn from example data. @@ -263,7 +263,8 @@ class Predictor : public Serializable * @param pattern: The active input SDR. * @param bucketIdxList: Vector of the current value bucket indices or categories. */ - void learn(UInt recordNum, const SDR &pattern, + void learn(const UInt recordNum, + const SDR &pattern, const std::vector &bucketIdxList); CerealAdapter; @@ -287,7 +288,7 @@ class Predictor : public Serializable // Stores the input pattern history, starting with the previous input. std::deque patternHistory_; std::deque recordNumHistory_; - void updateHistory_(UInt recordNum, const SDR & pattern); + void checkMonotonic_(UInt recordNum) const; // One per prediction step std::map classifiers_; From 22d6664c021164f565376233d7bdc801f36f8b1a Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Tue, 17 Sep 2019 19:03:01 +0200 Subject: [PATCH 5/8] review: formatting --- src/htm/algorithms/SDRClassifier.hpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 6fdd496df1..ca2e53dc50 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -22,11 +22,14 @@ /** @file * Definitions for the SDR Classifier & Predictor. * - * `Classifier` learns mapping from SDR->input value (encoder's output). This is used when you need to "explain" the HTM network back to real-world, ie. mapping SDRs - * back to digits in MNIST digit classification task. + * `Classifier` learns mapping from SDR->input value (encoder's output). + * This is used when you need to "explain" the HTM network back to real-world, + * ie. mapping SDRs back to digits in MNIST digit classification task. * - * `Predictor` has similar functionality for time-sequences where you want to "predict" N-steps ahead and then return real-world value. - * Internally it uses (several) Classifiers, and in nupic.core this used to be part for SDRClassifier, for htm.core this is a separate class Predictor. + * `Predictor` has similar functionality for time-sequences + * where you want to "predict" N-steps ahead and then return real-world value. + * Internally it uses (several) Classifiers, and in nupic.core this used to be + * a part for SDRClassifier, for `htm.core` this is a separate class `Predictor`. * */ @@ -50,7 +53,8 @@ namespace htm { * * See also: https://en.wikipedia.org/wiki/Probability_distribution */ -using PDF = std::vector; //Real64 (not Real/float) must be used here, otherwise precision is lost and Predictor never reaches sufficient results. +using PDF = std::vector; //Real64 (not Real/float) must be used here, +// ... otherwise precision is lost and Predictor never reaches sufficient results. /** * Returns the category with the greatest probablility. From 2aedac20dd57a3a31eb32ccf9254880a229e04de Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 19 Sep 2019 23:09:45 +0200 Subject: [PATCH 6/8] Predictor: use hashmap --- src/htm/algorithms/SDRClassifier.hpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index ca2e53dc50..58d739ce01 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -37,7 +37,7 @@ #define NTA_SDR_CLASSIFIER_HPP #include -#include +#include #include #include @@ -109,8 +109,11 @@ class Classifier : public Serializable /** * Constructor. * - * @param alpha - The alpha used to adapt the weight matrix during learning. A - * larger alpha results in faster adaptation to the data. + * @param alpha - The alpha used to adapt the weight matrix during learning. + * A larger alpha results in faster adaptation to the data. + * Note: when SDRs are formed correctly, the classification task + * for this class is quite easy, so you likely will never need to + * optimize this parameter. */ Classifier(Real alpha = 0.001f ); @@ -182,7 +185,7 @@ void softmax(PDF::iterator begin, PDF::iterator end); * The value is a PDF (probability distribution function, of the result being in * each bucket or category). */ -using Predictions = std::map; +using Predictions = std::unordered_map; /** * The Predictor class does N-Step ahead predictions. @@ -231,8 +234,9 @@ class Predictor : public Serializable * Constructor. * * @param steps - The number of steps into the future to learn and predict. - * @param alpha - The alpha used to adapt the weight matrix during learning. A - * larger alpha results in faster adaptation to the data. + * @param alpha - The alpha used to adapt the weight matrix during learning. + * A larger alpha results in faster adaptation to the data. + * (The default value will likely be OK in most cases.) */ Predictor(const std::vector &steps, Real alpha = 0.001f ); @@ -295,7 +299,7 @@ class Predictor : public Serializable void checkMonotonic_(UInt recordNum) const; // One per prediction step - std::map classifiers_; + std::unordered_map classifiers_; }; // End of Predictor class From 4d8708dc3808dfb5c115d87052f2429b3f9d1f8c Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Thu, 19 Sep 2019 23:51:02 +0200 Subject: [PATCH 7/8] Classifier: simplify asserts --- src/htm/algorithms/SDRClassifier.cpp | 26 +++++++++----------------- src/htm/algorithms/SDRClassifier.hpp | 2 +- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp index f08db5e64f..fc34e75556 100644 --- a/src/htm/algorithms/SDRClassifier.cpp +++ b/src/htm/algorithms/SDRClassifier.cpp @@ -38,7 +38,7 @@ void Classifier::initialize(const Real alpha) { NTA_CHECK(alpha > 0.0f); alpha_ = alpha; - dimensions_.clear(); + dimensions_ = 0; numCategories_ = 0u; weights_.clear(); } @@ -47,19 +47,9 @@ void Classifier::initialize(const Real alpha) PDF Classifier::infer(const SDR & pattern) const { // Check input dimensions, or if this is the first time the Classifier is used and dimensions // are unset, return zeroes. - if( dimensions_.empty() ) { - return PDF(numCategories_, 0.0f); //empty - } else if( pattern.dimensions != dimensions_ ) { - stringstream err_msg; - err_msg << "Classifier input SDR.dimensions mismatch: previously given SDR with dimensions ( "; - for( auto dim : dimensions_ ) - { err_msg << dim << " "; } - err_msg << "), now given SDR with dimensions ( "; - for( auto dim : pattern.dimensions ) - { err_msg << dim << " "; } - err_msg << ")."; - NTA_THROW << err_msg.str(); - } + NTA_CHECK( dimensions_ != 0 ) + << "Classifier: must call `learn` before `infer`."; + NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!"; // Accumulate feed forward input. PDF probabilities( numCategories_, 0.0f ); @@ -79,15 +69,17 @@ void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) { // If this is the first time the Classifier is being used, weights are empty, // so we set the dimensions to that of the input `pattern` - if( dimensions_.empty() ) { - dimensions_ = pattern.dimensions; + if( dimensions_ == 0 ) { + dimensions_ = pattern.size; while( weights_.size() < pattern.size ) { const auto initialEmptyWeights = PDF( numCategories_, 0.0f ); weights_.push_back( initialEmptyWeights ); } } + NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!"; + // Check if this is a new category & resize the weights table to hold it. - const auto maxCategoryIdx = *max_element(categoryIdxList.begin(), categoryIdxList.end()); + const auto maxCategoryIdx = *max_element(categoryIdxList.cbegin(), categoryIdxList.cend()); if( maxCategoryIdx >= numCategories_ ) { numCategories_ = maxCategoryIdx + 1; for( auto & vec : weights_ ) { diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 58d739ce01..66daba5f57 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -155,7 +155,7 @@ class Classifier : public Serializable private: Real alpha_; - std::vector dimensions_; + UInt dimensions_; UInt numCategories_; /** From df7ae4ed5fd920b4d82801059dfa511211a6aca4 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Fri, 20 Sep 2019 01:13:53 +0200 Subject: [PATCH 8/8] Predictor: remove numRecord arg, fixes as numRecord is no longer needed for inference. API changes to bidings and tests to reflect the change. Several fixes from earlier commits in this PR, this fixes the segfaults. --- .../bindings/algorithms/py_SDRClassifier.cpp | 8 +--- .../tests/algorithms/sdr_classifier_test.py | 37 ++++++++++--------- src/htm/algorithms/SDRClassifier.cpp | 23 +++++------- src/htm/algorithms/SDRClassifier.hpp | 9 ++--- .../unit/algorithms/SDRClassifierTest.cpp | 12 +++--- 5 files changed, 41 insertions(+), 48 deletions(-) diff --git a/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp b/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp index 1613c8ba7b..27dfe5f29c 100644 --- a/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp +++ b/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp @@ -138,11 +138,11 @@ Example Usage: # Give the predictor partial information, and make predictions # about the future. pred.reset() - A = pred.infer( 0, sequence[0] ) + A = pred.infer( sequence[0] ) numpy.argmax( A[1] ) -> labels[1] numpy.argmax( A[2] ) -> labels[2] - B = pred.infer( 1, sequence[1] ) + B = pred.infer( sequence[1] ) numpy.argmax( B[1] ) -> labels[2] numpy.argmax( B[2] ) -> labels[3] )"); @@ -162,14 +162,10 @@ R"(For use with time series datasets.)"); py_Predictor.def("infer", &Predictor::infer, R"(Compute the likelihoods. -Argument recordNum is an incrementing integer for each record. -Gaps in numbers correspond to missing records. - Argument pattern is the SDR containing the active input bits. Returns a dictionary whos keys are prediction steps, and values are PDFs. See help(Classifier.infer) for details about PDFs.)", - py::arg("recordNum"), py::arg("pattern")); py_Predictor.def("learn", &Predictor::learn, diff --git a/bindings/py/tests/algorithms/sdr_classifier_test.py b/bindings/py/tests/algorithms/sdr_classifier_test.py index 3519793e75..15225629ed 100644 --- a/bindings/py/tests/algorithms/sdr_classifier_test.py +++ b/bindings/py/tests/algorithms/sdr_classifier_test.py @@ -69,11 +69,11 @@ def testExampleUsage(self): # Give the predictor partial information, and make predictions # about the future. pred.reset() - A = pred.infer( 0, sequence[0] ) + A = pred.infer( sequence[0] ) assert( numpy.argmax( A[1] ) == labels[1] ) assert( numpy.argmax( A[2] ) == labels[2] ) - B = pred.infer( 1, sequence[1] ) + B = pred.infer( sequence[1] ) assert( numpy.argmax( B[1] ) == labels[2] ) assert( numpy.argmax( B[2] ) == labels[3] ) @@ -121,7 +121,7 @@ def testSingleValue0Steps(self): for recordNum in range(10): pred.learn(recordNum, inp, 2) - retval = pred.infer( 10, inp ) + retval = pred.infer( inp ) self.assertGreater(retval[0][2], 0.9) @@ -131,15 +131,18 @@ def testComputeInferOrLearnOnly(self): inp.randomize( .3 ) # learn only - c.infer(recordNum=0, pattern=inp) # Don't crash with not enough training data. + with self.assertRaises(RuntimeError): + c.infer(pattern=inp) # crash with not enough training data. c.learn(recordNum=0, pattern=inp, classification=4) - c.infer(recordNum=1, pattern=inp) # Don't crash with not enough training data. + with self.assertRaises(RuntimeError): + c.infer(pattern=inp) # crash with not enough training data. c.learn(recordNum=2, pattern=inp, classification=4) c.learn(recordNum=3, pattern=inp, classification=4) + c.infer(pattern=inp) # Don't crash with not enough training data. # infer only - retval1 = c.infer(recordNum=5, pattern=inp) - retval2 = c.infer(recordNum=6, pattern=inp) + retval1 = c.infer(pattern=inp) + retval2 = c.infer(pattern=inp) self.assertSequenceEqual(list(retval1[1]), list(retval2[1])) @@ -164,7 +167,7 @@ def testComputeComplex(self): classification=4,) inp.sparse = [1, 5, 9] - result = c.infer(recordNum=4, pattern=inp) + result = c.infer(pattern=inp) self.assertSetEqual(set(result.keys()), set([1])) self.assertEqual(len(result[1]), 6) @@ -206,7 +209,7 @@ def testMultistepSingleValue(self): for recordNum in range(10): classifier.learn(recordNum, inp, 0) - retval = classifier.infer(10, inp) + retval = classifier.infer(inp) # Should have a probability of 100% for that bucket. self.assertEqual(retval[1], [1.]) @@ -221,7 +224,7 @@ def testMultistepSimple(self): inp.sparse = [i % 10] classifier.learn(recordNum=i, pattern=inp, classification=(i % 10)) - retval = classifier.infer(99, inp) + retval = classifier.infer(inp) self.assertGreater(retval[1][0], 0.99) for i in range(1, 10): @@ -267,7 +270,7 @@ def testMissingRecords(self): # At this point, we should have learned [1,3,5] => bucket 1 # [2,4,6] => bucket 2 inp.sparse = [1, 3, 5] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=2) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -275,7 +278,7 @@ def testMissingRecords(self): self.assertLess(result[1][2], 0.1) inp.sparse = [2, 4, 6] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -289,7 +292,7 @@ def testMissingRecords(self): # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -300,7 +303,7 @@ def testMissingRecords(self): # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [2, 4, 6] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -311,7 +314,7 @@ def testMissingRecords(self): # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] - result = c.infer(recordNum=recordNum, pattern=inp) + result = c.infer(pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) @@ -548,8 +551,8 @@ def testMultiStepPredictions(self): c.learn(recordNum, pattern=SDR2, classification=1) recordNum += 1 - result1 = c.infer(recordNum, SDR1) - result2 = c.infer(recordNum, SDR2) + result1 = c.infer(SDR1) + result2 = c.infer(SDR2) self.assertAlmostEqual(result1[0][0], 1.0, places=1) self.assertAlmostEqual(result1[0][1], 0.0, places=1) diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp index fc34e75556..45d6573b6d 100644 --- a/src/htm/algorithms/SDRClassifier.cpp +++ b/src/htm/algorithms/SDRClassifier.cpp @@ -161,28 +161,27 @@ void Predictor::reset() { } -Predictions Predictor::infer(const UInt recordNum, const SDR &pattern) const { - checkMonotonic_(recordNum); - +Predictions Predictor::infer(const SDR &pattern) const { Predictions result; for( const auto step : steps_ ) { - result[step] = classifiers_.at(step).infer( pattern ); + result.insert({step, classifiers_.at(step).infer( pattern )}); } return result; } -void Predictor::learn(const UInt recordNum, const SDR &pattern, +void Predictor::learn(const UInt recordNum, //TODO make recordNum optional, autoincrement as steps + const SDR &pattern, const std::vector &bucketIdxList) { checkMonotonic_(recordNum); // Update pattern history if this is a new record. - const UInt lastRecordNum = recordNumHistory_.empty() ? -1 : recordNumHistory_.back(); + const UInt lastRecordNum = recordNumHistory_.empty() ? 0 : recordNumHistory_.back(); if (recordNumHistory_.size() == 0u || recordNum > lastRecordNum) { patternHistory_.emplace_back( pattern ); recordNumHistory_.push_back(recordNum); - if (patternHistory_.size() > steps_.back() + 1u) { + if (patternHistory_.size() > steps_.back() + 1u) { //steps_ are sorted, so steps_.back() is the "oldest/deepest" N-th step (ie 10 of [1,2,10]) patternHistory_.pop_front(); recordNumHistory_.pop_front(); } @@ -191,13 +190,13 @@ void Predictor::learn(const UInt recordNum, const SDR &pattern, // Iterate through all recently given inputs, starting from the furthest in the past. auto pastPattern = patternHistory_.begin(); auto pastRecordNum = recordNumHistory_.begin(); - for( ; pastRecordNum != recordNumHistory_.end(); pastPattern++, pastRecordNum++ ) + for( ; pastRecordNum != recordNumHistory_.cend(); pastPattern++, pastRecordNum++ ) { const UInt nSteps = recordNum - *pastRecordNum; // Update weights. if( binary_search( steps_.begin(), steps_.end(), nSteps )) { - classifiers_[nSteps].learn( *pastPattern, bucketIdxList ); + classifiers_.at(nSteps).learn( *pastPattern, bucketIdxList ); } } } @@ -205,8 +204,6 @@ void Predictor::learn(const UInt recordNum, const SDR &pattern, void Predictor::checkMonotonic_(const UInt recordNum) const { // Ensure that recordNum increases monotonically. - const UInt lastRecordNum = recordNumHistory_.empty() ? -1 : recordNumHistory_.back(); - if( not recordNumHistory_.empty() ) { - NTA_CHECK(recordNum >= lastRecordNum) << "The record number must increase monotonically."; - } + const UInt lastRecordNum = recordNumHistory_.empty() ? 0 : recordNumHistory_.back(); + NTA_CHECK(recordNum >= lastRecordNum) << "The record number must increase monotonically."; } diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 66daba5f57..7b662925e3 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -218,11 +218,11 @@ using Predictions = std::unordered_map; * // Give the predictor partial information, and make predictions * // about the future. * pred.reset(); - * Predictions A = pred.infer( 0, sequence[0] ); + * Predictions A = pred.infer( sequence[0] ); * argmax( A[1] ) -> labels[1] * argmax( A[2] ) -> labels[2] * - * Predictions B = pred.infer( 1, sequence[1] ); + * Predictions B = pred.infer( sequence[1] ); * argmax( B[1] ) -> labels[2] * argmax( B[2] ) -> labels[3] * ``` @@ -254,14 +254,11 @@ class Predictor : public Serializable /** * Compute the likelihoods. * - * @param recordNum: An incrementing integer for each record. Gaps in - * numbers correspond to missing records. - * * @param pattern: The active input SDR. * * @returns: A mapping from prediction step to PDF. */ - Predictions infer(const UInt recordNum, const SDR &pattern) const; + Predictions infer(const SDR &pattern) const; /** * Learn from example data. diff --git a/src/test/unit/algorithms/SDRClassifierTest.cpp b/src/test/unit/algorithms/SDRClassifierTest.cpp index 735b2ea03e..6f935d940a 100644 --- a/src/test/unit/algorithms/SDRClassifierTest.cpp +++ b/src/test/unit/algorithms/SDRClassifierTest.cpp @@ -81,11 +81,11 @@ TEST(SDRClassifierTest, ExampleUsagePredictor) // Give the predictor partial information, and make predictions // about the future. pred.reset(); - Predictions A = pred.infer( 0, sequence[0] ); + Predictions A = pred.infer( sequence[0] ); ASSERT_EQ( argmax( A[1] ), labels[1] ); ASSERT_EQ( argmax( A[2] ), labels[2] ); - Predictions B = pred.infer( 1, sequence[1] ); + Predictions B = pred.infer( sequence[1] ); ASSERT_EQ( argmax( B[1] ), labels[2] ); ASSERT_EQ( argmax( B[2] ), labels[3] ); } @@ -103,7 +103,7 @@ TEST(SDRClassifierTest, SingleValue) { for (UInt i = 0u; i < 10u; ++i) { c.learn( i, input1, bucketIdxList ); } - Predictions result1 = c.infer( 10u, input1 ); + Predictions result1 = c.infer( input1 ); ASSERT_EQ( argmax( result1[1u] ), 4u ) << "Incorrect prediction for bucket 4"; @@ -138,7 +138,7 @@ TEST(SDRClassifierTest, ComputeComplex) { c.learn(1, input2, bucketIdxList2); c.learn(2, input3, bucketIdxList3); c.learn(3, input1, bucketIdxList4); - auto result = c.infer(4, input1); + auto result = c.infer(input1); // Check the one-step prediction ASSERT_EQ(result.size(), 1u) @@ -211,7 +211,7 @@ TEST(SDRClassifierTest, SaveLoad) { // Measure and save some output. A.addNoise( 0.20f ); // Change two bits. c1.reset(); - const auto c1_out = c1.infer( 0u, A ); + const auto c1_out = c1.infer( A ); // Save and load. stringstream ss; @@ -220,7 +220,7 @@ TEST(SDRClassifierTest, SaveLoad) { EXPECT_NO_THROW(c2.load(ss)); // Expect identical results. - const auto c2_out = c2.infer( 0u, A ); + const auto c2_out = c2.infer( A ); ASSERT_EQ(c1_out, c2_out); }