Skip to content

Commit

Permalink
Merge pull request #667 from htm-community/predictor_precision_fix
Browse files Browse the repository at this point in the history
SDRClassifier: fix precision by using Real64 for PDF
  • Loading branch information
breznak authored Sep 20, 2019
2 parents a9fbd62 + df7ae4e commit 1565a50
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 105 deletions.
8 changes: 2 additions & 6 deletions bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,11 @@ Example Usage:
# Give the predictor partial information, and make predictions
# about the future.
pred.reset()
A = pred.infer( 0, sequence[0] )
A = pred.infer( sequence[0] )
numpy.argmax( A[1] ) -> labels[1]
numpy.argmax( A[2] ) -> labels[2]
B = pred.infer( 1, sequence[1] )
B = pred.infer( sequence[1] )
numpy.argmax( B[1] ) -> labels[2]
numpy.argmax( B[2] ) -> labels[3]
)");
Expand All @@ -162,14 +162,10 @@ R"(For use with time series datasets.)");
py_Predictor.def("infer", &Predictor::infer,
R"(Compute the likelihoods.
Argument recordNum is an incrementing integer for each record.
Gaps in numbers correspond to missing records.
Argument pattern is the SDR containing the active input bits.
Returns a dictionary whos keys are prediction steps, and values are PDFs.
See help(Classifier.infer) for details about PDFs.)",
py::arg("recordNum"),
py::arg("pattern"));

py_Predictor.def("learn", &Predictor::learn,
Expand Down
37 changes: 20 additions & 17 deletions bindings/py/tests/algorithms/sdr_classifier_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ def testExampleUsage(self):
# Give the predictor partial information, and make predictions
# about the future.
pred.reset()
A = pred.infer( 0, sequence[0] )
A = pred.infer( sequence[0] )
assert( numpy.argmax( A[1] ) == labels[1] )
assert( numpy.argmax( A[2] ) == labels[2] )

B = pred.infer( 1, sequence[1] )
B = pred.infer( sequence[1] )
assert( numpy.argmax( B[1] ) == labels[2] )
assert( numpy.argmax( B[2] ) == labels[3] )

Expand Down Expand Up @@ -121,7 +121,7 @@ def testSingleValue0Steps(self):
for recordNum in range(10):
pred.learn(recordNum, inp, 2)

retval = pred.infer( 10, inp )
retval = pred.infer( inp )
self.assertGreater(retval[0][2], 0.9)


Expand All @@ -131,15 +131,18 @@ def testComputeInferOrLearnOnly(self):
inp.randomize( .3 )

# learn only
c.infer(recordNum=0, pattern=inp) # Don't crash with not enough training data.
with self.assertRaises(RuntimeError):
c.infer(pattern=inp) # crash with not enough training data.
c.learn(recordNum=0, pattern=inp, classification=4)
c.infer(recordNum=1, pattern=inp) # Don't crash with not enough training data.
with self.assertRaises(RuntimeError):
c.infer(pattern=inp) # crash with not enough training data.
c.learn(recordNum=2, pattern=inp, classification=4)
c.learn(recordNum=3, pattern=inp, classification=4)
c.infer(pattern=inp) # Don't crash with not enough training data.

# infer only
retval1 = c.infer(recordNum=5, pattern=inp)
retval2 = c.infer(recordNum=6, pattern=inp)
retval1 = c.infer(pattern=inp)
retval2 = c.infer(pattern=inp)
self.assertSequenceEqual(list(retval1[1]), list(retval2[1]))


Expand All @@ -164,7 +167,7 @@ def testComputeComplex(self):
classification=4,)

inp.sparse = [1, 5, 9]
result = c.infer(recordNum=4, pattern=inp)
result = c.infer(pattern=inp)

self.assertSetEqual(set(result.keys()), set([1]))
self.assertEqual(len(result[1]), 6)
Expand Down Expand Up @@ -206,7 +209,7 @@ def testMultistepSingleValue(self):
for recordNum in range(10):
classifier.learn(recordNum, inp, 0)

retval = classifier.infer(10, inp)
retval = classifier.infer(inp)

# Should have a probability of 100% for that bucket.
self.assertEqual(retval[1], [1.])
Expand All @@ -221,7 +224,7 @@ def testMultistepSimple(self):
inp.sparse = [i % 10]
classifier.learn(recordNum=i, pattern=inp, classification=(i % 10))

retval = classifier.infer(99, inp)
retval = classifier.infer(inp)

self.assertGreater(retval[1][0], 0.99)
for i in range(1, 10):
Expand Down Expand Up @@ -267,15 +270,15 @@ def testMissingRecords(self):
# At this point, we should have learned [1,3,5] => bucket 1
# [2,4,6] => bucket 2
inp.sparse = [1, 3, 5]
result = c.infer(recordNum=recordNum, pattern=inp)
result = c.infer(pattern=inp)
c.learn(recordNum=recordNum, pattern=inp, classification=2)
recordNum += 1
self.assertLess(result[1][0], 0.1)
self.assertGreater(result[1][1], 0.9)
self.assertLess(result[1][2], 0.1)

inp.sparse = [2, 4, 6]
result = c.infer(recordNum=recordNum, pattern=inp)
result = c.infer(pattern=inp)
c.learn(recordNum=recordNum, pattern=inp, classification=1)
recordNum += 1
self.assertLess(result[1][0], 0.1)
Expand All @@ -289,7 +292,7 @@ def testMissingRecords(self):
# the previous learn associates with bucket 0
recordNum += 1
inp.sparse = [1, 3, 5]
result = c.infer(recordNum=recordNum, pattern=inp)
result = c.infer(pattern=inp)
c.learn(recordNum=recordNum, pattern=inp, classification=0)
recordNum += 1
self.assertLess(result[1][0], 0.1)
Expand All @@ -300,7 +303,7 @@ def testMissingRecords(self):
# the previous learn associates with bucket 0
recordNum += 1
inp.sparse = [2, 4, 6]
result = c.infer(recordNum=recordNum, pattern=inp)
result = c.infer(pattern=inp)
c.learn(recordNum=recordNum, pattern=inp, classification=0)
recordNum += 1
self.assertLess(result[1][0], 0.1)
Expand All @@ -311,7 +314,7 @@ def testMissingRecords(self):
# the previous learn associates with bucket 0
recordNum += 1
inp.sparse = [1, 3, 5]
result = c.infer(recordNum=recordNum, pattern=inp)
result = c.infer(pattern=inp)
c.learn(recordNum=recordNum, pattern=inp, classification=0)
recordNum += 1
self.assertLess(result[1][0], 0.1)
Expand Down Expand Up @@ -548,8 +551,8 @@ def testMultiStepPredictions(self):
c.learn(recordNum, pattern=SDR2, classification=1)
recordNum += 1

result1 = c.infer(recordNum, SDR1)
result2 = c.infer(recordNum, SDR2)
result1 = c.infer(SDR1)
result2 = c.infer(SDR2)

self.assertAlmostEqual(result1[0][0], 1.0, places=1)
self.assertAlmostEqual(result1[0][1], 0.0, places=1)
Expand Down
97 changes: 43 additions & 54 deletions src/htm/algorithms/SDRClassifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,32 +38,18 @@ void Classifier::initialize(const Real alpha)
{
NTA_CHECK(alpha > 0.0f);
alpha_ = alpha;
dimensions_.clear();
dimensions_ = 0;
numCategories_ = 0u;
weights_.clear();
}


PDF Classifier::infer(const SDR & pattern)
{
// Check input dimensions, or if this is the first time the Classifier has
// been used then initialize it with the given SDR's dimensions.
if( dimensions_.empty() ) {
dimensions_ = pattern.dimensions;
while( weights_.size() < pattern.size ) {
weights_.push_back( vector<Real>( numCategories_, 0.0f ));
}
} else if( pattern.dimensions != dimensions_ ) {
stringstream err_msg;
err_msg << "Classifier input SDR.dimensions mismatch: previously given SDR with dimensions ( ";
for( auto dim : dimensions_ )
{ err_msg << dim << " "; }
err_msg << "), now given SDR with dimensions ( ";
for( auto dim : pattern.dimensions )
{ err_msg << dim << " "; }
err_msg << ").";
NTA_THROW << err_msg.str();
}
PDF Classifier::infer(const SDR & pattern) const {
// Check input dimensions, or if this is the first time the Classifier is used and dimensions
// are unset, return zeroes.
NTA_CHECK( dimensions_ != 0 )
<< "Classifier: must call `learn` before `infer`.";
NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!";

// Accumulate feed forward input.
PDF probabilities( numCategories_, 0.0f );
Expand All @@ -81,8 +67,19 @@ PDF Classifier::infer(const SDR & pattern)

void Classifier::learn(const SDR &pattern, const vector<UInt> &categoryIdxList)
{
// If this is the first time the Classifier is being used, weights are empty,
// so we set the dimensions to that of the input `pattern`
if( dimensions_ == 0 ) {
dimensions_ = pattern.size;
while( weights_.size() < pattern.size ) {
const auto initialEmptyWeights = PDF( numCategories_, 0.0f );
weights_.push_back( initialEmptyWeights );
}
}
NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!";

// Check if this is a new category & resize the weights table to hold it.
const auto maxCategoryIdx = *max_element(categoryIdxList.begin(), categoryIdxList.end());
const auto maxCategoryIdx = *max_element(categoryIdxList.cbegin(), categoryIdxList.cend());
if( maxCategoryIdx >= numCategories_ ) {
numCategories_ = maxCategoryIdx + 1;
for( auto & vec : weights_ ) {
Expand All @@ -93,7 +90,7 @@ void Classifier::learn(const SDR &pattern, const vector<UInt> &categoryIdxList)
}

// Compute errors and update weights.
const vector<Real> error = calculateError_(categoryIdxList, pattern);
const auto& error = calculateError_(categoryIdxList, pattern);
for( const auto& bit : pattern.getSparse() ) {
for(size_t i = 0u; i < numCategories_; i++) {
weights_[bit][i] += alpha_ * error[i];
Expand All @@ -103,9 +100,8 @@ void Classifier::learn(const SDR &pattern, const vector<UInt> &categoryIdxList)


// Helper function to compute the error signal in learning.
std::vector<Real> Classifier::calculateError_(
const std::vector<UInt> &categoryIdxList, const SDR &pattern)
{
std::vector<Real64> Classifier::calculateError_(const std::vector<UInt> &categoryIdxList,
const SDR &pattern) const {
// compute predicted likelihoods
auto likelihoods = infer(pattern);

Expand Down Expand Up @@ -165,56 +161,49 @@ void Predictor::reset() {
}


Predictions Predictor::infer(const UInt recordNum, const SDR &pattern)
{
updateHistory_( recordNum, pattern );

Predictions Predictor::infer(const SDR &pattern) const {
Predictions result;
for( const auto step : steps_ ) {
result[step] = classifiers_[step].infer( pattern );
result.insert({step, classifiers_.at(step).infer( pattern )});
}
return result;
}


void Predictor::learn(const UInt recordNum, const SDR &pattern,
void Predictor::learn(const UInt recordNum, //TODO make recordNum optional, autoincrement as steps
const SDR &pattern,
const std::vector<UInt> &bucketIdxList)
{
updateHistory_( recordNum, pattern );
checkMonotonic_(recordNum);

// Update pattern history if this is a new record.
const UInt lastRecordNum = recordNumHistory_.empty() ? 0 : recordNumHistory_.back();
if (recordNumHistory_.size() == 0u || recordNum > lastRecordNum) {
patternHistory_.emplace_back( pattern );
recordNumHistory_.push_back(recordNum);
if (patternHistory_.size() > steps_.back() + 1u) { //steps_ are sorted, so steps_.back() is the "oldest/deepest" N-th step (ie 10 of [1,2,10])
patternHistory_.pop_front();
recordNumHistory_.pop_front();
}
}

// Iterate through all recently given inputs, starting from the furthest in the past.
auto pastPattern = patternHistory_.begin();
auto pastRecordNum = recordNumHistory_.begin();
for( ; pastRecordNum != recordNumHistory_.end(); pastPattern++, pastRecordNum++ )
for( ; pastRecordNum != recordNumHistory_.cend(); pastPattern++, pastRecordNum++ )
{
const UInt nSteps = recordNum - *pastRecordNum;

// Update weights.
if( binary_search( steps_.begin(), steps_.end(), nSteps )) {
classifiers_[nSteps].learn( *pastPattern, bucketIdxList );
classifiers_.at(nSteps).learn( *pastPattern, bucketIdxList );
}
}
}


void Predictor::updateHistory_(const UInt recordNum, const SDR & pattern)
{
void Predictor::checkMonotonic_(const UInt recordNum) const {
// Ensure that recordNum increases monotonically.
UInt lastRecordNum = -1;
if( not recordNumHistory_.empty() ) {
lastRecordNum = recordNumHistory_.back();
if (recordNum < lastRecordNum) {
NTA_THROW << "The record number must increase monotonically.";
}
}

// Update pattern history if this is a new record.
if (recordNumHistory_.size() == 0u || recordNum > lastRecordNum) {
patternHistory_.emplace_back( pattern );
recordNumHistory_.push_back(recordNum);
if (patternHistory_.size() > steps_.back() + 1u) {
patternHistory_.pop_front();
recordNumHistory_.pop_front();
}
}
const UInt lastRecordNum = recordNumHistory_.empty() ? 0 : recordNumHistory_.back();
NTA_CHECK(recordNum >= lastRecordNum) << "The record number must increase monotonically.";
}
Loading

0 comments on commit 1565a50

Please sign in to comment.