htm-community · breznak · Sep 18, 2019 · Sep 18, 2019 · Sep 18, 2019 · Sep 20, 2019
diff --git a/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp b/bindings/py/cpp_src/bindings/algorithms/py_SDRClassifier.cpp
@@ -147,7 +147,7 @@ Example Usage:
     numpy.argmax( B[2] )  ->  labels[3]
 )");
 
-        py_Predictor.def(py::init<const std::vector<UInt> &, Real>(),
+        py_Predictor.def(py::init<const std::vector<StepsAheadT> &, Real>(),
 R"(Argument steps is the number of steps into the future to learn and predict.
 The Predictor accepts a list of steps.
 
@@ -182,7 +182,7 @@ This may also be a list for when the input has multiple categories.)",
             py::arg("pattern"),
             py::arg("classification"));
 
-        py_Predictor.def("learn", [](Predictor &self, UInt recordNum, const SDR &pattern, UInt categoryIdx)
+        py_Predictor.def("learn", [](Predictor &self, UInt recordNum, const SDR &pattern, UInt categoryIdx) //override for using UInt category, instead of {UInt}
             { self.learn( recordNum, pattern, {categoryIdx} ); },
                 py::arg("recordNum"),
                 py::arg("pattern"),

diff --git a/bindings/py/cpp_src/bindings/algorithms/py_TemporalMemory.cpp b/bindings/py/cpp_src/bindings/algorithms/py_TemporalMemory.cpp
@@ -47,7 +47,7 @@ Example usage:
     TODO
 )");
 
-	py::enum_<TemporalMemory::ANMode>(m, "ANMode")
+	py::enum_<TemporalMemory::ANMode>(m, "ANMode") //TODO currently htm.bindings.algorithms.ANMode, make ANMode part of algorithms.TemporalMemory
 	  .value("DISABLED",   TemporalMemory::ANMode::DISABLED)
 	  .value("RAW",        TemporalMemory::ANMode::RAW)
 	  .value("LIKELIHOOD", TemporalMemory::ANMode::LIKELIHOOD)
@@ -372,6 +372,13 @@ R"()");
           "Anomaly score updated with each TM::compute() call. "
         );
 
+	py_HTM.def("setAnomalyMode", [](HTM_t &self, TemporalMemory::ANMode mode){
+	  self.setAnomalyMode(mode);
+			}, 
+	  "set anomaly mode used by TM.anomaly",
+	  py::arg("mode") = TemporalMemory::ANMode::RAW
+			);
+
         py_HTM.def("__str__",
             [](HTM_t &self) {
                 std::stringstream buf;

diff --git a/bindings/py/tests/algorithms/temporal_memory_test.py b/bindings/py/tests/algorithms/temporal_memory_test.py
@@ -171,11 +171,30 @@ def testPredictiveCells(self):
     _print("activeCols:"+str(len(activeColumnsA.sparse)))
     _print("activeCells:"+str(len(tm.getActiveCells().sparse)))
     _print("predictiveCells:"+str(len(predictiveCellsSDR.sparse)))
+
+
+  def testAnomaly(self):
+    """test convergence of TM and quality of anomaly methods"""
+    from htm.bindings.algorithms import ANMode
+    tm = TM(columnDimensions=[2048], anomalyMode=ANMode.RAW) #FIXME likelihood is always 0.5?! .LIKELIHOOD)
+
+    modes = [ANMode.RAW, ANMode.LIKELIHOOD, ANMode.LOGLIKELIHOOD]
+    for mod in modes: #this block test convergence of TM and anomaly score for select mode
+      tm.setAnomalyMode(mod)
+      print("testing {}".format(mod))
+      inp = SDR([2048]).randomize(0.05) #starting SDR with 5% bits ON
+
+      #learn TM a bit, anomaly should be low
+      for _ in range(200):
+        inp.addNoise(0.02) #change 2% bits -> 98% overlap => anomaly should ideally be 2%
+        tm.compute(inp, learn=True)
+      self.assertLess(tm.anomaly, 0.08)
+
 
 
 def _print(txt):
     if debugPrint:
         print(txt)
 
 if __name__ == "__main__":
-  unittest.main()
+  unittest.main()
diff --git a/src/examples/hotgym/HelloSPTP.cpp b/src/examples/hotgym/HelloSPTP.cpp
@@ -24,7 +24,7 @@
 #include "htm/algorithms/TemporalMemory.hpp"
 #include "htm/algorithms/SpatialPooler.hpp"
 #include "htm/encoders/RandomDistributedScalarEncoder.hpp"
-#include "htm/algorithms/AnomalyLikelihood.hpp"
+#include "htm/algorithms/SDRClassifier.hpp" // Classifier, Predictor
 
 #include "htm/types/Sdr.hpp"
 #include "htm/utils/Random.hpp"
@@ -36,12 +36,23 @@ namespace examples {
 using namespace std;
 using namespace htm;
 
+/**
+ *  helper to transform (Real) data to categories (UInt) for Classifier/Predictor
+ **/
+UInt realToCategory_(const Real r) {
+  return static_cast<UInt>((r+1.0f /*map sin(x):[-1,1] map it to [0,2]*/)*100); //precision on 2 dec places
+}
+Real categoryToReal_(const UInt bin) {
+  const Real rl =  static_cast<Real>((bin/100.0f)-1.0f);
+  std::cout << "RL =" << bin << std::endl;
+  return rl;
+}
 
 // work-load
 Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool useTM, const UInt COLS, const UInt DIM_INPUT, const UInt CELLS)
 {
 #ifndef NDEBUG
-EPOCHS = 2; // make test faster in Debug
+EPOCHS = 10; // make test faster in Debug
 #endif
 
 #if defined __aarch64__ || defined __arm__
@@ -75,8 +86,10 @@ EPOCHS = 2; // make test faster in Debug
   Random rnd(42); //uses fixed seed for deterministic output checks
 
   TemporalMemory tm(vector<UInt>{COLS}, CELLS);
+  tm.setAnomalyMode(TemporalMemory::ANMode::RAW); //set other modes here
+
+  Predictor pred( {0,100}); //predict 0 (=classify current), 100 steps ahead
 
-  AnomalyLikelihood anLikelihood;
   tInit.stop();
 
   // data for processing input
@@ -85,7 +98,7 @@ EPOCHS = 2; // make test faster in Debug
   SDR outSPlocal(spLocal.getColumnDimensions()); //for SPlocal
   SDR outSP(vector<UInt>{COLS});
   SDR outTM(spGlobal.getColumnDimensions()); 
-  Real an = 0.0f, anLikely = 0.0f; //for anomaly:
+  Real an = 0.0f; //for anomaly:
   MovingAverage avgAnom10(1000); //chose the window large enough so there's (some) periodicity in the patter, so TM can learn something
 
   //metrics
@@ -98,9 +111,6 @@ EPOCHS = 2; // make test faster in Debug
    * For example: fn = sin(x) -> periodic >= 2Pi ~ 6.3 && x+=0.01 -> 630 steps to 1st period -> window >= 630
    */
   Real avgAnomOld_ = 1.0;
-  NTA_CHECK(avgAnomOld_ >= avgAnom10.getCurrentAvg()) << "TM should learn and avg anomalies improve, but we got: "
-    << avgAnomOld_ << " and now: " << avgAnom10.getCurrentAvg(); //invariant
-
 
   // Start a stopwatch timer
   printf("starting:  %d iterations.", EPOCHS);
@@ -113,11 +123,13 @@ EPOCHS = 2; // make test faster in Debug
     //Encode
     tEnc.start();
     x+=0.01f; //step size for fn(x)
-    enc.encode(sin(x), input); //model sin(x) function //TODO replace with CSV data
-//    cout << x << "\n" << sin(x) << "\n" << input << "\n\n";
+    const Real data = sin(x);
+    enc.encode(data, input); //model sin(x) function //TODO replace with CSV data
+//    cout << x << "\n" << data << "\n" << input << "\n\n";
     tEnc.stop();
 
     tRng.start();
+    //TODO this is dropout: 
     input.addNoise(0.01f, rnd); //change 1% of the SDR for each iteration, this makes a random sequence, but seemingly stable
     tRng.stop();
 
@@ -148,19 +160,25 @@ EPOCHS = 2; // make test faster in Debug
     //Anomaly (pure x likelihood)
     an = tm.anomaly;
     avgAnom10.compute(an); //moving average
-    if(e % 1000 == 0) {
+    if(e % (500 + avgAnom10.getData().size() /*size of avg window for an*/) == 0) {
       NTA_CHECK(avgAnomOld_ >= avgAnom10.getCurrentAvg()) << "TM should learn and avg anomalies improve, but we got: "
         << avgAnomOld_ << " and now: " << avgAnom10.getCurrentAvg(); //invariant
       avgAnomOld_ = avgAnom10.getCurrentAvg(); //update
     }
-    tAnLikelihood.start();
-    anLikelihood.anomalyProbability(an); //FIXME AnLikelihood is 0.0, probably not working correctly
-    tAnLikelihood.stop();
+
+    //Classifier, Predictor
+    tCls.start();
+    const auto label = realToCategory_(data);
+    pred.learn(e, outTM, { label }); //FIXME fails with bad_alloc if label is too large! PDF should use map, instead of a vector
+    const auto recovered = categoryToReal_(argmax(pred.infer(outTM)[0]));
+    NTA_CHECK(label == recovered);
+    tCls.stop();
 
 
     // print
     if (e == EPOCHS - 1) {
       tAll.stop();
+      pred.reset();
 
       //print connections stats
       cout << "\nInput :\n" << statsInput
@@ -176,10 +194,14 @@ EPOCHS = 2; // make test faster in Debug
       cout << "Epoch = " << e << endl;
       cout << "Anomaly = " << an << endl;
       cout << "Anomaly (avg) = " << avgAnom10.getCurrentAvg() << endl;
-      cout << "Anomaly (Likelihood) = " << anLikely << endl;
       cout << "SP (g)= " << outSP << endl;
       cout << "SP (l)= " << outSPlocal <<endl;
       cout << "TM= " << outTM << endl;
+      cout << "Cls[0]= " << categoryToReal_(argmax(pred.infer(outTM)[0])) << endl;
+      cout << "Cls[100]= " << categoryToReal_(argmax(pred.infer(outTM)[100])) << endl;
+
+      NTA_CHECK( categoryToReal_(argmax(pred.infer(outTM)[0])) != -1) << "Classifier did not learn"; //FIXME Predictor is not learning, this should be ~ sin(49.99)
+
 
       //timers
       cout << "==============TIMERS============" << endl;
@@ -189,7 +211,7 @@ EPOCHS = 2; // make test faster in Debug
       if(useSPlocal)  cout << "SP (l):\t" << tSPloc.getElapsed()*1.0f  << endl;
       if(useSPglobal) cout << "SP (g):\t" << tSPglob.getElapsed() << endl;
       if(useTM) cout << "TM:\t" << tTM.getElapsed() << endl;
-      cout << "AN:\t" << tAnLikelihood.getElapsed() << endl;
+      cout << "Cls:\t" << tCls.getElapsed() << endl;
 
       // check deterministic SP, TM output 
       SDR goldEnc({DIM_INPUT});

diff --git a/src/examples/hotgym/HelloSPTP.hpp b/src/examples/hotgym/HelloSPTP.hpp
@@ -25,7 +25,7 @@ class BenchmarkHotgym {
   );
 
   //timers
-  Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM, tAnLikelihood;
+  Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM, tCls;
 };
 
 } //-ns

diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp
@@ -81,7 +81,9 @@ void Classifier::learn(const SDR &pattern, const vector<UInt> &categoryIdxList)
   NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!";
 
   // Check if this is a new category & resize the weights table to hold it.
-  const auto maxCategoryIdx = *max_element(categoryIdxList.cbegin(), categoryIdxList.cend());
+  const size_t maxCategoryIdx = *max_element(categoryIdxList.cbegin(), categoryIdxList.cend());
+  NTA_CHECK(maxCategoryIdx < 1000) << "TODO for now we only support limited number of labels (<1000).";
+
   if( maxCategoryIdx >= numCategories_ ) {
     numCategories_ = maxCategoryIdx + 1;
     for( auto & vec : weights_ ) {
@@ -140,10 +142,11 @@ void htm::softmax(PDF::iterator begin, PDF::iterator end) {
 /******************************************************************************/
 
 
-Predictor::Predictor(const vector<UInt> &steps, const Real alpha)
+Predictor::Predictor(const vector<StepsAheadT> &steps, const Real alpha)
   { initialize(steps, alpha); }
 
-void Predictor::initialize(const vector<UInt> &steps, const Real alpha)
+
+void Predictor::initialize(const vector<StepsAheadT> &steps, const Real alpha)
 {
   NTA_CHECK( not steps.empty() ) << "Required argument steps is empty!";
   steps_ = steps;

diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp
@@ -136,7 +136,8 @@ class Classifier : public Serializable
    * Learn from example data.
    *
    * @param pattern:  The active input bit SDR.
-   * @param categoryIdxList:  The current categories or bucket indices.
+   * @param categoryIdxList:  The current categories (or bucket indices) 
+   *   that should be associated with this pattern. 
    */
   void learn(const SDR & pattern, const std::vector<UInt> & categoryIdxList);
 
@@ -157,7 +158,7 @@ class Classifier : public Serializable
 private:
   Real alpha_;
   UInt dimensions_;
-  UInt numCategories_;
+  size_t numCategories_;
 
   /**
    * 2D map used to store the data.
@@ -181,12 +182,13 @@ void softmax(PDF::iterator begin, PDF::iterator end);
 /******************************************************************************/
 
 
+using StepsAheadT = unsigned short; 
 /**
  * The key is the step, for predicting multiple time steps into the future.
  * The value is a PDF (probability distribution function, of the result being in
  * each bucket or category).
  */
-using Predictions = std::unordered_map<UInt, PDF>;
+using Predictions = std::unordered_map<StepsAheadT, PDF>;
 
 /**
  * The Predictor class does N-Step ahead predictions.
@@ -210,7 +212,7 @@ using Predictions = std::unordered_map<UInt, PDF>;
  *    vector<UInt> labels = { 4, 5, 6, 7 };
  *
  *    // Make a Predictor and train it.
- *    Predictor pred( vector<UInt>{ 1, 2 } );
+ *    Predictor pred( vector<StepsAheadT>{ 1, 2 } );
  *    pred.learn( 0, sequence[0], { labels[0] } );
  *    pred.learn( 1, sequence[1], { labels[1] } );
  *    pred.learn( 2, sequence[2], { labels[2] } );
@@ -239,13 +241,13 @@ class Predictor : public Serializable
    *                A larger alpha results in faster adaptation to the data.
    *                (The default value will likely be OK in most cases.)
    */
-  Predictor(const std::vector<UInt> &steps, Real alpha = 0.001f );
+  Predictor(const std::vector<StepsAheadT> &steps, Real alpha = 0.001f );
 
   /**
    * Constructor for use when deserializing.
    */
   Predictor() {}
-  void initialize(const std::vector<UInt> &steps, Real alpha = 0.001f );
+  void initialize(const std::vector<StepsAheadT> &steps, Real alpha = 0.001f );
 
   /**
    * For use with time series datasets.
@@ -289,15 +291,15 @@ class Predictor : public Serializable
 
 private:
   // The list of prediction steps to learn and infer.
-  std::vector<UInt> steps_;
+  std::vector<StepsAheadT> steps_;
 
   // Stores the input pattern history, starting with the previous input.
   std::deque<SDR>  patternHistory_;
   std::deque<UInt> recordNumHistory_;
   void checkMonotonic_(UInt recordNum) const;
 
   // One per prediction step
-  std::unordered_map<UInt, Classifier> classifiers_;
+  std::unordered_map<StepsAheadT, Classifier> classifiers_;
 
 };      // End of Predictor class
 

diff --git a/src/htm/algorithms/TemporalMemory.cpp b/src/htm/algorithms/TemporalMemory.cpp
@@ -538,7 +538,7 @@ void TemporalMemory::reset(void) {
   activeSegments_.clear();
   matchingSegments_.clear();
   segmentsValid_ = false;
-  tmAnomaly_.anomaly_ = -1.0f; //TODO reset rather to 0.5 as default (undecided) anomaly
+  tmAnomaly_.reset();
 }
 
 // ==============================
@@ -718,6 +718,10 @@ SynapseIdx TemporalMemory::getMaxSynapsesPerSegment() const {
 
 UInt TemporalMemory::version() const { return TM_VERSION; }
 
+void TemporalMemory::setAnomalyMode(ANMode mode) {
+  tmAnomaly_.reset(); 
+  tmAnomaly_.mode_ = mode;
+}
 
 static set<pair<CellIdx, SynapseIdx>>
 getComparableSegmentSet(const Connections &connections,
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,7 +25,7 @@ class BenchmarkHotgym { @@
       );
       //timers
-      Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM, tAnLikelihood;
+      Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM, tCls;
     };
     } //-ns
@@ Expand Down @@