htm-community · breznak · Oct 22, 2019 · Oct 22, 2019 · Oct 22, 2019 · Oct 22, 2019
diff --git a/py/htm/encoders/eye.py b/py/htm/encoders/eye.py
@@ -158,7 +158,7 @@ def __init__(self, input_shape, num_samples, sparsity,
 
     def encode(self, img):
         """Returns a dense boolean np.ndarray."""
-        assert(img.shape == self.input_shape)
+        assert(img.shape == self.input_shape),print("Channel: img must have same dims as input_shape:", img.shape, self.input_shape)
         assert(img.dtype == self.dtype)
         if self.wrap:
             img += self.offsets
@@ -177,6 +177,11 @@ class Eye:
     Simulates functionality of eye's retinal parvocellular(P-cells),
     and magnocellular(M-cells) pathways, at the saccadic steps. 
 
+    Based on OpenCV's cv2.bioinspired.Retina model: 
+    https://docs.opencv.org/3.4/d2/d94/bioinspired_retina.html
+    http://web.iitd.ac.in/~sumeet/Modelling_Vision.pdf
+
+
     On high level, 
     magno cells: 
       - detect change in temporal information in the image, ie motion 
@@ -218,7 +223,7 @@ class Eye:
 
 
     def __init__(self,
-        output_diameter   = 200, # output SDR size is diameter^2
+        output_diameter   = 200, # fovea image size, also approximately output SDR size (= diameter^2)
         sparsityParvo     = 0.2,
         sparsityMagno     = 0.025,
         color             = True,):
@@ -238,45 +243,44 @@ def __init__(self,
             motion detection and motion tracking, video processing.
             For details see @param `sparsityParvo`.
             TODO: output of M-cells should be processed on a fast TM.
-        Argument color: use color vision (requires P-cells > 0), default true.
+        Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster)
         """
         self.output_diameter   = output_diameter
         # Argument resolution_factor is used to expand the sensor array so that
         # the fovea has adequate resolution.  After log-polar transform image
         # is reduced by this factor back to the output_diameter.
-        self.resolution_factor = 3
-        self.retina_diameter   = int(self.resolution_factor * output_diameter)
-        # Argument fovea_scale  ... represents "zoom" aka distance from the object/image.
-        self.fovea_scale       = 0.177
+        resolution_factor = 2
         assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number.
-        assert(self.retina_diameter // 2 * 2 == self.retina_diameter) # (Resolution Factor X Diameter) must be an even number.
         assert(sparsityParvo >= 0 and sparsityParvo <= 1.0)
-        if sparsityParvo > 0:
-          assert(sparsityParvo * (self.retina_diameter **2) > 0)
         self.sparsityParvo = sparsityParvo
         assert(sparsityMagno >= 0 and sparsityMagno <= 1.0)
-        if sparsityMagno > 0:
-          assert(sparsityMagno * (self.retina_diameter **2) > 0)
         self.sparsityMagno = sparsityMagno
         if color is True:
           assert(sparsityParvo > 0)
         self.color = color
 
-
         self.retina = cv2.bioinspired.Retina_create(
-            inputSize            = (self.retina_diameter, self.retina_diameter),
+            inputSize            = (resolution_factor*output_diameter, resolution_factor*output_diameter),
             colorMode            = color,
-            colorSamplingMethod  = cv2.bioinspired.RETINA_COLOR_BAYER,)
+            colorSamplingMethod  = cv2.bioinspired.RETINA_COLOR_BAYER,
+            useRetinaLogSampling = True,
+	    reductionFactor      = 1.2, #!reductionFactor_, # how much is the image under-sampled #TODO tune these params
+	    samplingStrenght     = 4.0, # how much are the corners blured/forgotten
+            )
+
+        # Activate Parvo/Magno vision based on whether sparsityXXX is set.
+        self.retina.activateContoursProcessing(sparsityParvo > 0) # Parvo
+        self.retina.activateMovingContoursProcessing(sparsityMagno > 0) # Magno
 
         print(self.retina.printSetup())
         print()
 
         if sparsityParvo > 0:
-          dims = (output_diameter, output_diameter)
+          dims = self.retina.getOutputSize()
 
           sparsityP_ = sparsityParvo
-          if color is True: 
-            dims = (output_diameter, output_diameter, 3,) #3 for RGB color channels
+          if color is True:
+            dims = dims +(3,) #append 3rd dim with value '3' for RGB color channels
 
             # The reason the parvo-cellular has `3rd-root of the sparsity` is that there are three color channels (RGB), 
             # each of which is encoded separately and then combined. The color channels are combined with a logical AND, 
@@ -295,77 +299,88 @@ def __init__(self,
 
         if sparsityMagno > 0:
           self.magno_enc = ChannelEncoder(
-                            input_shape = (output_diameter, output_diameter),
+                            input_shape = self.retina.getOutputSize(),
                             num_samples = 1, 
                             sparsity = sparsityMagno,
                             dtype=np.uint8, drange=[0, 255],)
         else:
           self.magno_enc = None
 
         # output variables:
-        self.image = None # the current input RGB image
+        self.image = np.zeros(self.retina.getInputSize()) # the current input RGB image
         self.roi   = None # self.image cropped to region of interest
-        self.parvo_img = None # output visualization of parvo/magno cells
-        self.magno_img = None
-        self.parvo_sdr  = SDR((output_diameter, output_diameter,)) # parvo/magno cellular representation (SDR)
-        self.magno_sdr  = SDR((output_diameter, output_diameter,))
-
-
-    def new_image(self, image):
+        #self.retina.getParvo() # output visualization of parvo/magno cells
+        #self.retina.getMagno()
+        self.parvo_sdr  = SDR(self.retina.getOutputSize()) # parvo/magno cellular representation (SDR)
+        self.magno_sdr  = SDR(self.retina.getOutputSize())
+
+        # Motor-control variables (to be set by user):
+        self.orientation = 0 #in degrees
+        self.position    = (0,0)
+        self.scale       = 1.0
+
+
+    def new_image_(image):
         """
         Argument image ...
             If String, will load image from file path.
             If numpy.ndarray, will attempt to cast to correct data type and
                 dimensions.
+
+        Return: the new image ndarray (only useful if string is passed in)
         """
         # Load image if needed.
         if isinstance(image, str):
-            self.image = cv2.imread(image)
-            self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)
+            image = cv2.imread(image)
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         else:
-            self.image = image
+            image = image
         # Get the image into the right format.
-        assert(isinstance(self.image, np.ndarray))
-        if self.image.dtype != np.uint8:
-            raise TypeError('Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%(
-                self.image.dtype))
+        assert(isinstance(image, np.ndarray))
+        assert(image.dtype == np.uint8), print(
+                'Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%(image.dtype))
         # Ensure there are three color channels.
-        if len(self.image.shape) == 2 or self.image.shape[2] == 1:
-            self.image = np.dstack([self.image] * 3)
+        if len(image.shape) == 2 or image.shape[2] == 1:
+            image = np.dstack([image] * 3)
         # Drop the alpha channel if present.
-        elif self.image.shape[2] == 4:
-            self.image = self.image[:,:,:3]
+        elif image.shape[2] == 4:
+            image = image[:,:,:3]
         # Sanity checks.
-        assert(len(self.image.shape) == 3)
-        assert(self.image.shape[2] == 3) # Color images only.
-        self.reset()
-        self.center_view()
+        assert(len(image.shape) == 3)
+        assert(image.shape[2] == 3) # Color images only.
+        return image
+
 
     def center_view(self):
         """Center the view over the image"""
         self.orientation = 0
         self.position    = (self.image.shape[0]/2., self.image.shape[1]/2.)
-        self.scale       = np.min(np.divide(self.image.shape[:2], self.retina_diameter))
+        self.scale       = np.min(np.divide(self.image.shape[:2], self.retina.getInputSize()[0]))
 
     def randomize_view(self, scale_range=None):
         """Set the eye's view point to a random location"""
         if scale_range is None:
-            scale_range = [2, min(self.image.shape[:2]) / self.retina_diameter]
+            scale_range = [2, min(self.image.shape[:2]) / self.retina.getInputSize()[0]]
         assert(len(scale_range) == 2)
         self.orientation = random.uniform(0, 2 * math.pi)
         self.scale       = random.uniform(min(scale_range), max(scale_range))
-        roi_radius       = self.scale * self.retina_diameter / 2
+        roi_radius       = self.scale * self.retina.getInputSize()[0] / 2
         self.position    = [random.uniform(roi_radius, dim - roi_radius)
                                  for dim in self.image.shape[:2]]
 
+    def rotate_(self, image, angle):
+      image_center = tuple(np.array(image.shape[1::-1]) / 2)
+      rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
+      result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
+      return result
+
+
     def _crop_roi(self):
         """
         Crop to Region Of Interest (ROI) which contains the whole field of view.
         Adds a black circular boarder to mask out areas which the eye can't see.
 
-        Note: size of the ROI is (eye.output_diameter * eye.resolution_factor).
-        Note: the circular boarder is actually a bit too far out, playing with
-          eye.fovea_scale can hide areas which this ROI image will show.
+        Note: size of the ROI is (eye.retina.getOutputSize()[0] * resolution_factor).
 
         Arguments: eye.scale, eye.position, eye.image
 
@@ -376,7 +391,7 @@ def _crop_roi(self):
         """
         assert(self.image is not None)
 
-        r     = int(round(self.scale * self.retina_diameter / 2))
+        r     = int(round(self.scale * self.retina.getInputSize()[0] / 2))
         x, y  = self.position
         x     = int(round(x))
         y     = int(round(y))
@@ -403,18 +418,16 @@ def _crop_roi(self):
         roi[x_offset:x_offset+x_shape, y_offset:y_offset+y_shape] = image_slice
 
         # Rescale the ROI to remove the scaling effect.
-        roi.resize( (self.retina_diameter, self.retina_diameter, 3))
+        inDims_ = self.retina.getInputSize()
+        inDims_ = inDims_ + (3,) #add 3rd dim '3' for RGB
+        roi.resize( inDims_ )
 
-        # Mask out areas the eye can't see by drawing a circle boarder.
-        center = int(roi.shape[0] / 2)
-        circle_mask = np.zeros(roi.shape, dtype=np.uint8)
-        cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255))
-        roi = np.minimum(roi, circle_mask)
         return roi
 
 
-    def compute(self, position=None, rotation=None, scale=None):
+    def compute(self, image, position=None, rotation=None, scale=None):
         """
+        Argument image - string (to load) or numpy.ndarray with image data
         Arguments position, rotation, scale: optional, if not None, the self.xxx is overriden
           with the provided value.
         Returns tuple (SDR parvo, SDR magno) 
@@ -427,55 +440,49 @@ def compute(self, position=None, rotation=None, scale=None):
         if scale is not None:
           self.scale=scale
 
-        # apply field of view (FOV)
+        # apply field of view (FOV) & rotation
+        self.image = Eye.new_image_(image) #TODO remove the FOV, already done in retina's logPolar transform
+        self.image = self.rotate_(self.image, rotation)
         self.roi = self._crop_roi()
 
         # Retina image transforms (Parvo & Magnocellular).
         self.retina.run(self.roi)
+
         if self.parvo_enc is not None:
           parvo = self.retina.getParvo()
         if self.magno_enc is not None:
           magno = self.retina.getMagno()
 
         # Log Polar Transform.
-        center = self.retina_diameter / 2
-        M      = self.retina_diameter * self.fovea_scale
+        center = self.retina.getInputSize()[0] / 2
+        M      = self.retina.getInputSize()[0] * self.scale
         if self.parvo_enc is not None:
           parvo = cv2.logPolar(parvo,
                                center = (center, center),
                                M = M,
                                flags = cv2.WARP_FILL_OUTLIERS)
-          parvo = cv2.resize(parvo,  dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC)
 
         if self.magno_enc is not None:
           magno = cv2.logPolar(magno,
                                center = (center, center),
                                M = M,
                                flags = cv2.WARP_FILL_OUTLIERS)
-          magno = cv2.resize(magno, dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC)
 
-        # Apply rotation by rolling the images around axis 1.
-        rotation = self.output_diameter * self.orientation / (2 * math.pi)
-        rotation = int(round(rotation))
-        if self.parvo_enc is not None:
-          self.parvo_img = np.roll(parvo, rotation, axis=0)
-        if self.magno_enc is not None:
-          self.magno_img = np.roll(magno, rotation, axis=0)
 
         # Encode images into SDRs.
-        p = []
-        m = []
         if self.parvo_enc is not None:
           p   = self.parvo_enc.encode(parvo)
           if self.color:
             pr, pg, pb = np.dsplit(p, 3)
             p   = np.logical_and(np.logical_and(pr, pg), pb)
           p   = np.expand_dims(np.squeeze(p), axis=2)
+          self.parvo_sdr.dense = p.flatten()
         if self.magno_enc is not None:
           m   = self.magno_enc.encode(magno)
+          self.magno_sdr.dense = m.flatten()
+
 
-        self.magno_sdr.dense = m.flatten()
-        self.parvo_sdr.dense = p.flatten()
+        self.parvo_img = parvo
         assert(len(self.magno_sdr.sparse) > 0)
         assert(len(self.parvo_sdr.sparse) > 0)
 
@@ -493,34 +500,33 @@ def make_roi_pretty(self, roi=None):
         if roi is None:
             roi = self.roi
 
-        # Show the ROI, first rotate it like the eye is rotated.
-        angle = self.orientation * 360 / (2 * math.pi)
-        roi = self.roi[:,:,::-1]
-        rows, cols, color_depth = roi.shape
-        M   = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
-        roi = cv2.warpAffine(roi, M, (cols,rows))
-
         # Invert 5 pixels in the center to show where the fovea is located.
         center = int(roi.shape[0] / 2)
         roi[center, center]     = np.full(3, 255) - roi[center, center]
         roi[center+2, center+2] = np.full(3, 255) - roi[center+2, center+2]
         roi[center-2, center+2] = np.full(3, 255) - roi[center-2, center+2]
         roi[center-2, center-2] = np.full(3, 255) - roi[center-2, center-2]
         roi[center+2, center-2] = np.full(3, 255) - roi[center+2, center-2]
+
+        # Mask out areas the eye can't see by drawing a circle boarder.
+        center = int(roi.shape[0] / 2)
+        circle_mask = np.zeros(roi.shape, dtype=np.uint8)
+        cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255))
+        roi = np.minimum(roi, circle_mask)
         return roi
 
 
     def plot(self, window_name='Eye', delay=1000):
         roi = self.make_roi_pretty()
         cv2.imshow('Region Of Interest', roi)
         if self.color:
-          cv2.imshow('Parvocellular', self.parvo_img[:,:,::-1])
+          cv2.imshow('Parvocellular', self.retina.getParvo()[:,:,::-1])
         else:
-          cv2.imshow('Parvocellular', self.parvo_img)
-        cv2.imshow('Magnocellular', self.magno_img)
-        idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255
+          cv2.imshow('Parvocellular', self.retina.getParvo())
+        cv2.imshow('Magnocellular', self.retina.getMagno())
+        idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255
         cv2.imshow('Parvo SDR', idx)
-        idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255
+        idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255
         cv2.imshow('Magno SDR', idx)
         cv2.waitKey(delay)
 
@@ -529,7 +535,7 @@ def small_random_movement(self):
         """returns small difference in position, rotation, scale.
            This is naive "saccadic" movements.
         """
-        max_change_angle = (2*3.14159) / 500
+        max_change_angle = (2*math.pi) / 100
         self.position = (
             self.position[0] + random.gauss(1, .75),
             self.position[1] + random.gauss(1, .75),)
@@ -581,16 +587,18 @@ def _get_images(path):
         print('No images found at file path "%s"!'%args.IMAGE)
     else:
         eye = Eye()
+
         for img_path in images:
             eye.reset()
             print("Loading image %s"%img_path)
-            eye.new_image(img_path)
-            eye.scale = 1
-            eye.center_view()
+            #eye.center_view()
+            #manually set position to look at head:
+            eye.position = (400, 400)
             for i in range(10):
                 pos,rot,sc = eye.small_random_movement()
-                (sdrParvo, sdrMagno) = eye.compute(pos,rot,sc) #TODO derive from Encoder
-                eye.plot(500)
+                sc = 1.0 #FIXME scaling with any other than 1.0 breaks plots
+                (sdrParvo, sdrMagno) = eye.compute(img_path, pos,rot,sc) #TODO derive from Encoder
+                eye.plot(delay=1500)
             print("Sparsity parvo: {}".format(len(eye.parvo_sdr.sparse)/np.product(eye.parvo_sdr.dimensions)))
             print("Sparsity magno: {}".format(len(eye.magno_sdr.sparse)/np.product(eye.magno_sdr.dimensions)))
         print("All images seen.")