Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retina broken log polar WIP #721

Open
wants to merge 10 commits into
base: retina_encoder
Choose a base branch
from
188 changes: 98 additions & 90 deletions py/htm/encoders/eye.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def __init__(self, input_shape, num_samples, sparsity,

def encode(self, img):
"""Returns a dense boolean np.ndarray."""
assert(img.shape == self.input_shape)
assert(img.shape == self.input_shape),print("Channel: img must have same dims as input_shape:", img.shape, self.input_shape)
assert(img.dtype == self.dtype)
if self.wrap:
img += self.offsets
Expand All @@ -177,6 +177,11 @@ class Eye:
Simulates functionality of eye's retinal parvocellular(P-cells),
and magnocellular(M-cells) pathways, at the saccadic steps.

Based on OpenCV's cv2.bioinspired.Retina model:
https://docs.opencv.org/3.4/d2/d94/bioinspired_retina.html
http://web.iitd.ac.in/~sumeet/Modelling_Vision.pdf


On high level,
magno cells:
- detect change in temporal information in the image, ie motion
Expand Down Expand Up @@ -218,7 +223,7 @@ class Eye:


def __init__(self,
output_diameter = 200, # output SDR size is diameter^2
output_diameter = 200, # fovea image size, also approximately output SDR size (= diameter^2)
sparsityParvo = 0.2,
sparsityMagno = 0.025,
color = True,):
Expand All @@ -238,45 +243,44 @@ def __init__(self,
motion detection and motion tracking, video processing.
For details see @param `sparsityParvo`.
TODO: output of M-cells should be processed on a fast TM.
Argument color: use color vision (requires P-cells > 0), default true.
Argument color: use color vision (requires P-cells > 0), default true. (Grayscale is faster)
"""
self.output_diameter = output_diameter
# Argument resolution_factor is used to expand the sensor array so that
# the fovea has adequate resolution. After log-polar transform image
# is reduced by this factor back to the output_diameter.
self.resolution_factor = 3
self.retina_diameter = int(self.resolution_factor * output_diameter)
# Argument fovea_scale ... represents "zoom" aka distance from the object/image.
self.fovea_scale = 0.177
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fovea_scale vs self.scale?

resolution_factor = 2
assert(output_diameter // 2 * 2 == output_diameter) # Diameter must be an even number.
assert(self.retina_diameter // 2 * 2 == self.retina_diameter) # (Resolution Factor X Diameter) must be an even number.
assert(sparsityParvo >= 0 and sparsityParvo <= 1.0)
if sparsityParvo > 0:
assert(sparsityParvo * (self.retina_diameter **2) > 0)
self.sparsityParvo = sparsityParvo
assert(sparsityMagno >= 0 and sparsityMagno <= 1.0)
if sparsityMagno > 0:
assert(sparsityMagno * (self.retina_diameter **2) > 0)
self.sparsityMagno = sparsityMagno
if color is True:
assert(sparsityParvo > 0)
self.color = color


self.retina = cv2.bioinspired.Retina_create(
inputSize = (self.retina_diameter, self.retina_diameter),
inputSize = (resolution_factor*output_diameter, resolution_factor*output_diameter),
colorMode = color,
colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER,)
colorSamplingMethod = cv2.bioinspired.RETINA_COLOR_BAYER,
useRetinaLogSampling = True,
reductionFactor = 1.2, #!reductionFactor_, # how much is the image under-sampled #TODO tune these params
samplingStrenght = 4.0, # how much are the corners blured/forgotten
)

# Activate Parvo/Magno vision based on whether sparsityXXX is set.
self.retina.activateContoursProcessing(sparsityParvo > 0) # Parvo
self.retina.activateMovingContoursProcessing(sparsityMagno > 0) # Magno

print(self.retina.printSetup())
print()

if sparsityParvo > 0:
dims = (output_diameter, output_diameter)
dims = self.retina.getOutputSize()

sparsityP_ = sparsityParvo
if color is True:
dims = (output_diameter, output_diameter, 3,) #3 for RGB color channels
if color is True:
dims = dims +(3,) #append 3rd dim with value '3' for RGB color channels

# The reason the parvo-cellular has `3rd-root of the sparsity` is that there are three color channels (RGB),
# each of which is encoded separately and then combined. The color channels are combined with a logical AND,
Expand All @@ -295,77 +299,88 @@ def __init__(self,

if sparsityMagno > 0:
self.magno_enc = ChannelEncoder(
input_shape = (output_diameter, output_diameter),
input_shape = self.retina.getOutputSize(),
num_samples = 1,
sparsity = sparsityMagno,
dtype=np.uint8, drange=[0, 255],)
else:
self.magno_enc = None

# output variables:
self.image = None # the current input RGB image
self.image = np.zeros(self.retina.getInputSize()) # the current input RGB image
self.roi = None # self.image cropped to region of interest
self.parvo_img = None # output visualization of parvo/magno cells
self.magno_img = None
self.parvo_sdr = SDR((output_diameter, output_diameter,)) # parvo/magno cellular representation (SDR)
self.magno_sdr = SDR((output_diameter, output_diameter,))


def new_image(self, image):
#self.retina.getParvo() # output visualization of parvo/magno cells
#self.retina.getMagno()
self.parvo_sdr = SDR(self.retina.getOutputSize()) # parvo/magno cellular representation (SDR)
self.magno_sdr = SDR(self.retina.getOutputSize())

# Motor-control variables (to be set by user):
self.orientation = 0 #in degrees
self.position = (0,0)
self.scale = 1.0


def new_image_(image):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now static, rename to read_image?

"""
Argument image ...
If String, will load image from file path.
If numpy.ndarray, will attempt to cast to correct data type and
dimensions.

Return: the new image ndarray (only useful if string is passed in)
"""
# Load image if needed.
if isinstance(image, str):
self.image = cv2.imread(image)
self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)
image = cv2.imread(image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
self.image = image
image = image
# Get the image into the right format.
assert(isinstance(self.image, np.ndarray))
if self.image.dtype != np.uint8:
raise TypeError('Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%(
self.image.dtype))
assert(isinstance(image, np.ndarray))
assert(image.dtype == np.uint8), print(
'Image "%s" dtype is not unsigned 8 bit integer, image.dtype is %s.'%(image.dtype))
# Ensure there are three color channels.
if len(self.image.shape) == 2 or self.image.shape[2] == 1:
self.image = np.dstack([self.image] * 3)
if len(image.shape) == 2 or image.shape[2] == 1:
image = np.dstack([image] * 3)
# Drop the alpha channel if present.
elif self.image.shape[2] == 4:
self.image = self.image[:,:,:3]
elif image.shape[2] == 4:
image = image[:,:,:3]
# Sanity checks.
assert(len(self.image.shape) == 3)
assert(self.image.shape[2] == 3) # Color images only.
self.reset()
self.center_view()
assert(len(image.shape) == 3)
assert(image.shape[2] == 3) # Color images only.
return image


def center_view(self):
"""Center the view over the image"""
self.orientation = 0
self.position = (self.image.shape[0]/2., self.image.shape[1]/2.)
self.scale = np.min(np.divide(self.image.shape[:2], self.retina_diameter))
self.scale = np.min(np.divide(self.image.shape[:2], self.retina.getInputSize()[0]))

def randomize_view(self, scale_range=None):
"""Set the eye's view point to a random location"""
if scale_range is None:
scale_range = [2, min(self.image.shape[:2]) / self.retina_diameter]
scale_range = [2, min(self.image.shape[:2]) / self.retina.getInputSize()[0]]
assert(len(scale_range) == 2)
self.orientation = random.uniform(0, 2 * math.pi)
self.scale = random.uniform(min(scale_range), max(scale_range))
roi_radius = self.scale * self.retina_diameter / 2
roi_radius = self.scale * self.retina.getInputSize()[0] / 2
self.position = [random.uniform(roi_radius, dim - roi_radius)
for dim in self.image.shape[:2]]

def rotate_(self, image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result


def _crop_roi(self):
"""
Crop to Region Of Interest (ROI) which contains the whole field of view.
Adds a black circular boarder to mask out areas which the eye can't see.

Note: size of the ROI is (eye.output_diameter * eye.resolution_factor).
Note: the circular boarder is actually a bit too far out, playing with
eye.fovea_scale can hide areas which this ROI image will show.
Note: size of the ROI is (eye.retina.getOutputSize()[0] * resolution_factor).

Arguments: eye.scale, eye.position, eye.image

Expand All @@ -376,7 +391,7 @@ def _crop_roi(self):
"""
assert(self.image is not None)

r = int(round(self.scale * self.retina_diameter / 2))
r = int(round(self.scale * self.retina.getInputSize()[0] / 2))
x, y = self.position
x = int(round(x))
y = int(round(y))
Expand All @@ -403,18 +418,16 @@ def _crop_roi(self):
roi[x_offset:x_offset+x_shape, y_offset:y_offset+y_shape] = image_slice

# Rescale the ROI to remove the scaling effect.
roi.resize( (self.retina_diameter, self.retina_diameter, 3))
inDims_ = self.retina.getInputSize()
inDims_ = inDims_ + (3,) #add 3rd dim '3' for RGB
roi.resize( inDims_ )

# Mask out areas the eye can't see by drawing a circle boarder.
center = int(roi.shape[0] / 2)
circle_mask = np.zeros(roi.shape, dtype=np.uint8)
cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255))
roi = np.minimum(roi, circle_mask)
return roi


def compute(self, position=None, rotation=None, scale=None):
def compute(self, image, position=None, rotation=None, scale=None):
"""
Argument image - string (to load) or numpy.ndarray with image data
Arguments position, rotation, scale: optional, if not None, the self.xxx is overriden
with the provided value.
Returns tuple (SDR parvo, SDR magno)
Expand All @@ -427,55 +440,49 @@ def compute(self, position=None, rotation=None, scale=None):
if scale is not None:
self.scale=scale

# apply field of view (FOV)
# apply field of view (FOV) & rotation
self.image = Eye.new_image_(image) #TODO remove the FOV, already done in retina's logPolar transform
self.image = self.rotate_(self.image, rotation)
self.roi = self._crop_roi()

# Retina image transforms (Parvo & Magnocellular).
self.retina.run(self.roi)

if self.parvo_enc is not None:
parvo = self.retina.getParvo()
if self.magno_enc is not None:
magno = self.retina.getMagno()

# Log Polar Transform.
center = self.retina_diameter / 2
M = self.retina_diameter * self.fovea_scale
center = self.retina.getInputSize()[0] / 2
M = self.retina.getInputSize()[0] * self.scale
if self.parvo_enc is not None:
parvo = cv2.logPolar(parvo,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we apply the logPolar only in the Channel encoder? Bcs that one expects its data in that form.

center = (center, center),
M = M,
flags = cv2.WARP_FILL_OUTLIERS)
parvo = cv2.resize(parvo, dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC)

if self.magno_enc is not None:
magno = cv2.logPolar(magno,
center = (center, center),
M = M,
flags = cv2.WARP_FILL_OUTLIERS)
magno = cv2.resize(magno, dsize=(self.output_diameter, self.output_diameter), interpolation = cv2.INTER_CUBIC)

# Apply rotation by rolling the images around axis 1.
rotation = self.output_diameter * self.orientation / (2 * math.pi)
rotation = int(round(rotation))
if self.parvo_enc is not None:
self.parvo_img = np.roll(parvo, rotation, axis=0)
if self.magno_enc is not None:
self.magno_img = np.roll(magno, rotation, axis=0)

# Encode images into SDRs.
p = []
m = []
if self.parvo_enc is not None:
p = self.parvo_enc.encode(parvo)
if self.color:
pr, pg, pb = np.dsplit(p, 3)
p = np.logical_and(np.logical_and(pr, pg), pb)
p = np.expand_dims(np.squeeze(p), axis=2)
self.parvo_sdr.dense = p.flatten()
if self.magno_enc is not None:
m = self.magno_enc.encode(magno)
self.magno_sdr.dense = m.flatten()


self.magno_sdr.dense = m.flatten()
self.parvo_sdr.dense = p.flatten()
self.parvo_img = parvo
assert(len(self.magno_sdr.sparse) > 0)
assert(len(self.parvo_sdr.sparse) > 0)

Expand All @@ -493,34 +500,33 @@ def make_roi_pretty(self, roi=None):
if roi is None:
roi = self.roi

# Show the ROI, first rotate it like the eye is rotated.
angle = self.orientation * 360 / (2 * math.pi)
roi = self.roi[:,:,::-1]
rows, cols, color_depth = roi.shape
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
roi = cv2.warpAffine(roi, M, (cols,rows))

# Invert 5 pixels in the center to show where the fovea is located.
center = int(roi.shape[0] / 2)
roi[center, center] = np.full(3, 255) - roi[center, center]
roi[center+2, center+2] = np.full(3, 255) - roi[center+2, center+2]
roi[center-2, center+2] = np.full(3, 255) - roi[center-2, center+2]
roi[center-2, center-2] = np.full(3, 255) - roi[center-2, center-2]
roi[center+2, center-2] = np.full(3, 255) - roi[center+2, center-2]

# Mask out areas the eye can't see by drawing a circle boarder.
center = int(roi.shape[0] / 2)
circle_mask = np.zeros(roi.shape, dtype=np.uint8)
cv2.circle(circle_mask, (center, center), center, thickness = -1, color=(255,255,255))
roi = np.minimum(roi, circle_mask)
return roi


def plot(self, window_name='Eye', delay=1000):
roi = self.make_roi_pretty()
cv2.imshow('Region Of Interest', roi)
if self.color:
cv2.imshow('Parvocellular', self.parvo_img[:,:,::-1])
cv2.imshow('Parvocellular', self.retina.getParvo()[:,:,::-1])
else:
cv2.imshow('Parvocellular', self.parvo_img)
cv2.imshow('Magnocellular', self.magno_img)
idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255
cv2.imshow('Parvocellular', self.retina.getParvo())
cv2.imshow('Magnocellular', self.retina.getMagno())
idx = self.parvo_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255
cv2.imshow('Parvo SDR', idx)
idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.output_diameter, self.output_diameter)*255
idx = self.magno_sdr.dense.astype(np.uint8).reshape(self.retina.getOutputSize())*255
cv2.imshow('Magno SDR', idx)
cv2.waitKey(delay)

Expand All @@ -529,7 +535,7 @@ def small_random_movement(self):
"""returns small difference in position, rotation, scale.
This is naive "saccadic" movements.
"""
max_change_angle = (2*3.14159) / 500
max_change_angle = (2*math.pi) / 100
self.position = (
self.position[0] + random.gauss(1, .75),
self.position[1] + random.gauss(1, .75),)
Expand Down Expand Up @@ -581,16 +587,18 @@ def _get_images(path):
print('No images found at file path "%s"!'%args.IMAGE)
else:
eye = Eye()

for img_path in images:
eye.reset()
print("Loading image %s"%img_path)
eye.new_image(img_path)
eye.scale = 1
eye.center_view()
#eye.center_view()
#manually set position to look at head:
eye.position = (400, 400)
for i in range(10):
pos,rot,sc = eye.small_random_movement()
(sdrParvo, sdrMagno) = eye.compute(pos,rot,sc) #TODO derive from Encoder
eye.plot(500)
sc = 1.0 #FIXME scaling with any other than 1.0 breaks plots
(sdrParvo, sdrMagno) = eye.compute(img_path, pos,rot,sc) #TODO derive from Encoder
eye.plot(delay=1500)
print("Sparsity parvo: {}".format(len(eye.parvo_sdr.sparse)/np.product(eye.parvo_sdr.dimensions)))
print("Sparsity magno: {}".format(len(eye.magno_sdr.sparse)/np.product(eye.magno_sdr.dimensions)))
print("All images seen.")