Merge pull request #37 from googleinterns/nice-to-have

Final Code Documentation and Upgrades
googleinterns · Sep 11, 2020 · ed0fd11 · ed0fd11
2 parents cd27710 + 05f73f7
commit ed0fd11
Show file tree

Hide file tree

Showing 11 changed files with 70 additions and 42 deletions.
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ Under ```datasets/```:
 The end-to-end pipeline can be run from the command-line as such:
 ```python -m modules.benchmark_pipeline --tfrecord_path=datasets/small_single_instance_v2.tfrecord --output_path=small_single_instance.txt --multi_instance_icon=False --visualize=True --iou_threshold=0.6```. 
 
-The results (accuracy, precision, recall, latency average/median, memory average/median) will then be printed to the output txt file as well as to stdout like so:
+The results (accuracy, precision, recall, latency average/median, memory average/median) will then be printed to the output txt file as well as to logging.info like so:
 ```
 Average seconds per image: 1.439400
 Median seconds of images: 1.544500

diff --git a/datasets/small_multi_instance_v2.tfrecord b/datasets/small_multi_instance_v2.tfrecord
diff --git a/modules/algorithms.py b/modules/algorithms.py
@@ -9,6 +9,7 @@
 - Distance thresholding
 - Suppress overlapping bounding boxes
 """
+import logging
 from typing import List, Tuple
 
 import cv2
@@ -87,8 +88,8 @@ def cluster_contours(clusterer: sklearn.base.ClusterMixin,
   # a label of -1 means the point was not clustered - a "noise" point
   n_clusters = len([label for label in set(clusters.labels_) if label != -1])
   n_noise = list(clusters.labels_).count(-1)
-  print("Estimated number of clusters: %d" % n_clusters)
-  print("Estimated number of noise points: %d" % n_noise)
+  logging.debug("Estimated number of clusters: %d", n_clusters)
+  logging.debug("Estimated number of noise points: %d", n_noise)
   contour_groups = []
   for i in range(0, n_clusters):
     contour_group = image_contours[np.argwhere(clusters.labels_ == i)]

diff --git a/modules/analysis_util.py b/modules/analysis_util.py
@@ -3,6 +3,9 @@
 This contains:
 - labeling the number of points in a cluster on the image
 - plotting the number of points as a histogram
+- saving an icon and image within the same image
+- generating a custom scatterplot
+- scaling images and icons in a dataset
 """
 from typing import List, Tuple
 

diff --git a/modules/benchmark_pipeline.py b/modules/benchmark_pipeline.py
@@ -1,6 +1,7 @@
 """BenchmarkPipeline class and tfRecord utility functions."""
 
 import argparse
+import logging
 from typing import Optional, Tuple
 
 import cv2
@@ -16,8 +17,10 @@ class BenchmarkPipeline:
   """Represents a pipeline to test generated Bounding Boxes.
 
   Usage example:
-    benchmark = BenchmarkPipeline("benchmark.tfrecord")
-    benchmark.evaluate()
+    benchmark = BenchmarkPipeline(tfrecord_path="datasets/
+       small_multi_instance_v2.tfrecord")
+    benchmark.evaluate(icon_finder_object=icon_finder_shape_context.
+       IconFinderShapeContext(clusterer=clustering_algorithms.DBSCANClusterer()))
   """
 
   def __init__(self, tfrecord_path: str = defaults.TFRECORD_PATH):
@@ -83,7 +86,7 @@ def visualize_bounding_boxes(self,
         cv2.rectangle(image_bgr_copy, (box.min_x, box.min_y),
                       (box.max_x, box.max_y), (0, 0, 255), 2)
 
-      if draw_contours:
+      if draw_contours and self.image_clusters[i] and self.icon_contours[i]:
         # draw each contour cluster in the image with a distinct color
         # each contour cluster will alternate between these colors
         colors = [(128, 0, 128), (255, 192, 203), (255, 0, 255)]
@@ -95,7 +98,7 @@ def visualize_bounding_boxes(self,
       image_rgb = cv2.cvtColor(image_bgr_copy, cv2.COLOR_BGR2RGB)
       icon_rgb = cv2.cvtColor(icon_bgr_copy, cv2.COLOR_BGR2RGB)
       if image_rgb is None:
-        print("Could not read the image.")
+        logging.error("Could not read the image.")
 
       analysis_util.save_icon_with_image(icon_rgb, image_rgb,
                                          output_name + str(i) + ".png")
@@ -133,12 +136,12 @@ def calculate_latency(self, icon_finder_object, output_path: str) -> float:
         self.image_clusters.append(image_contour_clusters)
         self.icon_contours.append(icon_contour)
       times.append(timer.calculate_latency_info(output_path))
-    time_info = "Average time per image: %f\n" % np.mean(times)
-    time_info += "Median time of images: %f" % np.median(times)
+    time_info = "Average seconds per image: %f\n" % np.mean(times)
+    time_info += "Median seconds of images: %f\n" % np.median(times)
     if output_path:
       with open(output_path, "a") as output_file:
         output_file.write(time_info)
-    print(time_info)
+    logging.info(time_info)
     return np.mean(times)
 
   def calculate_memory(self, icon_finder_object, output_path: str) -> float:
@@ -172,12 +175,12 @@ def calculate_memory(self, icon_finder_object, output_path: str) -> float:
         self.proposed_boxes.append(bboxes)
         self.image_clusters.append(image_contour_clusters)
         self.icon_contours.append(icon_contour)
-    memory_info = "Average MiBs per image: %f" % np.mean(mems)
-    memory_info += "Median MiBs per image: %f" % np.median(mems)
+    memory_info = "Average MiBs per image: %f\n" % np.mean(mems)
+    memory_info += "Median MiBs per image: %f\n" % np.median(mems)
     if output_path:
       with open(output_path, "a") as output_file:
         output_file.write(memory_info)
-    print(memory_info)
+    logging.info(memory_info)
     return np.mean(mems)
 
   def find_icons(
@@ -351,6 +354,6 @@ def evaluate(
 
   benchmark = BenchmarkPipeline(tfrecord_path=args.tfrecord_path)
   benchmark.evaluate(visualize=args.visualize,
-                     iou_threshold=args.threshold,
-                     output_path=args.output_path,
-                     multi_instance_icon=args.multi_instance_icon)
+                    iou_threshold=args.threshold,
+                    output_path=args.output_path,
+                    multi_instance_icon=args.multi_instance_icon)
diff --git a/modules/confusion_matrix.py b/modules/confusion_matrix.py
@@ -1,4 +1,6 @@
 """This module contains a ConfusionMatrix class."""
+import logging
+
 import dataclasses
 from modules import defaults
 from modules.correctness_metrics import CorrectnessMetrics
@@ -57,7 +59,7 @@ def calculate_correctness_metrics(self,
         output_file.write("Precision: %f\n" % precision)
         output_file.write("Recall: %f\n" % recall)
 
-    print("Accuracy: %f\n" % accuracy)
-    print("Precision: %f\n" % precision)
-    print("Recall: %f\n" % recall)
+    logging.info("Accuracy: %f\n", accuracy)
+    logging.info("Precision: %f\n", precision)
+    logging.info("Recall: %f\n\n", recall)
     return CorrectnessMetrics(accuracy, precision, recall)
diff --git a/modules/icon_finder.py b/modules/icon_finder.py
@@ -1,7 +1,7 @@
 """This module contains the IconFinder base class.
 """
 import abc
-from typing import List
+from typing import List, Optional, Tuple
 
 from modules.bounding_box import BoundingBox
 import numpy as np
@@ -12,15 +12,19 @@ class IconFinder(abc.ABC):
   """
 
   @abc.abstractmethod
-  def find_icons(self, image: np.ndarray,
-                 icon: np.ndarray) -> List[BoundingBox]:
+  def find_icons(
+      self, image: np.ndarray, icon: np.ndarray
+  ) -> Tuple[List[BoundingBox], Optional[List[np.ndarray]],
+             Optional[List[np.ndarray]]]:
     """Find instances of icon in image.
 
     Arguments:
         image: Numpy array representing image
         icon: Numpy array representing icon
 
     Returns:
-        List[BoundingBox] -- Bounding Box for each instance of icon in image.
+        (List[BoundingBox] -- Bounding Box for each instance of icon in image,
+        optionally the contours found in the image, and optionally the contours
+        found in the icon)
     """
     pass
diff --git a/modules/icon_finder_random.py b/modules/icon_finder_random.py
@@ -1,7 +1,9 @@
 """This module has an IconFinderRandom class for randomly finding bounding boxes.
 """
 import random
-from typing import List
+from typing import List, Optional, Tuple
+
+import memory_profiler
 
 from modules.bounding_box import BoundingBox
 import modules.icon_finder
@@ -10,17 +12,23 @@
 
 class IconFinderRandom(modules.icon_finder.IconFinder):  # pytype: disable=module-attr
   """This class generates bounding boxes randomly."""
+  @memory_profiler.profile
 
-  def find_icons(self, image: np.ndarray,
-                 icon: np.ndarray) -> List[BoundingBox]:
+  def find_icons(
+      self, image: np.ndarray, icon: np.ndarray
+  ) -> Tuple[List[BoundingBox], Optional[List[np.ndarray]],
+             Optional[List[np.ndarray]]]:
     """Find instances of icon in a given image randomly.
 
     Arguments:
         image: Numpy array representing image
         icon: Numpy array representing icon
 
     Returns:
-        List[BoundingBox] -- Bounding Box for each instance of icon in image.
+        Tuple[Bounding Box for each instance of icon in image,
+        None, None]. The Nones are just to satisfy the overall
+        IconFinder API but would normally contain icon and image
+        cluster contours.
     """
     height = image.shape[0]
     width = image.shape[1]
@@ -29,4 +37,4 @@ def find_icons(self, image: np.ndarray,
     min_x = random.randint(0, width - 1)
     max_x = random.randint(min_x, width - 1)
     max_y = random.randint(min_y, height - 1)
-    return [BoundingBox(min_x, min_y, max_x, max_y)]
+    return [BoundingBox(min_x, min_y, max_x, max_y)], None, None
diff --git a/modules/icon_finder_shape_context.py b/modules/icon_finder_shape_context.py
@@ -1,5 +1,6 @@
 """This module has an IconFinderShapeContext class for finding bounding boxes.
 """
+import logging
 import multiprocessing  # pytype: disable=pyi-error
 from typing import List, Optional, Tuple
 
@@ -53,8 +54,9 @@ def __init__(self,
     self.sc_distance_threshold = sc_distance_threshold
     self.nms_iou_threshold = nms_iou_threshold
 
-  def _get_distance(self, icon_contour_3d: np.ndarray,
-                    image_contour_3d: np.ndarray) -> Optional[Tuple]:
+  def _get_distance(
+      self, icon_contour_3d: np.ndarray,
+      image_contour_3d: np.ndarray) -> Optional[Tuple[np.ndarray, float]]:
     """Calculate distance between icon and image contour.
 
     Arguments:
@@ -71,9 +73,9 @@ def _get_distance(self, icon_contour_3d: np.ndarray,
       if distance < self.sc_distance_threshold:
         return (image_contour_3d, distance)
     except cv2.error as e:
-      print(e)
-      print("These were the icon and image shapes: %s %s" %
-            (str(icon_contour_3d.shape), str(image_contour_3d.shape)))
+      logging.debug(e)
+      logging.debug("These were the icon and image shapes: %s %s",
+                    str(icon_contour_3d.shape), str(image_contour_3d.shape))
 
   def _get_similar_contours(
       self, icon_contour_keypoints: np.ndarray,
@@ -137,7 +139,8 @@ def _get_similar_contours(
 
   def find_icons(
       self, image: np.ndarray, icon: np.ndarray
-  ) -> Tuple[List[BoundingBox], List[np.ndarray], List[np.ndarray]]:
+  ) -> Tuple[List[BoundingBox], Optional[List[np.ndarray]],
+             Optional[List[np.ndarray]]]:
     """Find instances of icon in a given image via shape context descriptor.
 
     Arguments:
@@ -147,8 +150,8 @@ def find_icons(
     Returns:
         Tuple(list of Bounding Box for each instance of icon in image,
         list of clusters of contours detected in the image to visually evaluate
-        how well contour clustering worked, list of booleans representing
-        whether each image had zero false positives and false negatives)
+        how well contour clustering worked, list of contours detected in the
+        icon, also for visualization purposes)
     """
     # get icon keypoints and nonkeypoints (using all points will hurt accuracy)
     icon_contour_keypoints = np.vstack(
@@ -199,7 +202,7 @@ def find_icons(
     sorted_indices = nearby_distances.argsort()
     sorted_contours = nearby_contours[sorted_indices]
     sorted_distances = nearby_distances[sorted_indices]
-    print("Minimum distance achieved: %f" % sorted_distances[0])
+    logging.debug("Minimum distance achieved: %f", sorted_distances[0])
     distance_threshold = algorithms.get_distance_threshold(
         sorted_distances, desired_confidence=self.desired_confidence)
     end_index = np.searchsorted(sorted_distances,

diff --git a/tests/integration_test.py b/tests/integration_test.py
@@ -1,3 +1,4 @@
+from modules import clustering_algorithms
 from modules import icon_finder_shape_context
 import modules.benchmark_pipeline
 
@@ -18,9 +19,10 @@ def test_benchmark():
 
 def test_single_instance_benchmark():
   find_icon_single_instance = modules.benchmark_pipeline.BenchmarkPipeline(
-      tfrecord_path="datasets/benchmark_single_instance.tfrecord")
+      tfrecord_path="datasets/small_single_instance_v2.tfrecord")
   correctness, avg_time_secs, avg_memory_mibs = find_icon_single_instance.evaluate(
-      icon_finder_object=icon_finder_shape_context.IconFinderShapeContext())
+      icon_finder_object=icon_finder_shape_context.IconFinderShapeContext(
+          clusterer=clustering_algorithms.DBSCANClusterer()))
   # current results to prevent any regressions due to algorithm changes
   assert avg_memory_mibs <= 1000
   assert avg_time_secs <= 5
@@ -31,18 +33,20 @@ def test_single_instance_benchmark():
 
 def test_multi_instance():
   find_icon_multi_instance = modules.benchmark_pipeline.BenchmarkPipeline(
-      tfrecord_path="datasets/benchmark_multi_instance.tfrecord")
+      tfrecord_path="datasets/small_multi_instance_v2.tfrecord")
   # test responsiveness to different desired levels of confidence (from 0 to 1)
   correctness, _, _ = find_icon_multi_instance.evaluate(
       icon_finder_object=icon_finder_shape_context.IconFinderShapeContext(
+          clusterer=clustering_algorithms.DBSCANClusterer(),
           desired_confidence=0.9),
       multi_instance_icon=True)
   assert correctness.precision >= 0.7
 
   find_icon_multi_instance = modules.benchmark_pipeline.BenchmarkPipeline(
-      tfrecord_path="datasets/benchmark_multi_instance.tfrecord")
+      tfrecord_path="datasets/small_multi_instance_v2.tfrecord")
   correctness, _, _ = find_icon_multi_instance.evaluate(
       icon_finder_object=icon_finder_shape_context.IconFinderShapeContext(
+          clusterer=clustering_algorithms.DBSCANClusterer(),
           desired_confidence=0.1),
       multi_instance_icon=True)
   assert correctness.recall >= 0.8
diff --git a/tests/unit_test.py b/tests/unit_test.py
@@ -207,7 +207,7 @@ def test_get_nms_bounding_boxes(bboxes, rects, confidences,
 
 @pytest.mark.parametrize(
     "keypoints,min_points,max_points,nonkeypoints,expected", pointset_tests)
-def test_create_pointset(keypoints, min_points, max_points, nonkeypoints,
+def test_resize_pointset(keypoints, min_points, max_points, nonkeypoints,
                          expected):
   assert len(
       algorithms.resize_pointset(keypoints,