Skip to content

Commit

Permalink
Merge pull request #37 from googleinterns/nice-to-have
Browse files Browse the repository at this point in the history
Final Code Documentation and Upgrades
  • Loading branch information
marilynzhang authored Sep 11, 2020
2 parents cd27710 + 05f73f7 commit ed0fd11
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 42 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Under ```datasets/```:
The end-to-end pipeline can be run from the command-line as such:
```python -m modules.benchmark_pipeline --tfrecord_path=datasets/small_single_instance_v2.tfrecord --output_path=small_single_instance.txt --multi_instance_icon=False --visualize=True --iou_threshold=0.6```.

The results (accuracy, precision, recall, latency average/median, memory average/median) will then be printed to the output txt file as well as to stdout like so:
The results (accuracy, precision, recall, latency average/median, memory average/median) will then be printed to the output txt file as well as to logging.info like so:
```
Average seconds per image: 1.439400
Median seconds of images: 1.544500
Expand Down
Binary file added datasets/small_multi_instance_v2.tfrecord
Binary file not shown.
5 changes: 3 additions & 2 deletions modules/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Distance thresholding
- Suppress overlapping bounding boxes
"""
import logging
from typing import List, Tuple

import cv2
Expand Down Expand Up @@ -87,8 +88,8 @@ def cluster_contours(clusterer: sklearn.base.ClusterMixin,
# a label of -1 means the point was not clustered - a "noise" point
n_clusters = len([label for label in set(clusters.labels_) if label != -1])
n_noise = list(clusters.labels_).count(-1)
print("Estimated number of clusters: %d" % n_clusters)
print("Estimated number of noise points: %d" % n_noise)
logging.debug("Estimated number of clusters: %d", n_clusters)
logging.debug("Estimated number of noise points: %d", n_noise)
contour_groups = []
for i in range(0, n_clusters):
contour_group = image_contours[np.argwhere(clusters.labels_ == i)]
Expand Down
3 changes: 3 additions & 0 deletions modules/analysis_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
This contains:
- labeling the number of points in a cluster on the image
- plotting the number of points as a histogram
- saving an icon and image within the same image
- generating a custom scatterplot
- scaling images and icons in a dataset
"""
from typing import List, Tuple

Expand Down
29 changes: 16 additions & 13 deletions modules/benchmark_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""BenchmarkPipeline class and tfRecord utility functions."""

import argparse
import logging
from typing import Optional, Tuple

import cv2
Expand All @@ -16,8 +17,10 @@ class BenchmarkPipeline:
"""Represents a pipeline to test generated Bounding Boxes.
Usage example:
benchmark = BenchmarkPipeline("benchmark.tfrecord")
benchmark.evaluate()
benchmark = BenchmarkPipeline(tfrecord_path="datasets/
small_multi_instance_v2.tfrecord")
benchmark.evaluate(icon_finder_object=icon_finder_shape_context.
IconFinderShapeContext(clusterer=clustering_algorithms.DBSCANClusterer()))
"""

def __init__(self, tfrecord_path: str = defaults.TFRECORD_PATH):
Expand Down Expand Up @@ -83,7 +86,7 @@ def visualize_bounding_boxes(self,
cv2.rectangle(image_bgr_copy, (box.min_x, box.min_y),
(box.max_x, box.max_y), (0, 0, 255), 2)

if draw_contours:
if draw_contours and self.image_clusters[i] and self.icon_contours[i]:
# draw each contour cluster in the image with a distinct color
# each contour cluster will alternate between these colors
colors = [(128, 0, 128), (255, 192, 203), (255, 0, 255)]
Expand All @@ -95,7 +98,7 @@ def visualize_bounding_boxes(self,
image_rgb = cv2.cvtColor(image_bgr_copy, cv2.COLOR_BGR2RGB)
icon_rgb = cv2.cvtColor(icon_bgr_copy, cv2.COLOR_BGR2RGB)
if image_rgb is None:
print("Could not read the image.")
logging.error("Could not read the image.")

analysis_util.save_icon_with_image(icon_rgb, image_rgb,
output_name + str(i) + ".png")
Expand Down Expand Up @@ -133,12 +136,12 @@ def calculate_latency(self, icon_finder_object, output_path: str) -> float:
self.image_clusters.append(image_contour_clusters)
self.icon_contours.append(icon_contour)
times.append(timer.calculate_latency_info(output_path))
time_info = "Average time per image: %f\n" % np.mean(times)
time_info += "Median time of images: %f" % np.median(times)
time_info = "Average seconds per image: %f\n" % np.mean(times)
time_info += "Median seconds of images: %f\n" % np.median(times)
if output_path:
with open(output_path, "a") as output_file:
output_file.write(time_info)
print(time_info)
logging.info(time_info)
return np.mean(times)

def calculate_memory(self, icon_finder_object, output_path: str) -> float:
Expand Down Expand Up @@ -172,12 +175,12 @@ def calculate_memory(self, icon_finder_object, output_path: str) -> float:
self.proposed_boxes.append(bboxes)
self.image_clusters.append(image_contour_clusters)
self.icon_contours.append(icon_contour)
memory_info = "Average MiBs per image: %f" % np.mean(mems)
memory_info += "Median MiBs per image: %f" % np.median(mems)
memory_info = "Average MiBs per image: %f\n" % np.mean(mems)
memory_info += "Median MiBs per image: %f\n" % np.median(mems)
if output_path:
with open(output_path, "a") as output_file:
output_file.write(memory_info)
print(memory_info)
logging.info(memory_info)
return np.mean(mems)

def find_icons(
Expand Down Expand Up @@ -351,6 +354,6 @@ def evaluate(

benchmark = BenchmarkPipeline(tfrecord_path=args.tfrecord_path)
benchmark.evaluate(visualize=args.visualize,
iou_threshold=args.threshold,
output_path=args.output_path,
multi_instance_icon=args.multi_instance_icon)
iou_threshold=args.threshold,
output_path=args.output_path,
multi_instance_icon=args.multi_instance_icon)
8 changes: 5 additions & 3 deletions modules/confusion_matrix.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""This module contains a ConfusionMatrix class."""
import logging

import dataclasses
from modules import defaults
from modules.correctness_metrics import CorrectnessMetrics
Expand Down Expand Up @@ -57,7 +59,7 @@ def calculate_correctness_metrics(self,
output_file.write("Precision: %f\n" % precision)
output_file.write("Recall: %f\n" % recall)

print("Accuracy: %f\n" % accuracy)
print("Precision: %f\n" % precision)
print("Recall: %f\n" % recall)
logging.info("Accuracy: %f\n", accuracy)
logging.info("Precision: %f\n", precision)
logging.info("Recall: %f\n\n", recall)
return CorrectnessMetrics(accuracy, precision, recall)
12 changes: 8 additions & 4 deletions modules/icon_finder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This module contains the IconFinder base class.
"""
import abc
from typing import List
from typing import List, Optional, Tuple

from modules.bounding_box import BoundingBox
import numpy as np
Expand All @@ -12,15 +12,19 @@ class IconFinder(abc.ABC):
"""

@abc.abstractmethod
def find_icons(self, image: np.ndarray,
icon: np.ndarray) -> List[BoundingBox]:
def find_icons(
self, image: np.ndarray, icon: np.ndarray
) -> Tuple[List[BoundingBox], Optional[List[np.ndarray]],
Optional[List[np.ndarray]]]:
"""Find instances of icon in image.
Arguments:
image: Numpy array representing image
icon: Numpy array representing icon
Returns:
List[BoundingBox] -- Bounding Box for each instance of icon in image.
(List[BoundingBox] -- Bounding Box for each instance of icon in image,
optionally the contours found in the image, and optionally the contours
found in the icon)
"""
pass
18 changes: 13 additions & 5 deletions modules/icon_finder_random.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""This module has an IconFinderRandom class for randomly finding bounding boxes.
"""
import random
from typing import List
from typing import List, Optional, Tuple

import memory_profiler

from modules.bounding_box import BoundingBox
import modules.icon_finder
Expand All @@ -10,17 +12,23 @@

class IconFinderRandom(modules.icon_finder.IconFinder): # pytype: disable=module-attr
"""This class generates bounding boxes randomly."""
@memory_profiler.profile

def find_icons(self, image: np.ndarray,
icon: np.ndarray) -> List[BoundingBox]:
def find_icons(
self, image: np.ndarray, icon: np.ndarray
) -> Tuple[List[BoundingBox], Optional[List[np.ndarray]],
Optional[List[np.ndarray]]]:
"""Find instances of icon in a given image randomly.
Arguments:
image: Numpy array representing image
icon: Numpy array representing icon
Returns:
List[BoundingBox] -- Bounding Box for each instance of icon in image.
Tuple[Bounding Box for each instance of icon in image,
None, None]. The Nones are just to satisfy the overall
IconFinder API but would normally contain icon and image
cluster contours.
"""
height = image.shape[0]
width = image.shape[1]
Expand All @@ -29,4 +37,4 @@ def find_icons(self, image: np.ndarray,
min_x = random.randint(0, width - 1)
max_x = random.randint(min_x, width - 1)
max_y = random.randint(min_y, height - 1)
return [BoundingBox(min_x, min_y, max_x, max_y)]
return [BoundingBox(min_x, min_y, max_x, max_y)], None, None
21 changes: 12 additions & 9 deletions modules/icon_finder_shape_context.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This module has an IconFinderShapeContext class for finding bounding boxes.
"""
import logging
import multiprocessing # pytype: disable=pyi-error
from typing import List, Optional, Tuple

Expand Down Expand Up @@ -53,8 +54,9 @@ def __init__(self,
self.sc_distance_threshold = sc_distance_threshold
self.nms_iou_threshold = nms_iou_threshold

def _get_distance(self, icon_contour_3d: np.ndarray,
image_contour_3d: np.ndarray) -> Optional[Tuple]:
def _get_distance(
self, icon_contour_3d: np.ndarray,
image_contour_3d: np.ndarray) -> Optional[Tuple[np.ndarray, float]]:
"""Calculate distance between icon and image contour.
Arguments:
Expand All @@ -71,9 +73,9 @@ def _get_distance(self, icon_contour_3d: np.ndarray,
if distance < self.sc_distance_threshold:
return (image_contour_3d, distance)
except cv2.error as e:
print(e)
print("These were the icon and image shapes: %s %s" %
(str(icon_contour_3d.shape), str(image_contour_3d.shape)))
logging.debug(e)
logging.debug("These were the icon and image shapes: %s %s",
str(icon_contour_3d.shape), str(image_contour_3d.shape))

def _get_similar_contours(
self, icon_contour_keypoints: np.ndarray,
Expand Down Expand Up @@ -137,7 +139,8 @@ def _get_similar_contours(

def find_icons(
self, image: np.ndarray, icon: np.ndarray
) -> Tuple[List[BoundingBox], List[np.ndarray], List[np.ndarray]]:
) -> Tuple[List[BoundingBox], Optional[List[np.ndarray]],
Optional[List[np.ndarray]]]:
"""Find instances of icon in a given image via shape context descriptor.
Arguments:
Expand All @@ -147,8 +150,8 @@ def find_icons(
Returns:
Tuple(list of Bounding Box for each instance of icon in image,
list of clusters of contours detected in the image to visually evaluate
how well contour clustering worked, list of booleans representing
whether each image had zero false positives and false negatives)
how well contour clustering worked, list of contours detected in the
icon, also for visualization purposes)
"""
# get icon keypoints and nonkeypoints (using all points will hurt accuracy)
icon_contour_keypoints = np.vstack(
Expand Down Expand Up @@ -199,7 +202,7 @@ def find_icons(
sorted_indices = nearby_distances.argsort()
sorted_contours = nearby_contours[sorted_indices]
sorted_distances = nearby_distances[sorted_indices]
print("Minimum distance achieved: %f" % sorted_distances[0])
logging.debug("Minimum distance achieved: %f", sorted_distances[0])
distance_threshold = algorithms.get_distance_threshold(
sorted_distances, desired_confidence=self.desired_confidence)
end_index = np.searchsorted(sorted_distances,
Expand Down
12 changes: 8 additions & 4 deletions tests/integration_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from modules import clustering_algorithms
from modules import icon_finder_shape_context
import modules.benchmark_pipeline

Expand All @@ -18,9 +19,10 @@ def test_benchmark():

def test_single_instance_benchmark():
find_icon_single_instance = modules.benchmark_pipeline.BenchmarkPipeline(
tfrecord_path="datasets/benchmark_single_instance.tfrecord")
tfrecord_path="datasets/small_single_instance_v2.tfrecord")
correctness, avg_time_secs, avg_memory_mibs = find_icon_single_instance.evaluate(
icon_finder_object=icon_finder_shape_context.IconFinderShapeContext())
icon_finder_object=icon_finder_shape_context.IconFinderShapeContext(
clusterer=clustering_algorithms.DBSCANClusterer()))
# current results to prevent any regressions due to algorithm changes
assert avg_memory_mibs <= 1000
assert avg_time_secs <= 5
Expand All @@ -31,18 +33,20 @@ def test_single_instance_benchmark():

def test_multi_instance():
find_icon_multi_instance = modules.benchmark_pipeline.BenchmarkPipeline(
tfrecord_path="datasets/benchmark_multi_instance.tfrecord")
tfrecord_path="datasets/small_multi_instance_v2.tfrecord")
# test responsiveness to different desired levels of confidence (from 0 to 1)
correctness, _, _ = find_icon_multi_instance.evaluate(
icon_finder_object=icon_finder_shape_context.IconFinderShapeContext(
clusterer=clustering_algorithms.DBSCANClusterer(),
desired_confidence=0.9),
multi_instance_icon=True)
assert correctness.precision >= 0.7

find_icon_multi_instance = modules.benchmark_pipeline.BenchmarkPipeline(
tfrecord_path="datasets/benchmark_multi_instance.tfrecord")
tfrecord_path="datasets/small_multi_instance_v2.tfrecord")
correctness, _, _ = find_icon_multi_instance.evaluate(
icon_finder_object=icon_finder_shape_context.IconFinderShapeContext(
clusterer=clustering_algorithms.DBSCANClusterer(),
desired_confidence=0.1),
multi_instance_icon=True)
assert correctness.recall >= 0.8
2 changes: 1 addition & 1 deletion tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_get_nms_bounding_boxes(bboxes, rects, confidences,

@pytest.mark.parametrize(
"keypoints,min_points,max_points,nonkeypoints,expected", pointset_tests)
def test_create_pointset(keypoints, min_points, max_points, nonkeypoints,
def test_resize_pointset(keypoints, min_points, max_points, nonkeypoints,
expected):
assert len(
algorithms.resize_pointset(keypoints,
Expand Down

0 comments on commit ed0fd11

Please sign in to comment.