diff --git a/lib/fast_rcnn/config.py b/lib/fast_rcnn/config.py index 12210d1e..37584fcd 100644 --- a/lib/fast_rcnn/config.py +++ b/lib/fast_rcnn/config.py @@ -262,9 +262,9 @@ def _merge_a_into_b(a, b): if type(a) is not edict: return - for k, v in a.iteritems(): + for k, v in a.items(): # a must specify keys that are in b - if not b.has_key(k): + if k not in b: raise KeyError('{} is not a valid config key'.format(k)) # the types must match, too @@ -282,7 +282,7 @@ def _merge_a_into_b(a, b): try: _merge_a_into_b(a[k], b[k]) except: - print('Error under config key: {}'.format(k)) + print(('Error under config key: {}'.format(k))) raise else: b[k] = v @@ -303,10 +303,10 @@ def cfg_from_list(cfg_list): key_list = k.split('.') d = __C for subkey in key_list[:-1]: - assert d.has_key(subkey) + assert subkey in d d = d[subkey] subkey = key_list[-1] - assert d.has_key(subkey) + assert subkey in d try: value = literal_eval(v) except: diff --git a/lib/fast_rcnn/config.py.bak b/lib/fast_rcnn/config.py.bak new file mode 100644 index 00000000..12210d1e --- /dev/null +++ b/lib/fast_rcnn/config.py.bak @@ -0,0 +1,318 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Fast R-CNN config system. + +This file specifies default config options for Fast R-CNN. You should not +change values in this file. Instead, you should write a config file (in yaml) +and use cfg_from_file(yaml_file) to load it and override the default options. + +Most tools in $ROOT/tools take a --cfg option to specify an override file. + - See tools/{train,test}_net.py for example code that uses cfg_from_file() + - See experiments/cfgs/*.yml for example YAML config override files +""" + +import os +import os.path as osp +import numpy as np +from distutils import spawn +# `pip install easydict` if you don't have it +from easydict import EasyDict as edict + +__C = edict() +# Consumers can get config by: +# from fast_rcnn_config import cfg +cfg = __C + +# +# Training options +# + +__C.TRAIN = edict() +#__C.NET_NAME = 'VGGnet' +# learning rate +__C.TRAIN.LEARNING_RATE = 0.001 +__C.TRAIN.MOMENTUM = 0.9 +__C.TRAIN.GAMMA = 0.1 +__C.TRAIN.STEPSIZE = 50000 +__C.TRAIN.DISPLAY = 10 +__C.IS_MULTISCALE = False + +# Scales to compute real features +#__C.TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0) +#__C.TRAIN.SCALES_BASE = (1.0,) + +# parameters for ROI generating +#__C.TRAIN.SPATIAL_SCALE = 0.0625 +#__C.TRAIN.KERNEL_SIZE = 5 + +# Aspect ratio to use during training +#__C.TRAIN.ASPECTS = (1, 0.75, 0.5, 0.25) +#__C.TRAIN.ASPECTS= (1,) + + +# Scales to use during training (can list multiple scales) +# Each scale is the pixel size of an image's shortest side +__C.TRAIN.SCALES = (600,) + +# Max pixel size of the longest side of a scaled input image +__C.TRAIN.MAX_SIZE = 1000 + +# Images to use per minibatch +__C.TRAIN.IMS_PER_BATCH = 2 + +# Minibatch size (number of regions of interest [ROIs]) +__C.TRAIN.BATCH_SIZE = 128 + +# Fraction of minibatch that is labeled foreground (i.e. class > 0) +__C.TRAIN.FG_FRACTION = 0.25 + +# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) +__C.TRAIN.FG_THRESH = 0.5 + +# Overlap threshold for a ROI to be considered background (class = 0 if +# overlap in [LO, HI)) +__C.TRAIN.BG_THRESH_HI = 0.5 +__C.TRAIN.BG_THRESH_LO = 0.1 + +# Use horizontally-flipped images during training? +__C.TRAIN.USE_FLIPPED = True + +# Train bounding-box regressors +__C.TRAIN.BBOX_REG = True + +# Overlap required between a ROI and ground-truth box in order for that ROI to +# be used as a bounding-box regression training example +__C.TRAIN.BBOX_THRESH = 0.5 + +# Iterations between snapshots +__C.TRAIN.SNAPSHOT_ITERS = 5000 + +# solver.prototxt specifies the snapshot path prefix, this adds an optional +# infix to yield the path: [_]_iters_XYZ.caffemodel +__C.TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn' +__C.TRAIN.SNAPSHOT_INFIX = '' + +# Use a prefetch thread in roi_data_layer.layer +# So far I haven't found this useful; likely more engineering work is required +__C.TRAIN.USE_PREFETCH = False + +# Normalize the targets (subtract empirical mean, divide by empirical stddev) +__C.TRAIN.BBOX_NORMALIZE_TARGETS = True +# Deprecated (inside weights) +__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) +# Normalize the targets using "precomputed" (or made up) means and stdevs +# (BBOX_NORMALIZE_TARGETS must also be True) +__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False +__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) +__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) + +# Train using these proposals +__C.TRAIN.PROPOSAL_METHOD = 'selective_search' + +# Make minibatches from images that have similar aspect ratios (i.e. both +# tall and thin or both short and wide) in order to avoid wasting computation +# on zero-padding. +__C.TRAIN.ASPECT_GROUPING = True + +# Use RPN to detect objects +__C.TRAIN.HAS_RPN = False +# IOU >= thresh: positive example +__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 +# IOU < thresh: negative example +__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 +# If an anchor statisfied by positive and negative conditions set to negative +__C.TRAIN.RPN_CLOBBER_POSITIVES = False +# Max number of foreground examples +__C.TRAIN.RPN_FG_FRACTION = 0.5 +# Total number of examples +__C.TRAIN.RPN_BATCHSIZE = 256 +# NMS threshold used on RPN proposals +__C.TRAIN.RPN_NMS_THRESH = 0.7 +# Number of top scoring boxes to keep before apply NMS to RPN proposals +__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 +# Number of top scoring boxes to keep after applying NMS to RPN proposals +__C.TRAIN.RPN_POST_NMS_TOP_N = 2000 +# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) +__C.TRAIN.RPN_MIN_SIZE = 16 +# Deprecated (outside weights) +__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) +# Give the positive RPN examples weight of p * 1 / {num positives} +# and give negatives a weight of (1 - p) +# Set to -1.0 to use uniform example weighting +__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 + +# Enable timeline generation +__C.TRAIN.DEBUG_TIMELINE = False + +# +# Testing options +# + +__C.TEST = edict() + +# Scales to use during testing (can list multiple scales) +# Each scale is the pixel size of an image's shortest side +__C.TEST.SCALES = (600,) + +# Max pixel size of the longest side of a scaled input image +__C.TEST.MAX_SIZE = 1000 + +# Overlap threshold used for non-maximum suppression (suppress boxes with +# IoU >= this threshold) +__C.TEST.NMS = 0.3 + +# Experimental: treat the (K+1) units in the cls_score layer as linear +# predictors (trained, eg, with one-vs-rest SVMs). +__C.TEST.SVM = False + +# Test using bounding-box regressors +__C.TEST.BBOX_REG = True + +# Propose boxes +__C.TEST.HAS_RPN = True + +# Test using these proposals +__C.TEST.PROPOSAL_METHOD = 'selective_search' + +## NMS threshold used on RPN proposals +__C.TEST.RPN_NMS_THRESH = 0.7 +## Number of top scoring boxes to keep before apply NMS to RPN proposals +__C.TEST.RPN_PRE_NMS_TOP_N = 6000 +#__C.TEST.RPN_PRE_NMS_TOP_N = 12000 +## Number of top scoring boxes to keep after applying NMS to RPN proposals +__C.TEST.RPN_POST_NMS_TOP_N = 300 +#__C.TEST.RPN_POST_NMS_TOP_N = 2000 +# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) +__C.TEST.RPN_MIN_SIZE = 16 + +# Enable timeline generation +__C.TEST.DEBUG_TIMELINE = False + +# +# MISC +# + +# The mapping from image coordinates to feature map coordinates might cause +# some boxes that are distinct in image space to become identical in feature +# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor +# for identifying duplicate boxes. +# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 +__C.DEDUP_BOXES = 1./16. + +# Pixel mean values (BGR order) as a (1, 1, 3) array +# We use the same pixel mean for all networks even though it's not exactly what +# they were trained with +__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) + +# For reproducibility +__C.RNG_SEED = 3 + +# A small number that's used many times +__C.EPS = 1e-14 + +# Root directory of project +__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) + +# Data directory +__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) + +# Model directory +__C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc')) + +# Name (or path to) the matlab executable +__C.MATLAB = 'matlab' + +# Place outputs under an experiments directory +__C.EXP_DIR = 'default' + + +if spawn.find_executable("nvcc"): + # Use GPU implementation of non-maximum suppression + __C.USE_GPU_NMS = True + + # Default GPU device id + __C.GPU_ID = 0 +else: + __C.USE_GPU_NMS = False + + +def get_output_dir(imdb, weights_filename): + """Return the directory where experimental artifacts are placed. + If the directory does not exist, it is created. + + A canonical path is built using the name from an imdb and a network + (if not None). + """ + outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) + if weights_filename is not None: + outdir = osp.join(outdir, weights_filename) + if not os.path.exists(outdir): + os.makedirs(outdir) + return outdir + +def _merge_a_into_b(a, b): + """Merge config dictionary a into config dictionary b, clobbering the + options in b whenever they are also specified in a. + """ + if type(a) is not edict: + return + + for k, v in a.iteritems(): + # a must specify keys that are in b + if not b.has_key(k): + raise KeyError('{} is not a valid config key'.format(k)) + + # the types must match, too + old_type = type(b[k]) + if old_type is not type(v): + if isinstance(b[k], np.ndarray): + v = np.array(v, dtype=b[k].dtype) + else: + raise ValueError(('Type mismatch ({} vs. {}) ' + 'for config key: {}').format(type(b[k]), + type(v), k)) + + # recursively merge dicts + if type(v) is edict: + try: + _merge_a_into_b(a[k], b[k]) + except: + print('Error under config key: {}'.format(k)) + raise + else: + b[k] = v + +def cfg_from_file(filename): + """Load a config file and merge it into the default options.""" + import yaml + with open(filename, 'r') as f: + yaml_cfg = edict(yaml.load(f)) + + _merge_a_into_b(yaml_cfg, __C) + +def cfg_from_list(cfg_list): + """Set config keys via list (e.g., from command line).""" + from ast import literal_eval + assert len(cfg_list) % 2 == 0 + for k, v in zip(cfg_list[0::2], cfg_list[1::2]): + key_list = k.split('.') + d = __C + for subkey in key_list[:-1]: + assert d.has_key(subkey) + d = d[subkey] + subkey = key_list[-1] + assert d.has_key(subkey) + try: + value = literal_eval(v) + except: + # handle the case when v is a string literal + value = v + assert type(value) == type(d[subkey]), \ + 'type {} does not match original type {}'.format( + type(value), type(d[subkey])) + d[subkey] = value diff --git a/lib/fast_rcnn/test.py b/lib/fast_rcnn/test.py index 41cd12c5..55ec35db 100644 --- a/lib/fast_rcnn/test.py +++ b/lib/fast_rcnn/test.py @@ -5,7 +5,7 @@ import cv2 from utils.cython_nms import nms, nms_new from utils.boxes_grid import get_boxes_grid -import cPickle +import pickle import heapq from utils.blob import im_list_to_blob import os @@ -124,7 +124,7 @@ def _clip_boxes(boxes, im_shape): def _rescale_boxes(boxes, inds, scales): """Rescale boxes according to image rescaling.""" - for i in xrange(boxes.shape[0]): + for i in range(boxes.shape[0]): boxes[i,:] = boxes[i,:] / scales[int(inds[i])] return boxes @@ -207,7 +207,7 @@ def im_detect(sess, net, im, boxes=None): if cfg.TEST.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) - trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') + trace_file = open(str(int(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() @@ -218,7 +218,7 @@ def vis_detections(im, class_name, dets, thresh=0.8): """Visual debugging of detections.""" import matplotlib.pyplot as plt #im = im[:, :, (2, 1, 0)] - for i in xrange(np.minimum(10, dets.shape[0])): + for i in range(np.minimum(10, dets.shape[0])): bbox = dets[i, :4] score = dets[i, -1] if score > thresh: @@ -244,10 +244,10 @@ def apply_nms(all_boxes, thresh): """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) - nms_boxes = [[[] for _ in xrange(num_images)] - for _ in xrange(num_classes)] - for cls_ind in xrange(num_classes): - for im_ind in xrange(num_images): + nms_boxes = [[[] for _ in range(num_images)] + for _ in range(num_classes)] + for cls_ind in range(num_classes): + for im_ind in range(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue @@ -275,8 +275,8 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) - all_boxes = [[[] for _ in xrange(num_images)] - for _ in xrange(imdb.num_classes)] + all_boxes = [[[] for _ in range(num_images)] + for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers @@ -285,7 +285,7 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, if not cfg.TEST.HAS_RPN: roidb = imdb.roidb - for i in xrange(num_images): + for i in range(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None @@ -309,7 +309,7 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, plt.imshow(image) # skip j = 0, because it's the background class - for j in xrange(1, imdb.num_classes): + for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] @@ -325,22 +325,22 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] - for j in xrange(1, imdb.num_classes)]) + for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] - for j in xrange(1, imdb.num_classes): + for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() - print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ + print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, - _t['misc'].average_time) + _t['misc'].average_time)) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: - cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) + pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) - print 'Evaluating detections' + print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) diff --git a/lib/fast_rcnn/test.py.bak b/lib/fast_rcnn/test.py.bak new file mode 100644 index 00000000..41cd12c5 --- /dev/null +++ b/lib/fast_rcnn/test.py.bak @@ -0,0 +1,346 @@ +from fast_rcnn.config import cfg, get_output_dir +import argparse +from utils.timer import Timer +import numpy as np +import cv2 +from utils.cython_nms import nms, nms_new +from utils.boxes_grid import get_boxes_grid +import cPickle +import heapq +from utils.blob import im_list_to_blob +import os +import math +from rpn_msr.generate import imdb_proposals_det +import tensorflow as tf +from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv +import matplotlib.pyplot as plt +from tensorflow.python.client import timeline +import time + +def _get_image_blob(im): + """Converts an image into a network input. + Arguments: + im (ndarray): a color image in BGR order + Returns: + blob (ndarray): a data blob holding an image pyramid + im_scale_factors (list): list of image scales (relative to im) used + in the image pyramid + """ + im_orig = im.astype(np.float32, copy=True) + im_orig -= cfg.PIXEL_MEANS + + im_shape = im_orig.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + + processed_ims = [] + im_scale_factors = [] + + for target_size in cfg.TEST.SCALES: + im_scale = float(target_size) / float(im_size_min) + # Prevent the biggest axis from being more than MAX_SIZE + if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: + im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) + im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, + interpolation=cv2.INTER_LINEAR) + im_scale_factors.append(im_scale) + processed_ims.append(im) + + # Create a blob to hold the input images + blob = im_list_to_blob(processed_ims) + + return blob, np.array(im_scale_factors) + +def _get_rois_blob(im_rois, im_scale_factors): + """Converts RoIs into network inputs. + Arguments: + im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates + im_scale_factors (list): scale factors as returned by _get_image_blob + Returns: + blob (ndarray): R x 5 matrix of RoIs in the image pyramid + """ + rois, levels = _project_im_rois(im_rois, im_scale_factors) + rois_blob = np.hstack((levels, rois)) + return rois_blob.astype(np.float32, copy=False) + +def _project_im_rois(im_rois, scales): + """Project image RoIs into the image pyramid built by _get_image_blob. + Arguments: + im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates + scales (list): scale factors as returned by _get_image_blob + Returns: + rois (ndarray): R x 4 matrix of projected RoI coordinates + levels (list): image pyramid levels used by each projected RoI + """ + im_rois = im_rois.astype(np.float, copy=False) + scales = np.array(scales) + + if len(scales) > 1: + widths = im_rois[:, 2] - im_rois[:, 0] + 1 + heights = im_rois[:, 3] - im_rois[:, 1] + 1 + + areas = widths * heights + scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2) + diff_areas = np.abs(scaled_areas - 224 * 224) + levels = diff_areas.argmin(axis=1)[:, np.newaxis] + else: + levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) + + rois = im_rois * scales[levels] + + return rois, levels + +def _get_blobs(im, rois): + """Convert an image and RoIs within that image into network inputs.""" + if cfg.TEST.HAS_RPN: + blobs = {'data' : None, 'rois' : None} + blobs['data'], im_scale_factors = _get_image_blob(im) + else: + blobs = {'data' : None, 'rois' : None} + blobs['data'], im_scale_factors = _get_image_blob(im) + if cfg.IS_MULTISCALE: + if cfg.IS_EXTRAPOLATING: + blobs['rois'] = _get_rois_blob(rois, cfg.TEST.SCALES) + else: + blobs['rois'] = _get_rois_blob(rois, cfg.TEST.SCALES_BASE) + else: + blobs['rois'] = _get_rois_blob(rois, cfg.TEST.SCALES_BASE) + + return blobs, im_scale_factors + +def _clip_boxes(boxes, im_shape): + """Clip boxes to image boundaries.""" + # x1 >= 0 + boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0) + # y1 >= 0 + boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0) + # x2 < im_shape[1] + boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1) + # y2 < im_shape[0] + boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1) + return boxes + + +def _rescale_boxes(boxes, inds, scales): + """Rescale boxes according to image rescaling.""" + + for i in xrange(boxes.shape[0]): + boxes[i,:] = boxes[i,:] / scales[int(inds[i])] + + return boxes + + +def im_detect(sess, net, im, boxes=None): + """Detect object classes in an image given object proposals. + Arguments: + net (caffe.Net): Fast R-CNN network to use + im (ndarray): color image to test (in BGR order) + boxes (ndarray): R x 4 array of object proposals + Returns: + scores (ndarray): R x K array of object class scores (K includes + background as object category 0) + boxes (ndarray): R x (4*K) array of predicted bounding boxes + """ + + blobs, im_scales = _get_blobs(im, boxes) + + # When mapping from image ROIs to feature map ROIs, there's some aliasing + # (some distinct image ROIs get mapped to the same feature ROI). + # Here, we identify duplicate feature ROIs, so we only compute features + # on the unique subset. + if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: + v = np.array([1, 1e3, 1e6, 1e9, 1e12]) + hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) + _, index, inv_index = np.unique(hashes, return_index=True, + return_inverse=True) + blobs['rois'] = blobs['rois'][index, :] + boxes = boxes[index, :] + + if cfg.TEST.HAS_RPN: + im_blob = blobs['data'] + blobs['im_info'] = np.array( + [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], + dtype=np.float32) + # forward pass + if cfg.TEST.HAS_RPN: + feed_dict={net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0} + else: + feed_dict={net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0} + + run_options = None + run_metadata = None + if cfg.TEST.DEBUG_TIMELINE: + run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) + run_metadata = tf.RunMetadata() + + cls_score, cls_prob, bbox_pred, rois = sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois')], + feed_dict=feed_dict, + options=run_options, + run_metadata=run_metadata) + + if cfg.TEST.HAS_RPN: + assert len(im_scales) == 1, "Only single-image batch implemented" + boxes = rois[:, 1:5] / im_scales[0] + + + if cfg.TEST.SVM: + # use the raw scores before softmax under the assumption they + # were trained as linear SVMs + scores = cls_score + else: + # use softmax estimated probabilities + scores = cls_prob + + if cfg.TEST.BBOX_REG: + # Apply bounding-box regression deltas + box_deltas = bbox_pred + pred_boxes = bbox_transform_inv(boxes, box_deltas) + pred_boxes = _clip_boxes(pred_boxes, im.shape) + else: + # Simply repeat the boxes, once for each class + pred_boxes = np.tile(boxes, (1, scores.shape[1])) + + if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: + # Map scores and predictions back to the original set of boxes + scores = scores[inv_index, :] + pred_boxes = pred_boxes[inv_index, :] + + if cfg.TEST.DEBUG_TIMELINE: + trace = timeline.Timeline(step_stats=run_metadata.step_stats) + trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') + trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) + trace_file.close() + + return scores, pred_boxes + + +def vis_detections(im, class_name, dets, thresh=0.8): + """Visual debugging of detections.""" + import matplotlib.pyplot as plt + #im = im[:, :, (2, 1, 0)] + for i in xrange(np.minimum(10, dets.shape[0])): + bbox = dets[i, :4] + score = dets[i, -1] + if score > thresh: + #plt.cla() + #plt.imshow(im) + plt.gca().add_patch( + plt.Rectangle((bbox[0], bbox[1]), + bbox[2] - bbox[0], + bbox[3] - bbox[1], fill=False, + edgecolor='g', linewidth=3) + ) + plt.gca().text(bbox[0], bbox[1] - 2, + '{:s} {:.3f}'.format(class_name, score), + bbox=dict(facecolor='blue', alpha=0.5), + fontsize=14, color='white') + + plt.title('{} {:.3f}'.format(class_name, score)) + #plt.show() + +def apply_nms(all_boxes, thresh): + """Apply non-maximum suppression to all predicted boxes output by the + test_net method. + """ + num_classes = len(all_boxes) + num_images = len(all_boxes[0]) + nms_boxes = [[[] for _ in xrange(num_images)] + for _ in xrange(num_classes)] + for cls_ind in xrange(num_classes): + for im_ind in xrange(num_images): + dets = all_boxes[cls_ind][im_ind] + if dets == []: + continue + + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + inds = np.where((x2 > x1) & (y2 > y1) & (scores > cfg.TEST.DET_THRESHOLD))[0] + dets = dets[inds,:] + if dets == []: + continue + + keep = nms(dets, thresh) + if len(keep) == 0: + continue + nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() + return nms_boxes + + +def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, vis=False): + """Test a Fast R-CNN network on an image database.""" + num_images = len(imdb.image_index) + # all detections are collected into: + # all_boxes[cls][image] = N x 5 array of detections in + # (x1, y1, x2, y2, score) + all_boxes = [[[] for _ in xrange(num_images)] + for _ in xrange(imdb.num_classes)] + + output_dir = get_output_dir(imdb, weights_filename) + # timers + _t = {'im_detect' : Timer(), 'misc' : Timer()} + + if not cfg.TEST.HAS_RPN: + roidb = imdb.roidb + + for i in xrange(num_images): + # filter out any ground truth boxes + if cfg.TEST.HAS_RPN: + box_proposals = None + else: + # The roidb may contain ground-truth rois (for example, if the roidb + # comes from the training or val split). We only want to evaluate + # detection on the *non*-ground-truth rois. We select those the rois + # that have the gt_classes field set to 0, which means there's no + # ground truth. + box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] + + im = cv2.imread(imdb.image_path_at(i)) + _t['im_detect'].tic() + scores, boxes = im_detect(sess, net, im, box_proposals) + _t['im_detect'].toc() + + _t['misc'].tic() + if vis: + image = im[:, :, (2, 1, 0)] + plt.cla() + plt.imshow(image) + + # skip j = 0, because it's the background class + for j in xrange(1, imdb.num_classes): + inds = np.where(scores[:, j] > thresh)[0] + cls_scores = scores[inds, j] + cls_boxes = boxes[inds, j*4:(j+1)*4] + cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ + .astype(np.float32, copy=False) + keep = nms(cls_dets, cfg.TEST.NMS) + cls_dets = cls_dets[keep, :] + if vis: + vis_detections(image, imdb.classes[j], cls_dets) + all_boxes[j][i] = cls_dets + if vis: + plt.show() + # Limit to max_per_image detections *over all classes* + if max_per_image > 0: + image_scores = np.hstack([all_boxes[j][i][:, -1] + for j in xrange(1, imdb.num_classes)]) + if len(image_scores) > max_per_image: + image_thresh = np.sort(image_scores)[-max_per_image] + for j in xrange(1, imdb.num_classes): + keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] + all_boxes[j][i] = all_boxes[j][i][keep, :] + _t['misc'].toc() + + print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ + .format(i + 1, num_images, _t['im_detect'].average_time, + _t['misc'].average_time) + + det_file = os.path.join(output_dir, 'detections.pkl') + with open(det_file, 'wb') as f: + cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) + + print 'Evaluating detections' + imdb.evaluate_detections(all_boxes, output_dir) + diff --git a/lib/fast_rcnn/train.py b/lib/fast_rcnn/train.py index d7633ee5..680e43bf 100644 --- a/lib/fast_rcnn/train.py +++ b/lib/fast_rcnn/train.py @@ -33,10 +33,10 @@ def __init__(self, sess, saver, network, imdb, roidb, output_dir, pretrained_mod self.output_dir = output_dir self.pretrained_model = pretrained_model - print 'Computing bounding-box regression targets...' + print('Computing bounding-box regression targets...') if cfg.TRAIN.BBOX_REG: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) - print 'done' + print('done') # For checkpoint self.saver = saver @@ -47,7 +47,7 @@ def snapshot(self, sess, iter): """ net = self.net - if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'): + if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers: # save original values with tf.variable_scope('bbox_pred', reuse=True): weights = tf.get_variable("weights") @@ -71,9 +71,9 @@ def snapshot(self, sess, iter): filename = os.path.join(self.output_dir, filename) self.saver.save(sess, filename) - print 'Wrote snapshot to: {:s}'.format(filename) + print('Wrote snapshot to: {:s}'.format(filename)) - if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'): + if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers: with tf.variable_scope('bbox_pred', reuse=True): # restore net to original state sess.run(net.bbox_weights_assign, feed_dict={net.bbox_weights: orig_0}) @@ -150,8 +150,8 @@ def train_model(self, sess, max_iters): # iintialize variables sess.run(tf.global_variables_initializer()) if self.pretrained_model is not None: - print ('Loading pretrained model ' - 'weights from {:s}').format(self.pretrained_model) + print(('Loading pretrained model ' + 'weights from {:s}').format(self.pretrained_model)) self.net.load(self.pretrained_model, sess, self.saver, True) last_snapshot_iter = -1 @@ -181,14 +181,14 @@ def train_model(self, sess, max_iters): if cfg.TRAIN.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) - trace_file = open(str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') + trace_file = open(str(int(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if (iter+1) % (cfg.TRAIN.DISPLAY) == 0: - print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ - (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) - print 'speed: {:.3f}s / iter'.format(timer.average_time) + print('iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ + (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval())) + print('speed: {:.3f}s / iter'.format(timer.average_time)) if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter @@ -200,11 +200,11 @@ def train_model(self, sess, max_iters): def get_training_roidb(imdb): """Returns a roidb (Region of Interest database) for use in training.""" if cfg.TRAIN.USE_FLIPPED: - print 'Appending horizontally-flipped training examples...' + print('Appending horizontally-flipped training examples...') imdb.append_flipped_images() - print 'done' + print('done') - print 'Preparing training data...' + print('Preparing training data...') if cfg.TRAIN.HAS_RPN: if cfg.IS_MULTISCALE: gdl_roidb.prepare_roidb(imdb) @@ -212,7 +212,7 @@ def get_training_roidb(imdb): rdl_roidb.prepare_roidb(imdb) else: rdl_roidb.prepare_roidb(imdb) - print 'done' + print('done') return imdb.roidb @@ -249,8 +249,8 @@ def is_valid(entry): num = len(roidb) filtered_roidb = [entry for entry in roidb if is_valid(entry)] num_after = len(filtered_roidb) - print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after, - num, num_after) + print('Filtered {} roidb entries: {} -> {}'.format(num - num_after, + num, num_after)) return filtered_roidb @@ -260,6 +260,6 @@ def train_net(network, imdb, roidb, output_dir, pretrained_model=None, max_iters saver = tf.train.Saver(max_to_keep=100) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sw = SolverWrapper(sess, saver, network, imdb, roidb, output_dir, pretrained_model=pretrained_model) - print 'Solving...' + print('Solving...') sw.train_model(sess, max_iters) - print 'done solving' + print('done solving') diff --git a/lib/fast_rcnn/train.py.bak b/lib/fast_rcnn/train.py.bak new file mode 100644 index 00000000..d7633ee5 --- /dev/null +++ b/lib/fast_rcnn/train.py.bak @@ -0,0 +1,265 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Train a Fast R-CNN network.""" + +from fast_rcnn.config import cfg +import gt_data_layer.roidb as gdl_roidb +import roi_data_layer.roidb as rdl_roidb +from roi_data_layer.layer import RoIDataLayer +from utils.timer import Timer +import numpy as np +import os +import tensorflow as tf +import sys +from tensorflow.python.client import timeline +import time + +class SolverWrapper(object): + """A simple wrapper around Caffe's solver. + This wrapper gives us control over he snapshotting process, which we + use to unnormalize the learned bounding-box regression weights. + """ + + def __init__(self, sess, saver, network, imdb, roidb, output_dir, pretrained_model=None): + """Initialize the SolverWrapper.""" + self.net = network + self.imdb = imdb + self.roidb = roidb + self.output_dir = output_dir + self.pretrained_model = pretrained_model + + print 'Computing bounding-box regression targets...' + if cfg.TRAIN.BBOX_REG: + self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) + print 'done' + + # For checkpoint + self.saver = saver + + def snapshot(self, sess, iter): + """Take a snapshot of the network after unnormalizing the learned + bounding-box regression weights. This enables easy use at test-time. + """ + net = self.net + + if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'): + # save original values + with tf.variable_scope('bbox_pred', reuse=True): + weights = tf.get_variable("weights") + biases = tf.get_variable("biases") + + orig_0 = weights.eval() + orig_1 = biases.eval() + + # scale and shift with bbox reg unnormalization; then save snapshot + weights_shape = weights.get_shape().as_list() + sess.run(net.bbox_weights_assign, feed_dict={net.bbox_weights: orig_0 * np.tile(self.bbox_stds, (weights_shape[0], 1))}) + sess.run(net.bbox_bias_assign, feed_dict={net.bbox_biases: orig_1 * self.bbox_stds + self.bbox_means}) + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + + infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX + if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') + filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix + + '_iter_{:d}'.format(iter+1) + '.ckpt') + filename = os.path.join(self.output_dir, filename) + + self.saver.save(sess, filename) + print 'Wrote snapshot to: {:s}'.format(filename) + + if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'): + with tf.variable_scope('bbox_pred', reuse=True): + # restore net to original state + sess.run(net.bbox_weights_assign, feed_dict={net.bbox_weights: orig_0}) + sess.run(net.bbox_bias_assign, feed_dict={net.bbox_biases: orig_1}) + + def _modified_smooth_l1(self, sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights): + """ + ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets)) + SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 + |x| - 0.5 / sigma^2, otherwise + """ + sigma2 = sigma * sigma + + inside_mul = tf.multiply(bbox_inside_weights, tf.subtract(bbox_pred, bbox_targets)) + + smooth_l1_sign = tf.cast(tf.less(tf.abs(inside_mul), 1.0 / sigma2), tf.float32) + smooth_l1_option1 = tf.multiply(tf.multiply(inside_mul, inside_mul), 0.5 * sigma2) + smooth_l1_option2 = tf.subtract(tf.abs(inside_mul), 0.5 / sigma2) + smooth_l1_result = tf.add(tf.multiply(smooth_l1_option1, smooth_l1_sign), + tf.multiply(smooth_l1_option2, tf.abs(tf.subtract(smooth_l1_sign, 1.0)))) + + outside_mul = tf.multiply(bbox_outside_weights, smooth_l1_result) + + return outside_mul + + + def train_model(self, sess, max_iters): + """Network training loop.""" + + data_layer = get_data_layer(self.roidb, self.imdb.num_classes) + + # RPN + # classification loss + rpn_cls_score = tf.reshape(self.net.get_output('rpn_cls_score_reshape'),[-1,2]) + rpn_label = tf.reshape(self.net.get_output('rpn-data')[0],[-1]) + rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_label,-1))),[-1,2]) + rpn_label = tf.reshape(tf.gather(rpn_label,tf.where(tf.not_equal(rpn_label,-1))),[-1]) + rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)) + + # bounding box regression L1 loss + rpn_bbox_pred = self.net.get_output('rpn_bbox_pred') + rpn_bbox_targets = tf.transpose(self.net.get_output('rpn-data')[1],[0,2,3,1]) + rpn_bbox_inside_weights = tf.transpose(self.net.get_output('rpn-data')[2],[0,2,3,1]) + rpn_bbox_outside_weights = tf.transpose(self.net.get_output('rpn-data')[3],[0,2,3,1]) + + rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) + rpn_loss_box = tf.reduce_mean(tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3])) + + # R-CNN + # classification loss + cls_score = self.net.get_output('cls_score') + label = tf.reshape(self.net.get_output('roi-data')[1],[-1]) + cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) + + # bounding box regression L1 loss + bbox_pred = self.net.get_output('bbox_pred') + bbox_targets = self.net.get_output('roi-data')[2] + bbox_inside_weights = self.net.get_output('roi-data')[3] + bbox_outside_weights = self.net.get_output('roi-data')[4] + + smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) + loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1])) + + # final loss + loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box + + # optimizer and learning rate + global_step = tf.Variable(0, trainable=False) + lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, + cfg.TRAIN.STEPSIZE, 0.1, staircase=True) + momentum = cfg.TRAIN.MOMENTUM + train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(loss, global_step=global_step) + + # iintialize variables + sess.run(tf.global_variables_initializer()) + if self.pretrained_model is not None: + print ('Loading pretrained model ' + 'weights from {:s}').format(self.pretrained_model) + self.net.load(self.pretrained_model, sess, self.saver, True) + + last_snapshot_iter = -1 + timer = Timer() + for iter in range(max_iters): + # get one batch + blobs = data_layer.forward() + + # Make one SGD update + feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \ + self.net.gt_boxes: blobs['gt_boxes']} + + run_options = None + run_metadata = None + if cfg.TRAIN.DEBUG_TIMELINE: + run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) + run_metadata = tf.RunMetadata() + + timer.tic() + + rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, _ = sess.run([rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, train_op], + feed_dict=feed_dict, + options=run_options, + run_metadata=run_metadata) + + timer.toc() + + if cfg.TRAIN.DEBUG_TIMELINE: + trace = timeline.Timeline(step_stats=run_metadata.step_stats) + trace_file = open(str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') + trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) + trace_file.close() + + if (iter+1) % (cfg.TRAIN.DISPLAY) == 0: + print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ + (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) + print 'speed: {:.3f}s / iter'.format(timer.average_time) + + if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: + last_snapshot_iter = iter + self.snapshot(sess, iter) + + if last_snapshot_iter != iter: + self.snapshot(sess, iter) + +def get_training_roidb(imdb): + """Returns a roidb (Region of Interest database) for use in training.""" + if cfg.TRAIN.USE_FLIPPED: + print 'Appending horizontally-flipped training examples...' + imdb.append_flipped_images() + print 'done' + + print 'Preparing training data...' + if cfg.TRAIN.HAS_RPN: + if cfg.IS_MULTISCALE: + gdl_roidb.prepare_roidb(imdb) + else: + rdl_roidb.prepare_roidb(imdb) + else: + rdl_roidb.prepare_roidb(imdb) + print 'done' + + return imdb.roidb + + +def get_data_layer(roidb, num_classes): + """return a data layer.""" + if cfg.TRAIN.HAS_RPN: + if cfg.IS_MULTISCALE: + layer = GtDataLayer(roidb) + else: + layer = RoIDataLayer(roidb, num_classes) + else: + layer = RoIDataLayer(roidb, num_classes) + + return layer + +def filter_roidb(roidb): + """Remove roidb entries that have no usable RoIs.""" + + def is_valid(entry): + # Valid images have: + # (1) At least one foreground RoI OR + # (2) At least one background RoI + overlaps = entry['max_overlaps'] + # find boxes with sufficient overlap + fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] + # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) + bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & + (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] + # image is only valid if such boxes exist + valid = len(fg_inds) > 0 or len(bg_inds) > 0 + return valid + + num = len(roidb) + filtered_roidb = [entry for entry in roidb if is_valid(entry)] + num_after = len(filtered_roidb) + print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after, + num, num_after) + return filtered_roidb + + +def train_net(network, imdb, roidb, output_dir, pretrained_model=None, max_iters=40000): + """Train a Fast R-CNN network.""" + roidb = filter_roidb(roidb) + saver = tf.train.Saver(max_to_keep=100) + with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: + sw = SolverWrapper(sess, saver, network, imdb, roidb, output_dir, pretrained_model=pretrained_model) + print 'Solving...' + sw.train_model(sess, max_iters) + print 'done solving' diff --git a/lib/gt_data_layer/layer.py b/lib/gt_data_layer/layer.py index 9c93c9e6..4e041546 100644 --- a/lib/gt_data_layer/layer.py +++ b/lib/gt_data_layer/layer.py @@ -93,7 +93,7 @@ def forward(self, bottom, top): """Get blobs and copy them into this layer's top blob vector.""" blobs = self._get_next_minibatch() - for blob_name, blob in blobs.iteritems(): + for blob_name, blob in blobs.items(): top_ind = self._name_to_top_map[blob_name] # Reshape net's input blobs top[top_ind].reshape(*(blob.shape)) diff --git a/lib/gt_data_layer/layer.py.bak b/lib/gt_data_layer/layer.py.bak new file mode 100644 index 00000000..9c93c9e6 --- /dev/null +++ b/lib/gt_data_layer/layer.py.bak @@ -0,0 +1,109 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""The data layer used during training to train a Fast R-CNN network. + +GtDataLayer implements a Caffe Python layer. +""" + +import caffe +from fast_rcnn.config import cfg +from gt_data_layer.minibatch import get_minibatch +import numpy as np +import yaml +from multiprocessing import Process, Queue + +class GtDataLayer(caffe.Layer): + """Fast R-CNN data layer used for training.""" + + def _shuffle_roidb_inds(self): + """Randomly permute the training roidb.""" + self._perm = np.random.permutation(np.arange(len(self._roidb))) + self._cur = 0 + + def _get_next_minibatch_inds(self): + """Return the roidb indices for the next minibatch.""" + if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): + self._shuffle_roidb_inds() + + db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] + self._cur += cfg.TRAIN.IMS_PER_BATCH + + """ + # sample images with gt objects + db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) + i = 0 + while (i < cfg.TRAIN.IMS_PER_BATCH): + ind = self._perm[self._cur] + num_objs = self._roidb[ind]['boxes'].shape[0] + if num_objs != 0: + db_inds[i] = ind + i += 1 + + self._cur += 1 + if self._cur >= len(self._roidb): + self._shuffle_roidb_inds() + """ + + return db_inds + + def _get_next_minibatch(self): + """Return the blobs to be used for the next minibatch.""" + db_inds = self._get_next_minibatch_inds() + minibatch_db = [self._roidb[i] for i in db_inds] + return get_minibatch(minibatch_db, self._num_classes) + + # this function is called in training the net + def set_roidb(self, roidb): + """Set the roidb to be used by this layer during training.""" + self._roidb = roidb + self._shuffle_roidb_inds() + + def setup(self, bottom, top): + """Setup the GtDataLayer.""" + + # parse the layer parameter string, which must be valid YAML + layer_params = yaml.load(self.param_str_) + + self._num_classes = layer_params['num_classes'] + + self._name_to_top_map = { + 'data': 0, + 'info_boxes': 1, + 'parameters': 2} + + # data blob: holds a batch of N images, each with 3 channels + # The height and width (100 x 100) are dummy values + num_scale_base = len(cfg.TRAIN.SCALES_BASE) + top[0].reshape(num_scale_base, 3, 100, 100) + + # info boxes blob + top[1].reshape(1, 18) + + # parameters blob + num_scale = len(cfg.TRAIN.SCALES) + num_aspect = len(cfg.TRAIN.ASPECTS) + top[2].reshape(2 + 2*num_scale + 2*num_aspect) + + def forward(self, bottom, top): + """Get blobs and copy them into this layer's top blob vector.""" + blobs = self._get_next_minibatch() + + for blob_name, blob in blobs.iteritems(): + top_ind = self._name_to_top_map[blob_name] + # Reshape net's input blobs + top[top_ind].reshape(*(blob.shape)) + # Copy data into net's input blobs + top[top_ind].data[...] = blob.astype(np.float32, copy=False) + + def backward(self, top, propagate_down, bottom): + """This layer does not propagate gradients.""" + pass + + def reshape(self, bottom, top): + """Reshaping happens during the call to forward.""" + pass diff --git a/lib/gt_data_layer/minibatch.py b/lib/gt_data_layer/minibatch.py index 1ee74ce7..b6e15721 100644 --- a/lib/gt_data_layer/minibatch.py +++ b/lib/gt_data_layer/minibatch.py @@ -26,7 +26,7 @@ def get_minibatch(roidb, num_classes): # build the box information blob info_boxes_blob = np.zeros((0, 18), dtype=np.float32) num_scale = len(cfg.TRAIN.SCALES) - for i in xrange(num_images): + for i in range(num_images): info_boxes = roidb[i]['info_boxes'] # change the batch index @@ -61,7 +61,7 @@ def _get_image_blob(roidb): num_images = len(roidb) processed_ims = [] - for i in xrange(num_images): + for i in range(num_images): # read image im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: @@ -115,7 +115,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob): """Visualize a mini-batch for debugging.""" import matplotlib.pyplot as plt - for i in xrange(rois_blob.shape[0]): + for i in range(rois_blob.shape[0]): rois = rois_blob[i, :] im_ind = rois[0] roi = rois[2:] @@ -126,7 +126,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob): cls = labels_blob[i] subcls = sublabels_blob[i] plt.imshow(im) - print 'class: ', cls, ' subclass: ', subcls + print('class: ', cls, ' subclass: ', subcls) plt.gca().add_patch( plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], roi[3] - roi[1], fill=False, diff --git a/lib/gt_data_layer/minibatch.py.bak b/lib/gt_data_layer/minibatch.py.bak new file mode 100644 index 00000000..1ee74ce7 --- /dev/null +++ b/lib/gt_data_layer/minibatch.py.bak @@ -0,0 +1,135 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Compute minibatch blobs for training a Fast R-CNN network.""" + +import numpy as np +import numpy.random as npr +import cv2 +from fast_rcnn.config import cfg +from utils.blob import prep_im_for_blob, im_list_to_blob + +def get_minibatch(roidb, num_classes): + """Given a roidb, construct a minibatch sampled from it.""" + num_images = len(roidb) + assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ + 'num_images ({}) must divide BATCH_SIZE ({})'. \ + format(num_images, cfg.TRAIN.BATCH_SIZE) + + # Get the input image blob, formatted for caffe + im_blob = _get_image_blob(roidb) + + # build the box information blob + info_boxes_blob = np.zeros((0, 18), dtype=np.float32) + num_scale = len(cfg.TRAIN.SCALES) + for i in xrange(num_images): + info_boxes = roidb[i]['info_boxes'] + + # change the batch index + info_boxes[:,2] += i * num_scale + info_boxes[:,7] += i * num_scale + + info_boxes_blob = np.vstack((info_boxes_blob, info_boxes)) + + # build the parameter blob + num_aspect = len(cfg.TRAIN.ASPECTS) + num = 2 + 2 * num_scale + 2 * num_aspect + parameters_blob = np.zeros((num), dtype=np.float32) + parameters_blob[0] = num_scale + parameters_blob[1] = num_aspect + parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES + parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING + parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS + parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS + + # For debug visualizations + # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob) + + blobs = {'data': im_blob, + 'info_boxes': info_boxes_blob, + 'parameters': parameters_blob} + + return blobs + +def _get_image_blob(roidb): + """Builds an input blob from the images in the roidb at the different scales. + """ + num_images = len(roidb) + processed_ims = [] + + for i in xrange(num_images): + # read image + im = cv2.imread(roidb[i]['image']) + if roidb[i]['flipped']: + im = im[:, ::-1, :] + + im_orig = im.astype(np.float32, copy=True) + im_orig -= cfg.PIXEL_MEANS + + # build image pyramid + for im_scale in cfg.TRAIN.SCALES_BASE: + im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, + interpolation=cv2.INTER_LINEAR) + + processed_ims.append(im) + + # Create a blob to hold the input images + blob = im_list_to_blob(processed_ims) + + return blob + +def _project_im_rois(im_rois, im_scale_factor): + """Project image RoIs into the rescaled training image.""" + rois = im_rois * im_scale_factor + return rois + +def _get_bbox_regression_labels(bbox_target_data, num_classes): + """Bounding-box regression targets are stored in a compact form in the + roidb. + + This function expands those targets into the 4-of-4*K representation used + by the network (i.e. only one class has non-zero targets). The loss weights + are similarly expanded. + + Returns: + bbox_target_data (ndarray): N x 4K blob of regression targets + bbox_loss_weights (ndarray): N x 4K blob of loss weights + """ + clss = bbox_target_data[:, 0] + bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) + bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32) + inds = np.where(clss > 0)[0] + for ind in inds: + cls = clss[ind] + start = 4 * cls + end = start + 4 + bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] + bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.] + return bbox_targets, bbox_loss_weights + + +def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob): + """Visualize a mini-batch for debugging.""" + import matplotlib.pyplot as plt + for i in xrange(rois_blob.shape[0]): + rois = rois_blob[i, :] + im_ind = rois[0] + roi = rois[2:] + im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() + im += cfg.PIXEL_MEANS + im = im[:, :, (2, 1, 0)] + im = im.astype(np.uint8) + cls = labels_blob[i] + subcls = sublabels_blob[i] + plt.imshow(im) + print 'class: ', cls, ' subclass: ', subcls + plt.gca().add_patch( + plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], + roi[3] - roi[1], fill=False, + edgecolor='r', linewidth=3) + ) + plt.show() diff --git a/lib/gt_data_layer/roidb.py b/lib/gt_data_layer/roidb.py index 2f3a87e9..90ee9197 100644 --- a/lib/gt_data_layer/roidb.py +++ b/lib/gt_data_layer/roidb.py @@ -15,7 +15,7 @@ import PIL import math import os -import cPickle +import pickle import pdb @@ -29,12 +29,12 @@ def prepare_roidb(imdb): cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: - imdb._roidb = cPickle.load(fid) - print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) + imdb._roidb = pickle.load(fid) + print('{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)) return roidb = imdb.roidb - for i in xrange(len(imdb.image_index)): + for i in range(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] @@ -62,7 +62,7 @@ def prepare_roidb(imdb): # select positive boxes fg_inds = [] - for k in xrange(1, imdb.num_classes): + for k in range(1, imdb.num_classes): fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: @@ -88,8 +88,8 @@ def prepare_roidb(imdb): roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: - cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) - print 'wrote gt roidb prepared to {}'.format(cache_file) + pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) + print('wrote gt roidb prepared to {}'.format(cache_file)) def add_bbox_regression_targets(roidb): """Add information needed to train bounding-box regressors.""" @@ -105,9 +105,9 @@ def add_bbox_regression_targets(roidb): class_counts = np.zeros((num_classes, 1)) + cfg.EPS sums = np.zeros((num_classes, 4)) squared_sums = np.zeros((num_classes, 4)) - for im_i in xrange(num_images): + for im_i in range(num_images): targets = roidb[im_i]['info_boxes'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 12] == cls)[0] if cls_inds.size > 0: class_counts[cls] += cls_inds.size @@ -118,9 +118,9 @@ def add_bbox_regression_targets(roidb): stds = np.sqrt(squared_sums / class_counts - means ** 2) # Normalize targets - for im_i in xrange(num_images): + for im_i in range(num_images): targets = roidb[im_i]['info_boxes'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 12] == cls)[0] roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :] if stds[cls, 0] != 0: diff --git a/lib/gt_data_layer/roidb.py.bak b/lib/gt_data_layer/roidb.py.bak new file mode 100644 index 00000000..2f3a87e9 --- /dev/null +++ b/lib/gt_data_layer/roidb.py.bak @@ -0,0 +1,156 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Transform a roidb into a trainable roidb by adding a bunch of metadata.""" + +import numpy as np +from fast_rcnn.config import cfg +from utils.cython_bbox import bbox_overlaps +from utils.boxes_grid import get_boxes_grid +import scipy.sparse +import PIL +import math +import os +import cPickle +import pdb + + +def prepare_roidb(imdb): + """Enrich the imdb's roidb by adding some derived quantities that + are useful for training. This function precomputes the maximum + overlap, taken over ground-truth boxes, between each ROI and + each ground-truth box. The class with maximum overlap is also + recorded. + """ + cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') + if os.path.exists(cache_file): + with open(cache_file, 'rb') as fid: + imdb._roidb = cPickle.load(fid) + print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) + return + + roidb = imdb.roidb + for i in xrange(len(imdb.image_index)): + roidb[i]['image'] = imdb.image_path_at(i) + boxes = roidb[i]['boxes'] + labels = roidb[i]['gt_classes'] + info_boxes = np.zeros((0, 18), dtype=np.float32) + + if boxes.shape[0] == 0: + roidb[i]['info_boxes'] = info_boxes + continue + + # compute grid boxes + s = PIL.Image.open(imdb.image_path_at(i)).size + image_height = s[1] + image_width = s[0] + boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) + + # for each scale + for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): + boxes_rescaled = boxes * scale + + # compute overlap + overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) + max_overlaps = overlaps.max(axis = 1) + argmax_overlaps = overlaps.argmax(axis = 1) + max_classes = labels[argmax_overlaps] + + # select positive boxes + fg_inds = [] + for k in xrange(1, imdb.num_classes): + fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) + + if len(fg_inds) > 0: + gt_inds = argmax_overlaps[fg_inds] + # bounding box regression targets + gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:]) + # scale mapping for RoI pooling + scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] + scale_map = cfg.TRAIN.SCALES[scale_ind_map] + # contruct the list of positive boxes + # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) + info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) + info_box[:, 0] = cx[fg_inds] + info_box[:, 1] = cy[fg_inds] + info_box[:, 2] = scale_ind + info_box[:, 3:7] = boxes_grid[fg_inds,:] + info_box[:, 7] = scale_ind_map + info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale + info_box[:, 12] = labels[gt_inds] + info_box[:, 14:] = gt_targets + info_boxes = np.vstack((info_boxes, info_box)) + + roidb[i]['info_boxes'] = info_boxes + + with open(cache_file, 'wb') as fid: + cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) + print 'wrote gt roidb prepared to {}'.format(cache_file) + +def add_bbox_regression_targets(roidb): + """Add information needed to train bounding-box regressors.""" + assert len(roidb) > 0 + assert 'info_boxes' in roidb[0], 'Did you call prepare_roidb first?' + + num_images = len(roidb) + # Infer number of classes from the number of columns in gt_overlaps + num_classes = roidb[0]['gt_overlaps'].shape[1] + + # Compute values needed for means and stds + # var(x) = E(x^2) - E(x)^2 + class_counts = np.zeros((num_classes, 1)) + cfg.EPS + sums = np.zeros((num_classes, 4)) + squared_sums = np.zeros((num_classes, 4)) + for im_i in xrange(num_images): + targets = roidb[im_i]['info_boxes'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 12] == cls)[0] + if cls_inds.size > 0: + class_counts[cls] += cls_inds.size + sums[cls, :] += targets[cls_inds, 14:].sum(axis=0) + squared_sums[cls, :] += (targets[cls_inds, 14:] ** 2).sum(axis=0) + + means = sums / class_counts + stds = np.sqrt(squared_sums / class_counts - means ** 2) + + # Normalize targets + for im_i in xrange(num_images): + targets = roidb[im_i]['info_boxes'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 12] == cls)[0] + roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :] + if stds[cls, 0] != 0: + roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :] + + # These values will be needed for making predictions + # (the predicts will need to be unnormalized and uncentered) + return means.ravel(), stds.ravel() + +def _compute_targets(ex_rois, gt_rois): + """Compute bounding-box regression targets for an image. The targets are scale invariance""" + + ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS + ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS + ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths + ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights + + gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS + gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS + gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths + gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights + + targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths + targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights + targets_dw = np.log(gt_widths / ex_widths) + targets_dh = np.log(gt_heights / ex_heights) + + targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32) + targets[:, 0] = targets_dx + targets[:, 1] = targets_dy + targets[:, 2] = targets_dw + targets[:, 3] = targets_dh + return targets diff --git a/lib/make.sh b/lib/make.sh index 15a616bc..515baa42 100755 --- a/lib/make.sh +++ b/lib/make.sh @@ -2,21 +2,28 @@ TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') CUDA_PATH=/usr/local/cuda/ CXXFLAGS='' - +TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') if [[ "$OSTYPE" =~ ^darwin ]]; then CXXFLAGS+='-undefined dynamic_lookup' fi cd roi_pooling_layer + + + if [ -d "$CUDA_PATH" ]; then nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \ -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CXXFLAGS \ -arch=sm_37 - - g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ - roi_pooling_op.cu.o -I $TF_INC -D GOOGLE_CUDA=1 -fPIC $CXXFLAGS \ - -lcudart -L $CUDA_PATH/lib64 + + g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc -D_GLIBCXX_USE_CXX11_ABI=0 \ +roi_pooling_op.cu.o -I $TF_INC -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 \ +-fPIC $CXXFLAGS -lcudart -L $CUDA_PATH/lib64 + +# g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ +# roi_pooling_op.cu.o -I $TF_INC -D GOOGLE_CUDA=1 -fPIC $CXXFLAGS \ +# -lcudart -L $CUDA_PATH/lib64 else g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ -I $TF_INC -fPIC $CXXFLAGS diff --git a/lib/networks/factory.py b/lib/networks/factory.py index 2b88cf60..172b50a7 100644 --- a/lib/networks/factory.py +++ b/lib/networks/factory.py @@ -34,4 +34,4 @@ def get_network(name): def list_networks(): """List all registered imdbs.""" - return __sets.keys() + return list(__sets.keys()) diff --git a/lib/networks/factory.py.bak b/lib/networks/factory.py.bak new file mode 100644 index 00000000..2b88cf60 --- /dev/null +++ b/lib/networks/factory.py.bak @@ -0,0 +1,37 @@ +# -------------------------------------------------------- +# SubCNN_TF +# Copyright (c) 2016 CVGL Stanford +# Licensed under The MIT License [see LICENSE for details] +# Written by Yu Xiang +# -------------------------------------------------------- + +"""Factory method for easily getting imdbs by name.""" + +__sets = {} + +import networks.VGGnet_train +import networks.VGGnet_test +import pdb +import tensorflow as tf + +#__sets['VGGnet_train'] = networks.VGGnet_train() + +#__sets['VGGnet_test'] = networks.VGGnet_test() + + +def get_network(name): + """Get a network by name.""" + #if not __sets.has_key(name): + # raise KeyError('Unknown dataset: {}'.format(name)) + #return __sets[name] + if name.split('_')[1] == 'test': + return networks.VGGnet_test() + elif name.split('_')[1] == 'train': + return networks.VGGnet_train() + else: + raise KeyError('Unknown dataset: {}'.format(name)) + + +def list_networks(): + """List all registered imdbs.""" + return __sets.keys() diff --git a/lib/networks/network.py b/lib/networks/network.py index d51f32ff..051a324b 100644 --- a/lib/networks/network.py +++ b/lib/networks/network.py @@ -52,9 +52,9 @@ def load(self, data_path, session, saver, ignore_missing=False): try: var = tf.get_variable(subkey) session.run(var.assign(data_dict[key][subkey])) - print "assign pretrain model "+subkey+ " to "+key + print("assign pretrain model "+subkey+ " to "+key) except ValueError: - print "ignore "+key + print("ignore "+key) if not ignore_missing: raise @@ -63,12 +63,12 @@ def feed(self, *args): assert len(args)!=0 self.inputs = [] for layer in args: - if isinstance(layer, basestring): + if isinstance(layer, str): try: layer = self.layers[layer] - print layer + print(layer) except KeyError: - print self.layers.keys() + print(list(self.layers.keys())) raise KeyError('Unknown layer name fed: %s'%layer) self.inputs.append(layer) return self @@ -77,12 +77,12 @@ def get_output(self, layer): try: layer = self.layers[layer] except KeyError: - print self.layers.keys() + print(list(self.layers.keys())) raise KeyError('Unknown layer name fed: %s'%layer) return layer def get_unique_name(self, prefix): - id = sum(t.startswith(prefix) for t,_ in self.layers.items())+1 + id = sum(t.startswith(prefix) for t,_ in list(self.layers.items()))+1 return '%s_%d'%(prefix, id) def make_var(self, name, shape, initializer=None, trainable=True): @@ -102,7 +102,7 @@ def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_ init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) init_biases = tf.constant_initializer(0.0) - kernel = self.make_var('weights', [k_h, k_w, c_i/group, c_o], init_weights, trainable) + kernel = self.make_var('weights', [k_h, k_w, int(c_i)/group, c_o], init_weights, trainable) biases = self.make_var('biases', [c_o], init_biases, trainable) if group==1: @@ -148,7 +148,7 @@ def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name): if isinstance(input[1], tuple): input[1] = input[1][0] - print input + print(input) return roi_pool_op.roi_pool(input[0], input[1], pooled_height, pooled_width, diff --git a/lib/networks/network.py.bak b/lib/networks/network.py.bak new file mode 100644 index 00000000..d51f32ff --- /dev/null +++ b/lib/networks/network.py.bak @@ -0,0 +1,272 @@ +import numpy as np +import tensorflow as tf +import roi_pooling_layer.roi_pooling_op as roi_pool_op +import roi_pooling_layer.roi_pooling_op_grad +from rpn_msr.proposal_layer_tf import proposal_layer as proposal_layer_py +from rpn_msr.anchor_target_layer_tf import anchor_target_layer as anchor_target_layer_py +from rpn_msr.proposal_target_layer_tf import proposal_target_layer as proposal_target_layer_py + + + +DEFAULT_PADDING = 'SAME' + +def layer(op): + def layer_decorated(self, *args, **kwargs): + # Automatically set a name if not provided. + name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) + # Figure out the layer inputs. + if len(self.inputs)==0: + raise RuntimeError('No input variables found for layer %s.'%name) + elif len(self.inputs)==1: + layer_input = self.inputs[0] + else: + layer_input = list(self.inputs) + # Perform the operation and get the output. + layer_output = op(self, layer_input, *args, **kwargs) + # Add to layer LUT. + self.layers[name] = layer_output + # This output is now the input for the next layer. + self.feed(layer_output) + # Return self for chained calls. + return self + return layer_decorated + +class Network(object): + def __init__(self, inputs, trainable=True): + self.inputs = [] + self.layers = dict(inputs) + self.trainable = trainable + self.setup() + + def setup(self): + raise NotImplementedError('Must be subclassed.') + + def load(self, data_path, session, saver, ignore_missing=False): + if data_path.endswith('.ckpt'): + saver.restore(session, data_path) + else: + data_dict = np.load(data_path).item() + for key in data_dict: + with tf.variable_scope(key, reuse=True): + for subkey in data_dict[key]: + try: + var = tf.get_variable(subkey) + session.run(var.assign(data_dict[key][subkey])) + print "assign pretrain model "+subkey+ " to "+key + except ValueError: + print "ignore "+key + if not ignore_missing: + + raise + + def feed(self, *args): + assert len(args)!=0 + self.inputs = [] + for layer in args: + if isinstance(layer, basestring): + try: + layer = self.layers[layer] + print layer + except KeyError: + print self.layers.keys() + raise KeyError('Unknown layer name fed: %s'%layer) + self.inputs.append(layer) + return self + + def get_output(self, layer): + try: + layer = self.layers[layer] + except KeyError: + print self.layers.keys() + raise KeyError('Unknown layer name fed: %s'%layer) + return layer + + def get_unique_name(self, prefix): + id = sum(t.startswith(prefix) for t,_ in self.layers.items())+1 + return '%s_%d'%(prefix, id) + + def make_var(self, name, shape, initializer=None, trainable=True): + return tf.get_variable(name, shape, initializer=initializer, trainable=trainable) + + def validate_padding(self, padding): + assert padding in ('SAME', 'VALID') + + @layer + def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, trainable=True): + self.validate_padding(padding) + c_i = input.get_shape()[-1] + assert c_i%group==0 + assert c_o%group==0 + convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) + with tf.variable_scope(name) as scope: + + init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) + init_biases = tf.constant_initializer(0.0) + kernel = self.make_var('weights', [k_h, k_w, c_i/group, c_o], init_weights, trainable) + biases = self.make_var('biases', [c_o], init_biases, trainable) + + if group==1: + conv = convolve(input, kernel) + else: + input_groups = tf.split(3, group, input) + kernel_groups = tf.split(3, group, kernel) + output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)] + conv = tf.concat(3, output_groups) + if relu: + bias = tf.nn.bias_add(conv, biases) + return tf.nn.relu(bias, name=scope.name) + return tf.nn.bias_add(conv, biases, name=scope.name) + + @layer + def relu(self, input, name): + return tf.nn.relu(input, name=name) + + @layer + def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): + self.validate_padding(padding) + return tf.nn.max_pool(input, + ksize=[1, k_h, k_w, 1], + strides=[1, s_h, s_w, 1], + padding=padding, + name=name) + + @layer + def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): + self.validate_padding(padding) + return tf.nn.avg_pool(input, + ksize=[1, k_h, k_w, 1], + strides=[1, s_h, s_w, 1], + padding=padding, + name=name) + + @layer + def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name): + # only use the first input + if isinstance(input[0], tuple): + input[0] = input[0][0] + + if isinstance(input[1], tuple): + input[1] = input[1][0] + + print input + return roi_pool_op.roi_pool(input[0], input[1], + pooled_height, + pooled_width, + spatial_scale, + name=name)[0] + + @layer + def proposal_layer(self, input, _feat_stride, anchor_scales, cfg_key, name): + if isinstance(input[0], tuple): + input[0] = input[0][0] + return tf.reshape(tf.py_func(proposal_layer_py,[input[0],input[1],input[2], cfg_key, _feat_stride, anchor_scales], [tf.float32]),[-1,5],name =name) + + + @layer + def anchor_target_layer(self, input, _feat_stride, anchor_scales, name): + if isinstance(input[0], tuple): + input[0] = input[0][0] + + with tf.variable_scope(name) as scope: + + rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = tf.py_func(anchor_target_layer_py,[input[0],input[1],input[2],input[3], _feat_stride, anchor_scales],[tf.float32,tf.float32,tf.float32,tf.float32]) + + rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels') + rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets') + rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights') + rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights') + + + return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights + + + @layer + def proposal_target_layer(self, input, classes, name): + if isinstance(input[0], tuple): + input[0] = input[0][0] + with tf.variable_scope(name) as scope: + + rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights = tf.py_func(proposal_target_layer_py,[input[0],input[1],classes],[tf.float32,tf.float32,tf.float32,tf.float32,tf.float32]) + + rois = tf.reshape(rois,[-1,5] , name = 'rois') + labels = tf.convert_to_tensor(tf.cast(labels,tf.int32), name = 'labels') + bbox_targets = tf.convert_to_tensor(bbox_targets, name = 'bbox_targets') + bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name = 'bbox_inside_weights') + bbox_outside_weights = tf.convert_to_tensor(bbox_outside_weights, name = 'bbox_outside_weights') + + + return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights + + + @layer + def reshape_layer(self, input, d,name): + input_shape = tf.shape(input) + if name == 'rpn_cls_prob_reshape': + return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0], + int(d),tf.cast(tf.cast(input_shape[1],tf.float32)/tf.cast(d,tf.float32)*tf.cast(input_shape[3],tf.float32),tf.int32),input_shape[2]]),[0,2,3,1],name=name) + else: + return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0], + int(d),tf.cast(tf.cast(input_shape[1],tf.float32)*(tf.cast(input_shape[3],tf.float32)/tf.cast(d,tf.float32)),tf.int32),input_shape[2]]),[0,2,3,1],name=name) + + @layer + def feature_extrapolating(self, input, scales_base, num_scale_base, num_per_octave, name): + return feature_extrapolating_op.feature_extrapolating(input, + scales_base, + num_scale_base, + num_per_octave, + name=name) + + @layer + def lrn(self, input, radius, alpha, beta, name, bias=1.0): + return tf.nn.local_response_normalization(input, + depth_radius=radius, + alpha=alpha, + beta=beta, + bias=bias, + name=name) + + @layer + def concat(self, inputs, axis, name): + return tf.concat(concat_dim=axis, values=inputs, name=name) + + @layer + def fc(self, input, num_out, name, relu=True, trainable=True): + with tf.variable_scope(name) as scope: + # only use the first input + if isinstance(input, tuple): + input = input[0] + + input_shape = input.get_shape() + if input_shape.ndims == 4: + dim = 1 + for d in input_shape[1:].as_list(): + dim *= d + feed_in = tf.reshape(tf.transpose(input,[0,3,1,2]), [-1, dim]) + else: + feed_in, dim = (input, int(input_shape[-1])) + + if name == 'bbox_pred': + init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001) + init_biases = tf.constant_initializer(0.0) + else: + init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) + init_biases = tf.constant_initializer(0.0) + + weights = self.make_var('weights', [dim, num_out], init_weights, trainable) + biases = self.make_var('biases', [num_out], init_biases, trainable) + + op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b + fc = op(feed_in, weights, biases, name=scope.name) + return fc + + @layer + def softmax(self, input, name): + input_shape = tf.shape(input) + if name == 'rpn_cls_prob': + return tf.reshape(tf.nn.softmax(tf.reshape(input,[-1,input_shape[3]])),[-1,input_shape[1],input_shape[2],input_shape[3]],name=name) + else: + return tf.nn.softmax(input,name=name) + + @layer + def dropout(self, input, keep_prob, name): + return tf.nn.dropout(input, keep_prob, name=name) diff --git a/lib/roi_data_layer/minibatch.py b/lib/roi_data_layer/minibatch.py index 674ef209..2640cddf 100644 --- a/lib/roi_data_layer/minibatch.py +++ b/lib/roi_data_layer/minibatch.py @@ -49,7 +49,7 @@ def get_minibatch(roidb, num_classes): bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) # all_overlaps = [] - for im_i in xrange(num_images): + for im_i in range(num_images): labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes) @@ -133,7 +133,7 @@ def _get_image_blob(roidb, scale_inds): num_images = len(roidb) processed_ims = [] im_scales = [] - for i in xrange(num_images): + for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] @@ -180,7 +180,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): """Visualize a mini-batch for debugging.""" import matplotlib.pyplot as plt - for i in xrange(rois_blob.shape[0]): + for i in range(rois_blob.shape[0]): rois = rois_blob[i, :] im_ind = rois[0] roi = rois[1:] @@ -190,7 +190,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): im = im.astype(np.uint8) cls = labels_blob[i] plt.imshow(im) - print 'class: ', cls, ' overlap: ', overlaps[i] + print('class: ', cls, ' overlap: ', overlaps[i]) plt.gca().add_patch( plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], roi[3] - roi[1], fill=False, diff --git a/lib/roi_data_layer/minibatch.py.bak b/lib/roi_data_layer/minibatch.py.bak new file mode 100644 index 00000000..674ef209 --- /dev/null +++ b/lib/roi_data_layer/minibatch.py.bak @@ -0,0 +1,199 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Compute minibatch blobs for training a Fast R-CNN network.""" + +import numpy as np +import numpy.random as npr +import cv2 +from fast_rcnn.config import cfg +from utils.blob import prep_im_for_blob, im_list_to_blob + +def get_minibatch(roidb, num_classes): + """Given a roidb, construct a minibatch sampled from it.""" + num_images = len(roidb) + # Sample random scales to use for each image in this batch + random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), + size=num_images) + assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ + 'num_images ({}) must divide BATCH_SIZE ({})'. \ + format(num_images, cfg.TRAIN.BATCH_SIZE) + rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images + fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) + + # Get the input image blob, formatted for caffe + im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) + + blobs = {'data': im_blob} + + if cfg.TRAIN.HAS_RPN: + assert len(im_scales) == 1, "Single batch only" + assert len(roidb) == 1, "Single batch only" + # gt boxes: (x1, y1, x2, y2, cls) + gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] + gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) + gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] + gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] + blobs['gt_boxes'] = gt_boxes + blobs['im_info'] = np.array( + [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], + dtype=np.float32) + else: # not using RPN + # Now, build the region of interest and label blobs + rois_blob = np.zeros((0, 5), dtype=np.float32) + labels_blob = np.zeros((0), dtype=np.float32) + bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) + bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) + # all_overlaps = [] + for im_i in xrange(num_images): + labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ + = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, + num_classes) + + # Add to RoIs blob + rois = _project_im_rois(im_rois, im_scales[im_i]) + batch_ind = im_i * np.ones((rois.shape[0], 1)) + rois_blob_this_image = np.hstack((batch_ind, rois)) + rois_blob = np.vstack((rois_blob, rois_blob_this_image)) + + # Add to labels, bbox targets, and bbox loss blobs + labels_blob = np.hstack((labels_blob, labels)) + bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets)) + bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights)) + # all_overlaps = np.hstack((all_overlaps, overlaps)) + + # For debug visualizations + # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) + + blobs['rois'] = rois_blob + blobs['labels'] = labels_blob + + if cfg.TRAIN.BBOX_REG: + blobs['bbox_targets'] = bbox_targets_blob + blobs['bbox_inside_weights'] = bbox_inside_blob + blobs['bbox_outside_weights'] = \ + np.array(bbox_inside_blob > 0).astype(np.float32) + + return blobs + +def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): + """Generate a random sample of RoIs comprising foreground and background + examples. + """ + # label = class RoI has max overlap with + labels = roidb['max_classes'] + overlaps = roidb['max_overlaps'] + rois = roidb['boxes'] + + # Select foreground RoIs as those with >= FG_THRESH overlap + fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] + # Guard against the case when an image has fewer than fg_rois_per_image + # foreground RoIs + fg_rois_per_this_image = int(np.minimum(fg_rois_per_image, fg_inds.size)) + # Sample foreground regions without replacement + if fg_inds.size > 0: + fg_inds = npr.choice( + fg_inds, size=fg_rois_per_this_image, replace=False) + + # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) + bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & + (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] + # Compute number of background RoIs to take from this image (guarding + # against there being fewer than desired) + bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image + bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, + bg_inds.size) + # Sample foreground regions without replacement + if bg_inds.size > 0: + bg_inds = npr.choice( + bg_inds, size=bg_rois_per_this_image, replace=False) + + # The indices that we're selecting (both fg and bg) + keep_inds = np.append(fg_inds, bg_inds) + # Select sampled values from various arrays: + labels = labels[keep_inds] + # Clamp labels for the background RoIs to 0 + labels[fg_rois_per_this_image:] = 0 + overlaps = overlaps[keep_inds] + rois = rois[keep_inds] + + bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( + roidb['bbox_targets'][keep_inds, :], num_classes) + + return labels, overlaps, rois, bbox_targets, bbox_inside_weights + +def _get_image_blob(roidb, scale_inds): + """Builds an input blob from the images in the roidb at the specified + scales. + """ + num_images = len(roidb) + processed_ims = [] + im_scales = [] + for i in xrange(num_images): + im = cv2.imread(roidb[i]['image']) + if roidb[i]['flipped']: + im = im[:, ::-1, :] + target_size = cfg.TRAIN.SCALES[scale_inds[i]] + im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, + cfg.TRAIN.MAX_SIZE) + im_scales.append(im_scale) + processed_ims.append(im) + + # Create a blob to hold the input images + blob = im_list_to_blob(processed_ims) + + return blob, im_scales + +def _project_im_rois(im_rois, im_scale_factor): + """Project image RoIs into the rescaled training image.""" + rois = im_rois * im_scale_factor + return rois + +def _get_bbox_regression_labels(bbox_target_data, num_classes): + """Bounding-box regression targets are stored in a compact form in the + roidb. + + This function expands those targets into the 4-of-4*K representation used + by the network (i.e. only one class has non-zero targets). The loss weights + are similarly expanded. + + Returns: + bbox_target_data (ndarray): N x 4K blob of regression targets + bbox_inside_weights (ndarray): N x 4K blob of loss weights + """ + clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True) + bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) + bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) + inds = np.where(clss > 0)[0] + for ind in inds: + cls = clss[ind] + start = 4 * cls + end = start + 4 + bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] + bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS + return bbox_targets, bbox_inside_weights + +def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): + """Visualize a mini-batch for debugging.""" + import matplotlib.pyplot as plt + for i in xrange(rois_blob.shape[0]): + rois = rois_blob[i, :] + im_ind = rois[0] + roi = rois[1:] + im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() + im += cfg.PIXEL_MEANS + im = im[:, :, (2, 1, 0)] + im = im.astype(np.uint8) + cls = labels_blob[i] + plt.imshow(im) + print 'class: ', cls, ' overlap: ', overlaps[i] + plt.gca().add_patch( + plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], + roi[3] - roi[1], fill=False, + edgecolor='r', linewidth=3) + ) + plt.show() diff --git a/lib/roi_data_layer/minibatch2.py b/lib/roi_data_layer/minibatch2.py index 7e9a39c2..521e3d5e 100644 --- a/lib/roi_data_layer/minibatch2.py +++ b/lib/roi_data_layer/minibatch2.py @@ -55,7 +55,7 @@ def get_minibatch(roidb, num_classes): bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) # all_overlaps = [] - for im_i in xrange(num_images): + for im_i in range(num_images): labels, overlaps, im_rois, bbox_targets, bbox_inside_weights, sublabels \ = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes) @@ -106,7 +106,7 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = [] - for i in xrange(1, num_classes): + for i in range(1, num_classes): fg_inds.extend(np.where((labels == i) & (overlaps >= cfg.TRAIN.FG_THRESH))[0]) fg_inds = np.array(fg_inds) @@ -121,12 +121,12 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = [] - for i in xrange(1, num_classes): + for i in range(1, num_classes): bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] ) if len(bg_inds) < bg_rois_per_this_image: - for i in xrange(1, num_classes): + for i in range(1, num_classes): bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI))[0] ) if len(bg_inds) < bg_rois_per_this_image: @@ -173,7 +173,7 @@ def _get_image_blob(roidb, scale_inds): num_images = len(roidb) processed_ims = [] im_scales = [] - for i in xrange(num_images): + for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] @@ -200,7 +200,7 @@ def _get_image_blob_multiscale(roidb): processed_ims = [] im_scales = [] scales = cfg.TRAIN.SCALES_BASE - for i in xrange(num_images): + for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] @@ -310,7 +310,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps, sublabels_blob, vi """Visualize a mini-batch for debugging.""" import matplotlib.pyplot as plt import math - for i in xrange(min(rois_blob.shape[0], 10)): + for i in range(min(rois_blob.shape[0], 10)): rois = rois_blob[i, :] im_ind = rois[0] roi = rois[1:] @@ -321,7 +321,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps, sublabels_blob, vi cls = labels_blob[i] subcls = sublabels_blob[i] plt.imshow(im) - print 'class: ', cls, ' subclass: ', subcls, ' overlap: ', overlaps[i] + print('class: ', cls, ' subclass: ', subcls, ' overlap: ', overlaps[i]) start = 3 * cls end = start + 3 diff --git a/lib/roi_data_layer/minibatch2.py.bak b/lib/roi_data_layer/minibatch2.py.bak new file mode 100644 index 00000000..7e9a39c2 --- /dev/null +++ b/lib/roi_data_layer/minibatch2.py.bak @@ -0,0 +1,336 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Compute minibatch blobs for training a Fast R-CNN network.""" + +import numpy as np +import numpy.random as npr +import cv2 +from fast_rcnn.config import cfg +from utils.blob import prep_im_for_blob, im_list_to_blob + +def get_minibatch(roidb, num_classes): + """Given a roidb, construct a minibatch sampled from it.""" + num_images = len(roidb) + + assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ + 'num_images ({}) must divide BATCH_SIZE ({})'. \ + format(num_images, cfg.TRAIN.BATCH_SIZE) + rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images + fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) + + if cfg.IS_MULTISCALE: + im_blob, im_scales = _get_image_blob_multiscale(roidb) + else: + # Get the input image blob, formatted for caffe + # Sample random scales to use for each image in this batch + random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES_BASE), size=num_images) + im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) + + blobs = {'data': im_blob} + + if cfg.TRAIN.HAS_RPN: + assert len(im_scales) == 1, "Single batch only" + assert len(roidb) == 1, "Single batch only" + # gt boxes: (x1, y1, x2, y2, cls) + gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] + gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) + gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] + gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] + blobs['gt_boxes'] = gt_boxes + blobs['im_info'] = np.array( + [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], + dtype=np.float32) + + + else: + # Now, build the region of interest and label blobs + rois_blob = np.zeros((0, 5), dtype=np.float32) + labels_blob = np.zeros((0), dtype=np.float32) + bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) + bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) + + # all_overlaps = [] + for im_i in xrange(num_images): + labels, overlaps, im_rois, bbox_targets, bbox_inside_weights, sublabels \ + = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes) + + # Add to RoIs blob + if cfg.IS_MULTISCALE: + if cfg.IS_EXTRAPOLATING: + rois, levels = _project_im_rois_multiscale(im_rois, cfg.TRAIN.SCALES) + batch_ind = im_i * len(cfg.TRAIN.SCALES) + levels + else: + rois, levels = _project_im_rois_multiscale(im_rois, cfg.TRAIN.SCALES_BASE) + batch_ind = im_i * len(cfg.TRAIN.SCALES_BASE) + levels + else: + rois = _project_im_rois(im_rois, im_scales[im_i]) + batch_ind = im_i * np.ones((rois.shape[0], 1)) + + rois_blob_this_image = np.hstack((batch_ind, rois)) + rois_blob = np.vstack((rois_blob, rois_blob_this_image)) + + # Add to labels, bbox targets, and bbox loss blobs + labels_blob = np.hstack((labels_blob, labels)) + bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets)) + bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights)) + + # all_overlaps = np.hstack((all_overlaps, overlaps)) + + # For debug visualizations + # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps, sublabels_blob, view_targets_blob, view_inside_blob) + # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps, sublabels_blob) + + blobs['rois'] = rois_blob + blobs['labels'] = labels_blob + + if cfg.TRAIN.BBOX_REG: + blobs['bbox_targets'] = bbox_targets_blob + blobs['bbox_inside_weights'] = bbox_inside_blob + blobs['bbox_outside_weights'] = np.array(bbox_inside_blob > 0).astype(np.float32) + + return blobs + +def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): + """Generate a random sample of RoIs comprising foreground and background + examples. + """ + # label = class RoI has max overlap with + labels = roidb['max_classes'] + overlaps = roidb['max_overlaps'] + rois = roidb['boxes'] + + # Select foreground RoIs as those with >= FG_THRESH overlap + fg_inds = [] + for i in xrange(1, num_classes): + fg_inds.extend(np.where((labels == i) & (overlaps >= cfg.TRAIN.FG_THRESH))[0]) + fg_inds = np.array(fg_inds) + + # Guard against the case when an image has fewer than fg_rois_per_image + # foreground RoIs + fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) + # Sample foreground regions without replacement + if fg_inds.size > 0: + fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, + replace=False) + + bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image + # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) + bg_inds = [] + for i in xrange(1, num_classes): + bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI) & + (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] ) + + if len(bg_inds) < bg_rois_per_this_image: + for i in xrange(1, num_classes): + bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI))[0] ) + + if len(bg_inds) < bg_rois_per_this_image: + bg_inds.extend( np.where(overlaps < cfg.TRAIN.BG_THRESH_HI)[0] ) + bg_inds = np.array(bg_inds, dtype=np.int32) + + # Compute number of background RoIs to take from this image (guarding + # against there being fewer than desired) + bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, + bg_inds.size) + # Sample foreground regions without replacement + if bg_inds.size > 0: + bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, + replace=False) + + # The indices that we're selecting (both fg and bg) + keep_inds = np.append(fg_inds, bg_inds).astype(int) + # print '{} foregrounds and {} backgrounds'.format(fg_inds.size, bg_inds.size) + # Select sampled values from various arrays: + labels = labels[keep_inds] + # Clamp labels for the background RoIs to 0 + labels[fg_rois_per_this_image:] = 0 + overlaps = overlaps[keep_inds] + rois = rois[keep_inds] + sublabels = sublabels[keep_inds] + sublabels[fg_rois_per_this_image:] = 0 + + bbox_targets, bbox_loss_weights = \ + _get_bbox_regression_labels(roidb['bbox_targets'][keep_inds, :], + num_classes) + + if cfg.TRAIN.VIEWPOINT or cfg.TEST.VIEWPOINT: + viewpoints = viewpoints[keep_inds] + view_targets, view_loss_weights = \ + _get_viewpoint_estimation_labels(viewpoints, labels, num_classes) + return labels, overlaps, rois, bbox_targets, bbox_loss_weights, sublabels, view_targets, view_loss_weights + + return labels, overlaps, rois, bbox_targets, bbox_loss_weights, sublabels + +def _get_image_blob(roidb, scale_inds): + """Builds an input blob from the images in the roidb at the specified + scales. + """ + num_images = len(roidb) + processed_ims = [] + im_scales = [] + for i in xrange(num_images): + im = cv2.imread(roidb[i]['image']) + if roidb[i]['flipped']: + im = im[:, ::-1, :] + + im_orig = im.astype(np.float32, copy=True) + im_orig -= cfg.PIXEL_MEANS + + im_scale = cfg.TRAIN.SCALES_BASE[scale_inds[i]] + im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) + + im_scales.append(im_scale) + processed_ims.append(im) + + # Create a blob to hold the input images + blob = im_list_to_blob(processed_ims) + + return blob, im_scales + + +def _get_image_blob_multiscale(roidb): + """Builds an input blob from the images in the roidb at multiscales. + """ + num_images = len(roidb) + processed_ims = [] + im_scales = [] + scales = cfg.TRAIN.SCALES_BASE + for i in xrange(num_images): + im = cv2.imread(roidb[i]['image']) + if roidb[i]['flipped']: + im = im[:, ::-1, :] + + im_orig = im.astype(np.float32, copy=True) + im_orig -= cfg.PIXEL_MEANS + + for im_scale in scales: + im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) + im_scales.append(im_scale) + processed_ims.append(im) + + # Create a blob to hold the input images + blob = im_list_to_blob(processed_ims) + + return blob, im_scales + + +def _project_im_rois(im_rois, im_scale_factor): + """Project image RoIs into the rescaled training image.""" + rois = im_rois * im_scale_factor + return rois + + +def _project_im_rois_multiscale(im_rois, scales): + """Project image RoIs into the image pyramid built by _get_image_blob. + + Arguments: + im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates + scales (list): scale factors as returned by _get_image_blob + + Returns: + rois (ndarray): R x 4 matrix of projected RoI coordinates + levels (list): image pyramid levels used by each projected RoI + """ + im_rois = im_rois.astype(np.float, copy=False) + scales = np.array(scales) + + if len(scales) > 1: + widths = im_rois[:, 2] - im_rois[:, 0] + 1 + heights = im_rois[:, 3] - im_rois[:, 1] + 1 + + areas = widths * heights + scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2) + diff_areas = np.abs(scaled_areas - 224 * 224) + levels = diff_areas.argmin(axis=1)[:, np.newaxis] + else: + levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) + + rois = im_rois * scales[levels] + + return rois, levels + + +def _get_bbox_regression_labels(bbox_target_data, num_classes): + """Bounding-box regression targets are stored in a compact form in the + roidb. + + This function expands those targets into the 4-of-4*K representation used + by the network (i.e. only one class has non-zero targets). The loss weights + are similarly expanded. + + Returns: + bbox_target_data (ndarray): N x 4K blob of regression targets + bbox_loss_weights (ndarray): N x 4K blob of loss weights + """ + clss = bbox_target_data[:, 0] + bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) + bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32) + inds = np.where(clss > 0)[0] + for ind in inds: + cls = clss[ind] + start = 4 * cls + end = start + 4 + bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] + bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.] + return bbox_targets, bbox_loss_weights + + +def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes): + """Bounding-box regression targets are stored in a compact form in the + roidb. + + This function expands those targets into the 4-of-4*K representation used + by the network (i.e. only one class has non-zero targets). The loss weights + are similarly expanded. + + Returns: + view_target_data (ndarray): N x 3K blob of regression targets + view_loss_weights (ndarray): N x 3K blob of loss weights + """ + view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32) + view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32) + inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0] + for ind in inds: + cls = clss[ind] + start = 3 * cls + end = start + 3 + view_targets[ind, start:end] = viewpoint_data[ind, :] + view_loss_weights[ind, start:end] = [1., 1., 1.] + + assert not np.isinf(view_targets).any(), 'viewpoint undefined' + return view_targets, view_loss_weights + + +def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps, sublabels_blob, view_targets_blob=None, view_inside_blob=None): + """Visualize a mini-batch for debugging.""" + import matplotlib.pyplot as plt + import math + for i in xrange(min(rois_blob.shape[0], 10)): + rois = rois_blob[i, :] + im_ind = rois[0] + roi = rois[1:] + im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() + im += cfg.PIXEL_MEANS + im = im[:, :, (2, 1, 0)] + im = im.astype(np.uint8) + cls = labels_blob[i] + subcls = sublabels_blob[i] + plt.imshow(im) + print 'class: ', cls, ' subclass: ', subcls, ' overlap: ', overlaps[i] + + start = 3 * cls + end = start + 3 + # print 'view: ', view_targets_blob[i, start:end] * 180 / math.pi + # print 'view weights: ', view_inside_blob[i, start:end] + + plt.gca().add_patch( + plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], + roi[3] - roi[1], fill=False, + edgecolor='r', linewidth=3) + ) + plt.show() diff --git a/lib/roi_data_layer/roidb.py b/lib/roi_data_layer/roidb.py index 97a6a761..baa64387 100644 --- a/lib/roi_data_layer/roidb.py +++ b/lib/roi_data_layer/roidb.py @@ -21,9 +21,9 @@ def prepare_roidb(imdb): recorded. """ sizes = [PIL.Image.open(imdb.image_path_at(i)).size - for i in xrange(imdb.num_images)] + for i in range(imdb.num_images)] roidb = imdb.roidb - for i in xrange(len(imdb.image_index)): + for i in range(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) roidb[i]['width'] = sizes[i][0] roidb[i]['height'] = sizes[i][1] @@ -51,7 +51,7 @@ def add_bbox_regression_targets(roidb): num_images = len(roidb) # Infer number of classes from the number of columns in gt_overlaps num_classes = roidb[0]['gt_overlaps'].shape[1] - for im_i in xrange(num_images): + for im_i in range(num_images): rois = roidb[im_i]['boxes'] max_overlaps = roidb[im_i]['max_overlaps'] max_classes = roidb[im_i]['max_classes'] @@ -70,9 +70,9 @@ def add_bbox_regression_targets(roidb): class_counts = np.zeros((num_classes, 1)) + cfg.EPS sums = np.zeros((num_classes, 4)) squared_sums = np.zeros((num_classes, 4)) - for im_i in xrange(num_images): + for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] if cls_inds.size > 0: class_counts[cls] += cls_inds.size @@ -83,24 +83,24 @@ def add_bbox_regression_targets(roidb): means = sums / class_counts stds = np.sqrt(squared_sums / class_counts - means ** 2) - print 'bbox target means:' - print means - print means[1:, :].mean(axis=0) # ignore bg class - print 'bbox target stdevs:' - print stds - print stds[1:, :].mean(axis=0) # ignore bg class + print('bbox target means:') + print(means) + print(means[1:, :].mean(axis=0)) # ignore bg class + print('bbox target stdevs:') + print(stds) + print(stds[1:, :].mean(axis=0)) # ignore bg class # Normalize targets if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: - print "Normalizing targets" - for im_i in xrange(num_images): + print("Normalizing targets") + for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] else: - print "NOT normalizing targets" + print("NOT normalizing targets") # These values will be needed for making predictions # (the predicts will need to be unnormalized and uncentered) diff --git a/lib/roi_data_layer/roidb.py.bak b/lib/roi_data_layer/roidb.py.bak new file mode 100644 index 00000000..97a6a761 --- /dev/null +++ b/lib/roi_data_layer/roidb.py.bak @@ -0,0 +1,133 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Transform a roidb into a trainable roidb by adding a bunch of metadata.""" + +import numpy as np +from fast_rcnn.config import cfg +from fast_rcnn.bbox_transform import bbox_transform +from utils.cython_bbox import bbox_overlaps +import PIL + +def prepare_roidb(imdb): + """Enrich the imdb's roidb by adding some derived quantities that + are useful for training. This function precomputes the maximum + overlap, taken over ground-truth boxes, between each ROI and + each ground-truth box. The class with maximum overlap is also + recorded. + """ + sizes = [PIL.Image.open(imdb.image_path_at(i)).size + for i in xrange(imdb.num_images)] + roidb = imdb.roidb + for i in xrange(len(imdb.image_index)): + roidb[i]['image'] = imdb.image_path_at(i) + roidb[i]['width'] = sizes[i][0] + roidb[i]['height'] = sizes[i][1] + # need gt_overlaps as a dense array for argmax + gt_overlaps = roidb[i]['gt_overlaps'].toarray() + # max overlap with gt over classes (columns) + max_overlaps = gt_overlaps.max(axis=1) + # gt class that had the max overlap + max_classes = gt_overlaps.argmax(axis=1) + roidb[i]['max_classes'] = max_classes + roidb[i]['max_overlaps'] = max_overlaps + # sanity checks + # max overlap of 0 => class should be zero (background) + zero_inds = np.where(max_overlaps == 0)[0] + assert all(max_classes[zero_inds] == 0) + # max overlap > 0 => class should not be zero (must be a fg class) + nonzero_inds = np.where(max_overlaps > 0)[0] + assert all(max_classes[nonzero_inds] != 0) + +def add_bbox_regression_targets(roidb): + """Add information needed to train bounding-box regressors.""" + assert len(roidb) > 0 + assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' + + num_images = len(roidb) + # Infer number of classes from the number of columns in gt_overlaps + num_classes = roidb[0]['gt_overlaps'].shape[1] + for im_i in xrange(num_images): + rois = roidb[im_i]['boxes'] + max_overlaps = roidb[im_i]['max_overlaps'] + max_classes = roidb[im_i]['max_classes'] + roidb[im_i]['bbox_targets'] = \ + _compute_targets(rois, max_overlaps, max_classes) + + if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: + # Use fixed / precomputed "means" and "stds" instead of empirical values + means = np.tile( + np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) + stds = np.tile( + np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) + else: + # Compute values needed for means and stds + # var(x) = E(x^2) - E(x)^2 + class_counts = np.zeros((num_classes, 1)) + cfg.EPS + sums = np.zeros((num_classes, 4)) + squared_sums = np.zeros((num_classes, 4)) + for im_i in xrange(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 0] == cls)[0] + if cls_inds.size > 0: + class_counts[cls] += cls_inds.size + sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) + squared_sums[cls, :] += \ + (targets[cls_inds, 1:] ** 2).sum(axis=0) + + means = sums / class_counts + stds = np.sqrt(squared_sums / class_counts - means ** 2) + + print 'bbox target means:' + print means + print means[1:, :].mean(axis=0) # ignore bg class + print 'bbox target stdevs:' + print stds + print stds[1:, :].mean(axis=0) # ignore bg class + + # Normalize targets + if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: + print "Normalizing targets" + for im_i in xrange(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 0] == cls)[0] + roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] + roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] + else: + print "NOT normalizing targets" + + # These values will be needed for making predictions + # (the predicts will need to be unnormalized and uncentered) + return means.ravel(), stds.ravel() + +def _compute_targets(rois, overlaps, labels): + """Compute bounding-box regression targets for an image.""" + # Indices of ground-truth ROIs + gt_inds = np.where(overlaps == 1)[0] + if len(gt_inds) == 0: + # Bail if the image has no ground-truth ROIs + return np.zeros((rois.shape[0], 5), dtype=np.float32) + # Indices of examples for which we try to make predictions + ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] + + # Get IoU overlap between each ex ROI and gt ROI + ex_gt_overlaps = bbox_overlaps( + np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), + np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) + + # Find which gt ROI each ex ROI has max overlap with: + # this will be the ex ROI's gt target + gt_assignment = ex_gt_overlaps.argmax(axis=1) + gt_rois = rois[gt_inds[gt_assignment], :] + ex_rois = rois[ex_inds, :] + + targets = np.zeros((rois.shape[0], 5), dtype=np.float32) + targets[ex_inds, 0] = labels[ex_inds] + targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) + return targets diff --git a/lib/roi_data_layer/roidb2.py b/lib/roi_data_layer/roidb2.py index 6735f4f7..45f9573f 100644 --- a/lib/roi_data_layer/roidb2.py +++ b/lib/roi_data_layer/roidb2.py @@ -19,7 +19,7 @@ def prepare_roidb(imdb): recorded. """ roidb = imdb.roidb - for i in xrange(len(imdb.image_index)): + for i in range(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) # need gt_overlaps as a dense array for argmax gt_overlaps = roidb[i]['gt_overlaps'].toarray() @@ -47,7 +47,7 @@ def add_bbox_regression_targets(roidb): num_images = len(roidb) # Infer number of classes from the number of columns in gt_overlaps num_classes = roidb[0]['gt_overlaps'].shape[1] - for im_i in xrange(num_images): + for im_i in range(num_images): rois = roidb[im_i]['boxes'] max_overlaps = roidb[im_i]['max_overlaps'] max_classes = roidb[im_i]['max_classes'] @@ -59,9 +59,9 @@ def add_bbox_regression_targets(roidb): class_counts = np.zeros((num_classes, 1)) + cfg.EPS sums = np.zeros((num_classes, 4)) squared_sums = np.zeros((num_classes, 4)) - for im_i in xrange(num_images): + for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] if cls_inds.size > 0: class_counts[cls] += cls_inds.size @@ -72,9 +72,9 @@ def add_bbox_regression_targets(roidb): stds = np.sqrt(squared_sums / class_counts - means ** 2) # Normalize targets - for im_i in xrange(num_images): + for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] if stds[cls, 0] != 0: @@ -93,7 +93,7 @@ def _compute_targets(rois, overlaps, labels, num_classes): gt_inds = np.where(overlaps == 1)[0] # Indices of examples for which we try to make predictions ex_inds = [] - for i in xrange(1, num_classes): + for i in range(1, num_classes): ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] ) # Get IoU overlap between each ex ROI and gt ROI diff --git a/lib/roi_data_layer/roidb2.py.bak b/lib/roi_data_layer/roidb2.py.bak new file mode 100644 index 00000000..6735f4f7 --- /dev/null +++ b/lib/roi_data_layer/roidb2.py.bak @@ -0,0 +1,133 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +"""Transform a roidb into a trainable roidb by adding a bunch of metadata.""" + +import numpy as np +from fast_rcnn.config import cfg +import utils.cython_bbox + +def prepare_roidb(imdb): + """Enrich the imdb's roidb by adding some derived quantities that + are useful for training. This function precomputes the maximum + overlap, taken over ground-truth boxes, between each ROI and + each ground-truth box. The class with maximum overlap is also + recorded. + """ + roidb = imdb.roidb + for i in xrange(len(imdb.image_index)): + roidb[i]['image'] = imdb.image_path_at(i) + # need gt_overlaps as a dense array for argmax + gt_overlaps = roidb[i]['gt_overlaps'].toarray() + # max overlap with gt over classes (columns) + max_overlaps = gt_overlaps.max(axis=1) + # gt class that had the max overlap + max_classes = gt_overlaps.argmax(axis=1) + + roidb[i]['max_classes'] = max_classes + roidb[i]['max_overlaps'] = max_overlaps + + # sanity checks + # max overlap of 0 => class should be zero (background) + zero_inds = np.where(max_overlaps == 0)[0] + assert all(max_classes[zero_inds] == 0) + # max overlap > 0 => class should not be zero (must be a fg class) + nonzero_inds = np.where(max_overlaps > 0)[0] + assert all(max_classes[nonzero_inds] != 0) + +def add_bbox_regression_targets(roidb): + """Add information needed to train bounding-box regressors.""" + assert len(roidb) > 0 + assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' + + num_images = len(roidb) + # Infer number of classes from the number of columns in gt_overlaps + num_classes = roidb[0]['gt_overlaps'].shape[1] + for im_i in xrange(num_images): + rois = roidb[im_i]['boxes'] + max_overlaps = roidb[im_i]['max_overlaps'] + max_classes = roidb[im_i]['max_classes'] + roidb[im_i]['bbox_targets'] = \ + _compute_targets(rois, max_overlaps, max_classes, num_classes) + + # Compute values needed for means and stds + # var(x) = E(x^2) - E(x)^2 + class_counts = np.zeros((num_classes, 1)) + cfg.EPS + sums = np.zeros((num_classes, 4)) + squared_sums = np.zeros((num_classes, 4)) + for im_i in xrange(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 0] == cls)[0] + if cls_inds.size > 0: + class_counts[cls] += cls_inds.size + sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) + squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) + + means = sums / class_counts + stds = np.sqrt(squared_sums / class_counts - means ** 2) + + # Normalize targets + for im_i in xrange(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in xrange(1, num_classes): + cls_inds = np.where(targets[:, 0] == cls)[0] + roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] + if stds[cls, 0] != 0: + roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] + + # These values will be needed for making predictions + # (the predicts will need to be unnormalized and uncentered) + return means.ravel(), stds.ravel() + +def _compute_targets(rois, overlaps, labels, num_classes): + """Compute bounding-box regression targets for an image.""" + # Ensure ROIs are floats + rois = rois.astype(np.float, copy=False) + + # Indices of ground-truth ROIs + gt_inds = np.where(overlaps == 1)[0] + # Indices of examples for which we try to make predictions + ex_inds = [] + for i in xrange(1, num_classes): + ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] ) + + # Get IoU overlap between each ex ROI and gt ROI + ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :], + rois[gt_inds, :]) + + # Find which gt ROI each ex ROI has max overlap with: + # this will be the ex ROI's gt target + if ex_gt_overlaps.shape[0] != 0: + gt_assignment = ex_gt_overlaps.argmax(axis=1) + else: + gt_assignment = [] + gt_rois = rois[gt_inds[gt_assignment], :] + ex_rois = rois[ex_inds, :] + + ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS + ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS + ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths + ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights + + gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS + gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS + gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths + gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights + + targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths + targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights + targets_dw = np.log(gt_widths / ex_widths) + targets_dh = np.log(gt_heights / ex_heights) + + targets = np.zeros((rois.shape[0], 5), dtype=np.float32) + targets[ex_inds, 0] = labels[ex_inds] + targets[ex_inds, 1] = targets_dx + targets[ex_inds, 2] = targets_dy + targets[ex_inds, 3] = targets_dw + targets[ex_inds, 4] = targets_dh + return targets diff --git a/lib/roi_pooling_layer/roi_pooling_op_grad.py b/lib/roi_pooling_layer/roi_pooling_op_grad.py index 5ec3d188..38d76e4f 100644 --- a/lib/roi_pooling_layer/roi_pooling_op_grad.py +++ b/lib/roi_pooling_layer/roi_pooling_op_grad.py @@ -1,6 +1,6 @@ import tensorflow as tf from tensorflow.python.framework import ops -import roi_pooling_op +import roi_pooling_layer.roi_pooling_op import pdb diff --git a/lib/rpn_msr/anchor_target_layer.py b/lib/rpn_msr/anchor_target_layer.py index fb0b487a..31183dcd 100644 --- a/lib/rpn_msr/anchor_target_layer.py +++ b/lib/rpn_msr/anchor_target_layer.py @@ -11,7 +11,7 @@ from fast_rcnn.config import cfg import numpy as np import numpy.random as npr -from generate_anchors import generate_anchors +from .generate_anchors import generate_anchors from utils.cython_bbox import bbox_overlaps from fast_rcnn.bbox_transform import bbox_transform @@ -28,13 +28,13 @@ def setup(self, bottom, top): self._num_anchors = self._anchors.shape[0] if DEBUG: - print 'anchors:' - print self._anchors - print 'anchor shapes:' - print np.hstack(( + print('anchors:') + print(self._anchors) + print('anchor shapes:') + print(np.hstack(( self._anchors[:, 2::4] - self._anchors[:, 0::4], self._anchors[:, 3::4] - self._anchors[:, 1::4], - )) + ))) self._counts = cfg.EPS self._sums = np.zeros((1, 4)) self._squared_sums = np.zeros((1, 4)) @@ -50,7 +50,7 @@ def setup(self, bottom, top): height, width = bottom[0].data.shape[-2:] if DEBUG: - print 'AnchorTargetLayer: height', height, 'width', width + print('AnchorTargetLayer: height', height, 'width', width) A = self._num_anchors # labels @@ -82,12 +82,12 @@ def forward(self, bottom, top): im_info = bottom[2].data[0, :] if DEBUG: - print '' - print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) - print 'scale: {}'.format(im_info[2]) - print 'height, width: ({}, {})'.format(height, width) - print 'rpn: gt_boxes.shape', gt_boxes.shape - print 'rpn: gt_boxes', gt_boxes + print('') + print('im_size: ({}, {})'.format(im_info[0], im_info[1])) + print('scale: {}'.format(im_info[2])) + print('height, width: ({}, {})'.format(height, width)) + print('rpn: gt_boxes.shape', gt_boxes.shape) + print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride @@ -115,13 +115,13 @@ def forward(self, bottom, top): )[0] if DEBUG: - print 'total_anchors', total_anchors - print 'inds_inside', len(inds_inside) + print('total_anchors', total_anchors) + print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: - print 'anchors.shape', anchors.shape + print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) @@ -202,10 +202,10 @@ def forward(self, bottom, top): self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means ** 2) - print 'means:' - print means - print 'stdevs:' - print stds + print('means:') + print(means) + print('stdevs:') + print(stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) @@ -215,16 +215,16 @@ def forward(self, bottom, top): if DEBUG: if gt_boxes.shape[0] != 0: - print 'rpn: max max_overlap', np.max(max_overlaps) + print('rpn: max max_overlap', np.max(max_overlaps)) else: - print 'rpn: max max_overlap', 0 - print 'rpn: num_positive', np.sum(labels == 1) - print 'rpn: num_negative', np.sum(labels == 0) + print('rpn: max max_overlap', 0) + print('rpn: num_positive', np.sum(labels == 1)) + print('rpn: num_negative', np.sum(labels == 0)) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 - print 'rpn: num_positive avg', self._fg_sum / self._count - print 'rpn: num_negative avg', self._bg_sum / self._count + print('rpn: num_positive avg', self._fg_sum / self._count) + print('rpn: num_negative avg', self._bg_sum / self._count) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) diff --git a/lib/rpn_msr/anchor_target_layer.py.bak b/lib/rpn_msr/anchor_target_layer.py.bak new file mode 100644 index 00000000..fb0b487a --- /dev/null +++ b/lib/rpn_msr/anchor_target_layer.py.bak @@ -0,0 +1,287 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick and Sean Bell +# -------------------------------------------------------- + +import os +import caffe +import yaml +from fast_rcnn.config import cfg +import numpy as np +import numpy.random as npr +from generate_anchors import generate_anchors +from utils.cython_bbox import bbox_overlaps +from fast_rcnn.bbox_transform import bbox_transform + +DEBUG = False + +class AnchorTargetLayer(caffe.Layer): + """ + Assign anchors to ground-truth targets. Produces anchor classification + labels and bounding-box regression targets. + """ + + def setup(self, bottom, top): + self._anchors = generate_anchors(cfg.TRAIN.RPN_BASE_SIZE, cfg.TRAIN.RPN_ASPECTS, cfg.TRAIN.RPN_SCALES) + self._num_anchors = self._anchors.shape[0] + + if DEBUG: + print 'anchors:' + print self._anchors + print 'anchor shapes:' + print np.hstack(( + self._anchors[:, 2::4] - self._anchors[:, 0::4], + self._anchors[:, 3::4] - self._anchors[:, 1::4], + )) + self._counts = cfg.EPS + self._sums = np.zeros((1, 4)) + self._squared_sums = np.zeros((1, 4)) + self._fg_sum = 0 + self._bg_sum = 0 + self._count = 0 + + layer_params = yaml.load(self.param_str_) + self._feat_stride = layer_params['feat_stride'] + + # allow boxes to sit over the edge by a small amount + self._allowed_border = layer_params.get('allowed_border', 0) + + height, width = bottom[0].data.shape[-2:] + if DEBUG: + print 'AnchorTargetLayer: height', height, 'width', width + + A = self._num_anchors + # labels + top[0].reshape(1, 1, A * height, width) + # bbox_targets + top[1].reshape(1, A * 4, height, width) + # bbox_inside_weights + top[2].reshape(1, A * 4, height, width) + # bbox_outside_weights + top[3].reshape(1, A * 4, height, width) + + def forward(self, bottom, top): + # Algorithm: + # + # for each (H, W) location i + # generate 9 anchor boxes centered on cell i + # apply predicted bbox deltas at cell i to each of the 9 anchors + # filter out-of-image anchors + # measure GT overlap + + assert bottom[0].data.shape[0] == 1, \ + 'Only single item batches are supported' + + # map of shape (..., H, W) + height, width = bottom[0].data.shape[-2:] + # GT boxes (x1, y1, x2, y2, label) + gt_boxes = bottom[1].data + # im_info + im_info = bottom[2].data[0, :] + + if DEBUG: + print '' + print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) + print 'scale: {}'.format(im_info[2]) + print 'height, width: ({}, {})'.format(height, width) + print 'rpn: gt_boxes.shape', gt_boxes.shape + print 'rpn: gt_boxes', gt_boxes + + # 1. Generate proposals from bbox deltas and shifted anchors + shift_x = np.arange(0, width) * self._feat_stride + shift_y = np.arange(0, height) * self._feat_stride + shift_x, shift_y = np.meshgrid(shift_x, shift_y) + shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), + shift_x.ravel(), shift_y.ravel())).transpose() + # add A anchors (1, A, 4) to + # cell K shifts (K, 1, 4) to get + # shift anchors (K, A, 4) + # reshape to (K*A, 4) shifted anchors + A = self._num_anchors + K = shifts.shape[0] + all_anchors = (self._anchors.reshape((1, A, 4)) + + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) + all_anchors = all_anchors.reshape((K * A, 4)) + total_anchors = int(K * A) + + # only keep anchors inside the image + inds_inside = np.where( + (all_anchors[:, 0] >= -self._allowed_border) & + (all_anchors[:, 1] >= -self._allowed_border) & + (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width + (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height + )[0] + + if DEBUG: + print 'total_anchors', total_anchors + print 'inds_inside', len(inds_inside) + + # keep only inside anchors + anchors = all_anchors[inds_inside, :] + if DEBUG: + print 'anchors.shape', anchors.shape + + # label: 1 is positive, 0 is negative, -1 is dont care + labels = np.empty((len(inds_inside), ), dtype=np.float32) + labels.fill(-1) + + # overlaps between the anchors and the gt boxes + # overlaps (ex, gt) + if gt_boxes.shape[0] != 0: + overlaps = bbox_overlaps( + np.ascontiguousarray(anchors, dtype=np.float), + np.ascontiguousarray(gt_boxes, dtype=np.float)) + argmax_overlaps = overlaps.argmax(axis=1) + max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] + gt_argmax_overlaps = overlaps.argmax(axis=0) + gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] + gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] + + if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: + # assign bg labels first so that positive labels can clobber them + labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 + + # fg label: for each gt, anchor with highest overlap + labels[gt_argmax_overlaps] = 1 + + # fg label: above threshold IOU + labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 + + if cfg.TRAIN.RPN_CLOBBER_POSITIVES: + # assign bg labels last so that negative labels can clobber positives + labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 + else: + labels.fill(0) + + # subsample positive labels if we have too many + num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) + fg_inds = np.where(labels == 1)[0] + if len(fg_inds) > num_fg: + disable_inds = npr.choice( + fg_inds, size=(len(fg_inds) - num_fg), replace=False) + labels[disable_inds] = -1 + + # subsample negative labels if we have too many + num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) + bg_inds = np.where(labels == 0)[0] + if len(bg_inds) > num_bg: + disable_inds = npr.choice( + bg_inds, size=(len(bg_inds) - num_bg), replace=False) + labels[disable_inds] = -1 + #print "was %s inds, disabling %s, now %s inds" % ( + #len(bg_inds), len(disable_inds), np.sum(labels == 0)) + + bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) + if gt_boxes.shape[0] != 0: + bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) + + bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) + bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) + + bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) + if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: + # uniform weighting of examples (given non-uniform sampling) + num_examples = np.sum(labels >= 0) + positive_weights = np.ones((1, 4)) * 1.0 / num_examples + negative_weights = np.ones((1, 4)) * 1.0 / num_examples + else: + assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & + (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) + positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / + np.sum(labels == 1)) + negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / + np.sum(labels == 0)) + bbox_outside_weights[labels == 1, :] = positive_weights + bbox_outside_weights[labels == 0, :] = negative_weights + + if DEBUG: + self._sums += bbox_targets[labels == 1, :].sum(axis=0) + self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) + self._counts += np.sum(labels == 1) + means = self._sums / self._counts + stds = np.sqrt(self._squared_sums / self._counts - means ** 2) + print 'means:' + print means + print 'stdevs:' + print stds + + # map up to original set of anchors + labels = _unmap(labels, total_anchors, inds_inside, fill=-1) + bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) + bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) + bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) + + if DEBUG: + if gt_boxes.shape[0] != 0: + print 'rpn: max max_overlap', np.max(max_overlaps) + else: + print 'rpn: max max_overlap', 0 + print 'rpn: num_positive', np.sum(labels == 1) + print 'rpn: num_negative', np.sum(labels == 0) + self._fg_sum += np.sum(labels == 1) + self._bg_sum += np.sum(labels == 0) + self._count += 1 + print 'rpn: num_positive avg', self._fg_sum / self._count + print 'rpn: num_negative avg', self._bg_sum / self._count + + # labels + labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) + labels = labels.reshape((1, 1, A * height, width)) + top[0].reshape(*labels.shape) + top[0].data[...] = labels + + # bbox_targets + bbox_targets = bbox_targets \ + .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) + top[1].reshape(*bbox_targets.shape) + top[1].data[...] = bbox_targets + + # bbox_inside_weights + bbox_inside_weights = bbox_inside_weights \ + .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) + assert bbox_inside_weights.shape[2] == height + assert bbox_inside_weights.shape[3] == width + top[2].reshape(*bbox_inside_weights.shape) + top[2].data[...] = bbox_inside_weights + + # bbox_outside_weights + bbox_outside_weights = bbox_outside_weights \ + .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) + assert bbox_outside_weights.shape[2] == height + assert bbox_outside_weights.shape[3] == width + top[3].reshape(*bbox_outside_weights.shape) + top[3].data[...] = bbox_outside_weights + + def backward(self, top, propagate_down, bottom): + """This layer does not propagate gradients.""" + pass + + def reshape(self, bottom, top): + """Reshaping happens during the call to forward.""" + pass + + +def _unmap(data, count, inds, fill=0): + """ Unmap a subset of item (data) back to the original set of items (of + size count) """ + if len(data.shape) == 1: + ret = np.empty((count, ), dtype=np.float32) + ret.fill(fill) + ret[inds] = data + else: + ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) + ret.fill(fill) + ret[inds, :] = data + return ret + + +def _compute_targets(ex_rois, gt_rois): + """Compute bounding-box regression targets for an image.""" + + assert ex_rois.shape[0] == gt_rois.shape[0] + assert ex_rois.shape[1] == 4 + assert gt_rois.shape[1] == 5 + + return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) diff --git a/lib/rpn_msr/anchor_target_layer_tf.py b/lib/rpn_msr/anchor_target_layer_tf.py index 9965a7a1..60d8ee0b 100644 --- a/lib/rpn_msr/anchor_target_layer_tf.py +++ b/lib/rpn_msr/anchor_target_layer_tf.py @@ -10,7 +10,7 @@ from fast_rcnn.config import cfg import numpy as np import numpy.random as npr -from generate_anchors import generate_anchors +from .generate_anchors import generate_anchors from utils.cython_bbox import bbox_overlaps from fast_rcnn.bbox_transform import bbox_transform import pdb @@ -26,13 +26,13 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [ _num_anchors = _anchors.shape[0] if DEBUG: - print 'anchors:' - print _anchors - print 'anchor shapes:' - print np.hstack(( + print('anchors:') + print(_anchors) + print('anchor shapes:') + print(np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], - )) + ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) @@ -62,13 +62,13 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [ height, width = rpn_cls_score.shape[1:3] if DEBUG: - print 'AnchorTargetLayer: height', height, 'width', width - print '' - print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) - print 'scale: {}'.format(im_info[2]) - print 'height, width: ({}, {})'.format(height, width) - print 'rpn: gt_boxes.shape', gt_boxes.shape - print 'rpn: gt_boxes', gt_boxes + print('AnchorTargetLayer: height', height, 'width', width) + print('') + print('im_size: ({}, {})'.format(im_info[0], im_info[1])) + print('scale: {}'.format(im_info[2])) + print('height, width: ({}, {})'.format(height, width)) + print('rpn: gt_boxes.shape', gt_boxes.shape) + print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride @@ -96,13 +96,13 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [ )[0] if DEBUG: - print 'total_anchors', total_anchors - print 'inds_inside', len(inds_inside) + print('total_anchors', total_anchors) + print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: - print 'anchors.shape', anchors.shape + print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) @@ -180,10 +180,10 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [ _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means ** 2) - print 'means:' - print means - print 'stdevs:' - print stds + print('means:') + print(means) + print('stdevs:') + print(stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) @@ -192,14 +192,14 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [ bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: - print 'rpn: max max_overlap', np.max(max_overlaps) - print 'rpn: num_positive', np.sum(labels == 1) - print 'rpn: num_negative', np.sum(labels == 0) + print('rpn: max max_overlap', np.max(max_overlaps)) + print('rpn: num_positive', np.sum(labels == 1)) + print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 - print 'rpn: num_positive avg', _fg_sum / _count - print 'rpn: num_negative avg', _bg_sum / _count + print('rpn: num_positive avg', _fg_sum / _count) + print('rpn: num_negative avg', _bg_sum / _count) # labels #pdb.set_trace() diff --git a/lib/rpn_msr/anchor_target_layer_tf.py.bak b/lib/rpn_msr/anchor_target_layer_tf.py.bak new file mode 100644 index 00000000..9965a7a1 --- /dev/null +++ b/lib/rpn_msr/anchor_target_layer_tf.py.bak @@ -0,0 +1,256 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick and Sean Bell +# -------------------------------------------------------- + +import os +import yaml +from fast_rcnn.config import cfg +import numpy as np +import numpy.random as npr +from generate_anchors import generate_anchors +from utils.cython_bbox import bbox_overlaps +from fast_rcnn.bbox_transform import bbox_transform +import pdb + +DEBUG = False + +def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [16,], anchor_scales = [4 ,8, 16, 32]): + """ + Assign anchors to ground-truth targets. Produces anchor classification + labels and bounding-box regression targets. + """ + _anchors = generate_anchors(scales=np.array(anchor_scales)) + _num_anchors = _anchors.shape[0] + + if DEBUG: + print 'anchors:' + print _anchors + print 'anchor shapes:' + print np.hstack(( + _anchors[:, 2::4] - _anchors[:, 0::4], + _anchors[:, 3::4] - _anchors[:, 1::4], + )) + _counts = cfg.EPS + _sums = np.zeros((1, 4)) + _squared_sums = np.zeros((1, 4)) + _fg_sum = 0 + _bg_sum = 0 + _count = 0 + + # allow boxes to sit over the edge by a small amount + _allowed_border = 0 + # map of shape (..., H, W) + #height, width = rpn_cls_score.shape[1:3] + + im_info = im_info[0] + + # Algorithm: + # + # for each (H, W) location i + # generate 9 anchor boxes centered on cell i + # apply predicted bbox deltas at cell i to each of the 9 anchors + # filter out-of-image anchors + # measure GT overlap + + assert rpn_cls_score.shape[0] == 1, \ + 'Only single item batches are supported' + + # map of shape (..., H, W) + height, width = rpn_cls_score.shape[1:3] + + if DEBUG: + print 'AnchorTargetLayer: height', height, 'width', width + print '' + print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) + print 'scale: {}'.format(im_info[2]) + print 'height, width: ({}, {})'.format(height, width) + print 'rpn: gt_boxes.shape', gt_boxes.shape + print 'rpn: gt_boxes', gt_boxes + + # 1. Generate proposals from bbox deltas and shifted anchors + shift_x = np.arange(0, width) * _feat_stride + shift_y = np.arange(0, height) * _feat_stride + shift_x, shift_y = np.meshgrid(shift_x, shift_y) + shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), + shift_x.ravel(), shift_y.ravel())).transpose() + # add A anchors (1, A, 4) to + # cell K shifts (K, 1, 4) to get + # shift anchors (K, A, 4) + # reshape to (K*A, 4) shifted anchors + A = _num_anchors + K = shifts.shape[0] + all_anchors = (_anchors.reshape((1, A, 4)) + + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) + all_anchors = all_anchors.reshape((K * A, 4)) + total_anchors = int(K * A) + + # only keep anchors inside the image + inds_inside = np.where( + (all_anchors[:, 0] >= -_allowed_border) & + (all_anchors[:, 1] >= -_allowed_border) & + (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width + (all_anchors[:, 3] < im_info[0] + _allowed_border) # height + )[0] + + if DEBUG: + print 'total_anchors', total_anchors + print 'inds_inside', len(inds_inside) + + # keep only inside anchors + anchors = all_anchors[inds_inside, :] + if DEBUG: + print 'anchors.shape', anchors.shape + + # label: 1 is positive, 0 is negative, -1 is dont care + labels = np.empty((len(inds_inside), ), dtype=np.float32) + labels.fill(-1) + + # overlaps between the anchors and the gt boxes + # overlaps (ex, gt) + overlaps = bbox_overlaps( + np.ascontiguousarray(anchors, dtype=np.float), + np.ascontiguousarray(gt_boxes, dtype=np.float)) + argmax_overlaps = overlaps.argmax(axis=1) + max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] + gt_argmax_overlaps = overlaps.argmax(axis=0) + gt_max_overlaps = overlaps[gt_argmax_overlaps, + np.arange(overlaps.shape[1])] + gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] + + if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: + # assign bg labels first so that positive labels can clobber them + labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 + + # fg label: for each gt, anchor with highest overlap + labels[gt_argmax_overlaps] = 1 + + # fg label: above threshold IOU + labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 + + if cfg.TRAIN.RPN_CLOBBER_POSITIVES: + # assign bg labels last so that negative labels can clobber positives + labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 + + # subsample positive labels if we have too many + num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) + fg_inds = np.where(labels == 1)[0] + if len(fg_inds) > num_fg: + disable_inds = npr.choice( + fg_inds, size=(len(fg_inds) - num_fg), replace=False) + labels[disable_inds] = -1 + + # subsample negative labels if we have too many + num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) + bg_inds = np.where(labels == 0)[0] + if len(bg_inds) > num_bg: + disable_inds = npr.choice( + bg_inds, size=(len(bg_inds) - num_bg), replace=False) + labels[disable_inds] = -1 + #print "was %s inds, disabling %s, now %s inds" % ( + #len(bg_inds), len(disable_inds), np.sum(labels == 0)) + + bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) + bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) + + bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) + bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) + + bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) + if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: + # uniform weighting of examples (given non-uniform sampling) + num_examples = np.sum(labels >= 0) + positive_weights = np.ones((1, 4)) * 1.0 / num_examples + negative_weights = np.ones((1, 4)) * 1.0 / num_examples + else: + assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & + (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) + positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / + np.sum(labels == 1)) + negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / + np.sum(labels == 0)) + bbox_outside_weights[labels == 1, :] = positive_weights + bbox_outside_weights[labels == 0, :] = negative_weights + + if DEBUG: + _sums += bbox_targets[labels == 1, :].sum(axis=0) + _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) + _counts += np.sum(labels == 1) + means = _sums / _counts + stds = np.sqrt(_squared_sums / _counts - means ** 2) + print 'means:' + print means + print 'stdevs:' + print stds + + # map up to original set of anchors + labels = _unmap(labels, total_anchors, inds_inside, fill=-1) + bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) + bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) + bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) + + if DEBUG: + print 'rpn: max max_overlap', np.max(max_overlaps) + print 'rpn: num_positive', np.sum(labels == 1) + print 'rpn: num_negative', np.sum(labels == 0) + _fg_sum += np.sum(labels == 1) + _bg_sum += np.sum(labels == 0) + _count += 1 + print 'rpn: num_positive avg', _fg_sum / _count + print 'rpn: num_negative avg', _bg_sum / _count + + # labels + #pdb.set_trace() + labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) + labels = labels.reshape((1, 1, A * height, width)) + rpn_labels = labels + + # bbox_targets + bbox_targets = bbox_targets \ + .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) + + rpn_bbox_targets = bbox_targets + # bbox_inside_weights + bbox_inside_weights = bbox_inside_weights \ + .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) + #assert bbox_inside_weights.shape[2] == height + #assert bbox_inside_weights.shape[3] == width + + rpn_bbox_inside_weights = bbox_inside_weights + + # bbox_outside_weights + bbox_outside_weights = bbox_outside_weights \ + .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) + #assert bbox_outside_weights.shape[2] == height + #assert bbox_outside_weights.shape[3] == width + + rpn_bbox_outside_weights = bbox_outside_weights + + return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights + + + +def _unmap(data, count, inds, fill=0): + """ Unmap a subset of item (data) back to the original set of items (of + size count) """ + if len(data.shape) == 1: + ret = np.empty((count, ), dtype=np.float32) + ret.fill(fill) + ret[inds] = data + else: + ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) + ret.fill(fill) + ret[inds, :] = data + return ret + + +def _compute_targets(ex_rois, gt_rois): + """Compute bounding-box regression targets for an image.""" + + assert ex_rois.shape[0] == gt_rois.shape[0] + assert ex_rois.shape[1] == 4 + assert gt_rois.shape[1] == 5 + + return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) diff --git a/lib/rpn_msr/generate.py b/lib/rpn_msr/generate.py index 9e8cd53c..f1ce390a 100644 --- a/lib/rpn_msr/generate.py +++ b/lib/rpn_msr/generate.py @@ -92,14 +92,14 @@ def imdb_proposals(net, imdb): """Generate RPN proposals on all images in an imdb.""" _t = Timer() - imdb_boxes = [[] for _ in xrange(imdb.num_images)] - for i in xrange(imdb.num_images): + imdb_boxes = [[] for _ in range(imdb.num_images)] + for i in range(imdb.num_images): im = cv2.imread(imdb.image_path_at(i)) _t.tic() imdb_boxes[i], scores = im_proposals(net, im) _t.toc() - print 'im_proposals: {:d}/{:d} {:.3f}s' \ - .format(i + 1, imdb.num_images, _t.average_time) + print('im_proposals: {:d}/{:d} {:.3f}s' \ + .format(i + 1, imdb.num_images, _t.average_time)) if 0: dets = np.hstack((imdb_boxes[i], scores)) # from IPython import embed; embed() @@ -112,14 +112,14 @@ def imdb_proposals_det(net, imdb): """Generate RPN proposals on all images in an imdb.""" _t = Timer() - imdb_boxes = [[] for _ in xrange(imdb.num_images)] - for i in xrange(imdb.num_images): + imdb_boxes = [[] for _ in range(imdb.num_images)] + for i in range(imdb.num_images): im = cv2.imread(imdb.image_path_at(i)) _t.tic() boxes, scores = im_proposals(net, im) _t.toc() - print 'im_proposals: {:d}/{:d} {:.3f}s' \ - .format(i + 1, imdb.num_images, _t.average_time) + print('im_proposals: {:d}/{:d} {:.3f}s' \ + .format(i + 1, imdb.num_images, _t.average_time)) dets = np.hstack((boxes, scores)) imdb_boxes[i] = dets diff --git a/lib/rpn_msr/generate.py.bak b/lib/rpn_msr/generate.py.bak new file mode 100644 index 00000000..9e8cd53c --- /dev/null +++ b/lib/rpn_msr/generate.py.bak @@ -0,0 +1,131 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +from fast_rcnn.config import cfg +from utils.blob import im_list_to_blob +from utils.timer import Timer +import numpy as np +import cv2 + +def _vis_proposals(im, dets, thresh=0.5): + """Draw detected bounding boxes.""" + inds = np.where(dets[:, -1] >= thresh)[0] + if len(inds) == 0: + return + + class_name = 'obj' + im = im[:, :, (2, 1, 0)] + fig, ax = plt.subplots(figsize=(12, 12)) + ax.imshow(im, aspect='equal') + for i in inds: + bbox = dets[i, :4] + score = dets[i, -1] + + ax.add_patch( + plt.Rectangle((bbox[0], bbox[1]), + bbox[2] - bbox[0], + bbox[3] - bbox[1], fill=False, + edgecolor='red', linewidth=3.5) + ) + ax.text(bbox[0], bbox[1] - 2, + '{:s} {:.3f}'.format(class_name, score), + bbox=dict(facecolor='blue', alpha=0.5), + fontsize=14, color='white') + + ax.set_title(('{} detections with ' + 'p({} | box) >= {:.1f}').format(class_name, class_name, + thresh), + fontsize=14) + plt.axis('off') + plt.tight_layout() + plt.draw() + +def _get_image_blob(im): + """Converts an image into a network input. + + Arguments: + im (ndarray): a color image in BGR order + + Returns: + blob (ndarray): a data blob holding an image pyramid + im_scale_factors (list): list of image scales (relative to im) used + in the image pyramid + """ + im_orig = im.astype(np.float32, copy=True) + im_orig -= cfg.PIXEL_MEANS + + processed_ims = [] + + assert len(cfg.TEST.SCALES_BASE) == 1 + im_scale = cfg.TRAIN.SCALES_BASE[0] + + im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, + interpolation=cv2.INTER_LINEAR) + im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] + processed_ims.append(im) + + # Create a blob to hold the input images + blob = im_list_to_blob(processed_ims) + + return blob, im_info + +def im_proposals(net, im): + """Generate RPN proposals on a single image.""" + blobs = {} + blobs['data'], blobs['im_info'] = _get_image_blob(im) + net.blobs['data'].reshape(*(blobs['data'].shape)) + net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) + blobs_out = net.forward( + data=blobs['data'].astype(np.float32, copy=False), + im_info=blobs['im_info'].astype(np.float32, copy=False)) + + scale = blobs['im_info'][0, 2] + boxes = blobs_out['rois'][:, 1:].copy() / scale + scores = blobs_out['scores'].copy() + return boxes, scores + +def imdb_proposals(net, imdb): + """Generate RPN proposals on all images in an imdb.""" + + _t = Timer() + imdb_boxes = [[] for _ in xrange(imdb.num_images)] + for i in xrange(imdb.num_images): + im = cv2.imread(imdb.image_path_at(i)) + _t.tic() + imdb_boxes[i], scores = im_proposals(net, im) + _t.toc() + print 'im_proposals: {:d}/{:d} {:.3f}s' \ + .format(i + 1, imdb.num_images, _t.average_time) + if 0: + dets = np.hstack((imdb_boxes[i], scores)) + # from IPython import embed; embed() + _vis_proposals(im, dets[:3, :], thresh=0.9) + plt.show() + + return imdb_boxes + +def imdb_proposals_det(net, imdb): + """Generate RPN proposals on all images in an imdb.""" + + _t = Timer() + imdb_boxes = [[] for _ in xrange(imdb.num_images)] + for i in xrange(imdb.num_images): + im = cv2.imread(imdb.image_path_at(i)) + _t.tic() + boxes, scores = im_proposals(net, im) + _t.toc() + print 'im_proposals: {:d}/{:d} {:.3f}s' \ + .format(i + 1, imdb.num_images, _t.average_time) + dets = np.hstack((boxes, scores)) + imdb_boxes[i] = dets + + if 0: + # from IPython import embed; embed() + _vis_proposals(im, dets[:3, :], thresh=0.9) + plt.show() + + return imdb_boxes diff --git a/lib/rpn_msr/generate_anchors.py b/lib/rpn_msr/generate_anchors.py index 1125a801..fb686df0 100644 --- a/lib/rpn_msr/generate_anchors.py +++ b/lib/rpn_msr/generate_anchors.py @@ -44,7 +44,7 @@ def generate_anchors(base_size=16, ratios=[0.5, 1, 2], base_anchor = np.array([1, 1, base_size, base_size]) - 1 ratio_anchors = _ratio_enum(base_anchor, ratios) anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) - for i in xrange(ratio_anchors.shape[0])]) + for i in range(ratio_anchors.shape[0])]) return anchors def _whctrs(anchor): @@ -100,6 +100,6 @@ def _scale_enum(anchor, scales): import time t = time.time() a = generate_anchors() - print time.time() - t - print a + print(time.time() - t) + print(a) from IPython import embed; embed() diff --git a/lib/rpn_msr/generate_anchors.py.bak b/lib/rpn_msr/generate_anchors.py.bak new file mode 100644 index 00000000..1125a801 --- /dev/null +++ b/lib/rpn_msr/generate_anchors.py.bak @@ -0,0 +1,105 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick and Sean Bell +# -------------------------------------------------------- + +import numpy as np + +# Verify that we compute the same anchors as Shaoqing's matlab implementation: +# +# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat +# >> anchors +# +# anchors = +# +# -83 -39 100 56 +# -175 -87 192 104 +# -359 -183 376 200 +# -55 -55 72 72 +# -119 -119 136 136 +# -247 -247 264 264 +# -35 -79 52 96 +# -79 -167 96 184 +# -167 -343 184 360 + +#array([[ -83., -39., 100., 56.], +# [-175., -87., 192., 104.], +# [-359., -183., 376., 200.], +# [ -55., -55., 72., 72.], +# [-119., -119., 136., 136.], +# [-247., -247., 264., 264.], +# [ -35., -79., 52., 96.], +# [ -79., -167., 96., 184.], +# [-167., -343., 184., 360.]]) + +def generate_anchors(base_size=16, ratios=[0.5, 1, 2], + scales=2**np.arange(3, 6)): + """ + Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, 15, 15) window. + """ + + base_anchor = np.array([1, 1, base_size, base_size]) - 1 + ratio_anchors = _ratio_enum(base_anchor, ratios) + anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) + for i in xrange(ratio_anchors.shape[0])]) + return anchors + +def _whctrs(anchor): + """ + Return width, height, x center, and y center for an anchor (window). + """ + + w = anchor[2] - anchor[0] + 1 + h = anchor[3] - anchor[1] + 1 + x_ctr = anchor[0] + 0.5 * (w - 1) + y_ctr = anchor[1] + 0.5 * (h - 1) + return w, h, x_ctr, y_ctr + +def _mkanchors(ws, hs, x_ctr, y_ctr): + """ + Given a vector of widths (ws) and heights (hs) around a center + (x_ctr, y_ctr), output a set of anchors (windows). + """ + + ws = ws[:, np.newaxis] + hs = hs[:, np.newaxis] + anchors = np.hstack((x_ctr - 0.5 * (ws - 1), + y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), + y_ctr + 0.5 * (hs - 1))) + return anchors + +def _ratio_enum(anchor, ratios): + """ + Enumerate a set of anchors for each aspect ratio wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + size = w * h + size_ratios = size / ratios + ws = np.round(np.sqrt(size_ratios)) + hs = np.round(ws * ratios) + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors + +def _scale_enum(anchor, scales): + """ + Enumerate a set of anchors for each scale wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + ws = w * scales + hs = h * scales + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors + +if __name__ == '__main__': + import time + t = time.time() + a = generate_anchors() + print time.time() - t + print a + from IPython import embed; embed() diff --git a/lib/rpn_msr/proposal_layer.py b/lib/rpn_msr/proposal_layer.py index 2b879bc5..2ccf4b26 100644 --- a/lib/rpn_msr/proposal_layer.py +++ b/lib/rpn_msr/proposal_layer.py @@ -9,7 +9,7 @@ import numpy as np import yaml from fast_rcnn.config import cfg -from generate_anchors import generate_anchors +from .generate_anchors import generate_anchors from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes from fast_rcnn.nms_wrapper import nms @@ -30,9 +30,9 @@ def setup(self, bottom, top): self._num_anchors = self._anchors.shape[0] if DEBUG: - print 'feat_stride: {}'.format(self._feat_stride) - print 'anchors:' - print self._anchors + print('feat_stride: {}'.format(self._feat_stride)) + print('anchors:') + print(self._anchors) # rois blob: holds R regions of interest, each is a 5-tuple # (n, x1, y1, x2, y2) specifying an image batch index n and a @@ -73,14 +73,14 @@ def forward(self, bottom, top): im_info = bottom[2].data[0, :] if DEBUG: - print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) - print 'scale: {}'.format(im_info[2]) + print('im_size: ({}, {})'.format(im_info[0], im_info[1])) + print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: - print 'score map size: {}'.format(scores.shape) + print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride @@ -145,7 +145,7 @@ def forward(self, bottom, top): keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] - print scores.shape + print(scores.shape) # Output rois blob # Our RPN implementation only supports a single input image, so all diff --git a/lib/rpn_msr/proposal_layer.py.bak b/lib/rpn_msr/proposal_layer.py.bak new file mode 100644 index 00000000..2b879bc5 --- /dev/null +++ b/lib/rpn_msr/proposal_layer.py.bak @@ -0,0 +1,176 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick and Sean Bell +# -------------------------------------------------------- + +import caffe +import numpy as np +import yaml +from fast_rcnn.config import cfg +from generate_anchors import generate_anchors +from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes +from fast_rcnn.nms_wrapper import nms + +DEBUG = False + +class ProposalLayer(caffe.Layer): + """ + Outputs object detection proposals by applying estimated bounding-box + transformations to a set of regular boxes (called "anchors"). + """ + + def setup(self, bottom, top): + # parse the layer parameter string, which must be valid YAML + layer_params = yaml.load(self.param_str_) + + self._feat_stride = layer_params['feat_stride'] + self._anchors = generate_anchors(cfg.TRAIN.RPN_BASE_SIZE, cfg.TRAIN.RPN_ASPECTS, cfg.TRAIN.RPN_SCALES) + self._num_anchors = self._anchors.shape[0] + + if DEBUG: + print 'feat_stride: {}'.format(self._feat_stride) + print 'anchors:' + print self._anchors + + # rois blob: holds R regions of interest, each is a 5-tuple + # (n, x1, y1, x2, y2) specifying an image batch index n and a + # rectangle (x1, y1, x2, y2) + top[0].reshape(1, 5) + + # scores blob: holds scores for R regions of interest + if len(top) > 1: + top[1].reshape(1, 1, 1, 1) + + def forward(self, bottom, top): + # Algorithm: + # + # for each (H, W) location i + # generate A anchor boxes centered on cell i + # apply predicted bbox deltas at cell i to each of the A anchors + # clip predicted boxes to image + # remove predicted boxes with either height or width < threshold + # sort all (proposal, score) pairs by score from highest to lowest + # take top pre_nms_topN proposals before NMS + # apply NMS with threshold 0.7 to remaining proposals + # take after_nms_topN proposals after NMS + # return the top proposals (-> RoIs top, scores top) + + assert bottom[0].data.shape[0] == 1, \ + 'Only single item batches are supported' + # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' + cfg_key = 'TEST' + pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N + post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N + nms_thresh = cfg[cfg_key].RPN_NMS_THRESH + min_size = cfg[cfg_key].RPN_MIN_SIZE + + # the first set of _num_anchors channels are bg probs + # the second set are the fg probs, which we want + scores = bottom[0].data[:, self._num_anchors:, :, :] + bbox_deltas = bottom[1].data + im_info = bottom[2].data[0, :] + + if DEBUG: + print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) + print 'scale: {}'.format(im_info[2]) + + # 1. Generate proposals from bbox deltas and shifted anchors + height, width = scores.shape[-2:] + + if DEBUG: + print 'score map size: {}'.format(scores.shape) + + # Enumerate all shifts + shift_x = np.arange(0, width) * self._feat_stride + shift_y = np.arange(0, height) * self._feat_stride + shift_x, shift_y = np.meshgrid(shift_x, shift_y) + shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), + shift_x.ravel(), shift_y.ravel())).transpose() + + # Enumerate all shifted anchors: + # + # add A anchors (1, A, 4) to + # cell K shifts (K, 1, 4) to get + # shift anchors (K, A, 4) + # reshape to (K*A, 4) shifted anchors + A = self._num_anchors + K = shifts.shape[0] + anchors = self._anchors.reshape((1, A, 4)) + \ + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) + anchors = anchors.reshape((K * A, 4)) + + # Transpose and reshape predicted bbox transformations to get them + # into the same order as the anchors: + # + # bbox deltas will be (1, 4 * A, H, W) format + # transpose to (1, H, W, 4 * A) + # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) + # in slowest to fastest order + bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) + + # Same story for the scores: + # + # scores are (1, A, H, W) format + # transpose to (1, H, W, A) + # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) + scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) + + # Convert anchors into proposals via bbox transformations + proposals = bbox_transform_inv(anchors, bbox_deltas) + + # 2. clip predicted boxes to image + proposals = clip_boxes(proposals, im_info[:2]) + + # 3. remove predicted boxes with either height or width < threshold + # (NOTE: convert min_size to input image scale stored in im_info[2]) + keep = _filter_boxes(proposals, min_size * im_info[2]) + proposals = proposals[keep, :] + scores = scores[keep] + + # 4. sort all (proposal, score) pairs by score from highest to lowest + # 5. take top pre_nms_topN (e.g. 6000) + order = scores.ravel().argsort()[::-1] + if pre_nms_topN > 0: + order = order[:pre_nms_topN] + proposals = proposals[order, :] + scores = scores[order] + + # 6. apply nms (e.g. threshold = 0.7) + # 7. take after_nms_topN (e.g. 300) + # 8. return the top proposals (-> RoIs top) + keep = nms(np.hstack((proposals, scores)), nms_thresh) + if post_nms_topN > 0: + keep = keep[:post_nms_topN] + proposals = proposals[keep, :] + scores = scores[keep] + print scores.shape + + # Output rois blob + # Our RPN implementation only supports a single input image, so all + # batch inds are 0 + batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) + blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) + top[0].reshape(*(blob.shape)) + top[0].data[...] = blob + + # [Optional] output scores blob + if len(top) > 1: + top[1].reshape(*(scores.shape)) + top[1].data[...] = scores + + def backward(self, top, propagate_down, bottom): + """This layer does not propagate gradients.""" + pass + + def reshape(self, bottom, top): + """Reshaping happens during the call to forward.""" + pass + +def _filter_boxes(boxes, min_size): + """Remove all boxes with any side smaller than min_size.""" + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + keep = np.where((ws >= min_size) & (hs >= min_size))[0] + return keep diff --git a/lib/rpn_msr/proposal_layer_tf.py b/lib/rpn_msr/proposal_layer_tf.py index 13984090..4012a7a0 100644 --- a/lib/rpn_msr/proposal_layer_tf.py +++ b/lib/rpn_msr/proposal_layer_tf.py @@ -8,7 +8,7 @@ import numpy as np import yaml from fast_rcnn.config import cfg -from generate_anchors import generate_anchors +from .generate_anchors import generate_anchors from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes from fast_rcnn.nms_wrapper import nms import pdb @@ -33,6 +33,7 @@ def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stri # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) + cfg_key = cfg_key.decode("utf-8") _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) @@ -57,14 +58,14 @@ def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stri #im_info = bottom[2].data[0, :] if DEBUG: - print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) - print 'scale: {}'.format(im_info[2]) + print('im_size: ({}, {})'.format(im_info[0], im_info[1])) + print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: - print 'score map size: {}'.format(scores.shape) + print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride diff --git a/lib/rpn_msr/proposal_layer_tf.py.bak b/lib/rpn_msr/proposal_layer_tf.py.bak new file mode 100644 index 00000000..13984090 --- /dev/null +++ b/lib/rpn_msr/proposal_layer_tf.py.bak @@ -0,0 +1,151 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick and Sean Bell +# -------------------------------------------------------- + +import numpy as np +import yaml +from fast_rcnn.config import cfg +from generate_anchors import generate_anchors +from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes +from fast_rcnn.nms_wrapper import nms +import pdb + + +DEBUG = False +""" +Outputs object detection proposals by applying estimated bounding-box +transformations to a set of regular boxes (called "anchors"). +""" +def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]): + # Algorithm: + # + # for each (H, W) location i + # generate A anchor boxes centered on cell i + # apply predicted bbox deltas at cell i to each of the A anchors + # clip predicted boxes to image + # remove predicted boxes with either height or width < threshold + # sort all (proposal, score) pairs by score from highest to lowest + # take top pre_nms_topN proposals before NMS + # apply NMS with threshold 0.7 to remaining proposals + # take after_nms_topN proposals after NMS + # return the top proposals (-> RoIs top, scores top) + #layer_params = yaml.load(self.param_str_) + _anchors = generate_anchors(scales=np.array(anchor_scales)) + _num_anchors = _anchors.shape[0] + rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) + rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) + #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) + #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) + im_info = im_info[0] + + assert rpn_cls_prob_reshape.shape[0] == 1, \ + 'Only single item batches are supported' + # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' + #cfg_key = 'TEST' + pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N + post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N + nms_thresh = cfg[cfg_key].RPN_NMS_THRESH + min_size = cfg[cfg_key].RPN_MIN_SIZE + + # the first set of _num_anchors channels are bg probs + # the second set are the fg probs, which we want + scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] + bbox_deltas = rpn_bbox_pred + #im_info = bottom[2].data[0, :] + + if DEBUG: + print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) + print 'scale: {}'.format(im_info[2]) + + # 1. Generate proposals from bbox deltas and shifted anchors + height, width = scores.shape[-2:] + + if DEBUG: + print 'score map size: {}'.format(scores.shape) + + # Enumerate all shifts + shift_x = np.arange(0, width) * _feat_stride + shift_y = np.arange(0, height) * _feat_stride + shift_x, shift_y = np.meshgrid(shift_x, shift_y) + shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), + shift_x.ravel(), shift_y.ravel())).transpose() + + # Enumerate all shifted anchors: + # + # add A anchors (1, A, 4) to + # cell K shifts (K, 1, 4) to get + # shift anchors (K, A, 4) + # reshape to (K*A, 4) shifted anchors + A = _num_anchors + K = shifts.shape[0] + anchors = _anchors.reshape((1, A, 4)) + \ + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) + anchors = anchors.reshape((K * A, 4)) + + # Transpose and reshape predicted bbox transformations to get them + # into the same order as the anchors: + # + # bbox deltas will be (1, 4 * A, H, W) format + # transpose to (1, H, W, 4 * A) + # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) + # in slowest to fastest order + bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) + + # Same story for the scores: + # + # scores are (1, A, H, W) format + # transpose to (1, H, W, A) + # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) + scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) + + # Convert anchors into proposals via bbox transformations + proposals = bbox_transform_inv(anchors, bbox_deltas) + + # 2. clip predicted boxes to image + proposals = clip_boxes(proposals, im_info[:2]) + + # 3. remove predicted boxes with either height or width < threshold + # (NOTE: convert min_size to input image scale stored in im_info[2]) + keep = _filter_boxes(proposals, min_size * im_info[2]) + proposals = proposals[keep, :] + scores = scores[keep] + + # 4. sort all (proposal, score) pairs by score from highest to lowest + # 5. take top pre_nms_topN (e.g. 6000) + order = scores.ravel().argsort()[::-1] + if pre_nms_topN > 0: + order = order[:pre_nms_topN] + proposals = proposals[order, :] + scores = scores[order] + + # 6. apply nms (e.g. threshold = 0.7) + # 7. take after_nms_topN (e.g. 300) + # 8. return the top proposals (-> RoIs top) + keep = nms(np.hstack((proposals, scores)), nms_thresh) + if post_nms_topN > 0: + keep = keep[:post_nms_topN] + proposals = proposals[keep, :] + scores = scores[keep] + # Output rois blob + # Our RPN implementation only supports a single input image, so all + # batch inds are 0 + batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) + blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) + return blob + #top[0].reshape(*(blob.shape)) + #top[0].data[...] = blob + + # [Optional] output scores blob + #if len(top) > 1: + # top[1].reshape(*(scores.shape)) + # top[1].data[...] = scores + +def _filter_boxes(boxes, min_size): + """Remove all boxes with any side smaller than min_size.""" + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + keep = np.where((ws >= min_size) & (hs >= min_size))[0] + return keep diff --git a/lib/rpn_msr/proposal_target_layer_tf.py b/lib/rpn_msr/proposal_target_layer_tf.py index 75f19fd9..4d702b5e 100644 --- a/lib/rpn_msr/proposal_target_layer_tf.py +++ b/lib/rpn_msr/proposal_target_layer_tf.py @@ -48,14 +48,14 @@ def proposal_target_layer(rpn_rois, gt_boxes,_num_classes): rois_per_image, _num_classes) if DEBUG: - print 'num fg: {}'.format((labels > 0).sum()) - print 'num bg: {}'.format((labels == 0).sum()) + print('num fg: {}'.format((labels > 0).sum())) + print('num bg: {}'.format((labels == 0).sum())) _count += 1 _fg_num += (labels > 0).sum() _bg_num += (labels == 0).sum() - print 'num fg avg: {}'.format(_fg_num / _count) - print 'num bg avg: {}'.format(_bg_num / _count) - print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num)) + print('num fg avg: {}'.format(_fg_num / _count)) + print('num bg avg: {}'.format(_bg_num / _count)) + print('ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num))) rois = rois.reshape(-1,5) labels = labels.reshape(-1,1) diff --git a/lib/rpn_msr/proposal_target_layer_tf.py.bak b/lib/rpn_msr/proposal_target_layer_tf.py.bak new file mode 100644 index 00000000..75f19fd9 --- /dev/null +++ b/lib/rpn_msr/proposal_target_layer_tf.py.bak @@ -0,0 +1,155 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick and Sean Bell +# -------------------------------------------------------- + +import yaml +import numpy as np +import numpy.random as npr +from fast_rcnn.config import cfg +from fast_rcnn.bbox_transform import bbox_transform +from utils.cython_bbox import bbox_overlaps +import pdb + +DEBUG = False + +def proposal_target_layer(rpn_rois, gt_boxes,_num_classes): + """ + Assign object detection proposals to ground-truth targets. Produces proposal + classification labels and bounding-box regression targets. + """ + + # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN + # (i.e., rpn.proposal_layer.ProposalLayer), or any other source + all_rois = rpn_rois + # TODO(rbg): it's annoying that sometimes I have extra info before + # and other times after box coordinates -- normalize to one format + + # Include ground-truth boxes in the set of candidate rois + zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) + all_rois = np.vstack( + (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) + ) + + # Sanity check: single batch only + assert np.all(all_rois[:, 0] == 0), \ + 'Only single item batches are supported' + + num_images = 1 + rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images + fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) + + # Sample rois with classification labels and bounding box regression + # targets + labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( + all_rois, gt_boxes, fg_rois_per_image, + rois_per_image, _num_classes) + + if DEBUG: + print 'num fg: {}'.format((labels > 0).sum()) + print 'num bg: {}'.format((labels == 0).sum()) + _count += 1 + _fg_num += (labels > 0).sum() + _bg_num += (labels == 0).sum() + print 'num fg avg: {}'.format(_fg_num / _count) + print 'num bg avg: {}'.format(_bg_num / _count) + print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num)) + + rois = rois.reshape(-1,5) + labels = labels.reshape(-1,1) + bbox_targets = bbox_targets.reshape(-1,_num_classes*4) + bbox_inside_weights = bbox_inside_weights.reshape(-1,_num_classes*4) + + bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) + + return rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights + +def _get_bbox_regression_labels(bbox_target_data, num_classes): + """Bounding-box regression targets (bbox_target_data) are stored in a + compact form N x (class, tx, ty, tw, th) + + This function expands those targets into the 4-of-4*K representation used + by the network (i.e. only one class has non-zero targets). + + Returns: + bbox_target (ndarray): N x 4K blob of regression targets + bbox_inside_weights (ndarray): N x 4K blob of loss weights + """ + + clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True) + bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) + bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) + inds = np.where(clss > 0)[0] + for ind in inds: + cls = clss[ind] + start = 4 * cls + end = start + 4 + bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] + bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS + return bbox_targets, bbox_inside_weights + + +def _compute_targets(ex_rois, gt_rois, labels): + """Compute bounding-box regression targets for an image.""" + + assert ex_rois.shape[0] == gt_rois.shape[0] + assert ex_rois.shape[1] == 4 + assert gt_rois.shape[1] == 4 + + targets = bbox_transform(ex_rois, gt_rois) + if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: + # Optionally normalize targets by a precomputed mean and stdev + targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) + / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) + return np.hstack( + (labels[:, np.newaxis], targets)).astype(np.float32, copy=False) + +def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): + """Generate a random sample of RoIs comprising foreground and background + examples. + """ + # overlaps: (rois x gt_boxes) + overlaps = bbox_overlaps( + np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), + np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) + gt_assignment = overlaps.argmax(axis=1) + max_overlaps = overlaps.max(axis=1) + labels = gt_boxes[gt_assignment, 4] + + # Select foreground RoIs as those with >= FG_THRESH overlap + fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] + # Guard against the case when an image has fewer than fg_rois_per_image + # foreground RoIs + fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) + # Sample foreground regions without replacement + if fg_inds.size > 0: + fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) + + # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) + bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] + # Compute number of background RoIs to take from this image (guarding + # against there being fewer than desired) + bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image + bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) + # Sample background regions without replacement + if bg_inds.size > 0: + bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) + + # The indices that we're selecting (both fg and bg) + keep_inds = np.append(fg_inds, bg_inds) + # Select sampled values from various arrays: + labels = labels[keep_inds] + # Clamp labels for the background RoIs to 0 + labels[fg_rois_per_this_image:] = 0 + rois = all_rois[keep_inds] + + bbox_target_data = _compute_targets( + rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) + + bbox_targets, bbox_inside_weights = \ + _get_bbox_regression_labels(bbox_target_data, num_classes) + + return labels, rois, bbox_targets, bbox_inside_weights diff --git a/lib/setup.py b/lib/setup.py index 2f875015..cda553e1 100644 --- a/lib/setup.py +++ b/lib/setup.py @@ -46,7 +46,7 @@ def locate_cuda(): cudaconfig = {'home':home, 'nvcc':nvcc, 'include': pjoin(home, 'include'), 'lib64': pjoin(home, 'lib64')} - for k, v in cudaconfig.iteritems(): + for k, v in cudaconfig.items(): if not os.path.exists(v): return None; @@ -81,7 +81,7 @@ def customize_compiler_for_nvcc(self): # object but distutils doesn't have the ability to change compilers # based on source extension: we add it. def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - print extra_postargs + print(extra_postargs) if os.path.splitext(src)[1] == '.cu': # use the cuda for .cu files self.set_executable('compiler_so', CUDA['nvcc']) diff --git a/lib/setup.py.bak b/lib/setup.py.bak new file mode 100644 index 00000000..cc102151 --- /dev/null +++ b/lib/setup.py.bak @@ -0,0 +1,155 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import os +from os.path import join as pjoin +import numpy as np +from distutils.core import setup +from distutils.extension import Extension +from Cython.Distutils import build_ext + +def find_in_path(name, path): + "Find a file in a search path" + #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + for dir in path.split(os.pathsep): + binpath = pjoin(dir, name) + if os.path.exists(binpath): + return os.path.abspath(binpath) + return None + +def locate_cuda(): + """Locate the CUDA environment on the system + + Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' + and values giving the absolute path to each directory. + + Starts by looking for the CUDAHOME env variable. If not found, everything + is based on finding 'nvcc' in the PATH. + """ + + # first check if the CUDAHOME env variable is in use + if 'CUDAHOME' in os.environ: + home = os.environ['CUDAHOME'] + nvcc = pjoin(home, 'bin', 'nvcc') + else: + # otherwise, search the PATH for NVCC + default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') + nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) + if nvcc is None: + return None; + home = os.path.dirname(os.path.dirname(nvcc)) + + cudaconfig = {'home':home, 'nvcc':nvcc, + 'include': pjoin(home, 'include'), + 'lib64': pjoin(home, 'lib64')} + for k, v in cudaconfig.iteritems(): + if not os.path.exists(v): + return None; + + return cudaconfig + +CUDA = locate_cuda() + +# Obtain the numpy include directory. This logic works across numpy versions. +try: + numpy_include = np.get_include() +except AttributeError: + numpy_include = np.get_numpy_include() + +def customize_compiler_for_nvcc(self): + """inject deep into distutils to customize how the dispatch + to gcc/nvcc works. + + If you subclass UnixCCompiler, it's not trivial to get your subclass + injected in, and still have the right customizations (i.e. + distutils.sysconfig.customize_compiler) run on it. So instead of going + the OO route, I have this. Note, it's kindof like a wierd functional + subclassing going on.""" + + # tell the compiler it can processes .cu + self.src_extensions.append('.cu') + + # save references to the default compiler_so and _comple methods + default_compiler_so = self.compiler_so + super = self._compile + + # now redefine the _compile method. This gets executed for each + # object but distutils doesn't have the ability to change compilers + # based on source extension: we add it. + def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + print(extra_postargs) + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + self.set_executable('compiler_so', CUDA['nvcc']) + # use only a subset of the extra_postargs, which are 1-1 translated + # from the extra_compile_args in the Extension class + postargs = extra_postargs['nvcc'] + else: + postargs = extra_postargs['gcc'] + + super(obj, src, ext, cc_args, postargs, pp_opts) + # reset the default compiler_so, which we might have changed for cuda + self.compiler_so = default_compiler_so + + # inject our redefined _compile method into the class + self._compile = _compile + + +# run the customize_compiler +class custom_build_ext(build_ext): + def build_extensions(self): + customize_compiler_for_nvcc(self.compiler) + build_ext.build_extensions(self) + +ext_modules = [ + Extension( + "utils.cython_bbox", + ["utils/bbox.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs = [numpy_include] + ), + Extension( + "utils.cython_nms", + ["utils/nms.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs = [numpy_include] + ), + Extension( + "nms.cpu_nms", + ["nms/cpu_nms.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs = [numpy_include] + ) +] + +if CUDA: + ext_modules.append( + Extension('nms.gpu_nms', + ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], + library_dirs=[CUDA['lib64']], + libraries=['cudart'], + language='c++', + runtime_library_dirs=[CUDA['lib64']], + # this syntax is specific to this build system + # we're only going to use certain compiler args with nvcc and not with gcc + # the implementation of this trick is in customize_compiler() below + extra_compile_args={'gcc': ["-Wno-unused-function"], + 'nvcc': ['-arch=sm_35', + '--ptxas-options=-v', + '-c', + '--compiler-options', + "'-fPIC'"]}, + include_dirs = [numpy_include, CUDA['include']] + ) + ) + +setup( + name='fast_rcnn', + ext_modules=ext_modules, + # inject our custom trigger + cmdclass={'build_ext': custom_build_ext}, +) diff --git a/lib/utils/blob.py b/lib/utils/blob.py index 63c1b52e..0489dc4f 100644 --- a/lib/utils/blob.py +++ b/lib/utils/blob.py @@ -19,7 +19,7 @@ def im_list_to_blob(ims): num_images = len(ims) blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) - for i in xrange(num_images): + for i in range(num_images): im = ims[i] blob[i, 0:im.shape[0], 0:im.shape[1], :] = im diff --git a/tools/demo.py b/tools/demo.py index 0ffd219c..9444f8fa 100644 --- a/tools/demo.py +++ b/tools/demo.py @@ -64,8 +64,8 @@ def demo(sess, net, image_name): timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() - print ('Detection took {:.3f}s for ' - '{:d} object proposals').format(timer.total_time, boxes.shape[0]) + print(('Detection took {:.3f}s for ' + '{:d} object proposals').format(timer.total_time, boxes.shape[0])) # Visualize detections for each class im = im[:, :, (2, 1, 0)] @@ -118,11 +118,11 @@ def parse_args(): #sess.run(tf.initialize_all_variables()) - print '\n\nLoaded network {:s}'.format(args.model) + print('\n\nLoaded network {:s}'.format(args.model)) # Warmup on a dummy image im = 128 * np.ones((300, 300, 3), dtype=np.uint8) - for i in xrange(2): + for i in range(2): _, _= im_detect(sess, net, im) im_names = ['000456.jpg', '000542.jpg', '001150.jpg', @@ -130,8 +130,8 @@ def parse_args(): for im_name in im_names: - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Demo for data/demo/{}'.format(im_name) + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Demo for data/demo/{}'.format(im_name)) demo(sess, net, im_name) plt.show() diff --git a/tools/demo.py.bak b/tools/demo.py.bak new file mode 100644 index 00000000..0ffd219c --- /dev/null +++ b/tools/demo.py.bak @@ -0,0 +1,138 @@ +import _init_paths +import tensorflow as tf +from fast_rcnn.config import cfg +from fast_rcnn.test import im_detect +from fast_rcnn.nms_wrapper import nms +from utils.timer import Timer +import matplotlib.pyplot as plt +import numpy as np +import os, sys, cv2 +import argparse +from networks.factory import get_network + + +CLASSES = ('__background__', + 'aeroplane', 'bicycle', 'bird', 'boat', + 'bottle', 'bus', 'car', 'cat', 'chair', + 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', + 'sheep', 'sofa', 'train', 'tvmonitor') + + +#CLASSES = ('__background__','person','bike','motorbike','car','bus') + +def vis_detections(im, class_name, dets,ax, thresh=0.5): + """Draw detected bounding boxes.""" + inds = np.where(dets[:, -1] >= thresh)[0] + if len(inds) == 0: + return + + for i in inds: + bbox = dets[i, :4] + score = dets[i, -1] + + ax.add_patch( + plt.Rectangle((bbox[0], bbox[1]), + bbox[2] - bbox[0], + bbox[3] - bbox[1], fill=False, + edgecolor='red', linewidth=3.5) + ) + ax.text(bbox[0], bbox[1] - 2, + '{:s} {:.3f}'.format(class_name, score), + bbox=dict(facecolor='blue', alpha=0.5), + fontsize=14, color='white') + + ax.set_title(('{} detections with ' + 'p({} | box) >= {:.1f}').format(class_name, class_name, + thresh), + fontsize=14) + plt.axis('off') + plt.tight_layout() + plt.draw() + + +def demo(sess, net, image_name): + """Detect object classes in an image using pre-computed object proposals.""" + + # Load the demo image + im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) + #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name) + im = cv2.imread(im_file) + + # Detect all object classes and regress object bounds + timer = Timer() + timer.tic() + scores, boxes = im_detect(sess, net, im) + timer.toc() + print ('Detection took {:.3f}s for ' + '{:d} object proposals').format(timer.total_time, boxes.shape[0]) + + # Visualize detections for each class + im = im[:, :, (2, 1, 0)] + fig, ax = plt.subplots(figsize=(12, 12)) + ax.imshow(im, aspect='equal') + + CONF_THRESH = 0.8 + NMS_THRESH = 0.3 + for cls_ind, cls in enumerate(CLASSES[1:]): + cls_ind += 1 # because we skipped background + cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] + cls_scores = scores[:, cls_ind] + dets = np.hstack((cls_boxes, + cls_scores[:, np.newaxis])).astype(np.float32) + keep = nms(dets, NMS_THRESH) + dets = dets[keep, :] + vis_detections(im, cls, dets, ax, thresh=CONF_THRESH) + +def parse_args(): + """Parse input arguments.""" + parser = argparse.ArgumentParser(description='Faster R-CNN demo') + parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', + default=0, type=int) + parser.add_argument('--cpu', dest='cpu_mode', + help='Use CPU mode (overrides --gpu)', + action='store_true') + parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', + default='VGGnet_test') + parser.add_argument('--model', dest='model', help='Model path', + default=' ') + + args = parser.parse_args() + + return args +if __name__ == '__main__': + cfg.TEST.HAS_RPN = True # Use RPN for proposals + + args = parse_args() + + if args.model == ' ': + raise IOError(('Error: Model not found.\n')) + + # init session + sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) + # load network + net = get_network(args.demo_net) + # load model + saver = tf.train.Saver(write_version=tf.train.SaverDef.V1) + saver.restore(sess, args.model) + + #sess.run(tf.initialize_all_variables()) + + print '\n\nLoaded network {:s}'.format(args.model) + + # Warmup on a dummy image + im = 128 * np.ones((300, 300, 3), dtype=np.uint8) + for i in xrange(2): + _, _= im_detect(sess, net, im) + + im_names = ['000456.jpg', '000542.jpg', '001150.jpg', + '001763.jpg', '004545.jpg'] + + + for im_name in im_names: + print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + print 'Demo for data/demo/{}'.format(im_name) + demo(sess, net, im_name) + + plt.show() +