diff --git a/lib/fast_rcnn/config.py b/lib/fast_rcnn/config.py
index 12210d1e..37584fcd 100644
--- a/lib/fast_rcnn/config.py
+++ b/lib/fast_rcnn/config.py
@@ -262,9 +262,9 @@ def _merge_a_into_b(a, b):
     if type(a) is not edict:
         return
 
-    for k, v in a.iteritems():
+    for k, v in a.items():
         # a must specify keys that are in b
-        if not b.has_key(k):
+        if k not in b:
             raise KeyError('{} is not a valid config key'.format(k))
 
         # the types must match, too
@@ -282,7 +282,7 @@ def _merge_a_into_b(a, b):
             try:
                 _merge_a_into_b(a[k], b[k])
             except:
-                print('Error under config key: {}'.format(k))
+                print(('Error under config key: {}'.format(k)))
                 raise
         else:
             b[k] = v
@@ -303,10 +303,10 @@ def cfg_from_list(cfg_list):
         key_list = k.split('.')
         d = __C
         for subkey in key_list[:-1]:
-            assert d.has_key(subkey)
+            assert subkey in d
             d = d[subkey]
         subkey = key_list[-1]
-        assert d.has_key(subkey)
+        assert subkey in d
         try:
             value = literal_eval(v)
         except:
diff --git a/lib/fast_rcnn/config.py.bak b/lib/fast_rcnn/config.py.bak
new file mode 100644
index 00000000..12210d1e
--- /dev/null
+++ b/lib/fast_rcnn/config.py.bak
@@ -0,0 +1,318 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Fast R-CNN config system.
+
+This file specifies default config options for Fast R-CNN. You should not
+change values in this file. Instead, you should write a config file (in yaml)
+and use cfg_from_file(yaml_file) to load it and override the default options.
+
+Most tools in $ROOT/tools take a --cfg option to specify an override file.
+    - See tools/{train,test}_net.py for example code that uses cfg_from_file()
+    - See experiments/cfgs/*.yml for example YAML config override files
+"""
+
+import os
+import os.path as osp
+import numpy as np
+from distutils import spawn
+# `pip install easydict` if you don't have it
+from easydict import EasyDict as edict
+
+__C = edict()
+# Consumers can get config by:
+#   from fast_rcnn_config import cfg
+cfg = __C
+
+#
+# Training options
+#
+
+__C.TRAIN = edict()
+#__C.NET_NAME = 'VGGnet'
+# learning rate
+__C.TRAIN.LEARNING_RATE = 0.001
+__C.TRAIN.MOMENTUM = 0.9
+__C.TRAIN.GAMMA = 0.1
+__C.TRAIN.STEPSIZE = 50000
+__C.TRAIN.DISPLAY = 10
+__C.IS_MULTISCALE = False
+
+# Scales to compute real features
+#__C.TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0)
+#__C.TRAIN.SCALES_BASE = (1.0,)
+
+# parameters for ROI generating
+#__C.TRAIN.SPATIAL_SCALE = 0.0625
+#__C.TRAIN.KERNEL_SIZE = 5
+
+# Aspect ratio to use during training
+#__C.TRAIN.ASPECTS = (1, 0.75, 0.5, 0.25)
+#__C.TRAIN.ASPECTS= (1,)
+
+
+# Scales to use during training (can list multiple scales)
+# Each scale is the pixel size of an image's shortest side
+__C.TRAIN.SCALES = (600,)
+
+# Max pixel size of the longest side of a scaled input image
+__C.TRAIN.MAX_SIZE = 1000
+
+# Images to use per minibatch
+__C.TRAIN.IMS_PER_BATCH = 2
+
+# Minibatch size (number of regions of interest [ROIs])
+__C.TRAIN.BATCH_SIZE = 128
+
+# Fraction of minibatch that is labeled foreground (i.e. class > 0)
+__C.TRAIN.FG_FRACTION = 0.25
+
+# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
+__C.TRAIN.FG_THRESH = 0.5
+
+# Overlap threshold for a ROI to be considered background (class = 0 if
+# overlap in [LO, HI))
+__C.TRAIN.BG_THRESH_HI = 0.5
+__C.TRAIN.BG_THRESH_LO = 0.1
+
+# Use horizontally-flipped images during training?
+__C.TRAIN.USE_FLIPPED = True
+
+# Train bounding-box regressors
+__C.TRAIN.BBOX_REG = True
+
+# Overlap required between a ROI and ground-truth box in order for that ROI to
+# be used as a bounding-box regression training example
+__C.TRAIN.BBOX_THRESH = 0.5
+
+# Iterations between snapshots
+__C.TRAIN.SNAPSHOT_ITERS = 5000
+
+# solver.prototxt specifies the snapshot path prefix, this adds an optional
+# infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
+__C.TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn'
+__C.TRAIN.SNAPSHOT_INFIX = ''
+
+# Use a prefetch thread in roi_data_layer.layer
+# So far I haven't found this useful; likely more engineering work is required
+__C.TRAIN.USE_PREFETCH = False
+
+# Normalize the targets (subtract empirical mean, divide by empirical stddev)
+__C.TRAIN.BBOX_NORMALIZE_TARGETS = True
+# Deprecated (inside weights)
+__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+# Normalize the targets using "precomputed" (or made up) means and stdevs
+# (BBOX_NORMALIZE_TARGETS must also be True)
+__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False
+__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
+__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
+
+# Train using these proposals
+__C.TRAIN.PROPOSAL_METHOD = 'selective_search'
+
+# Make minibatches from images that have similar aspect ratios (i.e. both
+# tall and thin or both short and wide) in order to avoid wasting computation
+# on zero-padding.
+__C.TRAIN.ASPECT_GROUPING = True
+
+# Use RPN to detect objects
+__C.TRAIN.HAS_RPN = False
+# IOU >= thresh: positive example
+__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
+# IOU < thresh: negative example
+__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
+# If an anchor statisfied by positive and negative conditions set to negative
+__C.TRAIN.RPN_CLOBBER_POSITIVES = False
+# Max number of foreground examples
+__C.TRAIN.RPN_FG_FRACTION = 0.5
+# Total number of examples
+__C.TRAIN.RPN_BATCHSIZE = 256
+# NMS threshold used on RPN proposals
+__C.TRAIN.RPN_NMS_THRESH = 0.7
+# Number of top scoring boxes to keep before apply NMS to RPN proposals
+__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
+# Number of top scoring boxes to keep after applying NMS to RPN proposals
+__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+__C.TRAIN.RPN_MIN_SIZE = 16
+# Deprecated (outside weights)
+__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+# Give the positive RPN examples weight of p * 1 / {num positives}
+# and give negatives a weight of (1 - p)
+# Set to -1.0 to use uniform example weighting
+__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
+
+# Enable timeline generation
+__C.TRAIN.DEBUG_TIMELINE = False
+
+#
+# Testing options
+#
+
+__C.TEST = edict()
+
+# Scales to use during testing (can list multiple scales)
+# Each scale is the pixel size of an image's shortest side
+__C.TEST.SCALES = (600,)
+
+# Max pixel size of the longest side of a scaled input image
+__C.TEST.MAX_SIZE = 1000
+
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
+__C.TEST.NMS = 0.3
+
+# Experimental: treat the (K+1) units in the cls_score layer as linear
+# predictors (trained, eg, with one-vs-rest SVMs).
+__C.TEST.SVM = False
+
+# Test using bounding-box regressors
+__C.TEST.BBOX_REG = True
+
+# Propose boxes
+__C.TEST.HAS_RPN = True
+
+# Test using these proposals
+__C.TEST.PROPOSAL_METHOD = 'selective_search'
+
+## NMS threshold used on RPN proposals
+__C.TEST.RPN_NMS_THRESH = 0.7
+## Number of top scoring boxes to keep before apply NMS to RPN proposals
+__C.TEST.RPN_PRE_NMS_TOP_N = 6000
+#__C.TEST.RPN_PRE_NMS_TOP_N = 12000
+## Number of top scoring boxes to keep after applying NMS to RPN proposals
+__C.TEST.RPN_POST_NMS_TOP_N = 300
+#__C.TEST.RPN_POST_NMS_TOP_N = 2000
+# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
+__C.TEST.RPN_MIN_SIZE = 16
+
+# Enable timeline generation
+__C.TEST.DEBUG_TIMELINE = False
+
+#
+# MISC
+#
+
+# The mapping from image coordinates to feature map coordinates might cause
+# some boxes that are distinct in image space to become identical in feature
+# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
+# for identifying duplicate boxes.
+# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
+__C.DEDUP_BOXES = 1./16.
+
+# Pixel mean values (BGR order) as a (1, 1, 3) array
+# We use the same pixel mean for all networks even though it's not exactly what
+# they were trained with
+__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
+
+# For reproducibility
+__C.RNG_SEED = 3
+
+# A small number that's used many times
+__C.EPS = 1e-14
+
+# Root directory of project
+__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
+
+# Data directory
+__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
+
+# Model directory
+__C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))
+
+# Name (or path to) the matlab executable
+__C.MATLAB = 'matlab'
+
+# Place outputs under an experiments directory
+__C.EXP_DIR = 'default'
+
+
+if spawn.find_executable("nvcc"):
+    # Use GPU implementation of non-maximum suppression
+    __C.USE_GPU_NMS = True
+
+    # Default GPU device id
+    __C.GPU_ID = 0
+else:
+    __C.USE_GPU_NMS = False
+
+
+def get_output_dir(imdb, weights_filename):
+    """Return the directory where experimental artifacts are placed.
+    If the directory does not exist, it is created.
+
+    A canonical path is built using the name from an imdb and a network
+    (if not None).
+    """
+    outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
+    if weights_filename is not None:
+        outdir = osp.join(outdir, weights_filename)
+    if not os.path.exists(outdir):
+        os.makedirs(outdir)
+    return outdir
+
+def _merge_a_into_b(a, b):
+    """Merge config dictionary a into config dictionary b, clobbering the
+    options in b whenever they are also specified in a.
+    """
+    if type(a) is not edict:
+        return
+
+    for k, v in a.iteritems():
+        # a must specify keys that are in b
+        if not b.has_key(k):
+            raise KeyError('{} is not a valid config key'.format(k))
+
+        # the types must match, too
+        old_type = type(b[k])
+        if old_type is not type(v):
+            if isinstance(b[k], np.ndarray):
+                v = np.array(v, dtype=b[k].dtype)
+            else:
+                raise ValueError(('Type mismatch ({} vs. {}) '
+                                'for config key: {}').format(type(b[k]),
+                                                            type(v), k))
+
+        # recursively merge dicts
+        if type(v) is edict:
+            try:
+                _merge_a_into_b(a[k], b[k])
+            except:
+                print('Error under config key: {}'.format(k))
+                raise
+        else:
+            b[k] = v
+
+def cfg_from_file(filename):
+    """Load a config file and merge it into the default options."""
+    import yaml
+    with open(filename, 'r') as f:
+        yaml_cfg = edict(yaml.load(f))
+
+    _merge_a_into_b(yaml_cfg, __C)
+
+def cfg_from_list(cfg_list):
+    """Set config keys via list (e.g., from command line)."""
+    from ast import literal_eval
+    assert len(cfg_list) % 2 == 0
+    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
+        key_list = k.split('.')
+        d = __C
+        for subkey in key_list[:-1]:
+            assert d.has_key(subkey)
+            d = d[subkey]
+        subkey = key_list[-1]
+        assert d.has_key(subkey)
+        try:
+            value = literal_eval(v)
+        except:
+            # handle the case when v is a string literal
+            value = v
+        assert type(value) == type(d[subkey]), \
+            'type {} does not match original type {}'.format(
+            type(value), type(d[subkey]))
+        d[subkey] = value
diff --git a/lib/fast_rcnn/test.py b/lib/fast_rcnn/test.py
index 41cd12c5..55ec35db 100644
--- a/lib/fast_rcnn/test.py
+++ b/lib/fast_rcnn/test.py
@@ -5,7 +5,7 @@
 import cv2
 from utils.cython_nms import nms, nms_new
 from utils.boxes_grid import get_boxes_grid
-import cPickle
+import pickle
 import heapq
 from utils.blob import im_list_to_blob
 import os
@@ -124,7 +124,7 @@ def _clip_boxes(boxes, im_shape):
 def _rescale_boxes(boxes, inds, scales):
     """Rescale boxes according to image rescaling."""
 
-    for i in xrange(boxes.shape[0]):
+    for i in range(boxes.shape[0]):
         boxes[i,:] = boxes[i,:] / scales[int(inds[i])]
 
     return boxes
@@ -207,7 +207,7 @@ def im_detect(sess, net, im, boxes=None):
 
     if cfg.TEST.DEBUG_TIMELINE:
         trace = timeline.Timeline(step_stats=run_metadata.step_stats)
-        trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
+        trace_file = open(str(int(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
         trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
         trace_file.close()
 
@@ -218,7 +218,7 @@ def vis_detections(im, class_name, dets, thresh=0.8):
     """Visual debugging of detections."""
     import matplotlib.pyplot as plt
     #im = im[:, :, (2, 1, 0)]
-    for i in xrange(np.minimum(10, dets.shape[0])):
+    for i in range(np.minimum(10, dets.shape[0])):
         bbox = dets[i, :4]
         score = dets[i, -1]
         if score > thresh:
@@ -244,10 +244,10 @@ def apply_nms(all_boxes, thresh):
     """
     num_classes = len(all_boxes)
     num_images = len(all_boxes[0])
-    nms_boxes = [[[] for _ in xrange(num_images)]
-                 for _ in xrange(num_classes)]
-    for cls_ind in xrange(num_classes):
-        for im_ind in xrange(num_images):
+    nms_boxes = [[[] for _ in range(num_images)]
+                 for _ in range(num_classes)]
+    for cls_ind in range(num_classes):
+        for im_ind in range(num_images):
             dets = all_boxes[cls_ind][im_ind]
             if dets == []:
                 continue
@@ -275,8 +275,8 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05,
     # all detections are collected into:
     #    all_boxes[cls][image] = N x 5 array of detections in
     #    (x1, y1, x2, y2, score)
-    all_boxes = [[[] for _ in xrange(num_images)]
-                 for _ in xrange(imdb.num_classes)]
+    all_boxes = [[[] for _ in range(num_images)]
+                 for _ in range(imdb.num_classes)]
 
     output_dir = get_output_dir(imdb, weights_filename)
     # timers
@@ -285,7 +285,7 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05,
     if not cfg.TEST.HAS_RPN:
         roidb = imdb.roidb
 
-    for i in xrange(num_images):
+    for i in range(num_images):
         # filter out any ground truth boxes
         if cfg.TEST.HAS_RPN:
             box_proposals = None
@@ -309,7 +309,7 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05,
             plt.imshow(image)
 
         # skip j = 0, because it's the background class
-        for j in xrange(1, imdb.num_classes):
+        for j in range(1, imdb.num_classes):
             inds = np.where(scores[:, j] > thresh)[0]
             cls_scores = scores[inds, j]
             cls_boxes = boxes[inds, j*4:(j+1)*4]
@@ -325,22 +325,22 @@ def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05,
         # Limit to max_per_image detections *over all classes*
         if max_per_image > 0:
             image_scores = np.hstack([all_boxes[j][i][:, -1]
-                                      for j in xrange(1, imdb.num_classes)])
+                                      for j in range(1, imdb.num_classes)])
             if len(image_scores) > max_per_image:
                 image_thresh = np.sort(image_scores)[-max_per_image]
-                for j in xrange(1, imdb.num_classes):
+                for j in range(1, imdb.num_classes):
                     keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                     all_boxes[j][i] = all_boxes[j][i][keep, :]
         _t['misc'].toc()
 
-        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
+        print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
               .format(i + 1, num_images, _t['im_detect'].average_time,
-                      _t['misc'].average_time)
+                      _t['misc'].average_time))
 
     det_file = os.path.join(output_dir, 'detections.pkl')
     with open(det_file, 'wb') as f:
-        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
+        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
 
-    print 'Evaluating detections'
+    print('Evaluating detections')
     imdb.evaluate_detections(all_boxes, output_dir)
 
diff --git a/lib/fast_rcnn/test.py.bak b/lib/fast_rcnn/test.py.bak
new file mode 100644
index 00000000..41cd12c5
--- /dev/null
+++ b/lib/fast_rcnn/test.py.bak
@@ -0,0 +1,346 @@
+from fast_rcnn.config import cfg, get_output_dir
+import argparse
+from utils.timer import Timer
+import numpy as np
+import cv2
+from utils.cython_nms import nms, nms_new
+from utils.boxes_grid import get_boxes_grid
+import cPickle
+import heapq
+from utils.blob import im_list_to_blob
+import os
+import math
+from rpn_msr.generate import imdb_proposals_det
+import tensorflow as tf
+from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
+import matplotlib.pyplot as plt
+from tensorflow.python.client import timeline
+import time
+
+def _get_image_blob(im):
+    """Converts an image into a network input.
+    Arguments:
+        im (ndarray): a color image in BGR order
+    Returns:
+        blob (ndarray): a data blob holding an image pyramid
+        im_scale_factors (list): list of image scales (relative to im) used
+            in the image pyramid
+    """
+    im_orig = im.astype(np.float32, copy=True)
+    im_orig -= cfg.PIXEL_MEANS
+
+    im_shape = im_orig.shape
+    im_size_min = np.min(im_shape[0:2])
+    im_size_max = np.max(im_shape[0:2])
+
+    processed_ims = []
+    im_scale_factors = []
+
+    for target_size in cfg.TEST.SCALES:
+        im_scale = float(target_size) / float(im_size_min)
+        # Prevent the biggest axis from being more than MAX_SIZE
+        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
+            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
+        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
+                        interpolation=cv2.INTER_LINEAR)
+        im_scale_factors.append(im_scale)
+        processed_ims.append(im)
+
+    # Create a blob to hold the input images
+    blob = im_list_to_blob(processed_ims)
+
+    return blob, np.array(im_scale_factors)
+
+def _get_rois_blob(im_rois, im_scale_factors):
+    """Converts RoIs into network inputs.
+    Arguments:
+        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
+        im_scale_factors (list): scale factors as returned by _get_image_blob
+    Returns:
+        blob (ndarray): R x 5 matrix of RoIs in the image pyramid
+    """
+    rois, levels = _project_im_rois(im_rois, im_scale_factors)
+    rois_blob = np.hstack((levels, rois))
+    return rois_blob.astype(np.float32, copy=False)
+
+def _project_im_rois(im_rois, scales):
+    """Project image RoIs into the image pyramid built by _get_image_blob.
+    Arguments:
+        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
+        scales (list): scale factors as returned by _get_image_blob
+    Returns:
+        rois (ndarray): R x 4 matrix of projected RoI coordinates
+        levels (list): image pyramid levels used by each projected RoI
+    """
+    im_rois = im_rois.astype(np.float, copy=False)
+    scales = np.array(scales)
+
+    if len(scales) > 1:
+        widths = im_rois[:, 2] - im_rois[:, 0] + 1
+        heights = im_rois[:, 3] - im_rois[:, 1] + 1
+
+        areas = widths * heights
+        scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
+        diff_areas = np.abs(scaled_areas - 224 * 224)
+        levels = diff_areas.argmin(axis=1)[:, np.newaxis]
+    else:
+        levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
+
+    rois = im_rois * scales[levels]
+
+    return rois, levels
+
+def _get_blobs(im, rois):
+    """Convert an image and RoIs within that image into network inputs."""
+    if cfg.TEST.HAS_RPN:
+        blobs = {'data' : None, 'rois' : None}
+        blobs['data'], im_scale_factors = _get_image_blob(im)
+    else:
+        blobs = {'data' : None, 'rois' : None}
+        blobs['data'], im_scale_factors = _get_image_blob(im)
+        if cfg.IS_MULTISCALE:
+            if cfg.IS_EXTRAPOLATING:
+                blobs['rois'] = _get_rois_blob(rois, cfg.TEST.SCALES)
+            else:
+                blobs['rois'] = _get_rois_blob(rois, cfg.TEST.SCALES_BASE)
+        else:
+            blobs['rois'] = _get_rois_blob(rois, cfg.TEST.SCALES_BASE)
+
+    return blobs, im_scale_factors
+
+def _clip_boxes(boxes, im_shape):
+    """Clip boxes to image boundaries."""
+    # x1 >= 0
+    boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
+    # y1 >= 0
+    boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
+    # x2 < im_shape[1]
+    boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
+    # y2 < im_shape[0]
+    boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
+    return boxes
+
+
+def _rescale_boxes(boxes, inds, scales):
+    """Rescale boxes according to image rescaling."""
+
+    for i in xrange(boxes.shape[0]):
+        boxes[i,:] = boxes[i,:] / scales[int(inds[i])]
+
+    return boxes
+
+
+def im_detect(sess, net, im, boxes=None):
+    """Detect object classes in an image given object proposals.
+    Arguments:
+        net (caffe.Net): Fast R-CNN network to use
+        im (ndarray): color image to test (in BGR order)
+        boxes (ndarray): R x 4 array of object proposals
+    Returns:
+        scores (ndarray): R x K array of object class scores (K includes
+            background as object category 0)
+        boxes (ndarray): R x (4*K) array of predicted bounding boxes
+    """
+
+    blobs, im_scales = _get_blobs(im, boxes)
+
+    # When mapping from image ROIs to feature map ROIs, there's some aliasing
+    # (some distinct image ROIs get mapped to the same feature ROI).
+    # Here, we identify duplicate feature ROIs, so we only compute features
+    # on the unique subset.
+    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
+        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
+        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
+        _, index, inv_index = np.unique(hashes, return_index=True,
+                                        return_inverse=True)
+        blobs['rois'] = blobs['rois'][index, :]
+        boxes = boxes[index, :]
+
+    if cfg.TEST.HAS_RPN:
+        im_blob = blobs['data']
+        blobs['im_info'] = np.array(
+            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
+            dtype=np.float32)
+    # forward pass
+    if cfg.TEST.HAS_RPN:
+        feed_dict={net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0}
+    else:
+        feed_dict={net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0}
+
+    run_options = None
+    run_metadata = None
+    if cfg.TEST.DEBUG_TIMELINE:
+        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+        run_metadata = tf.RunMetadata()
+
+    cls_score, cls_prob, bbox_pred, rois = sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois')],
+                                                    feed_dict=feed_dict,
+                                                    options=run_options,
+                                                    run_metadata=run_metadata)
+
+    if cfg.TEST.HAS_RPN:
+        assert len(im_scales) == 1, "Only single-image batch implemented"
+        boxes = rois[:, 1:5] / im_scales[0]
+
+
+    if cfg.TEST.SVM:
+        # use the raw scores before softmax under the assumption they
+        # were trained as linear SVMs
+        scores = cls_score
+    else:
+        # use softmax estimated probabilities
+        scores = cls_prob
+
+    if cfg.TEST.BBOX_REG:
+        # Apply bounding-box regression deltas
+        box_deltas = bbox_pred
+        pred_boxes = bbox_transform_inv(boxes, box_deltas)
+        pred_boxes = _clip_boxes(pred_boxes, im.shape)
+    else:
+        # Simply repeat the boxes, once for each class
+        pred_boxes = np.tile(boxes, (1, scores.shape[1]))
+
+    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
+        # Map scores and predictions back to the original set of boxes
+        scores = scores[inv_index, :]
+        pred_boxes = pred_boxes[inv_index, :]
+
+    if cfg.TEST.DEBUG_TIMELINE:
+        trace = timeline.Timeline(step_stats=run_metadata.step_stats)
+        trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w')
+        trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
+        trace_file.close()
+
+    return scores, pred_boxes
+
+
+def vis_detections(im, class_name, dets, thresh=0.8):
+    """Visual debugging of detections."""
+    import matplotlib.pyplot as plt
+    #im = im[:, :, (2, 1, 0)]
+    for i in xrange(np.minimum(10, dets.shape[0])):
+        bbox = dets[i, :4]
+        score = dets[i, -1]
+        if score > thresh:
+            #plt.cla()
+            #plt.imshow(im)
+            plt.gca().add_patch(
+                plt.Rectangle((bbox[0], bbox[1]),
+                              bbox[2] - bbox[0],
+                              bbox[3] - bbox[1], fill=False,
+                              edgecolor='g', linewidth=3)
+                )
+            plt.gca().text(bbox[0], bbox[1] - 2,
+                 '{:s} {:.3f}'.format(class_name, score),
+                 bbox=dict(facecolor='blue', alpha=0.5),
+                 fontsize=14, color='white')
+
+            plt.title('{}  {:.3f}'.format(class_name, score))
+    #plt.show()
+
+def apply_nms(all_boxes, thresh):
+    """Apply non-maximum suppression to all predicted boxes output by the
+    test_net method.
+    """
+    num_classes = len(all_boxes)
+    num_images = len(all_boxes[0])
+    nms_boxes = [[[] for _ in xrange(num_images)]
+                 for _ in xrange(num_classes)]
+    for cls_ind in xrange(num_classes):
+        for im_ind in xrange(num_images):
+            dets = all_boxes[cls_ind][im_ind]
+            if dets == []:
+                continue
+
+            x1 = dets[:, 0]
+            y1 = dets[:, 1]
+            x2 = dets[:, 2]
+            y2 = dets[:, 3]
+            scores = dets[:, 4]
+            inds = np.where((x2 > x1) & (y2 > y1) & (scores > cfg.TEST.DET_THRESHOLD))[0]
+            dets = dets[inds,:]
+            if dets == []:
+                continue
+
+            keep = nms(dets, thresh)
+            if len(keep) == 0:
+                continue
+            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
+    return nms_boxes
+
+
+def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, vis=False):
+    """Test a Fast R-CNN network on an image database."""
+    num_images = len(imdb.image_index)
+    # all detections are collected into:
+    #    all_boxes[cls][image] = N x 5 array of detections in
+    #    (x1, y1, x2, y2, score)
+    all_boxes = [[[] for _ in xrange(num_images)]
+                 for _ in xrange(imdb.num_classes)]
+
+    output_dir = get_output_dir(imdb, weights_filename)
+    # timers
+    _t = {'im_detect' : Timer(), 'misc' : Timer()}
+
+    if not cfg.TEST.HAS_RPN:
+        roidb = imdb.roidb
+
+    for i in xrange(num_images):
+        # filter out any ground truth boxes
+        if cfg.TEST.HAS_RPN:
+            box_proposals = None
+        else:
+            # The roidb may contain ground-truth rois (for example, if the roidb
+            # comes from the training or val split). We only want to evaluate
+            # detection on the *non*-ground-truth rois. We select those the rois
+            # that have the gt_classes field set to 0, which means there's no
+            # ground truth.
+            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
+
+        im = cv2.imread(imdb.image_path_at(i))
+        _t['im_detect'].tic()
+        scores, boxes = im_detect(sess, net, im, box_proposals)
+        _t['im_detect'].toc()
+
+        _t['misc'].tic()
+        if vis:
+            image = im[:, :, (2, 1, 0)]
+            plt.cla()
+            plt.imshow(image)
+
+        # skip j = 0, because it's the background class
+        for j in xrange(1, imdb.num_classes):
+            inds = np.where(scores[:, j] > thresh)[0]
+            cls_scores = scores[inds, j]
+            cls_boxes = boxes[inds, j*4:(j+1)*4]
+            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
+                .astype(np.float32, copy=False)
+            keep = nms(cls_dets, cfg.TEST.NMS)
+            cls_dets = cls_dets[keep, :]
+            if vis:
+                vis_detections(image, imdb.classes[j], cls_dets)
+            all_boxes[j][i] = cls_dets
+        if vis:
+           plt.show()
+        # Limit to max_per_image detections *over all classes*
+        if max_per_image > 0:
+            image_scores = np.hstack([all_boxes[j][i][:, -1]
+                                      for j in xrange(1, imdb.num_classes)])
+            if len(image_scores) > max_per_image:
+                image_thresh = np.sort(image_scores)[-max_per_image]
+                for j in xrange(1, imdb.num_classes):
+                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
+                    all_boxes[j][i] = all_boxes[j][i][keep, :]
+        _t['misc'].toc()
+
+        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
+              .format(i + 1, num_images, _t['im_detect'].average_time,
+                      _t['misc'].average_time)
+
+    det_file = os.path.join(output_dir, 'detections.pkl')
+    with open(det_file, 'wb') as f:
+        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
+
+    print 'Evaluating detections'
+    imdb.evaluate_detections(all_boxes, output_dir)
+
diff --git a/lib/fast_rcnn/train.py b/lib/fast_rcnn/train.py
index d7633ee5..680e43bf 100644
--- a/lib/fast_rcnn/train.py
+++ b/lib/fast_rcnn/train.py
@@ -33,10 +33,10 @@ def __init__(self, sess, saver, network, imdb, roidb, output_dir, pretrained_mod
         self.output_dir = output_dir
         self.pretrained_model = pretrained_model
 
-        print 'Computing bounding-box regression targets...'
+        print('Computing bounding-box regression targets...')
         if cfg.TRAIN.BBOX_REG:
             self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
-        print 'done'
+        print('done')
 
         # For checkpoint
         self.saver = saver
@@ -47,7 +47,7 @@ def snapshot(self, sess, iter):
         """
         net = self.net
 
-        if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'):
+        if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers:
             # save original values
             with tf.variable_scope('bbox_pred', reuse=True):
                 weights = tf.get_variable("weights")
@@ -71,9 +71,9 @@ def snapshot(self, sess, iter):
         filename = os.path.join(self.output_dir, filename)
 
         self.saver.save(sess, filename)
-        print 'Wrote snapshot to: {:s}'.format(filename)
+        print('Wrote snapshot to: {:s}'.format(filename))
 
-        if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'):
+        if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers:
             with tf.variable_scope('bbox_pred', reuse=True):
                 # restore net to original state
                 sess.run(net.bbox_weights_assign, feed_dict={net.bbox_weights: orig_0})
@@ -150,8 +150,8 @@ def train_model(self, sess, max_iters):
         # iintialize variables
         sess.run(tf.global_variables_initializer())
         if self.pretrained_model is not None:
-            print ('Loading pretrained model '
-                   'weights from {:s}').format(self.pretrained_model)
+            print(('Loading pretrained model '
+                   'weights from {:s}').format(self.pretrained_model))
             self.net.load(self.pretrained_model, sess, self.saver, True)
 
         last_snapshot_iter = -1
@@ -181,14 +181,14 @@ def train_model(self, sess, max_iters):
 
             if cfg.TRAIN.DEBUG_TIMELINE:
                 trace = timeline.Timeline(step_stats=run_metadata.step_stats)
-                trace_file = open(str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w')
+                trace_file = open(str(int(time.time() * 1000)) + '-train-timeline.ctf.json', 'w')
                 trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
                 trace_file.close()
 
             if (iter+1) % (cfg.TRAIN.DISPLAY) == 0:
-                print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\
-                        (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval())
-                print 'speed: {:.3f}s / iter'.format(timer.average_time)
+                print('iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\
+                        (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()))
+                print('speed: {:.3f}s / iter'.format(timer.average_time))
 
             if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                 last_snapshot_iter = iter
@@ -200,11 +200,11 @@ def train_model(self, sess, max_iters):
 def get_training_roidb(imdb):
     """Returns a roidb (Region of Interest database) for use in training."""
     if cfg.TRAIN.USE_FLIPPED:
-        print 'Appending horizontally-flipped training examples...'
+        print('Appending horizontally-flipped training examples...')
         imdb.append_flipped_images()
-        print 'done'
+        print('done')
 
-    print 'Preparing training data...'
+    print('Preparing training data...')
     if cfg.TRAIN.HAS_RPN:
         if cfg.IS_MULTISCALE:
             gdl_roidb.prepare_roidb(imdb)
@@ -212,7 +212,7 @@ def get_training_roidb(imdb):
             rdl_roidb.prepare_roidb(imdb)
     else:
         rdl_roidb.prepare_roidb(imdb)
-    print 'done'
+    print('done')
 
     return imdb.roidb
 
@@ -249,8 +249,8 @@ def is_valid(entry):
     num = len(roidb)
     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
     num_after = len(filtered_roidb)
-    print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
-                                                       num, num_after)
+    print('Filtered {} roidb entries: {} -> {}'.format(num - num_after,
+                                                       num, num_after))
     return filtered_roidb
 
 
@@ -260,6 +260,6 @@ def train_net(network, imdb, roidb, output_dir, pretrained_model=None, max_iters
     saver = tf.train.Saver(max_to_keep=100)
     with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
         sw = SolverWrapper(sess, saver, network, imdb, roidb, output_dir, pretrained_model=pretrained_model)
-        print 'Solving...'
+        print('Solving...')
         sw.train_model(sess, max_iters)
-        print 'done solving'
+        print('done solving')
diff --git a/lib/fast_rcnn/train.py.bak b/lib/fast_rcnn/train.py.bak
new file mode 100644
index 00000000..d7633ee5
--- /dev/null
+++ b/lib/fast_rcnn/train.py.bak
@@ -0,0 +1,265 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Train a Fast R-CNN network."""
+
+from fast_rcnn.config import cfg
+import gt_data_layer.roidb as gdl_roidb
+import roi_data_layer.roidb as rdl_roidb
+from roi_data_layer.layer import RoIDataLayer
+from utils.timer import Timer
+import numpy as np
+import os
+import tensorflow as tf
+import sys
+from tensorflow.python.client import timeline
+import time
+
+class SolverWrapper(object):
+    """A simple wrapper around Caffe's solver.
+    This wrapper gives us control over he snapshotting process, which we
+    use to unnormalize the learned bounding-box regression weights.
+    """
+
+    def __init__(self, sess, saver, network, imdb, roidb, output_dir, pretrained_model=None):
+        """Initialize the SolverWrapper."""
+        self.net = network
+        self.imdb = imdb
+        self.roidb = roidb
+        self.output_dir = output_dir
+        self.pretrained_model = pretrained_model
+
+        print 'Computing bounding-box regression targets...'
+        if cfg.TRAIN.BBOX_REG:
+            self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
+        print 'done'
+
+        # For checkpoint
+        self.saver = saver
+
+    def snapshot(self, sess, iter):
+        """Take a snapshot of the network after unnormalizing the learned
+        bounding-box regression weights. This enables easy use at test-time.
+        """
+        net = self.net
+
+        if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'):
+            # save original values
+            with tf.variable_scope('bbox_pred', reuse=True):
+                weights = tf.get_variable("weights")
+                biases = tf.get_variable("biases")
+
+            orig_0 = weights.eval()
+            orig_1 = biases.eval()
+
+            # scale and shift with bbox reg unnormalization; then save snapshot
+            weights_shape = weights.get_shape().as_list()
+            sess.run(net.bbox_weights_assign, feed_dict={net.bbox_weights: orig_0 * np.tile(self.bbox_stds, (weights_shape[0], 1))})
+            sess.run(net.bbox_bias_assign, feed_dict={net.bbox_biases: orig_1 * self.bbox_stds + self.bbox_means})
+
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir)
+
+        infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
+                 if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
+        filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix +
+                    '_iter_{:d}'.format(iter+1) + '.ckpt')
+        filename = os.path.join(self.output_dir, filename)
+
+        self.saver.save(sess, filename)
+        print 'Wrote snapshot to: {:s}'.format(filename)
+
+        if cfg.TRAIN.BBOX_REG and net.layers.has_key('bbox_pred'):
+            with tf.variable_scope('bbox_pred', reuse=True):
+                # restore net to original state
+                sess.run(net.bbox_weights_assign, feed_dict={net.bbox_weights: orig_0})
+                sess.run(net.bbox_bias_assign, feed_dict={net.bbox_biases: orig_1})
+
+    def _modified_smooth_l1(self, sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights):
+        """
+            ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets))
+            SmoothL1(x) = 0.5 * (sigma * x)^2,    if |x| < 1 / sigma^2
+                          |x| - 0.5 / sigma^2,    otherwise
+        """
+        sigma2 = sigma * sigma
+
+        inside_mul = tf.multiply(bbox_inside_weights, tf.subtract(bbox_pred, bbox_targets))
+
+        smooth_l1_sign = tf.cast(tf.less(tf.abs(inside_mul), 1.0 / sigma2), tf.float32)
+        smooth_l1_option1 = tf.multiply(tf.multiply(inside_mul, inside_mul), 0.5 * sigma2)
+        smooth_l1_option2 = tf.subtract(tf.abs(inside_mul), 0.5 / sigma2)
+        smooth_l1_result = tf.add(tf.multiply(smooth_l1_option1, smooth_l1_sign),
+                                  tf.multiply(smooth_l1_option2, tf.abs(tf.subtract(smooth_l1_sign, 1.0))))
+
+        outside_mul = tf.multiply(bbox_outside_weights, smooth_l1_result)
+
+        return outside_mul
+
+
+    def train_model(self, sess, max_iters):
+        """Network training loop."""
+
+        data_layer = get_data_layer(self.roidb, self.imdb.num_classes)
+
+        # RPN
+        # classification loss
+        rpn_cls_score = tf.reshape(self.net.get_output('rpn_cls_score_reshape'),[-1,2])
+        rpn_label = tf.reshape(self.net.get_output('rpn-data')[0],[-1])
+        rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_label,-1))),[-1,2])
+        rpn_label = tf.reshape(tf.gather(rpn_label,tf.where(tf.not_equal(rpn_label,-1))),[-1])
+        rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label))
+
+        # bounding box regression L1 loss
+        rpn_bbox_pred = self.net.get_output('rpn_bbox_pred')
+        rpn_bbox_targets = tf.transpose(self.net.get_output('rpn-data')[1],[0,2,3,1])
+        rpn_bbox_inside_weights = tf.transpose(self.net.get_output('rpn-data')[2],[0,2,3,1])
+        rpn_bbox_outside_weights = tf.transpose(self.net.get_output('rpn-data')[3],[0,2,3,1])
+
+        rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights)
+        rpn_loss_box = tf.reduce_mean(tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3]))
+ 
+        # R-CNN
+        # classification loss
+        cls_score = self.net.get_output('cls_score')
+        label = tf.reshape(self.net.get_output('roi-data')[1],[-1])
+        cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label))
+
+        # bounding box regression L1 loss
+        bbox_pred = self.net.get_output('bbox_pred')
+        bbox_targets = self.net.get_output('roi-data')[2]
+        bbox_inside_weights = self.net.get_output('roi-data')[3]
+        bbox_outside_weights = self.net.get_output('roi-data')[4]
+
+        smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights)
+        loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1]))
+
+        # final loss
+        loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
+
+        # optimizer and learning rate
+        global_step = tf.Variable(0, trainable=False)
+        lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step,
+                                        cfg.TRAIN.STEPSIZE, 0.1, staircase=True)
+        momentum = cfg.TRAIN.MOMENTUM
+        train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(loss, global_step=global_step)
+
+        # iintialize variables
+        sess.run(tf.global_variables_initializer())
+        if self.pretrained_model is not None:
+            print ('Loading pretrained model '
+                   'weights from {:s}').format(self.pretrained_model)
+            self.net.load(self.pretrained_model, sess, self.saver, True)
+
+        last_snapshot_iter = -1
+        timer = Timer()
+        for iter in range(max_iters):
+            # get one batch
+            blobs = data_layer.forward()
+
+            # Make one SGD update
+            feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \
+                           self.net.gt_boxes: blobs['gt_boxes']}
+
+            run_options = None
+            run_metadata = None
+            if cfg.TRAIN.DEBUG_TIMELINE:
+                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+                run_metadata = tf.RunMetadata()
+
+            timer.tic()
+
+            rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, _ = sess.run([rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, train_op],
+                                                                                                feed_dict=feed_dict,
+                                                                                                options=run_options,
+                                                                                                run_metadata=run_metadata)
+
+            timer.toc()
+
+            if cfg.TRAIN.DEBUG_TIMELINE:
+                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
+                trace_file = open(str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w')
+                trace_file.write(trace.generate_chrome_trace_format(show_memory=False))
+                trace_file.close()
+
+            if (iter+1) % (cfg.TRAIN.DISPLAY) == 0:
+                print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\
+                        (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval())
+                print 'speed: {:.3f}s / iter'.format(timer.average_time)
+
+            if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
+                last_snapshot_iter = iter
+                self.snapshot(sess, iter)
+
+        if last_snapshot_iter != iter:
+            self.snapshot(sess, iter)
+
+def get_training_roidb(imdb):
+    """Returns a roidb (Region of Interest database) for use in training."""
+    if cfg.TRAIN.USE_FLIPPED:
+        print 'Appending horizontally-flipped training examples...'
+        imdb.append_flipped_images()
+        print 'done'
+
+    print 'Preparing training data...'
+    if cfg.TRAIN.HAS_RPN:
+        if cfg.IS_MULTISCALE:
+            gdl_roidb.prepare_roidb(imdb)
+        else:
+            rdl_roidb.prepare_roidb(imdb)
+    else:
+        rdl_roidb.prepare_roidb(imdb)
+    print 'done'
+
+    return imdb.roidb
+
+
+def get_data_layer(roidb, num_classes):
+    """return a data layer."""
+    if cfg.TRAIN.HAS_RPN:
+        if cfg.IS_MULTISCALE:
+            layer = GtDataLayer(roidb)
+        else:
+            layer = RoIDataLayer(roidb, num_classes)
+    else:
+        layer = RoIDataLayer(roidb, num_classes)
+
+    return layer
+
+def filter_roidb(roidb):
+    """Remove roidb entries that have no usable RoIs."""
+
+    def is_valid(entry):
+        # Valid images have:
+        #   (1) At least one foreground RoI OR
+        #   (2) At least one background RoI
+        overlaps = entry['max_overlaps']
+        # find boxes with sufficient overlap
+        fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
+        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+        bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                           (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+        # image is only valid if such boxes exist
+        valid = len(fg_inds) > 0 or len(bg_inds) > 0
+        return valid
+
+    num = len(roidb)
+    filtered_roidb = [entry for entry in roidb if is_valid(entry)]
+    num_after = len(filtered_roidb)
+    print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
+                                                       num, num_after)
+    return filtered_roidb
+
+
+def train_net(network, imdb, roidb, output_dir, pretrained_model=None, max_iters=40000):
+    """Train a Fast R-CNN network."""
+    roidb = filter_roidb(roidb)
+    saver = tf.train.Saver(max_to_keep=100)
+    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
+        sw = SolverWrapper(sess, saver, network, imdb, roidb, output_dir, pretrained_model=pretrained_model)
+        print 'Solving...'
+        sw.train_model(sess, max_iters)
+        print 'done solving'
diff --git a/lib/gt_data_layer/layer.py b/lib/gt_data_layer/layer.py
index 9c93c9e6..4e041546 100644
--- a/lib/gt_data_layer/layer.py
+++ b/lib/gt_data_layer/layer.py
@@ -93,7 +93,7 @@ def forward(self, bottom, top):
         """Get blobs and copy them into this layer's top blob vector."""
         blobs = self._get_next_minibatch()
 
-        for blob_name, blob in blobs.iteritems():
+        for blob_name, blob in blobs.items():
             top_ind = self._name_to_top_map[blob_name]
             # Reshape net's input blobs
             top[top_ind].reshape(*(blob.shape))
diff --git a/lib/gt_data_layer/layer.py.bak b/lib/gt_data_layer/layer.py.bak
new file mode 100644
index 00000000..9c93c9e6
--- /dev/null
+++ b/lib/gt_data_layer/layer.py.bak
@@ -0,0 +1,109 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""The data layer used during training to train a Fast R-CNN network.
+
+GtDataLayer implements a Caffe Python layer.
+"""
+
+import caffe
+from fast_rcnn.config import cfg
+from gt_data_layer.minibatch import get_minibatch
+import numpy as np
+import yaml
+from multiprocessing import Process, Queue
+
+class GtDataLayer(caffe.Layer):
+    """Fast R-CNN data layer used for training."""
+
+    def _shuffle_roidb_inds(self):
+        """Randomly permute the training roidb."""
+        self._perm = np.random.permutation(np.arange(len(self._roidb)))
+        self._cur = 0
+
+    def _get_next_minibatch_inds(self):
+        """Return the roidb indices for the next minibatch."""
+        if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
+            self._shuffle_roidb_inds()
+
+        db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
+        self._cur += cfg.TRAIN.IMS_PER_BATCH
+
+        """
+        # sample images with gt objects
+        db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32)
+        i = 0
+        while (i < cfg.TRAIN.IMS_PER_BATCH):
+            ind = self._perm[self._cur]
+            num_objs = self._roidb[ind]['boxes'].shape[0]
+            if num_objs != 0:
+                db_inds[i] = ind
+                i += 1
+
+            self._cur += 1
+            if self._cur >= len(self._roidb):
+                self._shuffle_roidb_inds()
+        """
+
+        return db_inds
+
+    def _get_next_minibatch(self):
+        """Return the blobs to be used for the next minibatch."""
+        db_inds = self._get_next_minibatch_inds()
+        minibatch_db = [self._roidb[i] for i in db_inds]
+        return get_minibatch(minibatch_db, self._num_classes)
+
+    # this function is called in training the net
+    def set_roidb(self, roidb):
+        """Set the roidb to be used by this layer during training."""
+        self._roidb = roidb
+        self._shuffle_roidb_inds()
+
+    def setup(self, bottom, top):
+        """Setup the GtDataLayer."""
+
+        # parse the layer parameter string, which must be valid YAML
+        layer_params = yaml.load(self.param_str_)
+
+        self._num_classes = layer_params['num_classes']
+
+        self._name_to_top_map = {
+            'data': 0,
+            'info_boxes': 1,
+            'parameters': 2}
+
+        # data blob: holds a batch of N images, each with 3 channels
+        # The height and width (100 x 100) are dummy values
+        num_scale_base = len(cfg.TRAIN.SCALES_BASE)
+        top[0].reshape(num_scale_base, 3, 100, 100)
+
+        # info boxes blob
+        top[1].reshape(1, 18)
+
+        # parameters blob
+        num_scale = len(cfg.TRAIN.SCALES)
+        num_aspect = len(cfg.TRAIN.ASPECTS)
+        top[2].reshape(2 + 2*num_scale + 2*num_aspect)
+            
+    def forward(self, bottom, top):
+        """Get blobs and copy them into this layer's top blob vector."""
+        blobs = self._get_next_minibatch()
+
+        for blob_name, blob in blobs.iteritems():
+            top_ind = self._name_to_top_map[blob_name]
+            # Reshape net's input blobs
+            top[top_ind].reshape(*(blob.shape))
+            # Copy data into net's input blobs
+            top[top_ind].data[...] = blob.astype(np.float32, copy=False)
+
+    def backward(self, top, propagate_down, bottom):
+        """This layer does not propagate gradients."""
+        pass
+
+    def reshape(self, bottom, top):
+        """Reshaping happens during the call to forward."""
+        pass
diff --git a/lib/gt_data_layer/minibatch.py b/lib/gt_data_layer/minibatch.py
index 1ee74ce7..b6e15721 100644
--- a/lib/gt_data_layer/minibatch.py
+++ b/lib/gt_data_layer/minibatch.py
@@ -26,7 +26,7 @@ def get_minibatch(roidb, num_classes):
     # build the box information blob
     info_boxes_blob = np.zeros((0, 18), dtype=np.float32)
     num_scale = len(cfg.TRAIN.SCALES)
-    for i in xrange(num_images):
+    for i in range(num_images):
         info_boxes = roidb[i]['info_boxes']
 
         # change the batch index
@@ -61,7 +61,7 @@ def _get_image_blob(roidb):
     num_images = len(roidb)
     processed_ims = []
 
-    for i in xrange(num_images):
+    for i in range(num_images):
         # read image
         im = cv2.imread(roidb[i]['image'])
         if roidb[i]['flipped']:
@@ -115,7 +115,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
 def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob):
     """Visualize a mini-batch for debugging."""
     import matplotlib.pyplot as plt
-    for i in xrange(rois_blob.shape[0]):
+    for i in range(rois_blob.shape[0]):
         rois = rois_blob[i, :]
         im_ind = rois[0]
         roi = rois[2:]
@@ -126,7 +126,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob):
         cls = labels_blob[i]
         subcls = sublabels_blob[i]
         plt.imshow(im)
-        print 'class: ', cls, ' subclass: ', subcls
+        print('class: ', cls, ' subclass: ', subcls)
         plt.gca().add_patch(
             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
                           roi[3] - roi[1], fill=False,
diff --git a/lib/gt_data_layer/minibatch.py.bak b/lib/gt_data_layer/minibatch.py.bak
new file mode 100644
index 00000000..1ee74ce7
--- /dev/null
+++ b/lib/gt_data_layer/minibatch.py.bak
@@ -0,0 +1,135 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Compute minibatch blobs for training a Fast R-CNN network."""
+
+import numpy as np
+import numpy.random as npr
+import cv2
+from fast_rcnn.config import cfg
+from utils.blob import prep_im_for_blob, im_list_to_blob
+
+def get_minibatch(roidb, num_classes):
+    """Given a roidb, construct a minibatch sampled from it."""
+    num_images = len(roidb)
+    assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
+        'num_images ({}) must divide BATCH_SIZE ({})'. \
+        format(num_images, cfg.TRAIN.BATCH_SIZE)
+
+    # Get the input image blob, formatted for caffe
+    im_blob = _get_image_blob(roidb)
+
+    # build the box information blob
+    info_boxes_blob = np.zeros((0, 18), dtype=np.float32)
+    num_scale = len(cfg.TRAIN.SCALES)
+    for i in xrange(num_images):
+        info_boxes = roidb[i]['info_boxes']
+
+        # change the batch index
+        info_boxes[:,2] += i * num_scale
+        info_boxes[:,7] += i * num_scale
+
+        info_boxes_blob = np.vstack((info_boxes_blob, info_boxes))
+
+    # build the parameter blob
+    num_aspect = len(cfg.TRAIN.ASPECTS)
+    num = 2 + 2 * num_scale + 2 * num_aspect
+    parameters_blob = np.zeros((num), dtype=np.float32)
+    parameters_blob[0] = num_scale
+    parameters_blob[1] = num_aspect
+    parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES
+    parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING
+    parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS
+    parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS
+
+    # For debug visualizations
+    # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)
+
+    blobs = {'data': im_blob,
+             'info_boxes': info_boxes_blob,
+             'parameters': parameters_blob}
+
+    return blobs
+
+def _get_image_blob(roidb):
+    """Builds an input blob from the images in the roidb at the different scales.
+    """
+    num_images = len(roidb)
+    processed_ims = []
+
+    for i in xrange(num_images):
+        # read image
+        im = cv2.imread(roidb[i]['image'])
+        if roidb[i]['flipped']:
+            im = im[:, ::-1, :]
+
+        im_orig = im.astype(np.float32, copy=True)
+        im_orig -= cfg.PIXEL_MEANS
+
+        # build image pyramid
+        for im_scale in cfg.TRAIN.SCALES_BASE:
+            im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
+                        interpolation=cv2.INTER_LINEAR)
+
+            processed_ims.append(im)
+
+    # Create a blob to hold the input images
+    blob = im_list_to_blob(processed_ims)
+
+    return blob
+
+def _project_im_rois(im_rois, im_scale_factor):
+    """Project image RoIs into the rescaled training image."""
+    rois = im_rois * im_scale_factor
+    return rois
+
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets are stored in a compact form in the
+    roidb.
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets). The loss weights
+    are similarly expanded.
+
+    Returns:
+        bbox_target_data (ndarray): N x 4K blob of regression targets
+        bbox_loss_weights (ndarray): N x 4K blob of loss weights
+    """
+    clss = bbox_target_data[:, 0]
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
+        bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.]
+    return bbox_targets, bbox_loss_weights
+
+
+def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob):
+    """Visualize a mini-batch for debugging."""
+    import matplotlib.pyplot as plt
+    for i in xrange(rois_blob.shape[0]):
+        rois = rois_blob[i, :]
+        im_ind = rois[0]
+        roi = rois[2:]
+        im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
+        im += cfg.PIXEL_MEANS
+        im = im[:, :, (2, 1, 0)]
+        im = im.astype(np.uint8)
+        cls = labels_blob[i]
+        subcls = sublabels_blob[i]
+        plt.imshow(im)
+        print 'class: ', cls, ' subclass: ', subcls
+        plt.gca().add_patch(
+            plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
+                          roi[3] - roi[1], fill=False,
+                          edgecolor='r', linewidth=3)
+            )
+        plt.show()
diff --git a/lib/gt_data_layer/roidb.py b/lib/gt_data_layer/roidb.py
index 2f3a87e9..90ee9197 100644
--- a/lib/gt_data_layer/roidb.py
+++ b/lib/gt_data_layer/roidb.py
@@ -15,7 +15,7 @@
 import PIL
 import math
 import os
-import cPickle
+import pickle
 import pdb
 
 
@@ -29,12 +29,12 @@ def prepare_roidb(imdb):
     cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
     if os.path.exists(cache_file):
         with open(cache_file, 'rb') as fid:
-            imdb._roidb = cPickle.load(fid)
-        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
+            imdb._roidb = pickle.load(fid)
+        print('{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file))
         return
 
     roidb = imdb.roidb
-    for i in xrange(len(imdb.image_index)):
+    for i in range(len(imdb.image_index)):
         roidb[i]['image'] = imdb.image_path_at(i)
         boxes = roidb[i]['boxes']
         labels = roidb[i]['gt_classes']
@@ -62,7 +62,7 @@ def prepare_roidb(imdb):
 
             # select positive boxes
             fg_inds = []
-            for k in xrange(1, imdb.num_classes):
+            for k in range(1, imdb.num_classes):
                 fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])
 
             if len(fg_inds) > 0:
@@ -88,8 +88,8 @@ def prepare_roidb(imdb):
         roidb[i]['info_boxes'] = info_boxes
 
     with open(cache_file, 'wb') as fid:
-        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
-    print 'wrote gt roidb prepared to {}'.format(cache_file)
+        pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL)
+    print('wrote gt roidb prepared to {}'.format(cache_file))
 
 def add_bbox_regression_targets(roidb):
     """Add information needed to train bounding-box regressors."""
@@ -105,9 +105,9 @@ def add_bbox_regression_targets(roidb):
     class_counts = np.zeros((num_classes, 1)) + cfg.EPS
     sums = np.zeros((num_classes, 4))
     squared_sums = np.zeros((num_classes, 4))
-    for im_i in xrange(num_images):
+    for im_i in range(num_images):
         targets = roidb[im_i]['info_boxes']
-        for cls in xrange(1, num_classes):
+        for cls in range(1, num_classes):
             cls_inds = np.where(targets[:, 12] == cls)[0]
             if cls_inds.size > 0:
                 class_counts[cls] += cls_inds.size
@@ -118,9 +118,9 @@ def add_bbox_regression_targets(roidb):
     stds = np.sqrt(squared_sums / class_counts - means ** 2)
 
     # Normalize targets
-    for im_i in xrange(num_images):
+    for im_i in range(num_images):
         targets = roidb[im_i]['info_boxes']
-        for cls in xrange(1, num_classes):
+        for cls in range(1, num_classes):
             cls_inds = np.where(targets[:, 12] == cls)[0]
             roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :]
             if stds[cls, 0] != 0:
diff --git a/lib/gt_data_layer/roidb.py.bak b/lib/gt_data_layer/roidb.py.bak
new file mode 100644
index 00000000..2f3a87e9
--- /dev/null
+++ b/lib/gt_data_layer/roidb.py.bak
@@ -0,0 +1,156 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Transform a roidb into a trainable roidb by adding a bunch of metadata."""
+
+import numpy as np
+from fast_rcnn.config import cfg
+from utils.cython_bbox import bbox_overlaps
+from utils.boxes_grid import get_boxes_grid
+import scipy.sparse
+import PIL
+import math
+import os
+import cPickle
+import pdb
+
+
+def prepare_roidb(imdb):
+    """Enrich the imdb's roidb by adding some derived quantities that
+    are useful for training. This function precomputes the maximum
+    overlap, taken over ground-truth boxes, between each ROI and
+    each ground-truth box. The class with maximum overlap is also
+    recorded.
+    """
+    cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
+    if os.path.exists(cache_file):
+        with open(cache_file, 'rb') as fid:
+            imdb._roidb = cPickle.load(fid)
+        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
+        return
+
+    roidb = imdb.roidb
+    for i in xrange(len(imdb.image_index)):
+        roidb[i]['image'] = imdb.image_path_at(i)
+        boxes = roidb[i]['boxes']
+        labels = roidb[i]['gt_classes']
+        info_boxes = np.zeros((0, 18), dtype=np.float32)
+
+        if boxes.shape[0] == 0:
+            roidb[i]['info_boxes'] = info_boxes
+            continue
+
+        # compute grid boxes
+        s = PIL.Image.open(imdb.image_path_at(i)).size
+        image_height = s[1]
+        image_width = s[0]
+        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
+        
+        # for each scale
+        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
+            boxes_rescaled = boxes * scale
+
+            # compute overlap
+            overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
+            max_overlaps = overlaps.max(axis = 1)
+            argmax_overlaps = overlaps.argmax(axis = 1)
+            max_classes = labels[argmax_overlaps]
+
+            # select positive boxes
+            fg_inds = []
+            for k in xrange(1, imdb.num_classes):
+                fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])
+
+            if len(fg_inds) > 0:
+                gt_inds = argmax_overlaps[fg_inds]
+                # bounding box regression targets
+                gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
+                # scale mapping for RoI pooling
+                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
+                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
+                # contruct the list of positive boxes
+                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
+                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
+                info_box[:, 0] = cx[fg_inds]
+                info_box[:, 1] = cy[fg_inds]
+                info_box[:, 2] = scale_ind
+                info_box[:, 3:7] = boxes_grid[fg_inds,:]
+                info_box[:, 7] = scale_ind_map
+                info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
+                info_box[:, 12] = labels[gt_inds]
+                info_box[:, 14:] = gt_targets
+                info_boxes = np.vstack((info_boxes, info_box))
+
+        roidb[i]['info_boxes'] = info_boxes
+
+    with open(cache_file, 'wb') as fid:
+        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
+    print 'wrote gt roidb prepared to {}'.format(cache_file)
+
+def add_bbox_regression_targets(roidb):
+    """Add information needed to train bounding-box regressors."""
+    assert len(roidb) > 0
+    assert 'info_boxes' in roidb[0], 'Did you call prepare_roidb first?'
+
+    num_images = len(roidb)
+    # Infer number of classes from the number of columns in gt_overlaps
+    num_classes = roidb[0]['gt_overlaps'].shape[1]
+
+    # Compute values needed for means and stds
+    # var(x) = E(x^2) - E(x)^2
+    class_counts = np.zeros((num_classes, 1)) + cfg.EPS
+    sums = np.zeros((num_classes, 4))
+    squared_sums = np.zeros((num_classes, 4))
+    for im_i in xrange(num_images):
+        targets = roidb[im_i]['info_boxes']
+        for cls in xrange(1, num_classes):
+            cls_inds = np.where(targets[:, 12] == cls)[0]
+            if cls_inds.size > 0:
+                class_counts[cls] += cls_inds.size
+                sums[cls, :] += targets[cls_inds, 14:].sum(axis=0)
+                squared_sums[cls, :] += (targets[cls_inds, 14:] ** 2).sum(axis=0)
+
+    means = sums / class_counts
+    stds = np.sqrt(squared_sums / class_counts - means ** 2)
+
+    # Normalize targets
+    for im_i in xrange(num_images):
+        targets = roidb[im_i]['info_boxes']
+        for cls in xrange(1, num_classes):
+            cls_inds = np.where(targets[:, 12] == cls)[0]
+            roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :]
+            if stds[cls, 0] != 0:
+                roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :]
+
+    # These values will be needed for making predictions
+    # (the predicts will need to be unnormalized and uncentered)
+    return means.ravel(), stds.ravel()
+
+def _compute_targets(ex_rois, gt_rois):
+    """Compute bounding-box regression targets for an image. The targets are scale invariance"""
+
+    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
+    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
+    ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
+    ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
+
+    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
+    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
+    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
+    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
+
+    targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
+    targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
+    targets_dw = np.log(gt_widths / ex_widths)
+    targets_dh = np.log(gt_heights / ex_heights)
+
+    targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32)
+    targets[:, 0] = targets_dx
+    targets[:, 1] = targets_dy
+    targets[:, 2] = targets_dw
+    targets[:, 3] = targets_dh
+    return targets
diff --git a/lib/make.sh b/lib/make.sh
index 15a616bc..515baa42 100755
--- a/lib/make.sh
+++ b/lib/make.sh
@@ -2,21 +2,28 @@ TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 
 CUDA_PATH=/usr/local/cuda/
 CXXFLAGS=''
-
+TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
 if [[ "$OSTYPE" =~ ^darwin ]]; then
 	CXXFLAGS+='-undefined dynamic_lookup'
 fi
 
 cd roi_pooling_layer
 
+
+
+
 if [ -d "$CUDA_PATH" ]; then
 	nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \
 		-I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CXXFLAGS \
 		-arch=sm_37
-
-	g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
-		roi_pooling_op.cu.o -I $TF_INC  -D GOOGLE_CUDA=1 -fPIC $CXXFLAGS \
-		-lcudart -L $CUDA_PATH/lib64
+        
+    g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc -D_GLIBCXX_USE_CXX11_ABI=0 \
+roi_pooling_op.cu.o -I $TF_INC -L $TF_LIB -ltensorflow_framework -D GOOGLE_CUDA=1 \
+-fPIC $CXXFLAGS -lcudart -L $CUDA_PATH/lib64
+
+# 	g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
+# 		roi_pooling_op.cu.o -I $TF_INC  -D GOOGLE_CUDA=1 -fPIC $CXXFLAGS \
+# 		-lcudart -L $CUDA_PATH/lib64
 else
 	g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
 		-I $TF_INC -fPIC $CXXFLAGS
diff --git a/lib/networks/factory.py b/lib/networks/factory.py
index 2b88cf60..172b50a7 100644
--- a/lib/networks/factory.py
+++ b/lib/networks/factory.py
@@ -34,4 +34,4 @@ def get_network(name):
 
 def list_networks():
     """List all registered imdbs."""
-    return __sets.keys()
+    return list(__sets.keys())
diff --git a/lib/networks/factory.py.bak b/lib/networks/factory.py.bak
new file mode 100644
index 00000000..2b88cf60
--- /dev/null
+++ b/lib/networks/factory.py.bak
@@ -0,0 +1,37 @@
+# --------------------------------------------------------
+# SubCNN_TF
+# Copyright (c) 2016 CVGL Stanford
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Yu Xiang
+# --------------------------------------------------------
+
+"""Factory method for easily getting imdbs by name."""
+
+__sets = {}
+
+import networks.VGGnet_train
+import networks.VGGnet_test
+import pdb
+import tensorflow as tf
+
+#__sets['VGGnet_train'] = networks.VGGnet_train()
+
+#__sets['VGGnet_test'] = networks.VGGnet_test()
+
+
+def get_network(name):
+    """Get a network by name."""
+    #if not __sets.has_key(name):
+    #    raise KeyError('Unknown dataset: {}'.format(name))
+    #return __sets[name]
+    if name.split('_')[1] == 'test':
+       return networks.VGGnet_test()
+    elif name.split('_')[1] == 'train':
+       return networks.VGGnet_train()
+    else:
+       raise KeyError('Unknown dataset: {}'.format(name))
+    
+
+def list_networks():
+    """List all registered imdbs."""
+    return __sets.keys()
diff --git a/lib/networks/network.py b/lib/networks/network.py
index d51f32ff..051a324b 100644
--- a/lib/networks/network.py
+++ b/lib/networks/network.py
@@ -52,9 +52,9 @@ def load(self, data_path, session, saver, ignore_missing=False):
                         try:
                             var = tf.get_variable(subkey)
                             session.run(var.assign(data_dict[key][subkey]))
-                            print "assign pretrain model "+subkey+ " to "+key
+                            print("assign pretrain model "+subkey+ " to "+key)
                         except ValueError:
-                            print "ignore "+key
+                            print("ignore "+key)
                             if not ignore_missing:
 
                                 raise
@@ -63,12 +63,12 @@ def feed(self, *args):
         assert len(args)!=0
         self.inputs = []
         for layer in args:
-            if isinstance(layer, basestring):
+            if isinstance(layer, str):
                 try:
                     layer = self.layers[layer]
-                    print layer
+                    print(layer)
                 except KeyError:
-                    print self.layers.keys()
+                    print(list(self.layers.keys()))
                     raise KeyError('Unknown layer name fed: %s'%layer)
             self.inputs.append(layer)
         return self
@@ -77,12 +77,12 @@ def get_output(self, layer):
         try:
             layer = self.layers[layer]
         except KeyError:
-            print self.layers.keys()
+            print(list(self.layers.keys()))
             raise KeyError('Unknown layer name fed: %s'%layer)
         return layer
 
     def get_unique_name(self, prefix):
-        id = sum(t.startswith(prefix) for t,_ in self.layers.items())+1
+        id = sum(t.startswith(prefix) for t,_ in list(self.layers.items()))+1
         return '%s_%d'%(prefix, id)
 
     def make_var(self, name, shape, initializer=None, trainable=True):
@@ -102,7 +102,7 @@ def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_
 
             init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01)
             init_biases = tf.constant_initializer(0.0)
-            kernel = self.make_var('weights', [k_h, k_w, c_i/group, c_o], init_weights, trainable)
+            kernel = self.make_var('weights', [k_h, k_w, int(c_i)/group, c_o], init_weights, trainable)
             biases = self.make_var('biases', [c_o], init_biases, trainable)
 
             if group==1:
@@ -148,7 +148,7 @@ def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name):
         if isinstance(input[1], tuple):
             input[1] = input[1][0]
 
-        print input
+        print(input)
         return roi_pool_op.roi_pool(input[0], input[1],
                                     pooled_height,
                                     pooled_width,
diff --git a/lib/networks/network.py.bak b/lib/networks/network.py.bak
new file mode 100644
index 00000000..d51f32ff
--- /dev/null
+++ b/lib/networks/network.py.bak
@@ -0,0 +1,272 @@
+import numpy as np
+import tensorflow as tf
+import roi_pooling_layer.roi_pooling_op as roi_pool_op
+import roi_pooling_layer.roi_pooling_op_grad
+from rpn_msr.proposal_layer_tf import proposal_layer as proposal_layer_py
+from rpn_msr.anchor_target_layer_tf import anchor_target_layer as anchor_target_layer_py
+from rpn_msr.proposal_target_layer_tf import proposal_target_layer as proposal_target_layer_py
+
+
+
+DEFAULT_PADDING = 'SAME'
+
+def layer(op):
+    def layer_decorated(self, *args, **kwargs):
+        # Automatically set a name if not provided.
+        name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
+        # Figure out the layer inputs.
+        if len(self.inputs)==0:
+            raise RuntimeError('No input variables found for layer %s.'%name)
+        elif len(self.inputs)==1:
+            layer_input = self.inputs[0]
+        else:
+            layer_input = list(self.inputs)
+        # Perform the operation and get the output.
+        layer_output = op(self, layer_input, *args, **kwargs)
+        # Add to layer LUT.
+        self.layers[name] = layer_output
+        # This output is now the input for the next layer.
+        self.feed(layer_output)
+        # Return self for chained calls.
+        return self
+    return layer_decorated
+
+class Network(object):
+    def __init__(self, inputs, trainable=True):
+        self.inputs = []
+        self.layers = dict(inputs)
+        self.trainable = trainable
+        self.setup()
+
+    def setup(self):
+        raise NotImplementedError('Must be subclassed.')
+
+    def load(self, data_path, session, saver, ignore_missing=False):
+        if data_path.endswith('.ckpt'):
+            saver.restore(session, data_path)
+        else:
+            data_dict = np.load(data_path).item()
+            for key in data_dict:
+                with tf.variable_scope(key, reuse=True):
+                    for subkey in data_dict[key]:
+                        try:
+                            var = tf.get_variable(subkey)
+                            session.run(var.assign(data_dict[key][subkey]))
+                            print "assign pretrain model "+subkey+ " to "+key
+                        except ValueError:
+                            print "ignore "+key
+                            if not ignore_missing:
+
+                                raise
+
+    def feed(self, *args):
+        assert len(args)!=0
+        self.inputs = []
+        for layer in args:
+            if isinstance(layer, basestring):
+                try:
+                    layer = self.layers[layer]
+                    print layer
+                except KeyError:
+                    print self.layers.keys()
+                    raise KeyError('Unknown layer name fed: %s'%layer)
+            self.inputs.append(layer)
+        return self
+
+    def get_output(self, layer):
+        try:
+            layer = self.layers[layer]
+        except KeyError:
+            print self.layers.keys()
+            raise KeyError('Unknown layer name fed: %s'%layer)
+        return layer
+
+    def get_unique_name(self, prefix):
+        id = sum(t.startswith(prefix) for t,_ in self.layers.items())+1
+        return '%s_%d'%(prefix, id)
+
+    def make_var(self, name, shape, initializer=None, trainable=True):
+        return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)
+
+    def validate_padding(self, padding):
+        assert padding in ('SAME', 'VALID')
+
+    @layer
+    def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, trainable=True):
+        self.validate_padding(padding)
+        c_i = input.get_shape()[-1]
+        assert c_i%group==0
+        assert c_o%group==0
+        convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
+        with tf.variable_scope(name) as scope:
+
+            init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01)
+            init_biases = tf.constant_initializer(0.0)
+            kernel = self.make_var('weights', [k_h, k_w, c_i/group, c_o], init_weights, trainable)
+            biases = self.make_var('biases', [c_o], init_biases, trainable)
+
+            if group==1:
+                conv = convolve(input, kernel)
+            else:
+                input_groups = tf.split(3, group, input)
+                kernel_groups = tf.split(3, group, kernel)
+                output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)]
+                conv = tf.concat(3, output_groups)
+            if relu:
+                bias = tf.nn.bias_add(conv, biases)
+                return tf.nn.relu(bias, name=scope.name)
+            return tf.nn.bias_add(conv, biases, name=scope.name)
+
+    @layer
+    def relu(self, input, name):
+        return tf.nn.relu(input, name=name)
+
+    @layer
+    def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
+        self.validate_padding(padding)
+        return tf.nn.max_pool(input,
+                              ksize=[1, k_h, k_w, 1],
+                              strides=[1, s_h, s_w, 1],
+                              padding=padding,
+                              name=name)
+
+    @layer
+    def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
+        self.validate_padding(padding)
+        return tf.nn.avg_pool(input,
+                              ksize=[1, k_h, k_w, 1],
+                              strides=[1, s_h, s_w, 1],
+                              padding=padding,
+                              name=name)
+
+    @layer
+    def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name):
+        # only use the first input
+        if isinstance(input[0], tuple):
+            input[0] = input[0][0]
+
+        if isinstance(input[1], tuple):
+            input[1] = input[1][0]
+
+        print input
+        return roi_pool_op.roi_pool(input[0], input[1],
+                                    pooled_height,
+                                    pooled_width,
+                                    spatial_scale,
+                                    name=name)[0]
+
+    @layer
+    def proposal_layer(self, input, _feat_stride, anchor_scales, cfg_key, name):
+        if isinstance(input[0], tuple):
+            input[0] = input[0][0]
+        return tf.reshape(tf.py_func(proposal_layer_py,[input[0],input[1],input[2], cfg_key, _feat_stride, anchor_scales], [tf.float32]),[-1,5],name =name)
+
+
+    @layer
+    def anchor_target_layer(self, input, _feat_stride, anchor_scales, name):
+        if isinstance(input[0], tuple):
+            input[0] = input[0][0]
+
+        with tf.variable_scope(name) as scope:
+
+            rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = tf.py_func(anchor_target_layer_py,[input[0],input[1],input[2],input[3], _feat_stride, anchor_scales],[tf.float32,tf.float32,tf.float32,tf.float32])
+
+            rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels')
+            rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets')
+            rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights')
+            rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights')
+
+
+            return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
+
+
+    @layer
+    def proposal_target_layer(self, input, classes, name):
+        if isinstance(input[0], tuple):
+            input[0] = input[0][0]
+        with tf.variable_scope(name) as scope:
+
+            rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights = tf.py_func(proposal_target_layer_py,[input[0],input[1],classes],[tf.float32,tf.float32,tf.float32,tf.float32,tf.float32])
+
+            rois = tf.reshape(rois,[-1,5] , name = 'rois') 
+            labels = tf.convert_to_tensor(tf.cast(labels,tf.int32), name = 'labels')
+            bbox_targets = tf.convert_to_tensor(bbox_targets, name = 'bbox_targets')
+            bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name = 'bbox_inside_weights')
+            bbox_outside_weights = tf.convert_to_tensor(bbox_outside_weights, name = 'bbox_outside_weights')
+
+           
+            return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
+
+
+    @layer
+    def reshape_layer(self, input, d,name):
+        input_shape = tf.shape(input)
+        if name == 'rpn_cls_prob_reshape':
+             return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0],
+                    int(d),tf.cast(tf.cast(input_shape[1],tf.float32)/tf.cast(d,tf.float32)*tf.cast(input_shape[3],tf.float32),tf.int32),input_shape[2]]),[0,2,3,1],name=name)
+        else:
+             return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0],
+                    int(d),tf.cast(tf.cast(input_shape[1],tf.float32)*(tf.cast(input_shape[3],tf.float32)/tf.cast(d,tf.float32)),tf.int32),input_shape[2]]),[0,2,3,1],name=name)
+
+    @layer
+    def feature_extrapolating(self, input, scales_base, num_scale_base, num_per_octave, name):
+        return feature_extrapolating_op.feature_extrapolating(input,
+                              scales_base,
+                              num_scale_base,
+                              num_per_octave,
+                              name=name)
+
+    @layer
+    def lrn(self, input, radius, alpha, beta, name, bias=1.0):
+        return tf.nn.local_response_normalization(input,
+                                                  depth_radius=radius,
+                                                  alpha=alpha,
+                                                  beta=beta,
+                                                  bias=bias,
+                                                  name=name)
+
+    @layer
+    def concat(self, inputs, axis, name):
+        return tf.concat(concat_dim=axis, values=inputs, name=name)
+
+    @layer
+    def fc(self, input, num_out, name, relu=True, trainable=True):
+        with tf.variable_scope(name) as scope:
+            # only use the first input
+            if isinstance(input, tuple):
+                input = input[0]
+
+            input_shape = input.get_shape()
+            if input_shape.ndims == 4:
+                dim = 1
+                for d in input_shape[1:].as_list():
+                    dim *= d
+                feed_in = tf.reshape(tf.transpose(input,[0,3,1,2]), [-1, dim])
+            else:
+                feed_in, dim = (input, int(input_shape[-1]))
+
+            if name == 'bbox_pred':
+                init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001)
+                init_biases = tf.constant_initializer(0.0)
+            else:
+                init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01)
+                init_biases = tf.constant_initializer(0.0)
+
+            weights = self.make_var('weights', [dim, num_out], init_weights, trainable)
+            biases = self.make_var('biases', [num_out], init_biases, trainable)
+
+            op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
+            fc = op(feed_in, weights, biases, name=scope.name)
+            return fc
+
+    @layer
+    def softmax(self, input, name):
+        input_shape = tf.shape(input)
+        if name == 'rpn_cls_prob':
+            return tf.reshape(tf.nn.softmax(tf.reshape(input,[-1,input_shape[3]])),[-1,input_shape[1],input_shape[2],input_shape[3]],name=name)
+        else:
+            return tf.nn.softmax(input,name=name)
+
+    @layer
+    def dropout(self, input, keep_prob, name):
+        return tf.nn.dropout(input, keep_prob, name=name)
diff --git a/lib/roi_data_layer/minibatch.py b/lib/roi_data_layer/minibatch.py
index 674ef209..2640cddf 100644
--- a/lib/roi_data_layer/minibatch.py
+++ b/lib/roi_data_layer/minibatch.py
@@ -49,7 +49,7 @@ def get_minibatch(roidb, num_classes):
         bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
         bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
         # all_overlaps = []
-        for im_i in xrange(num_images):
+        for im_i in range(num_images):
             labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
                 = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
                                num_classes)
@@ -133,7 +133,7 @@ def _get_image_blob(roidb, scale_inds):
     num_images = len(roidb)
     processed_ims = []
     im_scales = []
-    for i in xrange(num_images):
+    for i in range(num_images):
         im = cv2.imread(roidb[i]['image'])
         if roidb[i]['flipped']:
             im = im[:, ::-1, :]
@@ -180,7 +180,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
 def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
     """Visualize a mini-batch for debugging."""
     import matplotlib.pyplot as plt
-    for i in xrange(rois_blob.shape[0]):
+    for i in range(rois_blob.shape[0]):
         rois = rois_blob[i, :]
         im_ind = rois[0]
         roi = rois[1:]
@@ -190,7 +190,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
         im = im.astype(np.uint8)
         cls = labels_blob[i]
         plt.imshow(im)
-        print 'class: ', cls, ' overlap: ', overlaps[i]
+        print('class: ', cls, ' overlap: ', overlaps[i])
         plt.gca().add_patch(
             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
                           roi[3] - roi[1], fill=False,
diff --git a/lib/roi_data_layer/minibatch.py.bak b/lib/roi_data_layer/minibatch.py.bak
new file mode 100644
index 00000000..674ef209
--- /dev/null
+++ b/lib/roi_data_layer/minibatch.py.bak
@@ -0,0 +1,199 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Compute minibatch blobs for training a Fast R-CNN network."""
+
+import numpy as np
+import numpy.random as npr
+import cv2
+from fast_rcnn.config import cfg
+from utils.blob import prep_im_for_blob, im_list_to_blob
+
+def get_minibatch(roidb, num_classes):
+    """Given a roidb, construct a minibatch sampled from it."""
+    num_images = len(roidb)
+    # Sample random scales to use for each image in this batch
+    random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
+                                    size=num_images)
+    assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
+        'num_images ({}) must divide BATCH_SIZE ({})'. \
+        format(num_images, cfg.TRAIN.BATCH_SIZE)
+    rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
+    fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+
+    # Get the input image blob, formatted for caffe
+    im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
+
+    blobs = {'data': im_blob}
+
+    if cfg.TRAIN.HAS_RPN:
+        assert len(im_scales) == 1, "Single batch only"
+        assert len(roidb) == 1, "Single batch only"
+        # gt boxes: (x1, y1, x2, y2, cls)
+        gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
+        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
+        gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
+        gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
+        blobs['gt_boxes'] = gt_boxes
+        blobs['im_info'] = np.array(
+            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
+            dtype=np.float32)
+    else: # not using RPN
+        # Now, build the region of interest and label blobs
+        rois_blob = np.zeros((0, 5), dtype=np.float32)
+        labels_blob = np.zeros((0), dtype=np.float32)
+        bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
+        bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
+        # all_overlaps = []
+        for im_i in xrange(num_images):
+            labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
+                = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
+                               num_classes)
+
+            # Add to RoIs blob
+            rois = _project_im_rois(im_rois, im_scales[im_i])
+            batch_ind = im_i * np.ones((rois.shape[0], 1))
+            rois_blob_this_image = np.hstack((batch_ind, rois))
+            rois_blob = np.vstack((rois_blob, rois_blob_this_image))
+
+            # Add to labels, bbox targets, and bbox loss blobs
+            labels_blob = np.hstack((labels_blob, labels))
+            bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
+            bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
+            # all_overlaps = np.hstack((all_overlaps, overlaps))
+
+        # For debug visualizations
+        # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps)
+
+        blobs['rois'] = rois_blob
+        blobs['labels'] = labels_blob
+
+        if cfg.TRAIN.BBOX_REG:
+            blobs['bbox_targets'] = bbox_targets_blob
+            blobs['bbox_inside_weights'] = bbox_inside_blob
+            blobs['bbox_outside_weights'] = \
+                np.array(bbox_inside_blob > 0).astype(np.float32)
+
+    return blobs
+
+def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
+    """Generate a random sample of RoIs comprising foreground and background
+    examples.
+    """
+    # label = class RoI has max overlap with
+    labels = roidb['max_classes']
+    overlaps = roidb['max_overlaps']
+    rois = roidb['boxes']
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+    fg_rois_per_this_image = int(np.minimum(fg_rois_per_image, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(
+                fg_inds, size=fg_rois_per_this_image, replace=False)
+
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
+                                        bg_inds.size)
+    # Sample foreground regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(
+                bg_inds, size=bg_rois_per_this_image, replace=False)
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays:
+    labels = labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    overlaps = overlaps[keep_inds]
+    rois = rois[keep_inds]
+
+    bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
+            roidb['bbox_targets'][keep_inds, :], num_classes)
+
+    return labels, overlaps, rois, bbox_targets, bbox_inside_weights
+
+def _get_image_blob(roidb, scale_inds):
+    """Builds an input blob from the images in the roidb at the specified
+    scales.
+    """
+    num_images = len(roidb)
+    processed_ims = []
+    im_scales = []
+    for i in xrange(num_images):
+        im = cv2.imread(roidb[i]['image'])
+        if roidb[i]['flipped']:
+            im = im[:, ::-1, :]
+        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
+        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
+                                        cfg.TRAIN.MAX_SIZE)
+        im_scales.append(im_scale)
+        processed_ims.append(im)
+
+    # Create a blob to hold the input images
+    blob = im_list_to_blob(processed_ims)
+
+    return blob, im_scales
+
+def _project_im_rois(im_rois, im_scale_factor):
+    """Project image RoIs into the rescaled training image."""
+    rois = im_rois * im_scale_factor
+    return rois
+
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets are stored in a compact form in the
+    roidb.
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets). The loss weights
+    are similarly expanded.
+
+    Returns:
+        bbox_target_data (ndarray): N x 4K blob of regression targets
+        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+    """
+    clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True)
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
+        bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
+    return bbox_targets, bbox_inside_weights
+
+def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
+    """Visualize a mini-batch for debugging."""
+    import matplotlib.pyplot as plt
+    for i in xrange(rois_blob.shape[0]):
+        rois = rois_blob[i, :]
+        im_ind = rois[0]
+        roi = rois[1:]
+        im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
+        im += cfg.PIXEL_MEANS
+        im = im[:, :, (2, 1, 0)]
+        im = im.astype(np.uint8)
+        cls = labels_blob[i]
+        plt.imshow(im)
+        print 'class: ', cls, ' overlap: ', overlaps[i]
+        plt.gca().add_patch(
+            plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
+                          roi[3] - roi[1], fill=False,
+                          edgecolor='r', linewidth=3)
+            )
+        plt.show()
diff --git a/lib/roi_data_layer/minibatch2.py b/lib/roi_data_layer/minibatch2.py
index 7e9a39c2..521e3d5e 100644
--- a/lib/roi_data_layer/minibatch2.py
+++ b/lib/roi_data_layer/minibatch2.py
@@ -55,7 +55,7 @@ def get_minibatch(roidb, num_classes):
         bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
 
         # all_overlaps = []
-        for im_i in xrange(num_images):
+        for im_i in range(num_images):
             labels, overlaps, im_rois, bbox_targets, bbox_inside_weights, sublabels \
                     = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes)
 
@@ -106,7 +106,7 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
 
     # Select foreground RoIs as those with >= FG_THRESH overlap
     fg_inds = []
-    for i in xrange(1, num_classes):
+    for i in range(1, num_classes):
         fg_inds.extend(np.where((labels == i) & (overlaps >= cfg.TRAIN.FG_THRESH))[0])
     fg_inds = np.array(fg_inds)
 
@@ -121,12 +121,12 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
     bg_inds = []
-    for i in xrange(1, num_classes):
+    for i in range(1, num_classes):
         bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI) &
                         (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] )
 
     if len(bg_inds) < bg_rois_per_this_image:
-        for i in xrange(1, num_classes):
+        for i in range(1, num_classes):
             bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI))[0] )
 
     if len(bg_inds) < bg_rois_per_this_image:
@@ -173,7 +173,7 @@ def _get_image_blob(roidb, scale_inds):
     num_images = len(roidb)
     processed_ims = []
     im_scales = []
-    for i in xrange(num_images):
+    for i in range(num_images):
         im = cv2.imread(roidb[i]['image'])
         if roidb[i]['flipped']:
             im = im[:, ::-1, :]
@@ -200,7 +200,7 @@ def _get_image_blob_multiscale(roidb):
     processed_ims = []
     im_scales = []
     scales = cfg.TRAIN.SCALES_BASE
-    for i in xrange(num_images):
+    for i in range(num_images):
         im = cv2.imread(roidb[i]['image'])
         if roidb[i]['flipped']:
             im = im[:, ::-1, :]
@@ -310,7 +310,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps, sublabels_blob, vi
     """Visualize a mini-batch for debugging."""
     import matplotlib.pyplot as plt
     import math
-    for i in xrange(min(rois_blob.shape[0], 10)):
+    for i in range(min(rois_blob.shape[0], 10)):
         rois = rois_blob[i, :]
         im_ind = rois[0]
         roi = rois[1:]
@@ -321,7 +321,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps, sublabels_blob, vi
         cls = labels_blob[i]
         subcls = sublabels_blob[i]
         plt.imshow(im)
-        print 'class: ', cls, ' subclass: ', subcls, ' overlap: ', overlaps[i]
+        print('class: ', cls, ' subclass: ', subcls, ' overlap: ', overlaps[i])
 
         start = 3 * cls
         end = start + 3
diff --git a/lib/roi_data_layer/minibatch2.py.bak b/lib/roi_data_layer/minibatch2.py.bak
new file mode 100644
index 00000000..7e9a39c2
--- /dev/null
+++ b/lib/roi_data_layer/minibatch2.py.bak
@@ -0,0 +1,336 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Compute minibatch blobs for training a Fast R-CNN network."""
+
+import numpy as np
+import numpy.random as npr
+import cv2
+from fast_rcnn.config import cfg
+from utils.blob import prep_im_for_blob, im_list_to_blob
+
+def get_minibatch(roidb, num_classes):
+    """Given a roidb, construct a minibatch sampled from it."""
+    num_images = len(roidb)
+
+    assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
+        'num_images ({}) must divide BATCH_SIZE ({})'. \
+        format(num_images, cfg.TRAIN.BATCH_SIZE)
+    rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
+    fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+
+    if cfg.IS_MULTISCALE:
+        im_blob, im_scales = _get_image_blob_multiscale(roidb)
+    else:
+        # Get the input image blob, formatted for caffe
+        # Sample random scales to use for each image in this batch
+        random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES_BASE), size=num_images)
+        im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
+
+    blobs = {'data': im_blob}
+
+    if cfg.TRAIN.HAS_RPN:
+        assert len(im_scales) == 1, "Single batch only"
+        assert len(roidb) == 1, "Single batch only"
+        # gt boxes: (x1, y1, x2, y2, cls)
+        gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
+        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
+        gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
+        gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
+        blobs['gt_boxes'] = gt_boxes
+        blobs['im_info'] = np.array(
+            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
+            dtype=np.float32)
+
+
+    else:
+        # Now, build the region of interest and label blobs
+        rois_blob = np.zeros((0, 5), dtype=np.float32)
+        labels_blob = np.zeros((0), dtype=np.float32)
+        bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
+        bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
+
+        # all_overlaps = []
+        for im_i in xrange(num_images):
+            labels, overlaps, im_rois, bbox_targets, bbox_inside_weights, sublabels \
+                    = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes)
+
+            # Add to RoIs blob
+            if cfg.IS_MULTISCALE:
+                if cfg.IS_EXTRAPOLATING:
+                    rois, levels = _project_im_rois_multiscale(im_rois, cfg.TRAIN.SCALES)
+                    batch_ind = im_i * len(cfg.TRAIN.SCALES) + levels
+                else:
+                    rois, levels = _project_im_rois_multiscale(im_rois, cfg.TRAIN.SCALES_BASE)
+                    batch_ind = im_i * len(cfg.TRAIN.SCALES_BASE) + levels
+            else:
+                rois = _project_im_rois(im_rois, im_scales[im_i])
+                batch_ind = im_i * np.ones((rois.shape[0], 1))
+
+            rois_blob_this_image = np.hstack((batch_ind, rois))
+            rois_blob = np.vstack((rois_blob, rois_blob_this_image))
+
+            # Add to labels, bbox targets, and bbox loss blobs
+            labels_blob = np.hstack((labels_blob, labels))
+            bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
+            bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
+
+            # all_overlaps = np.hstack((all_overlaps, overlaps))
+
+        # For debug visualizations
+        # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps, sublabels_blob, view_targets_blob, view_inside_blob)
+        # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps, sublabels_blob)
+
+        blobs['rois'] = rois_blob
+        blobs['labels'] = labels_blob
+
+        if cfg.TRAIN.BBOX_REG:
+            blobs['bbox_targets'] = bbox_targets_blob
+            blobs['bbox_inside_weights'] = bbox_inside_blob
+            blobs['bbox_outside_weights'] = np.array(bbox_inside_blob > 0).astype(np.float32)
+
+    return blobs
+
+def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
+    """Generate a random sample of RoIs comprising foreground and background
+    examples.
+    """
+    # label = class RoI has max overlap with
+    labels = roidb['max_classes']
+    overlaps = roidb['max_overlaps']
+    rois = roidb['boxes']
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = []
+    for i in xrange(1, num_classes):
+        fg_inds.extend(np.where((labels == i) & (overlaps >= cfg.TRAIN.FG_THRESH))[0])
+    fg_inds = np.array(fg_inds)
+
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image,
+                             replace=False)
+
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = []
+    for i in xrange(1, num_classes):
+        bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                        (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] )
+
+    if len(bg_inds) < bg_rois_per_this_image:
+        for i in xrange(1, num_classes):
+            bg_inds.extend( np.where((labels == i) & (overlaps < cfg.TRAIN.BG_THRESH_HI))[0] )
+
+    if len(bg_inds) < bg_rois_per_this_image:
+        bg_inds.extend( np.where(overlaps < cfg.TRAIN.BG_THRESH_HI)[0] )
+    bg_inds = np.array(bg_inds, dtype=np.int32)
+
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
+                                        bg_inds.size)
+    # Sample foreground regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image,
+                             replace=False)
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds).astype(int)
+    # print '{} foregrounds and {} backgrounds'.format(fg_inds.size, bg_inds.size)
+    # Select sampled values from various arrays:
+    labels = labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    overlaps = overlaps[keep_inds]
+    rois = rois[keep_inds]
+    sublabels = sublabels[keep_inds]
+    sublabels[fg_rois_per_this_image:] = 0
+
+    bbox_targets, bbox_loss_weights = \
+            _get_bbox_regression_labels(roidb['bbox_targets'][keep_inds, :],
+                                        num_classes)
+
+    if cfg.TRAIN.VIEWPOINT or cfg.TEST.VIEWPOINT:
+        viewpoints = viewpoints[keep_inds]
+        view_targets, view_loss_weights = \
+                _get_viewpoint_estimation_labels(viewpoints, labels, num_classes)
+        return labels, overlaps, rois, bbox_targets, bbox_loss_weights, sublabels, view_targets, view_loss_weights
+
+    return labels, overlaps, rois, bbox_targets, bbox_loss_weights, sublabels
+
+def _get_image_blob(roidb, scale_inds):
+    """Builds an input blob from the images in the roidb at the specified
+    scales.
+    """
+    num_images = len(roidb)
+    processed_ims = []
+    im_scales = []
+    for i in xrange(num_images):
+        im = cv2.imread(roidb[i]['image'])
+        if roidb[i]['flipped']:
+            im = im[:, ::-1, :]
+
+        im_orig = im.astype(np.float32, copy=True)
+        im_orig -= cfg.PIXEL_MEANS
+
+        im_scale = cfg.TRAIN.SCALES_BASE[scale_inds[i]]
+        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
+
+        im_scales.append(im_scale)
+        processed_ims.append(im)
+
+    # Create a blob to hold the input images
+    blob = im_list_to_blob(processed_ims)
+
+    return blob, im_scales
+
+
+def _get_image_blob_multiscale(roidb):
+    """Builds an input blob from the images in the roidb at multiscales.
+    """
+    num_images = len(roidb)
+    processed_ims = []
+    im_scales = []
+    scales = cfg.TRAIN.SCALES_BASE
+    for i in xrange(num_images):
+        im = cv2.imread(roidb[i]['image'])
+        if roidb[i]['flipped']:
+            im = im[:, ::-1, :]
+
+        im_orig = im.astype(np.float32, copy=True)
+        im_orig -= cfg.PIXEL_MEANS
+
+        for im_scale in scales:
+            im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
+            im_scales.append(im_scale)
+            processed_ims.append(im)
+
+    # Create a blob to hold the input images
+    blob = im_list_to_blob(processed_ims)
+
+    return blob, im_scales
+
+
+def _project_im_rois(im_rois, im_scale_factor):
+    """Project image RoIs into the rescaled training image."""
+    rois = im_rois * im_scale_factor
+    return rois
+
+
+def _project_im_rois_multiscale(im_rois, scales):
+    """Project image RoIs into the image pyramid built by _get_image_blob.
+
+    Arguments:
+        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
+        scales (list): scale factors as returned by _get_image_blob
+
+    Returns:
+        rois (ndarray): R x 4 matrix of projected RoI coordinates
+        levels (list): image pyramid levels used by each projected RoI
+    """
+    im_rois = im_rois.astype(np.float, copy=False)
+    scales = np.array(scales)
+
+    if len(scales) > 1:
+        widths = im_rois[:, 2] - im_rois[:, 0] + 1
+        heights = im_rois[:, 3] - im_rois[:, 1] + 1
+
+        areas = widths * heights
+        scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
+        diff_areas = np.abs(scaled_areas - 224 * 224)
+        levels = diff_areas.argmin(axis=1)[:, np.newaxis]
+    else:
+        levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
+
+    rois = im_rois * scales[levels]
+
+    return rois, levels
+
+
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets are stored in a compact form in the
+    roidb.
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets). The loss weights
+    are similarly expanded.
+
+    Returns:
+        bbox_target_data (ndarray): N x 4K blob of regression targets
+        bbox_loss_weights (ndarray): N x 4K blob of loss weights
+    """
+    clss = bbox_target_data[:, 0]
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
+        bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.]
+    return bbox_targets, bbox_loss_weights
+
+
+def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
+    """Bounding-box regression targets are stored in a compact form in the
+    roidb.
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets). The loss weights
+    are similarly expanded.
+
+    Returns:
+        view_target_data (ndarray): N x 3K blob of regression targets
+        view_loss_weights (ndarray): N x 3K blob of loss weights
+    """
+    view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
+    view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
+    inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 3 * cls
+        end = start + 3
+        view_targets[ind, start:end] = viewpoint_data[ind, :]
+        view_loss_weights[ind, start:end] = [1., 1., 1.]
+
+    assert not np.isinf(view_targets).any(), 'viewpoint undefined'
+    return view_targets, view_loss_weights
+
+
+def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps, sublabels_blob, view_targets_blob=None, view_inside_blob=None):
+    """Visualize a mini-batch for debugging."""
+    import matplotlib.pyplot as plt
+    import math
+    for i in xrange(min(rois_blob.shape[0], 10)):
+        rois = rois_blob[i, :]
+        im_ind = rois[0]
+        roi = rois[1:]
+        im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
+        im += cfg.PIXEL_MEANS
+        im = im[:, :, (2, 1, 0)]
+        im = im.astype(np.uint8)
+        cls = labels_blob[i]
+        subcls = sublabels_blob[i]
+        plt.imshow(im)
+        print 'class: ', cls, ' subclass: ', subcls, ' overlap: ', overlaps[i]
+
+        start = 3 * cls
+        end = start + 3
+        # print 'view: ', view_targets_blob[i, start:end] * 180 / math.pi
+        # print 'view weights: ', view_inside_blob[i, start:end]
+
+        plt.gca().add_patch(
+            plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
+                          roi[3] - roi[1], fill=False,
+                          edgecolor='r', linewidth=3)
+            )
+        plt.show()
diff --git a/lib/roi_data_layer/roidb.py b/lib/roi_data_layer/roidb.py
index 97a6a761..baa64387 100644
--- a/lib/roi_data_layer/roidb.py
+++ b/lib/roi_data_layer/roidb.py
@@ -21,9 +21,9 @@ def prepare_roidb(imdb):
     recorded.
     """
     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
-             for i in xrange(imdb.num_images)]
+             for i in range(imdb.num_images)]
     roidb = imdb.roidb
-    for i in xrange(len(imdb.image_index)):
+    for i in range(len(imdb.image_index)):
         roidb[i]['image'] = imdb.image_path_at(i)
         roidb[i]['width'] = sizes[i][0]
         roidb[i]['height'] = sizes[i][1]
@@ -51,7 +51,7 @@ def add_bbox_regression_targets(roidb):
     num_images = len(roidb)
     # Infer number of classes from the number of columns in gt_overlaps
     num_classes = roidb[0]['gt_overlaps'].shape[1]
-    for im_i in xrange(num_images):
+    for im_i in range(num_images):
         rois = roidb[im_i]['boxes']
         max_overlaps = roidb[im_i]['max_overlaps']
         max_classes = roidb[im_i]['max_classes']
@@ -70,9 +70,9 @@ def add_bbox_regression_targets(roidb):
         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
         sums = np.zeros((num_classes, 4))
         squared_sums = np.zeros((num_classes, 4))
-        for im_i in xrange(num_images):
+        for im_i in range(num_images):
             targets = roidb[im_i]['bbox_targets']
-            for cls in xrange(1, num_classes):
+            for cls in range(1, num_classes):
                 cls_inds = np.where(targets[:, 0] == cls)[0]
                 if cls_inds.size > 0:
                     class_counts[cls] += cls_inds.size
@@ -83,24 +83,24 @@ def add_bbox_regression_targets(roidb):
         means = sums / class_counts
         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 
-    print 'bbox target means:'
-    print means
-    print means[1:, :].mean(axis=0) # ignore bg class
-    print 'bbox target stdevs:'
-    print stds
-    print stds[1:, :].mean(axis=0) # ignore bg class
+    print('bbox target means:')
+    print(means)
+    print(means[1:, :].mean(axis=0)) # ignore bg class
+    print('bbox target stdevs:')
+    print(stds)
+    print(stds[1:, :].mean(axis=0)) # ignore bg class
 
     # Normalize targets
     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
-        print "Normalizing targets"
-        for im_i in xrange(num_images):
+        print("Normalizing targets")
+        for im_i in range(num_images):
             targets = roidb[im_i]['bbox_targets']
-            for cls in xrange(1, num_classes):
+            for cls in range(1, num_classes):
                 cls_inds = np.where(targets[:, 0] == cls)[0]
                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
     else:
-        print "NOT normalizing targets"
+        print("NOT normalizing targets")
 
     # These values will be needed for making predictions
     # (the predicts will need to be unnormalized and uncentered)
diff --git a/lib/roi_data_layer/roidb.py.bak b/lib/roi_data_layer/roidb.py.bak
new file mode 100644
index 00000000..97a6a761
--- /dev/null
+++ b/lib/roi_data_layer/roidb.py.bak
@@ -0,0 +1,133 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Transform a roidb into a trainable roidb by adding a bunch of metadata."""
+
+import numpy as np
+from fast_rcnn.config import cfg
+from fast_rcnn.bbox_transform import bbox_transform
+from utils.cython_bbox import bbox_overlaps
+import PIL
+
+def prepare_roidb(imdb):
+    """Enrich the imdb's roidb by adding some derived quantities that
+    are useful for training. This function precomputes the maximum
+    overlap, taken over ground-truth boxes, between each ROI and
+    each ground-truth box. The class with maximum overlap is also
+    recorded.
+    """
+    sizes = [PIL.Image.open(imdb.image_path_at(i)).size
+             for i in xrange(imdb.num_images)]
+    roidb = imdb.roidb
+    for i in xrange(len(imdb.image_index)):
+        roidb[i]['image'] = imdb.image_path_at(i)
+        roidb[i]['width'] = sizes[i][0]
+        roidb[i]['height'] = sizes[i][1]
+        # need gt_overlaps as a dense array for argmax
+        gt_overlaps = roidb[i]['gt_overlaps'].toarray()
+        # max overlap with gt over classes (columns)
+        max_overlaps = gt_overlaps.max(axis=1)
+        # gt class that had the max overlap
+        max_classes = gt_overlaps.argmax(axis=1)
+        roidb[i]['max_classes'] = max_classes
+        roidb[i]['max_overlaps'] = max_overlaps
+        # sanity checks
+        # max overlap of 0 => class should be zero (background)
+        zero_inds = np.where(max_overlaps == 0)[0]
+        assert all(max_classes[zero_inds] == 0)
+        # max overlap > 0 => class should not be zero (must be a fg class)
+        nonzero_inds = np.where(max_overlaps > 0)[0]
+        assert all(max_classes[nonzero_inds] != 0)
+
+def add_bbox_regression_targets(roidb):
+    """Add information needed to train bounding-box regressors."""
+    assert len(roidb) > 0
+    assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
+
+    num_images = len(roidb)
+    # Infer number of classes from the number of columns in gt_overlaps
+    num_classes = roidb[0]['gt_overlaps'].shape[1]
+    for im_i in xrange(num_images):
+        rois = roidb[im_i]['boxes']
+        max_overlaps = roidb[im_i]['max_overlaps']
+        max_classes = roidb[im_i]['max_classes']
+        roidb[im_i]['bbox_targets'] = \
+                _compute_targets(rois, max_overlaps, max_classes)
+
+    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
+        # Use fixed / precomputed "means" and "stds" instead of empirical values
+        means = np.tile(
+                np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
+        stds = np.tile(
+                np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
+    else:
+        # Compute values needed for means and stds
+        # var(x) = E(x^2) - E(x)^2
+        class_counts = np.zeros((num_classes, 1)) + cfg.EPS
+        sums = np.zeros((num_classes, 4))
+        squared_sums = np.zeros((num_classes, 4))
+        for im_i in xrange(num_images):
+            targets = roidb[im_i]['bbox_targets']
+            for cls in xrange(1, num_classes):
+                cls_inds = np.where(targets[:, 0] == cls)[0]
+                if cls_inds.size > 0:
+                    class_counts[cls] += cls_inds.size
+                    sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
+                    squared_sums[cls, :] += \
+                            (targets[cls_inds, 1:] ** 2).sum(axis=0)
+
+        means = sums / class_counts
+        stds = np.sqrt(squared_sums / class_counts - means ** 2)
+
+    print 'bbox target means:'
+    print means
+    print means[1:, :].mean(axis=0) # ignore bg class
+    print 'bbox target stdevs:'
+    print stds
+    print stds[1:, :].mean(axis=0) # ignore bg class
+
+    # Normalize targets
+    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
+        print "Normalizing targets"
+        for im_i in xrange(num_images):
+            targets = roidb[im_i]['bbox_targets']
+            for cls in xrange(1, num_classes):
+                cls_inds = np.where(targets[:, 0] == cls)[0]
+                roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
+                roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
+    else:
+        print "NOT normalizing targets"
+
+    # These values will be needed for making predictions
+    # (the predicts will need to be unnormalized and uncentered)
+    return means.ravel(), stds.ravel()
+
+def _compute_targets(rois, overlaps, labels):
+    """Compute bounding-box regression targets for an image."""
+    # Indices of ground-truth ROIs
+    gt_inds = np.where(overlaps == 1)[0]
+    if len(gt_inds) == 0:
+        # Bail if the image has no ground-truth ROIs
+        return np.zeros((rois.shape[0], 5), dtype=np.float32)
+    # Indices of examples for which we try to make predictions
+    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
+
+    # Get IoU overlap between each ex ROI and gt ROI
+    ex_gt_overlaps = bbox_overlaps(
+        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
+        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
+
+    # Find which gt ROI each ex ROI has max overlap with:
+    # this will be the ex ROI's gt target
+    gt_assignment = ex_gt_overlaps.argmax(axis=1)
+    gt_rois = rois[gt_inds[gt_assignment], :]
+    ex_rois = rois[ex_inds, :]
+
+    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
+    targets[ex_inds, 0] = labels[ex_inds]
+    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
+    return targets
diff --git a/lib/roi_data_layer/roidb2.py b/lib/roi_data_layer/roidb2.py
index 6735f4f7..45f9573f 100644
--- a/lib/roi_data_layer/roidb2.py
+++ b/lib/roi_data_layer/roidb2.py
@@ -19,7 +19,7 @@ def prepare_roidb(imdb):
     recorded.
     """
     roidb = imdb.roidb
-    for i in xrange(len(imdb.image_index)):
+    for i in range(len(imdb.image_index)):
         roidb[i]['image'] = imdb.image_path_at(i)
         # need gt_overlaps as a dense array for argmax
         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
@@ -47,7 +47,7 @@ def add_bbox_regression_targets(roidb):
     num_images = len(roidb)
     # Infer number of classes from the number of columns in gt_overlaps
     num_classes = roidb[0]['gt_overlaps'].shape[1]
-    for im_i in xrange(num_images):
+    for im_i in range(num_images):
         rois = roidb[im_i]['boxes']
         max_overlaps = roidb[im_i]['max_overlaps']
         max_classes = roidb[im_i]['max_classes']
@@ -59,9 +59,9 @@ def add_bbox_regression_targets(roidb):
     class_counts = np.zeros((num_classes, 1)) + cfg.EPS
     sums = np.zeros((num_classes, 4))
     squared_sums = np.zeros((num_classes, 4))
-    for im_i in xrange(num_images):
+    for im_i in range(num_images):
         targets = roidb[im_i]['bbox_targets']
-        for cls in xrange(1, num_classes):
+        for cls in range(1, num_classes):
             cls_inds = np.where(targets[:, 0] == cls)[0]
             if cls_inds.size > 0:
                 class_counts[cls] += cls_inds.size
@@ -72,9 +72,9 @@ def add_bbox_regression_targets(roidb):
     stds = np.sqrt(squared_sums / class_counts - means ** 2)
 
     # Normalize targets
-    for im_i in xrange(num_images):
+    for im_i in range(num_images):
         targets = roidb[im_i]['bbox_targets']
-        for cls in xrange(1, num_classes):
+        for cls in range(1, num_classes):
             cls_inds = np.where(targets[:, 0] == cls)[0]
             roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
             if stds[cls, 0] != 0:
@@ -93,7 +93,7 @@ def _compute_targets(rois, overlaps, labels, num_classes):
     gt_inds = np.where(overlaps == 1)[0]
     # Indices of examples for which we try to make predictions
     ex_inds = []
-    for i in xrange(1, num_classes):
+    for i in range(1, num_classes):
         ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] )
 
     # Get IoU overlap between each ex ROI and gt ROI
diff --git a/lib/roi_data_layer/roidb2.py.bak b/lib/roi_data_layer/roidb2.py.bak
new file mode 100644
index 00000000..6735f4f7
--- /dev/null
+++ b/lib/roi_data_layer/roidb2.py.bak
@@ -0,0 +1,133 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Transform a roidb into a trainable roidb by adding a bunch of metadata."""
+
+import numpy as np
+from fast_rcnn.config import cfg
+import utils.cython_bbox
+
+def prepare_roidb(imdb):
+    """Enrich the imdb's roidb by adding some derived quantities that
+    are useful for training. This function precomputes the maximum
+    overlap, taken over ground-truth boxes, between each ROI and
+    each ground-truth box. The class with maximum overlap is also
+    recorded.
+    """
+    roidb = imdb.roidb
+    for i in xrange(len(imdb.image_index)):
+        roidb[i]['image'] = imdb.image_path_at(i)
+        # need gt_overlaps as a dense array for argmax
+        gt_overlaps = roidb[i]['gt_overlaps'].toarray()
+        # max overlap with gt over classes (columns)
+        max_overlaps = gt_overlaps.max(axis=1)
+        # gt class that had the max overlap
+        max_classes = gt_overlaps.argmax(axis=1)
+
+        roidb[i]['max_classes'] = max_classes
+        roidb[i]['max_overlaps'] = max_overlaps
+
+        # sanity checks
+        # max overlap of 0 => class should be zero (background)
+        zero_inds = np.where(max_overlaps == 0)[0]
+        assert all(max_classes[zero_inds] == 0)
+        # max overlap > 0 => class should not be zero (must be a fg class)
+        nonzero_inds = np.where(max_overlaps > 0)[0]
+        assert all(max_classes[nonzero_inds] != 0)
+
+def add_bbox_regression_targets(roidb):
+    """Add information needed to train bounding-box regressors."""
+    assert len(roidb) > 0
+    assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
+
+    num_images = len(roidb)
+    # Infer number of classes from the number of columns in gt_overlaps
+    num_classes = roidb[0]['gt_overlaps'].shape[1]
+    for im_i in xrange(num_images):
+        rois = roidb[im_i]['boxes']
+        max_overlaps = roidb[im_i]['max_overlaps']
+        max_classes = roidb[im_i]['max_classes']
+        roidb[im_i]['bbox_targets'] = \
+                _compute_targets(rois, max_overlaps, max_classes, num_classes)
+
+    # Compute values needed for means and stds
+    # var(x) = E(x^2) - E(x)^2
+    class_counts = np.zeros((num_classes, 1)) + cfg.EPS
+    sums = np.zeros((num_classes, 4))
+    squared_sums = np.zeros((num_classes, 4))
+    for im_i in xrange(num_images):
+        targets = roidb[im_i]['bbox_targets']
+        for cls in xrange(1, num_classes):
+            cls_inds = np.where(targets[:, 0] == cls)[0]
+            if cls_inds.size > 0:
+                class_counts[cls] += cls_inds.size
+                sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
+                squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0)
+
+    means = sums / class_counts
+    stds = np.sqrt(squared_sums / class_counts - means ** 2)
+
+    # Normalize targets
+    for im_i in xrange(num_images):
+        targets = roidb[im_i]['bbox_targets']
+        for cls in xrange(1, num_classes):
+            cls_inds = np.where(targets[:, 0] == cls)[0]
+            roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
+            if stds[cls, 0] != 0:
+                roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
+
+    # These values will be needed for making predictions
+    # (the predicts will need to be unnormalized and uncentered)
+    return means.ravel(), stds.ravel()
+
+def _compute_targets(rois, overlaps, labels, num_classes):
+    """Compute bounding-box regression targets for an image."""
+    # Ensure ROIs are floats
+    rois = rois.astype(np.float, copy=False)
+
+    # Indices of ground-truth ROIs
+    gt_inds = np.where(overlaps == 1)[0]
+    # Indices of examples for which we try to make predictions
+    ex_inds = []
+    for i in xrange(1, num_classes):
+        ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] )
+
+    # Get IoU overlap between each ex ROI and gt ROI
+    ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :],
+                                                     rois[gt_inds, :])
+
+    # Find which gt ROI each ex ROI has max overlap with:
+    # this will be the ex ROI's gt target
+    if ex_gt_overlaps.shape[0] != 0:
+        gt_assignment = ex_gt_overlaps.argmax(axis=1)
+    else:
+        gt_assignment = []
+    gt_rois = rois[gt_inds[gt_assignment], :]
+    ex_rois = rois[ex_inds, :]
+
+    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
+    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
+    ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
+    ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
+
+    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
+    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
+    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
+    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
+
+    targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
+    targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
+    targets_dw = np.log(gt_widths / ex_widths)
+    targets_dh = np.log(gt_heights / ex_heights)
+
+    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
+    targets[ex_inds, 0] = labels[ex_inds]
+    targets[ex_inds, 1] = targets_dx
+    targets[ex_inds, 2] = targets_dy
+    targets[ex_inds, 3] = targets_dw
+    targets[ex_inds, 4] = targets_dh
+    return targets
diff --git a/lib/roi_pooling_layer/roi_pooling_op_grad.py b/lib/roi_pooling_layer/roi_pooling_op_grad.py
index 5ec3d188..38d76e4f 100644
--- a/lib/roi_pooling_layer/roi_pooling_op_grad.py
+++ b/lib/roi_pooling_layer/roi_pooling_op_grad.py
@@ -1,6 +1,6 @@
 import tensorflow as tf
 from tensorflow.python.framework import ops
-import roi_pooling_op
+import roi_pooling_layer.roi_pooling_op
 import pdb
 
 
diff --git a/lib/rpn_msr/anchor_target_layer.py b/lib/rpn_msr/anchor_target_layer.py
index fb0b487a..31183dcd 100644
--- a/lib/rpn_msr/anchor_target_layer.py
+++ b/lib/rpn_msr/anchor_target_layer.py
@@ -11,7 +11,7 @@
 from fast_rcnn.config import cfg
 import numpy as np
 import numpy.random as npr
-from generate_anchors import generate_anchors
+from .generate_anchors import generate_anchors
 from utils.cython_bbox import bbox_overlaps
 from fast_rcnn.bbox_transform import bbox_transform
 
@@ -28,13 +28,13 @@ def setup(self, bottom, top):
         self._num_anchors = self._anchors.shape[0]
 
         if DEBUG:
-            print 'anchors:'
-            print self._anchors
-            print 'anchor shapes:'
-            print np.hstack((
+            print('anchors:')
+            print(self._anchors)
+            print('anchor shapes:')
+            print(np.hstack((
                 self._anchors[:, 2::4] - self._anchors[:, 0::4],
                 self._anchors[:, 3::4] - self._anchors[:, 1::4],
-            ))
+            )))
             self._counts = cfg.EPS
             self._sums = np.zeros((1, 4))
             self._squared_sums = np.zeros((1, 4))
@@ -50,7 +50,7 @@ def setup(self, bottom, top):
 
         height, width = bottom[0].data.shape[-2:]
         if DEBUG:
-            print 'AnchorTargetLayer: height', height, 'width', width
+            print('AnchorTargetLayer: height', height, 'width', width)
 
         A = self._num_anchors
         # labels
@@ -82,12 +82,12 @@ def forward(self, bottom, top):
         im_info = bottom[2].data[0, :]
 
         if DEBUG:
-            print ''
-            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
-            print 'scale: {}'.format(im_info[2])
-            print 'height, width: ({}, {})'.format(height, width)
-            print 'rpn: gt_boxes.shape', gt_boxes.shape
-            print 'rpn: gt_boxes', gt_boxes
+            print('')
+            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
+            print('scale: {}'.format(im_info[2]))
+            print('height, width: ({}, {})'.format(height, width))
+            print('rpn: gt_boxes.shape', gt_boxes.shape)
+            print('rpn: gt_boxes', gt_boxes)
 
         # 1. Generate proposals from bbox deltas and shifted anchors
         shift_x = np.arange(0, width) * self._feat_stride
@@ -115,13 +115,13 @@ def forward(self, bottom, top):
         )[0]
 
         if DEBUG:
-            print 'total_anchors', total_anchors
-            print 'inds_inside', len(inds_inside)
+            print('total_anchors', total_anchors)
+            print('inds_inside', len(inds_inside))
 
         # keep only inside anchors
         anchors = all_anchors[inds_inside, :]
         if DEBUG:
-            print 'anchors.shape', anchors.shape
+            print('anchors.shape', anchors.shape)
 
         # label: 1 is positive, 0 is negative, -1 is dont care
         labels = np.empty((len(inds_inside), ), dtype=np.float32)
@@ -202,10 +202,10 @@ def forward(self, bottom, top):
             self._counts += np.sum(labels == 1)
             means = self._sums / self._counts
             stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
-            print 'means:'
-            print means
-            print 'stdevs:'
-            print stds
+            print('means:')
+            print(means)
+            print('stdevs:')
+            print(stds)
 
         # map up to original set of anchors
         labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
@@ -215,16 +215,16 @@ def forward(self, bottom, top):
 
         if DEBUG:
             if gt_boxes.shape[0] != 0:
-                print 'rpn: max max_overlap', np.max(max_overlaps)
+                print('rpn: max max_overlap', np.max(max_overlaps))
             else:
-                print 'rpn: max max_overlap', 0
-            print 'rpn: num_positive', np.sum(labels == 1)
-            print 'rpn: num_negative', np.sum(labels == 0)
+                print('rpn: max max_overlap', 0)
+            print('rpn: num_positive', np.sum(labels == 1))
+            print('rpn: num_negative', np.sum(labels == 0))
             self._fg_sum += np.sum(labels == 1)
             self._bg_sum += np.sum(labels == 0)
             self._count += 1
-            print 'rpn: num_positive avg', self._fg_sum / self._count
-            print 'rpn: num_negative avg', self._bg_sum / self._count
+            print('rpn: num_positive avg', self._fg_sum / self._count)
+            print('rpn: num_negative avg', self._bg_sum / self._count)
 
         # labels
         labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
diff --git a/lib/rpn_msr/anchor_target_layer.py.bak b/lib/rpn_msr/anchor_target_layer.py.bak
new file mode 100644
index 00000000..fb0b487a
--- /dev/null
+++ b/lib/rpn_msr/anchor_target_layer.py.bak
@@ -0,0 +1,287 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import os
+import caffe
+import yaml
+from fast_rcnn.config import cfg
+import numpy as np
+import numpy.random as npr
+from generate_anchors import generate_anchors
+from utils.cython_bbox import bbox_overlaps
+from fast_rcnn.bbox_transform import bbox_transform
+
+DEBUG = False 
+
+class AnchorTargetLayer(caffe.Layer):
+    """
+    Assign anchors to ground-truth targets. Produces anchor classification
+    labels and bounding-box regression targets.
+    """
+
+    def setup(self, bottom, top):
+        self._anchors = generate_anchors(cfg.TRAIN.RPN_BASE_SIZE, cfg.TRAIN.RPN_ASPECTS, cfg.TRAIN.RPN_SCALES)
+        self._num_anchors = self._anchors.shape[0]
+
+        if DEBUG:
+            print 'anchors:'
+            print self._anchors
+            print 'anchor shapes:'
+            print np.hstack((
+                self._anchors[:, 2::4] - self._anchors[:, 0::4],
+                self._anchors[:, 3::4] - self._anchors[:, 1::4],
+            ))
+            self._counts = cfg.EPS
+            self._sums = np.zeros((1, 4))
+            self._squared_sums = np.zeros((1, 4))
+            self._fg_sum = 0
+            self._bg_sum = 0
+            self._count = 0
+
+        layer_params = yaml.load(self.param_str_)
+        self._feat_stride = layer_params['feat_stride']
+
+        # allow boxes to sit over the edge by a small amount
+        self._allowed_border = layer_params.get('allowed_border', 0)
+
+        height, width = bottom[0].data.shape[-2:]
+        if DEBUG:
+            print 'AnchorTargetLayer: height', height, 'width', width
+
+        A = self._num_anchors
+        # labels
+        top[0].reshape(1, 1, A * height, width)
+        # bbox_targets
+        top[1].reshape(1, A * 4, height, width)
+        # bbox_inside_weights
+        top[2].reshape(1, A * 4, height, width)
+        # bbox_outside_weights
+        top[3].reshape(1, A * 4, height, width)
+
+    def forward(self, bottom, top):
+        # Algorithm:
+        #
+        # for each (H, W) location i
+        #   generate 9 anchor boxes centered on cell i
+        #   apply predicted bbox deltas at cell i to each of the 9 anchors
+        # filter out-of-image anchors
+        # measure GT overlap
+
+        assert bottom[0].data.shape[0] == 1, \
+            'Only single item batches are supported'
+
+        # map of shape (..., H, W)
+        height, width = bottom[0].data.shape[-2:]
+        # GT boxes (x1, y1, x2, y2, label)
+        gt_boxes = bottom[1].data
+        # im_info
+        im_info = bottom[2].data[0, :]
+
+        if DEBUG:
+            print ''
+            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
+            print 'scale: {}'.format(im_info[2])
+            print 'height, width: ({}, {})'.format(height, width)
+            print 'rpn: gt_boxes.shape', gt_boxes.shape
+            print 'rpn: gt_boxes', gt_boxes
+
+        # 1. Generate proposals from bbox deltas and shifted anchors
+        shift_x = np.arange(0, width) * self._feat_stride
+        shift_y = np.arange(0, height) * self._feat_stride
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+        # add A anchors (1, A, 4) to
+        # cell K shifts (K, 1, 4) to get
+        # shift anchors (K, A, 4)
+        # reshape to (K*A, 4) shifted anchors
+        A = self._num_anchors
+        K = shifts.shape[0]
+        all_anchors = (self._anchors.reshape((1, A, 4)) +
+                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+        all_anchors = all_anchors.reshape((K * A, 4))
+        total_anchors = int(K * A)
+
+        # only keep anchors inside the image
+        inds_inside = np.where(
+            (all_anchors[:, 0] >= -self._allowed_border) &
+            (all_anchors[:, 1] >= -self._allowed_border) &
+            (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
+            (all_anchors[:, 3] < im_info[0] + self._allowed_border)    # height
+        )[0]
+
+        if DEBUG:
+            print 'total_anchors', total_anchors
+            print 'inds_inside', len(inds_inside)
+
+        # keep only inside anchors
+        anchors = all_anchors[inds_inside, :]
+        if DEBUG:
+            print 'anchors.shape', anchors.shape
+
+        # label: 1 is positive, 0 is negative, -1 is dont care
+        labels = np.empty((len(inds_inside), ), dtype=np.float32)
+        labels.fill(-1)
+
+        # overlaps between the anchors and the gt boxes
+        # overlaps (ex, gt)
+        if gt_boxes.shape[0] != 0:
+            overlaps = bbox_overlaps(
+                np.ascontiguousarray(anchors, dtype=np.float),
+                np.ascontiguousarray(gt_boxes, dtype=np.float))
+            argmax_overlaps = overlaps.argmax(axis=1)
+            max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
+            gt_argmax_overlaps = overlaps.argmax(axis=0)
+            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
+            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+
+            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+                # assign bg labels first so that positive labels can clobber them
+                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+            # fg label: for each gt, anchor with highest overlap
+            labels[gt_argmax_overlaps] = 1
+
+            # fg label: above threshold IOU
+            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+
+            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+                # assign bg labels last so that negative labels can clobber positives
+                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+        else:
+            labels.fill(0)
+
+        # subsample positive labels if we have too many
+        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
+        fg_inds = np.where(labels == 1)[0]
+        if len(fg_inds) > num_fg:
+            disable_inds = npr.choice(
+                fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+            labels[disable_inds] = -1
+
+        # subsample negative labels if we have too many
+        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
+        bg_inds = np.where(labels == 0)[0]
+        if len(bg_inds) > num_bg:
+            disable_inds = npr.choice(
+                bg_inds, size=(len(bg_inds) - num_bg), replace=False)
+            labels[disable_inds] = -1
+            #print "was %s inds, disabling %s, now %s inds" % (
+                #len(bg_inds), len(disable_inds), np.sum(labels == 0))
+
+        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
+        if gt_boxes.shape[0] != 0:
+            bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
+
+        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
+        bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
+
+        bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
+        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
+            # uniform weighting of examples (given non-uniform sampling)
+            num_examples = np.sum(labels >= 0)
+            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
+            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
+        else:
+            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
+                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
+            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
+                                np.sum(labels == 1))
+            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
+                                np.sum(labels == 0))
+        bbox_outside_weights[labels == 1, :] = positive_weights
+        bbox_outside_weights[labels == 0, :] = negative_weights
+
+        if DEBUG:
+            self._sums += bbox_targets[labels == 1, :].sum(axis=0)
+            self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
+            self._counts += np.sum(labels == 1)
+            means = self._sums / self._counts
+            stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
+            print 'means:'
+            print means
+            print 'stdevs:'
+            print stds
+
+        # map up to original set of anchors
+        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
+        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
+        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
+        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
+
+        if DEBUG:
+            if gt_boxes.shape[0] != 0:
+                print 'rpn: max max_overlap', np.max(max_overlaps)
+            else:
+                print 'rpn: max max_overlap', 0
+            print 'rpn: num_positive', np.sum(labels == 1)
+            print 'rpn: num_negative', np.sum(labels == 0)
+            self._fg_sum += np.sum(labels == 1)
+            self._bg_sum += np.sum(labels == 0)
+            self._count += 1
+            print 'rpn: num_positive avg', self._fg_sum / self._count
+            print 'rpn: num_negative avg', self._bg_sum / self._count
+
+        # labels
+        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
+        labels = labels.reshape((1, 1, A * height, width))
+        top[0].reshape(*labels.shape)
+        top[0].data[...] = labels
+
+        # bbox_targets
+        bbox_targets = bbox_targets \
+            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
+        top[1].reshape(*bbox_targets.shape)
+        top[1].data[...] = bbox_targets
+
+        # bbox_inside_weights
+        bbox_inside_weights = bbox_inside_weights \
+            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
+        assert bbox_inside_weights.shape[2] == height
+        assert bbox_inside_weights.shape[3] == width
+        top[2].reshape(*bbox_inside_weights.shape)
+        top[2].data[...] = bbox_inside_weights
+
+        # bbox_outside_weights
+        bbox_outside_weights = bbox_outside_weights \
+            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
+        assert bbox_outside_weights.shape[2] == height
+        assert bbox_outside_weights.shape[3] == width
+        top[3].reshape(*bbox_outside_weights.shape)
+        top[3].data[...] = bbox_outside_weights
+
+    def backward(self, top, propagate_down, bottom):
+        """This layer does not propagate gradients."""
+        pass
+
+    def reshape(self, bottom, top):
+        """Reshaping happens during the call to forward."""
+        pass
+
+
+def _unmap(data, count, inds, fill=0):
+    """ Unmap a subset of item (data) back to the original set of items (of
+    size count) """
+    if len(data.shape) == 1:
+        ret = np.empty((count, ), dtype=np.float32)
+        ret.fill(fill)
+        ret[inds] = data
+    else:
+        ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
+        ret.fill(fill)
+        ret[inds, :] = data
+    return ret
+
+
+def _compute_targets(ex_rois, gt_rois):
+    """Compute bounding-box regression targets for an image."""
+
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 5
+
+    return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
diff --git a/lib/rpn_msr/anchor_target_layer_tf.py b/lib/rpn_msr/anchor_target_layer_tf.py
index 9965a7a1..60d8ee0b 100644
--- a/lib/rpn_msr/anchor_target_layer_tf.py
+++ b/lib/rpn_msr/anchor_target_layer_tf.py
@@ -10,7 +10,7 @@
 from fast_rcnn.config import cfg
 import numpy as np
 import numpy.random as npr
-from generate_anchors import generate_anchors
+from .generate_anchors import generate_anchors
 from utils.cython_bbox import bbox_overlaps
 from fast_rcnn.bbox_transform import bbox_transform
 import pdb
@@ -26,13 +26,13 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [
     _num_anchors = _anchors.shape[0]
 
     if DEBUG:
-        print 'anchors:'
-        print _anchors
-        print 'anchor shapes:'
-        print np.hstack((
+        print('anchors:')
+        print(_anchors)
+        print('anchor shapes:')
+        print(np.hstack((
             _anchors[:, 2::4] - _anchors[:, 0::4],
             _anchors[:, 3::4] - _anchors[:, 1::4],
-        ))
+        )))
         _counts = cfg.EPS
         _sums = np.zeros((1, 4))
         _squared_sums = np.zeros((1, 4))
@@ -62,13 +62,13 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [
     height, width = rpn_cls_score.shape[1:3]
 
     if DEBUG:
-        print 'AnchorTargetLayer: height', height, 'width', width
-        print ''
-        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
-        print 'scale: {}'.format(im_info[2])
-        print 'height, width: ({}, {})'.format(height, width)
-        print 'rpn: gt_boxes.shape', gt_boxes.shape
-        print 'rpn: gt_boxes', gt_boxes
+        print('AnchorTargetLayer: height', height, 'width', width)
+        print('')
+        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
+        print('scale: {}'.format(im_info[2]))
+        print('height, width: ({}, {})'.format(height, width))
+        print('rpn: gt_boxes.shape', gt_boxes.shape)
+        print('rpn: gt_boxes', gt_boxes)
 
     # 1. Generate proposals from bbox deltas and shifted anchors
     shift_x = np.arange(0, width) * _feat_stride
@@ -96,13 +96,13 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [
     )[0]
 
     if DEBUG:
-        print 'total_anchors', total_anchors
-        print 'inds_inside', len(inds_inside)
+        print('total_anchors', total_anchors)
+        print('inds_inside', len(inds_inside))
 
     # keep only inside anchors
     anchors = all_anchors[inds_inside, :]
     if DEBUG:
-        print 'anchors.shape', anchors.shape
+        print('anchors.shape', anchors.shape)
 
     # label: 1 is positive, 0 is negative, -1 is dont care
     labels = np.empty((len(inds_inside), ), dtype=np.float32)
@@ -180,10 +180,10 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [
         _counts += np.sum(labels == 1)
         means = _sums / _counts
         stds = np.sqrt(_squared_sums / _counts - means ** 2)
-        print 'means:'
-        print means
-        print 'stdevs:'
-        print stds
+        print('means:')
+        print(means)
+        print('stdevs:')
+        print(stds)
 
     # map up to original set of anchors
     labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
@@ -192,14 +192,14 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [
     bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
 
     if DEBUG:
-        print 'rpn: max max_overlap', np.max(max_overlaps)
-        print 'rpn: num_positive', np.sum(labels == 1)
-        print 'rpn: num_negative', np.sum(labels == 0)
+        print('rpn: max max_overlap', np.max(max_overlaps))
+        print('rpn: num_positive', np.sum(labels == 1))
+        print('rpn: num_negative', np.sum(labels == 0))
         _fg_sum += np.sum(labels == 1)
         _bg_sum += np.sum(labels == 0)
         _count += 1
-        print 'rpn: num_positive avg', _fg_sum / _count
-        print 'rpn: num_negative avg', _bg_sum / _count
+        print('rpn: num_positive avg', _fg_sum / _count)
+        print('rpn: num_negative avg', _bg_sum / _count)
 
     # labels
     #pdb.set_trace()
diff --git a/lib/rpn_msr/anchor_target_layer_tf.py.bak b/lib/rpn_msr/anchor_target_layer_tf.py.bak
new file mode 100644
index 00000000..9965a7a1
--- /dev/null
+++ b/lib/rpn_msr/anchor_target_layer_tf.py.bak
@@ -0,0 +1,256 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import os
+import yaml
+from fast_rcnn.config import cfg
+import numpy as np
+import numpy.random as npr
+from generate_anchors import generate_anchors
+from utils.cython_bbox import bbox_overlaps
+from fast_rcnn.bbox_transform import bbox_transform
+import pdb
+
+DEBUG = False
+
+def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [16,], anchor_scales = [4 ,8, 16, 32]):
+    """
+    Assign anchors to ground-truth targets. Produces anchor classification
+    labels and bounding-box regression targets.
+    """
+    _anchors = generate_anchors(scales=np.array(anchor_scales))
+    _num_anchors = _anchors.shape[0]
+
+    if DEBUG:
+        print 'anchors:'
+        print _anchors
+        print 'anchor shapes:'
+        print np.hstack((
+            _anchors[:, 2::4] - _anchors[:, 0::4],
+            _anchors[:, 3::4] - _anchors[:, 1::4],
+        ))
+        _counts = cfg.EPS
+        _sums = np.zeros((1, 4))
+        _squared_sums = np.zeros((1, 4))
+        _fg_sum = 0
+        _bg_sum = 0
+        _count = 0
+
+    # allow boxes to sit over the edge by a small amount
+    _allowed_border =  0
+    # map of shape (..., H, W)
+    #height, width = rpn_cls_score.shape[1:3]
+
+    im_info = im_info[0]
+
+    # Algorithm:
+    #
+    # for each (H, W) location i
+    #   generate 9 anchor boxes centered on cell i
+    #   apply predicted bbox deltas at cell i to each of the 9 anchors
+    # filter out-of-image anchors
+    # measure GT overlap
+
+    assert rpn_cls_score.shape[0] == 1, \
+        'Only single item batches are supported'
+
+    # map of shape (..., H, W)
+    height, width = rpn_cls_score.shape[1:3]
+
+    if DEBUG:
+        print 'AnchorTargetLayer: height', height, 'width', width
+        print ''
+        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
+        print 'scale: {}'.format(im_info[2])
+        print 'height, width: ({}, {})'.format(height, width)
+        print 'rpn: gt_boxes.shape', gt_boxes.shape
+        print 'rpn: gt_boxes', gt_boxes
+
+    # 1. Generate proposals from bbox deltas and shifted anchors
+    shift_x = np.arange(0, width) * _feat_stride
+    shift_y = np.arange(0, height) * _feat_stride
+    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                        shift_x.ravel(), shift_y.ravel())).transpose()
+    # add A anchors (1, A, 4) to
+    # cell K shifts (K, 1, 4) to get
+    # shift anchors (K, A, 4)
+    # reshape to (K*A, 4) shifted anchors
+    A = _num_anchors
+    K = shifts.shape[0]
+    all_anchors = (_anchors.reshape((1, A, 4)) +
+                   shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+    all_anchors = all_anchors.reshape((K * A, 4))
+    total_anchors = int(K * A)
+
+    # only keep anchors inside the image
+    inds_inside = np.where(
+        (all_anchors[:, 0] >= -_allowed_border) &
+        (all_anchors[:, 1] >= -_allowed_border) &
+        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
+        (all_anchors[:, 3] < im_info[0] + _allowed_border)    # height
+    )[0]
+
+    if DEBUG:
+        print 'total_anchors', total_anchors
+        print 'inds_inside', len(inds_inside)
+
+    # keep only inside anchors
+    anchors = all_anchors[inds_inside, :]
+    if DEBUG:
+        print 'anchors.shape', anchors.shape
+
+    # label: 1 is positive, 0 is negative, -1 is dont care
+    labels = np.empty((len(inds_inside), ), dtype=np.float32)
+    labels.fill(-1)
+
+    # overlaps between the anchors and the gt boxes
+    # overlaps (ex, gt)
+    overlaps = bbox_overlaps(
+        np.ascontiguousarray(anchors, dtype=np.float),
+        np.ascontiguousarray(gt_boxes, dtype=np.float))
+    argmax_overlaps = overlaps.argmax(axis=1)
+    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
+    gt_argmax_overlaps = overlaps.argmax(axis=0)
+    gt_max_overlaps = overlaps[gt_argmax_overlaps,
+                               np.arange(overlaps.shape[1])]
+    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+
+    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+        # assign bg labels first so that positive labels can clobber them
+        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+    # fg label: for each gt, anchor with highest overlap
+    labels[gt_argmax_overlaps] = 1
+
+    # fg label: above threshold IOU
+    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+
+    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
+        # assign bg labels last so that negative labels can clobber positives
+        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+    # subsample positive labels if we have too many
+    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
+    fg_inds = np.where(labels == 1)[0]
+    if len(fg_inds) > num_fg:
+        disable_inds = npr.choice(
+            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+        labels[disable_inds] = -1
+
+    # subsample negative labels if we have too many
+    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
+    bg_inds = np.where(labels == 0)[0]
+    if len(bg_inds) > num_bg:
+        disable_inds = npr.choice(
+            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
+        labels[disable_inds] = -1
+        #print "was %s inds, disabling %s, now %s inds" % (
+            #len(bg_inds), len(disable_inds), np.sum(labels == 0))
+
+    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
+    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
+
+    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
+    bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
+
+    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
+    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
+        # uniform weighting of examples (given non-uniform sampling)
+        num_examples = np.sum(labels >= 0)
+        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
+        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
+    else:
+        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
+                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
+        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
+                            np.sum(labels == 1))
+        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
+                            np.sum(labels == 0))
+    bbox_outside_weights[labels == 1, :] = positive_weights
+    bbox_outside_weights[labels == 0, :] = negative_weights
+
+    if DEBUG:
+        _sums += bbox_targets[labels == 1, :].sum(axis=0)
+        _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
+        _counts += np.sum(labels == 1)
+        means = _sums / _counts
+        stds = np.sqrt(_squared_sums / _counts - means ** 2)
+        print 'means:'
+        print means
+        print 'stdevs:'
+        print stds
+
+    # map up to original set of anchors
+    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
+    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
+    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
+    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
+
+    if DEBUG:
+        print 'rpn: max max_overlap', np.max(max_overlaps)
+        print 'rpn: num_positive', np.sum(labels == 1)
+        print 'rpn: num_negative', np.sum(labels == 0)
+        _fg_sum += np.sum(labels == 1)
+        _bg_sum += np.sum(labels == 0)
+        _count += 1
+        print 'rpn: num_positive avg', _fg_sum / _count
+        print 'rpn: num_negative avg', _bg_sum / _count
+
+    # labels
+    #pdb.set_trace()
+    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
+    labels = labels.reshape((1, 1, A * height, width))
+    rpn_labels = labels
+
+    # bbox_targets
+    bbox_targets = bbox_targets \
+        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
+
+    rpn_bbox_targets = bbox_targets
+    # bbox_inside_weights
+    bbox_inside_weights = bbox_inside_weights \
+        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
+    #assert bbox_inside_weights.shape[2] == height
+    #assert bbox_inside_weights.shape[3] == width
+
+    rpn_bbox_inside_weights = bbox_inside_weights
+
+    # bbox_outside_weights
+    bbox_outside_weights = bbox_outside_weights \
+        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
+    #assert bbox_outside_weights.shape[2] == height
+    #assert bbox_outside_weights.shape[3] == width
+
+    rpn_bbox_outside_weights = bbox_outside_weights
+
+    return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights
+
+
+
+def _unmap(data, count, inds, fill=0):
+    """ Unmap a subset of item (data) back to the original set of items (of
+    size count) """
+    if len(data.shape) == 1:
+        ret = np.empty((count, ), dtype=np.float32)
+        ret.fill(fill)
+        ret[inds] = data
+    else:
+        ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
+        ret.fill(fill)
+        ret[inds, :] = data
+    return ret
+
+
+def _compute_targets(ex_rois, gt_rois):
+    """Compute bounding-box regression targets for an image."""
+
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 5
+
+    return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
diff --git a/lib/rpn_msr/generate.py b/lib/rpn_msr/generate.py
index 9e8cd53c..f1ce390a 100644
--- a/lib/rpn_msr/generate.py
+++ b/lib/rpn_msr/generate.py
@@ -92,14 +92,14 @@ def imdb_proposals(net, imdb):
     """Generate RPN proposals on all images in an imdb."""
 
     _t = Timer()
-    imdb_boxes = [[] for _ in xrange(imdb.num_images)]
-    for i in xrange(imdb.num_images):
+    imdb_boxes = [[] for _ in range(imdb.num_images)]
+    for i in range(imdb.num_images):
         im = cv2.imread(imdb.image_path_at(i))
         _t.tic()
         imdb_boxes[i], scores = im_proposals(net, im)
         _t.toc()
-        print 'im_proposals: {:d}/{:d} {:.3f}s' \
-              .format(i + 1, imdb.num_images, _t.average_time)
+        print('im_proposals: {:d}/{:d} {:.3f}s' \
+              .format(i + 1, imdb.num_images, _t.average_time))
         if 0:
             dets = np.hstack((imdb_boxes[i], scores))
             # from IPython import embed; embed()
@@ -112,14 +112,14 @@ def imdb_proposals_det(net, imdb):
     """Generate RPN proposals on all images in an imdb."""
 
     _t = Timer()
-    imdb_boxes = [[] for _ in xrange(imdb.num_images)]
-    for i in xrange(imdb.num_images):
+    imdb_boxes = [[] for _ in range(imdb.num_images)]
+    for i in range(imdb.num_images):
         im = cv2.imread(imdb.image_path_at(i))
         _t.tic()
         boxes, scores = im_proposals(net, im)
         _t.toc()
-        print 'im_proposals: {:d}/{:d} {:.3f}s' \
-              .format(i + 1, imdb.num_images, _t.average_time)
+        print('im_proposals: {:d}/{:d} {:.3f}s' \
+              .format(i + 1, imdb.num_images, _t.average_time))
         dets = np.hstack((boxes, scores))
         imdb_boxes[i] = dets
 
diff --git a/lib/rpn_msr/generate.py.bak b/lib/rpn_msr/generate.py.bak
new file mode 100644
index 00000000..9e8cd53c
--- /dev/null
+++ b/lib/rpn_msr/generate.py.bak
@@ -0,0 +1,131 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+from fast_rcnn.config import cfg
+from utils.blob import im_list_to_blob
+from utils.timer import Timer
+import numpy as np
+import cv2
+
+def _vis_proposals(im, dets, thresh=0.5):
+    """Draw detected bounding boxes."""
+    inds = np.where(dets[:, -1] >= thresh)[0]
+    if len(inds) == 0:
+        return
+
+    class_name = 'obj'
+    im = im[:, :, (2, 1, 0)]
+    fig, ax = plt.subplots(figsize=(12, 12))
+    ax.imshow(im, aspect='equal')
+    for i in inds:
+        bbox = dets[i, :4]
+        score = dets[i, -1]
+
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1], fill=False,
+                          edgecolor='red', linewidth=3.5)
+            )
+        ax.text(bbox[0], bbox[1] - 2,
+                '{:s} {:.3f}'.format(class_name, score),
+                bbox=dict(facecolor='blue', alpha=0.5),
+                fontsize=14, color='white')
+
+    ax.set_title(('{} detections with '
+                  'p({} | box) >= {:.1f}').format(class_name, class_name,
+                                                  thresh),
+                  fontsize=14)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.draw()
+
+def _get_image_blob(im):
+    """Converts an image into a network input.
+
+    Arguments:
+        im (ndarray): a color image in BGR order
+
+    Returns:
+        blob (ndarray): a data blob holding an image pyramid
+        im_scale_factors (list): list of image scales (relative to im) used
+            in the image pyramid
+    """
+    im_orig = im.astype(np.float32, copy=True)
+    im_orig -= cfg.PIXEL_MEANS
+
+    processed_ims = []
+
+    assert len(cfg.TEST.SCALES_BASE) == 1
+    im_scale = cfg.TRAIN.SCALES_BASE[0]
+
+    im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
+                    interpolation=cv2.INTER_LINEAR)
+    im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
+    processed_ims.append(im)
+
+    # Create a blob to hold the input images
+    blob = im_list_to_blob(processed_ims)
+
+    return blob, im_info
+
+def im_proposals(net, im):
+    """Generate RPN proposals on a single image."""
+    blobs = {}
+    blobs['data'], blobs['im_info'] = _get_image_blob(im)
+    net.blobs['data'].reshape(*(blobs['data'].shape))
+    net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
+    blobs_out = net.forward(
+            data=blobs['data'].astype(np.float32, copy=False),
+            im_info=blobs['im_info'].astype(np.float32, copy=False))
+
+    scale = blobs['im_info'][0, 2]
+    boxes = blobs_out['rois'][:, 1:].copy() / scale
+    scores = blobs_out['scores'].copy()
+    return boxes, scores
+
+def imdb_proposals(net, imdb):
+    """Generate RPN proposals on all images in an imdb."""
+
+    _t = Timer()
+    imdb_boxes = [[] for _ in xrange(imdb.num_images)]
+    for i in xrange(imdb.num_images):
+        im = cv2.imread(imdb.image_path_at(i))
+        _t.tic()
+        imdb_boxes[i], scores = im_proposals(net, im)
+        _t.toc()
+        print 'im_proposals: {:d}/{:d} {:.3f}s' \
+              .format(i + 1, imdb.num_images, _t.average_time)
+        if 0:
+            dets = np.hstack((imdb_boxes[i], scores))
+            # from IPython import embed; embed()
+            _vis_proposals(im, dets[:3, :], thresh=0.9)
+            plt.show()
+
+    return imdb_boxes
+
+def imdb_proposals_det(net, imdb):
+    """Generate RPN proposals on all images in an imdb."""
+
+    _t = Timer()
+    imdb_boxes = [[] for _ in xrange(imdb.num_images)]
+    for i in xrange(imdb.num_images):
+        im = cv2.imread(imdb.image_path_at(i))
+        _t.tic()
+        boxes, scores = im_proposals(net, im)
+        _t.toc()
+        print 'im_proposals: {:d}/{:d} {:.3f}s' \
+              .format(i + 1, imdb.num_images, _t.average_time)
+        dets = np.hstack((boxes, scores))
+        imdb_boxes[i] = dets
+
+        if 0:            
+            # from IPython import embed; embed()
+            _vis_proposals(im, dets[:3, :], thresh=0.9)
+            plt.show()
+
+    return imdb_boxes
diff --git a/lib/rpn_msr/generate_anchors.py b/lib/rpn_msr/generate_anchors.py
index 1125a801..fb686df0 100644
--- a/lib/rpn_msr/generate_anchors.py
+++ b/lib/rpn_msr/generate_anchors.py
@@ -44,7 +44,7 @@ def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
     base_anchor = np.array([1, 1, base_size, base_size]) - 1
     ratio_anchors = _ratio_enum(base_anchor, ratios)
     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
-                         for i in xrange(ratio_anchors.shape[0])])
+                         for i in range(ratio_anchors.shape[0])])
     return anchors
 
 def _whctrs(anchor):
@@ -100,6 +100,6 @@ def _scale_enum(anchor, scales):
     import time
     t = time.time()
     a = generate_anchors()
-    print time.time() - t
-    print a
+    print(time.time() - t)
+    print(a)
     from IPython import embed; embed()
diff --git a/lib/rpn_msr/generate_anchors.py.bak b/lib/rpn_msr/generate_anchors.py.bak
new file mode 100644
index 00000000..1125a801
--- /dev/null
+++ b/lib/rpn_msr/generate_anchors.py.bak
@@ -0,0 +1,105 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import numpy as np
+
+# Verify that we compute the same anchors as Shaoqing's matlab implementation:
+#
+#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
+#    >> anchors
+#
+#    anchors =
+#
+#       -83   -39   100    56
+#      -175   -87   192   104
+#      -359  -183   376   200
+#       -55   -55    72    72
+#      -119  -119   136   136
+#      -247  -247   264   264
+#       -35   -79    52    96
+#       -79  -167    96   184
+#      -167  -343   184   360
+
+#array([[ -83.,  -39.,  100.,   56.],
+#       [-175.,  -87.,  192.,  104.],
+#       [-359., -183.,  376.,  200.],
+#       [ -55.,  -55.,   72.,   72.],
+#       [-119., -119.,  136.,  136.],
+#       [-247., -247.,  264.,  264.],
+#       [ -35.,  -79.,   52.,   96.],
+#       [ -79., -167.,   96.,  184.],
+#       [-167., -343.,  184.,  360.]])
+
+def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
+                     scales=2**np.arange(3, 6)):
+    """
+    Generate anchor (reference) windows by enumerating aspect ratios X
+    scales wrt a reference (0, 0, 15, 15) window.
+    """
+
+    base_anchor = np.array([1, 1, base_size, base_size]) - 1
+    ratio_anchors = _ratio_enum(base_anchor, ratios)
+    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
+                         for i in xrange(ratio_anchors.shape[0])])
+    return anchors
+
+def _whctrs(anchor):
+    """
+    Return width, height, x center, and y center for an anchor (window).
+    """
+
+    w = anchor[2] - anchor[0] + 1
+    h = anchor[3] - anchor[1] + 1
+    x_ctr = anchor[0] + 0.5 * (w - 1)
+    y_ctr = anchor[1] + 0.5 * (h - 1)
+    return w, h, x_ctr, y_ctr
+
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+    """
+    Given a vector of widths (ws) and heights (hs) around a center
+    (x_ctr, y_ctr), output a set of anchors (windows).
+    """
+
+    ws = ws[:, np.newaxis]
+    hs = hs[:, np.newaxis]
+    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
+                         y_ctr - 0.5 * (hs - 1),
+                         x_ctr + 0.5 * (ws - 1),
+                         y_ctr + 0.5 * (hs - 1)))
+    return anchors
+
+def _ratio_enum(anchor, ratios):
+    """
+    Enumerate a set of anchors for each aspect ratio wrt an anchor.
+    """
+
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    size = w * h
+    size_ratios = size / ratios
+    ws = np.round(np.sqrt(size_ratios))
+    hs = np.round(ws * ratios)
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+def _scale_enum(anchor, scales):
+    """
+    Enumerate a set of anchors for each scale wrt an anchor.
+    """
+
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    ws = w * scales
+    hs = h * scales
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+if __name__ == '__main__':
+    import time
+    t = time.time()
+    a = generate_anchors()
+    print time.time() - t
+    print a
+    from IPython import embed; embed()
diff --git a/lib/rpn_msr/proposal_layer.py b/lib/rpn_msr/proposal_layer.py
index 2b879bc5..2ccf4b26 100644
--- a/lib/rpn_msr/proposal_layer.py
+++ b/lib/rpn_msr/proposal_layer.py
@@ -9,7 +9,7 @@
 import numpy as np
 import yaml
 from fast_rcnn.config import cfg
-from generate_anchors import generate_anchors
+from .generate_anchors import generate_anchors
 from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 from fast_rcnn.nms_wrapper import nms
 
@@ -30,9 +30,9 @@ def setup(self, bottom, top):
         self._num_anchors = self._anchors.shape[0]
 
         if DEBUG:
-            print 'feat_stride: {}'.format(self._feat_stride)
-            print 'anchors:'
-            print self._anchors
+            print('feat_stride: {}'.format(self._feat_stride))
+            print('anchors:')
+            print(self._anchors)
 
         # rois blob: holds R regions of interest, each is a 5-tuple
         # (n, x1, y1, x2, y2) specifying an image batch index n and a
@@ -73,14 +73,14 @@ def forward(self, bottom, top):
         im_info = bottom[2].data[0, :]
 
         if DEBUG:
-            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
-            print 'scale: {}'.format(im_info[2])
+            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
+            print('scale: {}'.format(im_info[2]))
 
         # 1. Generate proposals from bbox deltas and shifted anchors
         height, width = scores.shape[-2:]
 
         if DEBUG:
-            print 'score map size: {}'.format(scores.shape)
+            print('score map size: {}'.format(scores.shape))
 
         # Enumerate all shifts
         shift_x = np.arange(0, width) * self._feat_stride
@@ -145,7 +145,7 @@ def forward(self, bottom, top):
             keep = keep[:post_nms_topN]
         proposals = proposals[keep, :]
         scores = scores[keep]
-        print scores.shape
+        print(scores.shape)
 
         # Output rois blob
         # Our RPN implementation only supports a single input image, so all
diff --git a/lib/rpn_msr/proposal_layer.py.bak b/lib/rpn_msr/proposal_layer.py.bak
new file mode 100644
index 00000000..2b879bc5
--- /dev/null
+++ b/lib/rpn_msr/proposal_layer.py.bak
@@ -0,0 +1,176 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import caffe
+import numpy as np
+import yaml
+from fast_rcnn.config import cfg
+from generate_anchors import generate_anchors
+from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
+from fast_rcnn.nms_wrapper import nms
+
+DEBUG = False
+
+class ProposalLayer(caffe.Layer):
+    """
+    Outputs object detection proposals by applying estimated bounding-box
+    transformations to a set of regular boxes (called "anchors").
+    """
+
+    def setup(self, bottom, top):
+        # parse the layer parameter string, which must be valid YAML
+        layer_params = yaml.load(self.param_str_)
+
+        self._feat_stride = layer_params['feat_stride']
+        self._anchors     = generate_anchors(cfg.TRAIN.RPN_BASE_SIZE, cfg.TRAIN.RPN_ASPECTS, cfg.TRAIN.RPN_SCALES)
+        self._num_anchors = self._anchors.shape[0]
+
+        if DEBUG:
+            print 'feat_stride: {}'.format(self._feat_stride)
+            print 'anchors:'
+            print self._anchors
+
+        # rois blob: holds R regions of interest, each is a 5-tuple
+        # (n, x1, y1, x2, y2) specifying an image batch index n and a
+        # rectangle (x1, y1, x2, y2)
+        top[0].reshape(1, 5)
+
+        # scores blob: holds scores for R regions of interest
+        if len(top) > 1:
+            top[1].reshape(1, 1, 1, 1)
+
+    def forward(self, bottom, top):
+        # Algorithm:
+        #
+        # for each (H, W) location i
+        #   generate A anchor boxes centered on cell i
+        #   apply predicted bbox deltas at cell i to each of the A anchors
+        # clip predicted boxes to image
+        # remove predicted boxes with either height or width < threshold
+        # sort all (proposal, score) pairs by score from highest to lowest
+        # take top pre_nms_topN proposals before NMS
+        # apply NMS with threshold 0.7 to remaining proposals
+        # take after_nms_topN proposals after NMS
+        # return the top proposals (-> RoIs top, scores top)
+
+        assert bottom[0].data.shape[0] == 1, \
+            'Only single item batches are supported'
+        # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
+        cfg_key = 'TEST'
+        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
+        min_size      = cfg[cfg_key].RPN_MIN_SIZE
+
+        # the first set of _num_anchors channels are bg probs
+        # the second set are the fg probs, which we want
+        scores = bottom[0].data[:, self._num_anchors:, :, :]
+        bbox_deltas = bottom[1].data
+        im_info = bottom[2].data[0, :]
+
+        if DEBUG:
+            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
+            print 'scale: {}'.format(im_info[2])
+
+        # 1. Generate proposals from bbox deltas and shifted anchors
+        height, width = scores.shape[-2:]
+
+        if DEBUG:
+            print 'score map size: {}'.format(scores.shape)
+
+        # Enumerate all shifts
+        shift_x = np.arange(0, width) * self._feat_stride
+        shift_y = np.arange(0, height) * self._feat_stride
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+
+        # Enumerate all shifted anchors:
+        #
+        # add A anchors (1, A, 4) to
+        # cell K shifts (K, 1, 4) to get
+        # shift anchors (K, A, 4)
+        # reshape to (K*A, 4) shifted anchors
+        A = self._num_anchors
+        K = shifts.shape[0]
+        anchors = self._anchors.reshape((1, A, 4)) + \
+                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+        anchors = anchors.reshape((K * A, 4))
+
+        # Transpose and reshape predicted bbox transformations to get them
+        # into the same order as the anchors:
+        #
+        # bbox deltas will be (1, 4 * A, H, W) format
+        # transpose to (1, H, W, 4 * A)
+        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
+        # in slowest to fastest order
+        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
+
+        # Same story for the scores:
+        #
+        # scores are (1, A, H, W) format
+        # transpose to (1, H, W, A)
+        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
+        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
+
+        # Convert anchors into proposals via bbox transformations
+        proposals = bbox_transform_inv(anchors, bbox_deltas)
+
+        # 2. clip predicted boxes to image
+        proposals = clip_boxes(proposals, im_info[:2])
+
+        # 3. remove predicted boxes with either height or width < threshold
+        # (NOTE: convert min_size to input image scale stored in im_info[2])
+        keep = _filter_boxes(proposals, min_size * im_info[2])
+        proposals = proposals[keep, :]
+        scores = scores[keep]
+
+        # 4. sort all (proposal, score) pairs by score from highest to lowest
+        # 5. take top pre_nms_topN (e.g. 6000)
+        order = scores.ravel().argsort()[::-1]
+        if pre_nms_topN > 0:
+            order = order[:pre_nms_topN]
+        proposals = proposals[order, :]
+        scores = scores[order]
+
+        # 6. apply nms (e.g. threshold = 0.7)
+        # 7. take after_nms_topN (e.g. 300)
+        # 8. return the top proposals (-> RoIs top)
+        keep = nms(np.hstack((proposals, scores)), nms_thresh)
+        if post_nms_topN > 0:
+            keep = keep[:post_nms_topN]
+        proposals = proposals[keep, :]
+        scores = scores[keep]
+        print scores.shape
+
+        # Output rois blob
+        # Our RPN implementation only supports a single input image, so all
+        # batch inds are 0
+        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
+        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
+        top[0].reshape(*(blob.shape))
+        top[0].data[...] = blob
+
+        # [Optional] output scores blob
+        if len(top) > 1:
+            top[1].reshape(*(scores.shape))
+            top[1].data[...] = scores
+
+    def backward(self, top, propagate_down, bottom):
+        """This layer does not propagate gradients."""
+        pass
+
+    def reshape(self, bottom, top):
+        """Reshaping happens during the call to forward."""
+        pass
+
+def _filter_boxes(boxes, min_size):
+    """Remove all boxes with any side smaller than min_size."""
+    ws = boxes[:, 2] - boxes[:, 0] + 1
+    hs = boxes[:, 3] - boxes[:, 1] + 1
+    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
+    return keep
diff --git a/lib/rpn_msr/proposal_layer_tf.py b/lib/rpn_msr/proposal_layer_tf.py
index 13984090..4012a7a0 100644
--- a/lib/rpn_msr/proposal_layer_tf.py
+++ b/lib/rpn_msr/proposal_layer_tf.py
@@ -8,7 +8,7 @@
 import numpy as np
 import yaml
 from fast_rcnn.config import cfg
-from generate_anchors import generate_anchors
+from .generate_anchors import generate_anchors
 from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 from fast_rcnn.nms_wrapper import nms
 import pdb
@@ -33,6 +33,7 @@ def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stri
     # take after_nms_topN proposals after NMS
     # return the top proposals (-> RoIs top, scores top)
     #layer_params = yaml.load(self.param_str_)
+    cfg_key = cfg_key.decode("utf-8")
     _anchors = generate_anchors(scales=np.array(anchor_scales))
     _num_anchors = _anchors.shape[0]
     rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2])
@@ -57,14 +58,14 @@ def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stri
     #im_info = bottom[2].data[0, :]
 
     if DEBUG:
-        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
-        print 'scale: {}'.format(im_info[2])
+        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
+        print('scale: {}'.format(im_info[2]))
 
     # 1. Generate proposals from bbox deltas and shifted anchors
     height, width = scores.shape[-2:]
 
     if DEBUG:
-        print 'score map size: {}'.format(scores.shape)
+        print('score map size: {}'.format(scores.shape))
 
     # Enumerate all shifts
     shift_x = np.arange(0, width) * _feat_stride
diff --git a/lib/rpn_msr/proposal_layer_tf.py.bak b/lib/rpn_msr/proposal_layer_tf.py.bak
new file mode 100644
index 00000000..13984090
--- /dev/null
+++ b/lib/rpn_msr/proposal_layer_tf.py.bak
@@ -0,0 +1,151 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import numpy as np
+import yaml
+from fast_rcnn.config import cfg
+from generate_anchors import generate_anchors
+from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
+from fast_rcnn.nms_wrapper import nms
+import pdb
+
+
+DEBUG = False
+"""
+Outputs object detection proposals by applying estimated bounding-box
+transformations to a set of regular boxes (called "anchors").
+"""
+def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]):
+    # Algorithm:
+    #
+    # for each (H, W) location i
+    #   generate A anchor boxes centered on cell i
+    #   apply predicted bbox deltas at cell i to each of the A anchors
+    # clip predicted boxes to image
+    # remove predicted boxes with either height or width < threshold
+    # sort all (proposal, score) pairs by score from highest to lowest
+    # take top pre_nms_topN proposals before NMS
+    # apply NMS with threshold 0.7 to remaining proposals
+    # take after_nms_topN proposals after NMS
+    # return the top proposals (-> RoIs top, scores top)
+    #layer_params = yaml.load(self.param_str_)
+    _anchors = generate_anchors(scales=np.array(anchor_scales))
+    _num_anchors = _anchors.shape[0]
+    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2])
+    rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2])
+    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
+    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
+    im_info = im_info[0]
+
+    assert rpn_cls_prob_reshape.shape[0] == 1, \
+        'Only single item batches are supported'
+    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
+    #cfg_key = 'TEST'
+    pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+    nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
+    min_size      = cfg[cfg_key].RPN_MIN_SIZE
+
+    # the first set of _num_anchors channels are bg probs
+    # the second set are the fg probs, which we want
+    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
+    bbox_deltas = rpn_bbox_pred
+    #im_info = bottom[2].data[0, :]
+
+    if DEBUG:
+        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
+        print 'scale: {}'.format(im_info[2])
+
+    # 1. Generate proposals from bbox deltas and shifted anchors
+    height, width = scores.shape[-2:]
+
+    if DEBUG:
+        print 'score map size: {}'.format(scores.shape)
+
+    # Enumerate all shifts
+    shift_x = np.arange(0, width) * _feat_stride
+    shift_y = np.arange(0, height) * _feat_stride
+    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                        shift_x.ravel(), shift_y.ravel())).transpose()
+
+    # Enumerate all shifted anchors:
+    #
+    # add A anchors (1, A, 4) to
+    # cell K shifts (K, 1, 4) to get
+    # shift anchors (K, A, 4)
+    # reshape to (K*A, 4) shifted anchors
+    A = _num_anchors
+    K = shifts.shape[0]
+    anchors = _anchors.reshape((1, A, 4)) + \
+              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+    anchors = anchors.reshape((K * A, 4))
+
+    # Transpose and reshape predicted bbox transformations to get them
+    # into the same order as the anchors:
+    #
+    # bbox deltas will be (1, 4 * A, H, W) format
+    # transpose to (1, H, W, 4 * A)
+    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
+    # in slowest to fastest order
+    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
+
+    # Same story for the scores:
+    #
+    # scores are (1, A, H, W) format
+    # transpose to (1, H, W, A)
+    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
+    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
+
+    # Convert anchors into proposals via bbox transformations
+    proposals = bbox_transform_inv(anchors, bbox_deltas)
+
+    # 2. clip predicted boxes to image
+    proposals = clip_boxes(proposals, im_info[:2])
+
+    # 3. remove predicted boxes with either height or width < threshold
+    # (NOTE: convert min_size to input image scale stored in im_info[2])
+    keep = _filter_boxes(proposals, min_size * im_info[2])
+    proposals = proposals[keep, :]
+    scores = scores[keep]
+
+    # 4. sort all (proposal, score) pairs by score from highest to lowest
+    # 5. take top pre_nms_topN (e.g. 6000)
+    order = scores.ravel().argsort()[::-1]
+    if pre_nms_topN > 0:
+        order = order[:pre_nms_topN]
+    proposals = proposals[order, :]
+    scores = scores[order]
+
+    # 6. apply nms (e.g. threshold = 0.7)
+    # 7. take after_nms_topN (e.g. 300)
+    # 8. return the top proposals (-> RoIs top)
+    keep = nms(np.hstack((proposals, scores)), nms_thresh)
+    if post_nms_topN > 0:
+        keep = keep[:post_nms_topN]
+    proposals = proposals[keep, :]
+    scores = scores[keep]
+    # Output rois blob
+    # Our RPN implementation only supports a single input image, so all
+    # batch inds are 0
+    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
+    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
+    return blob
+    #top[0].reshape(*(blob.shape))
+    #top[0].data[...] = blob
+
+    # [Optional] output scores blob
+    #if len(top) > 1:
+    #    top[1].reshape(*(scores.shape))
+    #    top[1].data[...] = scores
+
+def _filter_boxes(boxes, min_size):
+    """Remove all boxes with any side smaller than min_size."""
+    ws = boxes[:, 2] - boxes[:, 0] + 1
+    hs = boxes[:, 3] - boxes[:, 1] + 1
+    keep = np.where((ws >= min_size) & (hs >= min_size))[0]
+    return keep
diff --git a/lib/rpn_msr/proposal_target_layer_tf.py b/lib/rpn_msr/proposal_target_layer_tf.py
index 75f19fd9..4d702b5e 100644
--- a/lib/rpn_msr/proposal_target_layer_tf.py
+++ b/lib/rpn_msr/proposal_target_layer_tf.py
@@ -48,14 +48,14 @@ def proposal_target_layer(rpn_rois, gt_boxes,_num_classes):
         rois_per_image, _num_classes)
 
     if DEBUG:
-        print 'num fg: {}'.format((labels > 0).sum())
-        print 'num bg: {}'.format((labels == 0).sum())
+        print('num fg: {}'.format((labels > 0).sum()))
+        print('num bg: {}'.format((labels == 0).sum()))
         _count += 1
         _fg_num += (labels > 0).sum()
         _bg_num += (labels == 0).sum()
-        print 'num fg avg: {}'.format(_fg_num / _count)
-        print 'num bg avg: {}'.format(_bg_num / _count)
-        print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num))
+        print('num fg avg: {}'.format(_fg_num / _count))
+        print('num bg avg: {}'.format(_bg_num / _count))
+        print('ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num)))
 
     rois = rois.reshape(-1,5)
     labels = labels.reshape(-1,1)
diff --git a/lib/rpn_msr/proposal_target_layer_tf.py.bak b/lib/rpn_msr/proposal_target_layer_tf.py.bak
new file mode 100644
index 00000000..75f19fd9
--- /dev/null
+++ b/lib/rpn_msr/proposal_target_layer_tf.py.bak
@@ -0,0 +1,155 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import yaml
+import numpy as np
+import numpy.random as npr
+from fast_rcnn.config import cfg
+from fast_rcnn.bbox_transform import bbox_transform
+from utils.cython_bbox import bbox_overlaps
+import pdb
+
+DEBUG = False
+
+def proposal_target_layer(rpn_rois, gt_boxes,_num_classes):
+    """
+    Assign object detection proposals to ground-truth targets. Produces proposal
+    classification labels and bounding-box regression targets.
+    """
+
+    # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
+    # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
+    all_rois = rpn_rois
+    # TODO(rbg): it's annoying that sometimes I have extra info before
+    # and other times after box coordinates -- normalize to one format
+
+    # Include ground-truth boxes in the set of candidate rois
+    zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
+    all_rois = np.vstack(
+        (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
+    )
+
+    # Sanity check: single batch only
+    assert np.all(all_rois[:, 0] == 0), \
+            'Only single item batches are supported'
+
+    num_images = 1
+    rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
+    fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
+
+    # Sample rois with classification labels and bounding box regression
+    # targets
+    labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
+        all_rois, gt_boxes, fg_rois_per_image,
+        rois_per_image, _num_classes)
+
+    if DEBUG:
+        print 'num fg: {}'.format((labels > 0).sum())
+        print 'num bg: {}'.format((labels == 0).sum())
+        _count += 1
+        _fg_num += (labels > 0).sum()
+        _bg_num += (labels == 0).sum()
+        print 'num fg avg: {}'.format(_fg_num / _count)
+        print 'num bg avg: {}'.format(_bg_num / _count)
+        print 'ratio: {:.3f}'.format(float(_fg_num) / float(_bg_num))
+
+    rois = rois.reshape(-1,5)
+    labels = labels.reshape(-1,1)
+    bbox_targets = bbox_targets.reshape(-1,_num_classes*4)
+    bbox_inside_weights = bbox_inside_weights.reshape(-1,_num_classes*4)
+
+    bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
+
+    return rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights
+
+def _get_bbox_regression_labels(bbox_target_data, num_classes):
+    """Bounding-box regression targets (bbox_target_data) are stored in a
+    compact form N x (class, tx, ty, tw, th)
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets).
+
+    Returns:
+        bbox_target (ndarray): N x 4K blob of regression targets
+        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+    """
+
+    clss = np.array(bbox_target_data[:, 0], dtype=np.uint16, copy=True)
+    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
+    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = clss[ind]
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
+        bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
+    return bbox_targets, bbox_inside_weights
+
+
+def _compute_targets(ex_rois, gt_rois, labels):
+    """Compute bounding-box regression targets for an image."""
+
+    assert ex_rois.shape[0] == gt_rois.shape[0]
+    assert ex_rois.shape[1] == 4
+    assert gt_rois.shape[1] == 4
+
+    targets = bbox_transform(ex_rois, gt_rois)
+    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
+        # Optionally normalize targets by a precomputed mean and stdev
+        targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
+                / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
+    return np.hstack(
+            (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
+
+def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
+    """Generate a random sample of RoIs comprising foreground and background
+    examples.
+    """
+    # overlaps: (rois x gt_boxes)
+    overlaps = bbox_overlaps(
+        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
+        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
+    gt_assignment = overlaps.argmax(axis=1)
+    max_overlaps = overlaps.max(axis=1)
+    labels = gt_boxes[gt_assignment, 4]
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
+
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
+    # Sample background regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Select sampled values from various arrays:
+    labels = labels[keep_inds]
+    # Clamp labels for the background RoIs to 0
+    labels[fg_rois_per_this_image:] = 0
+    rois = all_rois[keep_inds]
+
+    bbox_target_data = _compute_targets(
+        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
+
+    bbox_targets, bbox_inside_weights = \
+        _get_bbox_regression_labels(bbox_target_data, num_classes)
+
+    return labels, rois, bbox_targets, bbox_inside_weights
diff --git a/lib/setup.py b/lib/setup.py
index 2f875015..cda553e1 100644
--- a/lib/setup.py
+++ b/lib/setup.py
@@ -46,7 +46,7 @@ def locate_cuda():
     cudaconfig = {'home':home, 'nvcc':nvcc,
                   'include': pjoin(home, 'include'),
                   'lib64': pjoin(home, 'lib64')}
-    for k, v in cudaconfig.iteritems():
+    for k, v in cudaconfig.items():
         if not os.path.exists(v):
             return None;
 
@@ -81,7 +81,7 @@ def customize_compiler_for_nvcc(self):
     # object but distutils doesn't have the ability to change compilers
     # based on source extension: we add it.
     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
-        print extra_postargs
+        print(extra_postargs)
         if os.path.splitext(src)[1] == '.cu':
             # use the cuda for .cu files
             self.set_executable('compiler_so', CUDA['nvcc'])
diff --git a/lib/setup.py.bak b/lib/setup.py.bak
new file mode 100644
index 00000000..cc102151
--- /dev/null
+++ b/lib/setup.py.bak
@@ -0,0 +1,155 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import os
+from os.path import join as pjoin
+import numpy as np
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+
+def find_in_path(name, path):
+    "Find a file in a search path"
+    #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
+    for dir in path.split(os.pathsep):
+        binpath = pjoin(dir, name)
+        if os.path.exists(binpath):
+            return os.path.abspath(binpath)
+    return None
+
+def locate_cuda():
+    """Locate the CUDA environment on the system
+
+    Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
+    and values giving the absolute path to each directory.
+
+    Starts by looking for the CUDAHOME env variable. If not found, everything
+    is based on finding 'nvcc' in the PATH.
+    """
+
+    # first check if the CUDAHOME env variable is in use
+    if 'CUDAHOME' in os.environ:
+        home = os.environ['CUDAHOME']
+        nvcc = pjoin(home, 'bin', 'nvcc')
+    else:
+        # otherwise, search the PATH for NVCC
+        default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
+        nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
+        if nvcc is None:
+          return None;
+        home = os.path.dirname(os.path.dirname(nvcc))
+
+    cudaconfig = {'home':home, 'nvcc':nvcc,
+                  'include': pjoin(home, 'include'),
+                  'lib64': pjoin(home, 'lib64')}
+    for k, v in cudaconfig.iteritems():
+        if not os.path.exists(v):
+            return None;
+
+    return cudaconfig
+
+CUDA = locate_cuda()
+
+# Obtain the numpy include directory.  This logic works across numpy versions.
+try:
+    numpy_include = np.get_include()
+except AttributeError:
+    numpy_include = np.get_numpy_include()
+
+def customize_compiler_for_nvcc(self):
+    """inject deep into distutils to customize how the dispatch
+    to gcc/nvcc works.
+
+    If you subclass UnixCCompiler, it's not trivial to get your subclass
+    injected in, and still have the right customizations (i.e.
+    distutils.sysconfig.customize_compiler) run on it. So instead of going
+    the OO route, I have this. Note, it's kindof like a wierd functional
+    subclassing going on."""
+
+    # tell the compiler it can processes .cu
+    self.src_extensions.append('.cu')
+
+    # save references to the default compiler_so and _comple methods
+    default_compiler_so = self.compiler_so
+    super = self._compile
+
+    # now redefine the _compile method. This gets executed for each
+    # object but distutils doesn't have the ability to change compilers
+    # based on source extension: we add it.
+    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
+        print(extra_postargs)
+        if os.path.splitext(src)[1] == '.cu':
+            # use the cuda for .cu files
+            self.set_executable('compiler_so', CUDA['nvcc'])
+            # use only a subset of the extra_postargs, which are 1-1 translated
+            # from the extra_compile_args in the Extension class
+            postargs = extra_postargs['nvcc']
+        else:
+            postargs = extra_postargs['gcc']
+
+        super(obj, src, ext, cc_args, postargs, pp_opts)
+        # reset the default compiler_so, which we might have changed for cuda
+        self.compiler_so = default_compiler_so
+
+    # inject our redefined _compile method into the class
+    self._compile = _compile
+
+
+# run the customize_compiler
+class custom_build_ext(build_ext):
+    def build_extensions(self):
+        customize_compiler_for_nvcc(self.compiler)
+        build_ext.build_extensions(self)
+
+ext_modules = [
+    Extension(
+        "utils.cython_bbox",
+        ["utils/bbox.pyx"],
+        extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
+	include_dirs = [numpy_include]
+	),
+    Extension(
+        "utils.cython_nms",
+        ["utils/nms.pyx"],
+        extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
+        include_dirs = [numpy_include]
+    ),
+    Extension(
+        "nms.cpu_nms",
+        ["nms/cpu_nms.pyx"],
+        extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
+        include_dirs = [numpy_include]
+    )
+]
+
+if CUDA:
+    ext_modules.append(
+        Extension('nms.gpu_nms',
+            ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
+            library_dirs=[CUDA['lib64']],
+            libraries=['cudart'],
+            language='c++',
+            runtime_library_dirs=[CUDA['lib64']],
+            # this syntax is specific to this build system
+            # we're only going to use certain compiler args with nvcc and not with gcc
+            # the implementation of this trick is in customize_compiler() below
+            extra_compile_args={'gcc': ["-Wno-unused-function"],
+                                'nvcc': ['-arch=sm_35',
+                                         '--ptxas-options=-v',
+                                         '-c',
+                                         '--compiler-options',
+                                         "'-fPIC'"]},
+            include_dirs = [numpy_include, CUDA['include']]
+        )
+    )
+
+setup(
+    name='fast_rcnn',
+    ext_modules=ext_modules,
+    # inject our custom trigger
+    cmdclass={'build_ext': custom_build_ext},
+)
diff --git a/lib/utils/blob.py b/lib/utils/blob.py
index 63c1b52e..0489dc4f 100644
--- a/lib/utils/blob.py
+++ b/lib/utils/blob.py
@@ -19,7 +19,7 @@ def im_list_to_blob(ims):
     num_images = len(ims)
     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
                     dtype=np.float32)
-    for i in xrange(num_images):
+    for i in range(num_images):
         im = ims[i]
         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
 
diff --git a/tools/demo.py b/tools/demo.py
index 0ffd219c..9444f8fa 100644
--- a/tools/demo.py
+++ b/tools/demo.py
@@ -64,8 +64,8 @@ def demo(sess, net, image_name):
     timer.tic()
     scores, boxes = im_detect(sess, net, im)
     timer.toc()
-    print ('Detection took {:.3f}s for '
-           '{:d} object proposals').format(timer.total_time, boxes.shape[0])
+    print(('Detection took {:.3f}s for '
+           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
 
     # Visualize detections for each class
     im = im[:, :, (2, 1, 0)]
@@ -118,11 +118,11 @@ def parse_args():
    
     #sess.run(tf.initialize_all_variables())
 
-    print '\n\nLoaded network {:s}'.format(args.model)
+    print('\n\nLoaded network {:s}'.format(args.model))
 
     # Warmup on a dummy image
     im = 128 * np.ones((300, 300, 3), dtype=np.uint8)
-    for i in xrange(2):
+    for i in range(2):
         _, _= im_detect(sess, net, im)
 
     im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
@@ -130,8 +130,8 @@ def parse_args():
 
 
     for im_name in im_names:
-        print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
-        print 'Demo for data/demo/{}'.format(im_name)
+        print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
+        print('Demo for data/demo/{}'.format(im_name))
         demo(sess, net, im_name)
 
     plt.show()
diff --git a/tools/demo.py.bak b/tools/demo.py.bak
new file mode 100644
index 00000000..0ffd219c
--- /dev/null
+++ b/tools/demo.py.bak
@@ -0,0 +1,138 @@
+import _init_paths
+import tensorflow as tf
+from fast_rcnn.config import cfg
+from fast_rcnn.test import im_detect
+from fast_rcnn.nms_wrapper import nms
+from utils.timer import Timer
+import matplotlib.pyplot as plt
+import numpy as np
+import os, sys, cv2
+import argparse
+from networks.factory import get_network
+
+
+CLASSES = ('__background__',
+           'aeroplane', 'bicycle', 'bird', 'boat',
+           'bottle', 'bus', 'car', 'cat', 'chair',
+           'cow', 'diningtable', 'dog', 'horse',
+           'motorbike', 'person', 'pottedplant',
+           'sheep', 'sofa', 'train', 'tvmonitor')
+
+
+#CLASSES = ('__background__','person','bike','motorbike','car','bus')
+
+def vis_detections(im, class_name, dets,ax, thresh=0.5):
+    """Draw detected bounding boxes."""
+    inds = np.where(dets[:, -1] >= thresh)[0]
+    if len(inds) == 0:
+        return
+
+    for i in inds:
+        bbox = dets[i, :4]
+        score = dets[i, -1]
+
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1], fill=False,
+                          edgecolor='red', linewidth=3.5)
+            )
+        ax.text(bbox[0], bbox[1] - 2,
+                '{:s} {:.3f}'.format(class_name, score),
+                bbox=dict(facecolor='blue', alpha=0.5),
+                fontsize=14, color='white')
+
+    ax.set_title(('{} detections with '
+                  'p({} | box) >= {:.1f}').format(class_name, class_name,
+                                                  thresh),
+                  fontsize=14)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.draw()
+
+
+def demo(sess, net, image_name):
+    """Detect object classes in an image using pre-computed object proposals."""
+
+    # Load the demo image
+    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
+    #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name)
+    im = cv2.imread(im_file)
+
+    # Detect all object classes and regress object bounds
+    timer = Timer()
+    timer.tic()
+    scores, boxes = im_detect(sess, net, im)
+    timer.toc()
+    print ('Detection took {:.3f}s for '
+           '{:d} object proposals').format(timer.total_time, boxes.shape[0])
+
+    # Visualize detections for each class
+    im = im[:, :, (2, 1, 0)]
+    fig, ax = plt.subplots(figsize=(12, 12))
+    ax.imshow(im, aspect='equal')
+
+    CONF_THRESH = 0.8
+    NMS_THRESH = 0.3
+    for cls_ind, cls in enumerate(CLASSES[1:]):
+        cls_ind += 1 # because we skipped background
+        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
+        cls_scores = scores[:, cls_ind]
+        dets = np.hstack((cls_boxes,
+                          cls_scores[:, np.newaxis])).astype(np.float32)
+        keep = nms(dets, NMS_THRESH)
+        dets = dets[keep, :]
+        vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
+
+def parse_args():
+    """Parse input arguments."""
+    parser = argparse.ArgumentParser(description='Faster R-CNN demo')
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
+                        default=0, type=int)
+    parser.add_argument('--cpu', dest='cpu_mode',
+                        help='Use CPU mode (overrides --gpu)',
+                        action='store_true')
+    parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
+                        default='VGGnet_test')
+    parser.add_argument('--model', dest='model', help='Model path',
+                        default=' ')
+
+    args = parser.parse_args()
+
+    return args
+if __name__ == '__main__':
+    cfg.TEST.HAS_RPN = True  # Use RPN for proposals
+
+    args = parse_args()
+
+    if args.model == ' ':
+        raise IOError(('Error: Model not found.\n'))
+        
+    # init session
+    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+    # load network
+    net = get_network(args.demo_net)
+    # load model
+    saver = tf.train.Saver(write_version=tf.train.SaverDef.V1)
+    saver.restore(sess, args.model)
+   
+    #sess.run(tf.initialize_all_variables())
+
+    print '\n\nLoaded network {:s}'.format(args.model)
+
+    # Warmup on a dummy image
+    im = 128 * np.ones((300, 300, 3), dtype=np.uint8)
+    for i in xrange(2):
+        _, _= im_detect(sess, net, im)
+
+    im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
+                '001763.jpg', '004545.jpg']
+
+
+    for im_name in im_names:
+        print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
+        print 'Demo for data/demo/{}'.format(im_name)
+        demo(sess, net, im_name)
+
+    plt.show()
+