From c12a04b9562021b3b4a97a5c73045be62f5bbcf9 Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Fri, 5 May 2023 14:30:26 +0800 Subject: [PATCH] Align ops boxes (#274) * add vision.ops.boxes module * add vision.ops.boxes module * code format --- flowvision/__init__.py | 1 + flowvision/ops/__init__.py | 13 ++ flowvision/ops/_box_convert.py | 81 +++++++ flowvision/ops/_utils.py | 111 ++++++++++ flowvision/ops/boxes.py | 390 +++++++++++++++++++++++++++++++++ 5 files changed, 596 insertions(+) create mode 100644 flowvision/ops/__init__.py create mode 100644 flowvision/ops/_box_convert.py create mode 100644 flowvision/ops/_utils.py create mode 100644 flowvision/ops/boxes.py diff --git a/flowvision/__init__.py b/flowvision/__init__.py index 40954bc0..691cfa0e 100644 --- a/flowvision/__init__.py +++ b/flowvision/__init__.py @@ -5,6 +5,7 @@ from flowvision import loss from flowvision import scheduler from flowvision import data +from flowvision import ops try: from .version import __version__ # noqa: F401 diff --git a/flowvision/ops/__init__.py b/flowvision/ops/__init__.py new file mode 100644 index 00000000..b931af96 --- /dev/null +++ b/flowvision/ops/__init__.py @@ -0,0 +1,13 @@ +from .boxes import ( + batched_nms, + box_area, + box_convert, + box_iou, + clip_boxes_to_image, + complete_box_iou, + distance_box_iou, + generalized_box_iou, + masks_to_boxes, + nms, + remove_small_boxes, +) diff --git a/flowvision/ops/_box_convert.py b/flowvision/ops/_box_convert.py new file mode 100644 index 00000000..e62a2163 --- /dev/null +++ b/flowvision/ops/_box_convert.py @@ -0,0 +1,81 @@ +import oneflow as torch +from oneflow import Tensor + + +def _box_cxcywh_to_xyxy(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (cx, cy, w, h) format to (x1, y1, x2, y2) format. + (cx, cy) refers to center of bounding box + (w, h) are width and height of bounding box + Args: + boxes (Tensor[N, 4]): boxes in (cx, cy, w, h) format which will be converted. + + Returns: + boxes (Tensor(N, 4)): boxes in (x1, y1, x2, y2) format. + """ + # We need to change all 4 of them so some temporary variable is needed. + cx, cy, w, h = boxes.unbind(-1) + x1 = cx - 0.5 * w + y1 = cy - 0.5 * h + x2 = cx + 0.5 * w + y2 = cy + 0.5 * h + + boxes = torch.stack((x1, y1, x2, y2), dim=-1) + + return boxes + + +def _box_xyxy_to_cxcywh(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h) format. + (x1, y1) refer to top left of bounding box + (x2, y2) refer to bottom right of bounding box + Args: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format which will be converted. + + Returns: + boxes (Tensor(N, 4)): boxes in (cx, cy, w, h) format. + """ + x1, y1, x2, y2 = boxes.unbind(-1) + cx = (x1 + x2) / 2 + cy = (y1 + y2) / 2 + w = x2 - x1 + h = y2 - y1 + + boxes = torch.stack((cx, cy, w, h), dim=-1) + + return boxes + + +def _box_xywh_to_xyxy(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x, y, w, h) format to (x1, y1, x2, y2) format. + (x, y) refers to top left of bounding box. + (w, h) refers to width and height of box. + Args: + boxes (Tensor[N, 4]): boxes in (x, y, w, h) which will be converted. + + Returns: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format. + """ + x, y, w, h = boxes.unbind(-1) + boxes = torch.stack([x, y, x + w, y + h], dim=-1) + return boxes + + +def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x1, y1, x2, y2) format to (x, y, w, h) format. + (x1, y1) refer to top left of bounding box + (x2, y2) refer to bottom right of bounding box + Args: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) which will be converted. + + Returns: + boxes (Tensor[N, 4]): boxes in (x, y, w, h) format. + """ + x1, y1, x2, y2 = boxes.unbind(-1) + w = x2 - x1 # x2 - x1 + h = y2 - y1 # y2 - y1 + boxes = torch.stack((x1, y1, w, h), dim=-1) + return boxes diff --git a/flowvision/ops/_utils.py b/flowvision/ops/_utils.py new file mode 100644 index 00000000..fad3afa1 --- /dev/null +++ b/flowvision/ops/_utils.py @@ -0,0 +1,111 @@ +from typing import List, Optional, Tuple, Union + +import oneflow as torch +from oneflow import nn, Tensor + + +def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor: + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + # TODO add back the assert + # assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) + + +def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor: + concat_boxes = _cat([b for b in boxes], dim=0) + temp = [] + for i, b in enumerate(boxes): + temp.append(torch.full_like(b[:, :1], i)) + ids = _cat(temp, dim=0) + rois = torch.cat([ids, concat_boxes], dim=1) + return rois + + +def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]): + if isinstance(boxes, (list, tuple)): + for _tensor in boxes: + torch._assert( + _tensor.size(1) == 4, + "The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]", + ) + elif isinstance(boxes, torch.Tensor): + torch._assert( + boxes.size(1) == 5, "The boxes tensor shape is not correct as Tensor[K, 5]" + ) + else: + torch._assert( + False, "boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]" + ) + return + + +def split_normalization_params( + model: nn.Module, norm_classes: Optional[List[type]] = None +) -> Tuple[List[Tensor], List[Tensor]]: + # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501 + if not norm_classes: + norm_classes = [ + nn.modules.batchnorm._BatchNorm, + nn.LayerNorm, + nn.GroupNorm, + nn.modules.instancenorm._InstanceNorm, + ] + + for t in norm_classes: + if not issubclass(t, nn.Module): + raise ValueError(f"Class {t} is not a subclass of nn.Module.") + + classes = tuple(norm_classes) + + norm_params = [] + other_params = [] + for module in model.modules(): + if next(module.children(), None): + other_params.extend( + p for p in module.parameters(recurse=False) if p.requires_grad + ) + elif isinstance(module, classes): + norm_params.extend(p for p in module.parameters() if p.requires_grad) + else: + other_params.extend(p for p in module.parameters() if p.requires_grad) + return norm_params, other_params + + +def _upcast(t: Tensor) -> Tensor: + # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type + if t.is_floating_point(): + return t if t.dtype in (torch.float32, torch.float64) else t.float() + else: + return t if t.dtype in (torch.int32, torch.int64) else t.int() + + +def _upcast_non_float(t: Tensor) -> Tensor: + # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type + if t.dtype not in (torch.float32, torch.float64): + return t.float() + return t + + +def _loss_inter_union( + boxes1: torch.Tensor, boxes2: torch.Tensor, +) -> Tuple[torch.Tensor, torch.Tensor]: + + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + # Intersection keypoints + xkis1 = torch.max(x1, x1g) + ykis1 = torch.max(y1, y1g) + xkis2 = torch.min(x2, x2g) + ykis2 = torch.min(y2, y2g) + + intsctk = torch.zeros_like(x1) + mask = (ykis2 > ykis1) & (xkis2 > xkis1) + intsctk[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) + unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk + + return intsctk, unionk diff --git a/flowvision/ops/boxes.py b/flowvision/ops/boxes.py new file mode 100644 index 00000000..9850db22 --- /dev/null +++ b/flowvision/ops/boxes.py @@ -0,0 +1,390 @@ +from typing import Tuple + +import oneflow as torch +from oneflow import Tensor + +# from ..utils import _log_api_usage_once +from ._box_convert import ( + _box_cxcywh_to_xyxy, + _box_xywh_to_xyxy, + _box_xyxy_to_cxcywh, + _box_xyxy_to_xywh, +) +from ._utils import _upcast + + +def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor: + """ + Performs non-maximum suppression (NMS) on the boxes according + to their intersection-over-union (IoU). + + NMS iteratively removes lower scoring boxes which have an + IoU greater than iou_threshold with another (higher scoring) + box. + + If multiple boxes have the exact same score and satisfy the IoU + criterion with respect to a reference box, the selected box is + not guaranteed to be the same between CPU and GPU. This is similar + to the behavior of argsort in PyTorch when repeated values are present. + + Args: + boxes (Tensor[N, 4])): boxes to perform NMS on. They + are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and + ``0 <= y1 < y2``. + scores (Tensor[N]): scores for each one of the boxes + iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold + + Returns: + Tensor: int64 tensor with the indices of the elements that have been kept + by NMS, sorted in decreasing order of scores + """ + return torch.nms(boxes, scores, iou_threshold) + + +def batched_nms( + boxes: Tensor, scores: Tensor, idxs: Tensor, iou_threshold: float, +) -> Tensor: + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Args: + boxes (Tensor[N, 4]): boxes where NMS will be performed. They + are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and + ``0 <= y1 < y2``. + scores (Tensor[N]): scores for each one of the boxes + idxs (Tensor[N]): indices of the categories for each one of the boxes. + iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold + + Returns: + Tensor: int64 tensor with the indices of the elements that have been kept by NMS, sorted + in decreasing order of scores + """ + # Benchmarks that drove the following thresholds are at + # https://github.com/pytorch/vision/issues/1311#issuecomment-781329339 + if boxes.numel() > (4000 if boxes.device.type == "cpu" else 20000): + return _batched_nms_vanilla(boxes, scores, idxs, iou_threshold) + else: + return _batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold) + + +@torch.jit._script_if_tracing +def _batched_nms_coordinate_trick( + boxes: Tensor, scores: Tensor, idxs: Tensor, iou_threshold: float, +) -> Tensor: + # strategy: in order to perform NMS independently per class, + # we add an offset to all the boxes. The offset is dependent + # only on the class idx, and is large enough so that boxes + # from different classes do not overlap + if boxes.numel() == 0: + return torch.empty((0,), dtype=torch.int64, device=boxes.device) + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) + boxes_for_nms = boxes + offsets[:, None] + keep = nms(boxes_for_nms, scores, iou_threshold) + return keep + + +@torch.jit._script_if_tracing +def _batched_nms_vanilla( + boxes: Tensor, scores: Tensor, idxs: Tensor, iou_threshold: float, +) -> Tensor: + # Based on Detectron2 implementation, just manually call nms() on each class independently + keep_mask = torch.zeros_like(scores, dtype=torch.bool) + for class_id in torch.unique(idxs): + curr_indices = torch.where(idxs == class_id)[0] + curr_keep_indices = nms( + boxes[curr_indices], scores[curr_indices], iou_threshold + ) + keep_mask[curr_indices[curr_keep_indices]] = True + keep_indices = torch.where(keep_mask)[0] + return keep_indices[scores[keep_indices].sort(descending=True)[1]] + + +def remove_small_boxes(boxes: Tensor, min_size: float) -> Tensor: + """ + Remove boxes which contains at least one side smaller than min_size. + + Args: + boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format + with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + min_size (float): minimum size + + Returns: + Tensor[K]: indices of the boxes that have both sides + larger than min_size + """ + ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] + keep = (ws >= min_size) & (hs >= min_size) + keep = torch.where(keep)[0] + return keep + + +def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor: + """ + Clip boxes so that they lie inside an image of size `size`. + + Args: + boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format + with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + size (Tuple[height, width]): size of the image + + Returns: + Tensor[N, 4]: clipped boxes + """ + dim = boxes.dim() + boxes_x = boxes[..., 0::2] + boxes_y = boxes[..., 1::2] + height, width = size + + boxes_x = boxes_x.clamp(min=0, max=width) + boxes_y = boxes_y.clamp(min=0, max=height) + + clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim) + return clipped_boxes.reshape(boxes.shape) + + +def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor: + """ + Converts boxes from given in_fmt to out_fmt. + Supported in_fmt and out_fmt are: + + 'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right. + This is the format that torchvision utilities expect. + + 'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height. + + 'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h + being width and height. + + Args: + boxes (Tensor[N, 4]): boxes which will be converted. + in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']. + out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'] + + Returns: + Tensor[N, 4]: Boxes into converted format. + """ + allowed_fmts = ("xyxy", "xywh", "cxcywh") + if in_fmt not in allowed_fmts or out_fmt not in allowed_fmts: + raise ValueError( + "Unsupported Bounding Box Conversions for given in_fmt and out_fmt" + ) + + if in_fmt == out_fmt: + return boxes.clone() + + if in_fmt != "xyxy" and out_fmt != "xyxy": + # convert to xyxy and change in_fmt xyxy + if in_fmt == "xywh": + boxes = _box_xywh_to_xyxy(boxes) + elif in_fmt == "cxcywh": + boxes = _box_cxcywh_to_xyxy(boxes) + in_fmt = "xyxy" + + if in_fmt == "xyxy": + if out_fmt == "xywh": + boxes = _box_xyxy_to_xywh(boxes) + elif out_fmt == "cxcywh": + boxes = _box_xyxy_to_cxcywh(boxes) + elif out_fmt == "xyxy": + if in_fmt == "xywh": + boxes = _box_xywh_to_xyxy(boxes) + elif in_fmt == "cxcywh": + boxes = _box_cxcywh_to_xyxy(boxes) + return boxes + + +def box_area(boxes: Tensor) -> Tensor: + """ + Computes the area of a set of bounding boxes, which are specified by their + (x1, y1, x2, y2) coordinates. + + Args: + boxes (Tensor[N, 4]): boxes for which the area will be computed. They + are expected to be in (x1, y1, x2, y2) format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Returns: + Tensor[N]: the area for each box + """ + boxes = _upcast(boxes) + return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + +# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py +# with slight modifications +def _box_inter_union(boxes1: Tensor, boxes2: Tensor) -> Tuple[Tensor, Tensor]: + area1 = box_area(boxes1) + area2 = box_area(boxes2) + + lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] + rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] + + wh = _upcast(rb - lt).clamp(min=0) # [N,M,2] + inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] + + union = area1[:, None] + area2 - inter + + return inter, union + + +def box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor: + """ + Return intersection-over-union (Jaccard index) between two sets of boxes. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 + """ + inter, union = _box_inter_union(boxes1, boxes2) + iou = inter / union + return iou + + +# Implementation adapted from https://github.com/facebookresearch/detr/blob/master/util/box_ops.py +def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor: + """ + Return generalized intersection-over-union (Jaccard index) between two sets of boxes. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise generalized IoU values + for every element in boxes1 and boxes2 + """ + + inter, union = _box_inter_union(boxes1, boxes2) + iou = inter / union + + lti = torch.min(boxes1[:, None, :2], boxes2[:, :2]) + rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) + + whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] + areai = whi[:, :, 0] * whi[:, :, 1] + + return iou - (areai - union) / areai + + +def complete_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor: + """ + Return complete intersection-over-union (Jaccard index) between two sets of boxes. + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + eps (float, optional): small number to prevent division by zero. Default: 1e-7 + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise complete IoU values + for every element in boxes1 and boxes2 + """ + + boxes1 = _upcast(boxes1) + boxes2 = _upcast(boxes2) + + diou, iou = _box_diou_iou(boxes1, boxes2, eps) + + w_pred = boxes1[:, None, 2] - boxes1[:, None, 0] + h_pred = boxes1[:, None, 3] - boxes1[:, None, 1] + + w_gt = boxes2[:, 2] - boxes2[:, 0] + h_gt = boxes2[:, 3] - boxes2[:, 1] + + v = (4 / (torch.pi ** 2)) * torch.pow( + torch.atan(w_pred / h_pred) - torch.atan(w_gt / h_gt), 2 + ) + with torch.no_grad(): + alpha = v / (1 - iou + v + eps) + return diou - alpha * v + + +def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor: + """ + Return distance intersection-over-union (Jaccard index) between two sets of boxes. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + eps (float, optional): small number to prevent division by zero. Default: 1e-7 + + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values + for every element in boxes1 and boxes2 + """ + + boxes1 = _upcast(boxes1) + boxes2 = _upcast(boxes2) + diou, _ = _box_diou_iou(boxes1, boxes2, eps=eps) + return diou + + +def _box_diou_iou( + boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7 +) -> Tuple[Tensor, Tensor]: + + iou = box_iou(boxes1, boxes2) + lti = torch.min(boxes1[:, None, :2], boxes2[:, :2]) + rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) + whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] + diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps + # centers of boxes + x_p = (boxes1[:, 0] + boxes1[:, 2]) / 2 + y_p = (boxes1[:, 1] + boxes1[:, 3]) / 2 + x_g = (boxes2[:, 0] + boxes2[:, 2]) / 2 + y_g = (boxes2[:, 1] + boxes2[:, 3]) / 2 + # The distance between boxes' centers squared. + centers_distance_squared = (_upcast((x_p[:, None] - x_g[None, :])) ** 2) + ( + _upcast((y_p[:, None] - y_g[None, :])) ** 2 + ) + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. + return iou - (centers_distance_squared / diagonal_distance_squared), iou + + +def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor: + """ + Compute the bounding boxes around the provided masks. + + Returns a [N, 4] tensor containing bounding boxes. The boxes are in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + masks (Tensor[N, H, W]): masks to transform where N is the number of masks + and (H, W) are the spatial dimensions. + + Returns: + Tensor[N, 4]: bounding boxes + """ + if masks.numel() == 0: + return torch.zeros((0, 4), device=masks.device, dtype=torch.float) + + n = masks.shape[0] + + bounding_boxes = torch.zeros((n, 4), device=masks.device, dtype=torch.float) + + for index, mask in enumerate(masks): + y, x = torch.where(mask != 0) + + bounding_boxes[index, 0] = torch.min(x) + bounding_boxes[index, 1] = torch.min(y) + bounding_boxes[index, 2] = torch.max(x) + bounding_boxes[index, 3] = torch.max(y) + + return bounding_boxes