From c12a04b9562021b3b4a97a5c73045be62f5bbcf9 Mon Sep 17 00:00:00 2001
From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
Date: Fri, 5 May 2023 14:30:26 +0800
Subject: [PATCH] Align ops boxes (#274)

* add vision.ops.boxes module

* add vision.ops.boxes module

* code format
---
 flowvision/__init__.py         |   1 +
 flowvision/ops/__init__.py     |  13 ++
 flowvision/ops/_box_convert.py |  81 +++++++
 flowvision/ops/_utils.py       | 111 ++++++++++
 flowvision/ops/boxes.py        | 390 +++++++++++++++++++++++++++++++++
 5 files changed, 596 insertions(+)
 create mode 100644 flowvision/ops/__init__.py
 create mode 100644 flowvision/ops/_box_convert.py
 create mode 100644 flowvision/ops/_utils.py
 create mode 100644 flowvision/ops/boxes.py

diff --git a/flowvision/__init__.py b/flowvision/__init__.py
index 40954bc0..691cfa0e 100644
--- a/flowvision/__init__.py
+++ b/flowvision/__init__.py
@@ -5,6 +5,7 @@
 from flowvision import loss
 from flowvision import scheduler
 from flowvision import data
+from flowvision import ops
 
 try:
     from .version import __version__  # noqa: F401
diff --git a/flowvision/ops/__init__.py b/flowvision/ops/__init__.py
new file mode 100644
index 00000000..b931af96
--- /dev/null
+++ b/flowvision/ops/__init__.py
@@ -0,0 +1,13 @@
+from .boxes import (
+    batched_nms,
+    box_area,
+    box_convert,
+    box_iou,
+    clip_boxes_to_image,
+    complete_box_iou,
+    distance_box_iou,
+    generalized_box_iou,
+    masks_to_boxes,
+    nms,
+    remove_small_boxes,
+)
diff --git a/flowvision/ops/_box_convert.py b/flowvision/ops/_box_convert.py
new file mode 100644
index 00000000..e62a2163
--- /dev/null
+++ b/flowvision/ops/_box_convert.py
@@ -0,0 +1,81 @@
+import oneflow as torch
+from oneflow import Tensor
+
+
+def _box_cxcywh_to_xyxy(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (cx, cy, w, h) format to (x1, y1, x2, y2) format.
+    (cx, cy) refers to center of bounding box
+    (w, h) are width and height of bounding box
+    Args:
+        boxes (Tensor[N, 4]): boxes in (cx, cy, w, h) format which will be converted.
+
+    Returns:
+        boxes (Tensor(N, 4)): boxes in (x1, y1, x2, y2) format.
+    """
+    # We need to change all 4 of them so some temporary variable is needed.
+    cx, cy, w, h = boxes.unbind(-1)
+    x1 = cx - 0.5 * w
+    y1 = cy - 0.5 * h
+    x2 = cx + 0.5 * w
+    y2 = cy + 0.5 * h
+
+    boxes = torch.stack((x1, y1, x2, y2), dim=-1)
+
+    return boxes
+
+
+def _box_xyxy_to_cxcywh(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h) format.
+    (x1, y1) refer to top left of bounding box
+    (x2, y2) refer to bottom right of bounding box
+    Args:
+        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format which will be converted.
+
+    Returns:
+        boxes (Tensor(N, 4)): boxes in (cx, cy, w, h) format.
+    """
+    x1, y1, x2, y2 = boxes.unbind(-1)
+    cx = (x1 + x2) / 2
+    cy = (y1 + y2) / 2
+    w = x2 - x1
+    h = y2 - y1
+
+    boxes = torch.stack((cx, cy, w, h), dim=-1)
+
+    return boxes
+
+
+def _box_xywh_to_xyxy(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (x, y, w, h) format to (x1, y1, x2, y2) format.
+    (x, y) refers to top left of bounding box.
+    (w, h) refers to width and height of box.
+    Args:
+        boxes (Tensor[N, 4]): boxes in (x, y, w, h) which will be converted.
+
+    Returns:
+        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format.
+    """
+    x, y, w, h = boxes.unbind(-1)
+    boxes = torch.stack([x, y, x + w, y + h], dim=-1)
+    return boxes
+
+
+def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (x1, y1, x2, y2) format to (x, y, w, h) format.
+    (x1, y1) refer to top left of bounding box
+    (x2, y2) refer to bottom right of bounding box
+    Args:
+        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) which will be converted.
+
+    Returns:
+        boxes (Tensor[N, 4]): boxes in (x, y, w, h) format.
+    """
+    x1, y1, x2, y2 = boxes.unbind(-1)
+    w = x2 - x1  # x2 - x1
+    h = y2 - y1  # y2 - y1
+    boxes = torch.stack((x1, y1, w, h), dim=-1)
+    return boxes
diff --git a/flowvision/ops/_utils.py b/flowvision/ops/_utils.py
new file mode 100644
index 00000000..fad3afa1
--- /dev/null
+++ b/flowvision/ops/_utils.py
@@ -0,0 +1,111 @@
+from typing import List, Optional, Tuple, Union
+
+import oneflow as torch
+from oneflow import nn, Tensor
+
+
+def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor:
+    """
+    Efficient version of torch.cat that avoids a copy if there is only a single element in a list
+    """
+    # TODO add back the assert
+    # assert isinstance(tensors, (list, tuple))
+    if len(tensors) == 1:
+        return tensors[0]
+    return torch.cat(tensors, dim)
+
+
+def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor:
+    concat_boxes = _cat([b for b in boxes], dim=0)
+    temp = []
+    for i, b in enumerate(boxes):
+        temp.append(torch.full_like(b[:, :1], i))
+    ids = _cat(temp, dim=0)
+    rois = torch.cat([ids, concat_boxes], dim=1)
+    return rois
+
+
+def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]):
+    if isinstance(boxes, (list, tuple)):
+        for _tensor in boxes:
+            torch._assert(
+                _tensor.size(1) == 4,
+                "The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]",
+            )
+    elif isinstance(boxes, torch.Tensor):
+        torch._assert(
+            boxes.size(1) == 5, "The boxes tensor shape is not correct as Tensor[K, 5]"
+        )
+    else:
+        torch._assert(
+            False, "boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]"
+        )
+    return
+
+
+def split_normalization_params(
+    model: nn.Module, norm_classes: Optional[List[type]] = None
+) -> Tuple[List[Tensor], List[Tensor]]:
+    # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501
+    if not norm_classes:
+        norm_classes = [
+            nn.modules.batchnorm._BatchNorm,
+            nn.LayerNorm,
+            nn.GroupNorm,
+            nn.modules.instancenorm._InstanceNorm,
+        ]
+
+    for t in norm_classes:
+        if not issubclass(t, nn.Module):
+            raise ValueError(f"Class {t} is not a subclass of nn.Module.")
+
+    classes = tuple(norm_classes)
+
+    norm_params = []
+    other_params = []
+    for module in model.modules():
+        if next(module.children(), None):
+            other_params.extend(
+                p for p in module.parameters(recurse=False) if p.requires_grad
+            )
+        elif isinstance(module, classes):
+            norm_params.extend(p for p in module.parameters() if p.requires_grad)
+        else:
+            other_params.extend(p for p in module.parameters() if p.requires_grad)
+    return norm_params, other_params
+
+
+def _upcast(t: Tensor) -> Tensor:
+    # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
+    if t.is_floating_point():
+        return t if t.dtype in (torch.float32, torch.float64) else t.float()
+    else:
+        return t if t.dtype in (torch.int32, torch.int64) else t.int()
+
+
+def _upcast_non_float(t: Tensor) -> Tensor:
+    # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
+    if t.dtype not in (torch.float32, torch.float64):
+        return t.float()
+    return t
+
+
+def _loss_inter_union(
+    boxes1: torch.Tensor, boxes2: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+
+    x1, y1, x2, y2 = boxes1.unbind(dim=-1)
+    x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)
+
+    # Intersection keypoints
+    xkis1 = torch.max(x1, x1g)
+    ykis1 = torch.max(y1, y1g)
+    xkis2 = torch.min(x2, x2g)
+    ykis2 = torch.min(y2, y2g)
+
+    intsctk = torch.zeros_like(x1)
+    mask = (ykis2 > ykis1) & (xkis2 > xkis1)
+    intsctk[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
+    unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk
+
+    return intsctk, unionk
diff --git a/flowvision/ops/boxes.py b/flowvision/ops/boxes.py
new file mode 100644
index 00000000..9850db22
--- /dev/null
+++ b/flowvision/ops/boxes.py
@@ -0,0 +1,390 @@
+from typing import Tuple
+
+import oneflow as torch
+from oneflow import Tensor
+
+# from ..utils import _log_api_usage_once
+from ._box_convert import (
+    _box_cxcywh_to_xyxy,
+    _box_xywh_to_xyxy,
+    _box_xyxy_to_cxcywh,
+    _box_xyxy_to_xywh,
+)
+from ._utils import _upcast
+
+
+def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
+    """
+    Performs non-maximum suppression (NMS) on the boxes according
+    to their intersection-over-union (IoU).
+
+    NMS iteratively removes lower scoring boxes which have an
+    IoU greater than iou_threshold with another (higher scoring)
+    box.
+
+    If multiple boxes have the exact same score and satisfy the IoU
+    criterion with respect to a reference box, the selected box is
+    not guaranteed to be the same between CPU and GPU. This is similar
+    to the behavior of argsort in PyTorch when repeated values are present.
+
+    Args:
+        boxes (Tensor[N, 4])): boxes to perform NMS on. They
+            are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and
+            ``0 <= y1 < y2``.
+        scores (Tensor[N]): scores for each one of the boxes
+        iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold
+
+    Returns:
+        Tensor: int64 tensor with the indices of the elements that have been kept
+        by NMS, sorted in decreasing order of scores
+    """
+    return torch.nms(boxes, scores, iou_threshold)
+
+
+def batched_nms(
+    boxes: Tensor, scores: Tensor, idxs: Tensor, iou_threshold: float,
+) -> Tensor:
+    """
+    Performs non-maximum suppression in a batched fashion.
+
+    Each index value correspond to a category, and NMS
+    will not be applied between elements of different categories.
+
+    Args:
+        boxes (Tensor[N, 4]): boxes where NMS will be performed. They
+            are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and
+            ``0 <= y1 < y2``.
+        scores (Tensor[N]): scores for each one of the boxes
+        idxs (Tensor[N]): indices of the categories for each one of the boxes.
+        iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold
+
+    Returns:
+        Tensor: int64 tensor with the indices of the elements that have been kept by NMS, sorted
+        in decreasing order of scores
+    """
+    # Benchmarks that drove the following thresholds are at
+    # https://github.com/pytorch/vision/issues/1311#issuecomment-781329339
+    if boxes.numel() > (4000 if boxes.device.type == "cpu" else 20000):
+        return _batched_nms_vanilla(boxes, scores, idxs, iou_threshold)
+    else:
+        return _batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)
+
+
+@torch.jit._script_if_tracing
+def _batched_nms_coordinate_trick(
+    boxes: Tensor, scores: Tensor, idxs: Tensor, iou_threshold: float,
+) -> Tensor:
+    # strategy: in order to perform NMS independently per class,
+    # we add an offset to all the boxes. The offset is dependent
+    # only on the class idx, and is large enough so that boxes
+    # from different classes do not overlap
+    if boxes.numel() == 0:
+        return torch.empty((0,), dtype=torch.int64, device=boxes.device)
+    max_coordinate = boxes.max()
+    offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
+    boxes_for_nms = boxes + offsets[:, None]
+    keep = nms(boxes_for_nms, scores, iou_threshold)
+    return keep
+
+
+@torch.jit._script_if_tracing
+def _batched_nms_vanilla(
+    boxes: Tensor, scores: Tensor, idxs: Tensor, iou_threshold: float,
+) -> Tensor:
+    # Based on Detectron2 implementation, just manually call nms() on each class independently
+    keep_mask = torch.zeros_like(scores, dtype=torch.bool)
+    for class_id in torch.unique(idxs):
+        curr_indices = torch.where(idxs == class_id)[0]
+        curr_keep_indices = nms(
+            boxes[curr_indices], scores[curr_indices], iou_threshold
+        )
+        keep_mask[curr_indices[curr_keep_indices]] = True
+    keep_indices = torch.where(keep_mask)[0]
+    return keep_indices[scores[keep_indices].sort(descending=True)[1]]
+
+
+def remove_small_boxes(boxes: Tensor, min_size: float) -> Tensor:
+    """
+    Remove boxes which contains at least one side smaller than min_size.
+
+    Args:
+        boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format
+            with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+        min_size (float): minimum size
+
+    Returns:
+        Tensor[K]: indices of the boxes that have both sides
+        larger than min_size
+    """
+    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]
+    keep = (ws >= min_size) & (hs >= min_size)
+    keep = torch.where(keep)[0]
+    return keep
+
+
+def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor:
+    """
+    Clip boxes so that they lie inside an image of size `size`.
+
+    Args:
+        boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format
+            with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+        size (Tuple[height, width]): size of the image
+
+    Returns:
+        Tensor[N, 4]: clipped boxes
+    """
+    dim = boxes.dim()
+    boxes_x = boxes[..., 0::2]
+    boxes_y = boxes[..., 1::2]
+    height, width = size
+
+    boxes_x = boxes_x.clamp(min=0, max=width)
+    boxes_y = boxes_y.clamp(min=0, max=height)
+
+    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
+    return clipped_boxes.reshape(boxes.shape)
+
+
+def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor:
+    """
+    Converts boxes from given in_fmt to out_fmt.
+    Supported in_fmt and out_fmt are:
+
+    'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right.
+    This is the format that torchvision utilities expect.
+
+    'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height.
+
+    'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h
+    being width and height.
+
+    Args:
+        boxes (Tensor[N, 4]): boxes which will be converted.
+        in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'].
+        out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']
+
+    Returns:
+        Tensor[N, 4]: Boxes into converted format.
+    """
+    allowed_fmts = ("xyxy", "xywh", "cxcywh")
+    if in_fmt not in allowed_fmts or out_fmt not in allowed_fmts:
+        raise ValueError(
+            "Unsupported Bounding Box Conversions for given in_fmt and out_fmt"
+        )
+
+    if in_fmt == out_fmt:
+        return boxes.clone()
+
+    if in_fmt != "xyxy" and out_fmt != "xyxy":
+        # convert to xyxy and change in_fmt xyxy
+        if in_fmt == "xywh":
+            boxes = _box_xywh_to_xyxy(boxes)
+        elif in_fmt == "cxcywh":
+            boxes = _box_cxcywh_to_xyxy(boxes)
+        in_fmt = "xyxy"
+
+    if in_fmt == "xyxy":
+        if out_fmt == "xywh":
+            boxes = _box_xyxy_to_xywh(boxes)
+        elif out_fmt == "cxcywh":
+            boxes = _box_xyxy_to_cxcywh(boxes)
+    elif out_fmt == "xyxy":
+        if in_fmt == "xywh":
+            boxes = _box_xywh_to_xyxy(boxes)
+        elif in_fmt == "cxcywh":
+            boxes = _box_cxcywh_to_xyxy(boxes)
+    return boxes
+
+
+def box_area(boxes: Tensor) -> Tensor:
+    """
+    Computes the area of a set of bounding boxes, which are specified by their
+    (x1, y1, x2, y2) coordinates.
+
+    Args:
+        boxes (Tensor[N, 4]): boxes for which the area will be computed. They
+            are expected to be in (x1, y1, x2, y2) format with
+            ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+
+    Returns:
+        Tensor[N]: the area for each box
+    """
+    boxes = _upcast(boxes)
+    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+
+
+# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+# with slight modifications
+def _box_inter_union(boxes1: Tensor, boxes2: Tensor) -> Tuple[Tensor, Tensor]:
+    area1 = box_area(boxes1)
+    area2 = box_area(boxes2)
+
+    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
+    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
+
+    wh = _upcast(rb - lt).clamp(min=0)  # [N,M,2]
+    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
+
+    union = area1[:, None] + area2 - inter
+
+    return inter, union
+
+
+def box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
+    """
+    Return intersection-over-union (Jaccard index) between two sets of boxes.
+
+    Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
+    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+
+    Args:
+        boxes1 (Tensor[N, 4]): first set of boxes
+        boxes2 (Tensor[M, 4]): second set of boxes
+
+    Returns:
+        Tensor[N, M]: the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2
+    """
+    inter, union = _box_inter_union(boxes1, boxes2)
+    iou = inter / union
+    return iou
+
+
+# Implementation adapted from https://github.com/facebookresearch/detr/blob/master/util/box_ops.py
+def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
+    """
+    Return generalized intersection-over-union (Jaccard index) between two sets of boxes.
+
+    Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
+    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+
+    Args:
+        boxes1 (Tensor[N, 4]): first set of boxes
+        boxes2 (Tensor[M, 4]): second set of boxes
+
+    Returns:
+        Tensor[N, M]: the NxM matrix containing the pairwise generalized IoU values
+        for every element in boxes1 and boxes2
+    """
+
+    inter, union = _box_inter_union(boxes1, boxes2)
+    iou = inter / union
+
+    lti = torch.min(boxes1[:, None, :2], boxes2[:, :2])
+    rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
+
+    whi = _upcast(rbi - lti).clamp(min=0)  # [N,M,2]
+    areai = whi[:, :, 0] * whi[:, :, 1]
+
+    return iou - (areai - union) / areai
+
+
+def complete_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor:
+    """
+    Return complete intersection-over-union (Jaccard index) between two sets of boxes.
+    Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
+    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+    Args:
+        boxes1 (Tensor[N, 4]): first set of boxes
+        boxes2 (Tensor[M, 4]): second set of boxes
+        eps (float, optional): small number to prevent division by zero. Default: 1e-7
+    Returns:
+        Tensor[N, M]: the NxM matrix containing the pairwise complete IoU values
+        for every element in boxes1 and boxes2
+    """
+
+    boxes1 = _upcast(boxes1)
+    boxes2 = _upcast(boxes2)
+
+    diou, iou = _box_diou_iou(boxes1, boxes2, eps)
+
+    w_pred = boxes1[:, None, 2] - boxes1[:, None, 0]
+    h_pred = boxes1[:, None, 3] - boxes1[:, None, 1]
+
+    w_gt = boxes2[:, 2] - boxes2[:, 0]
+    h_gt = boxes2[:, 3] - boxes2[:, 1]
+
+    v = (4 / (torch.pi ** 2)) * torch.pow(
+        torch.atan(w_pred / h_pred) - torch.atan(w_gt / h_gt), 2
+    )
+    with torch.no_grad():
+        alpha = v / (1 - iou + v + eps)
+    return diou - alpha * v
+
+
+def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor:
+    """
+    Return distance intersection-over-union (Jaccard index) between two sets of boxes.
+
+    Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
+    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+
+    Args:
+        boxes1 (Tensor[N, 4]): first set of boxes
+        boxes2 (Tensor[M, 4]): second set of boxes
+        eps (float, optional): small number to prevent division by zero. Default: 1e-7
+
+    Returns:
+        Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values
+        for every element in boxes1 and boxes2
+    """
+
+    boxes1 = _upcast(boxes1)
+    boxes2 = _upcast(boxes2)
+    diou, _ = _box_diou_iou(boxes1, boxes2, eps=eps)
+    return diou
+
+
+def _box_diou_iou(
+    boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7
+) -> Tuple[Tensor, Tensor]:
+
+    iou = box_iou(boxes1, boxes2)
+    lti = torch.min(boxes1[:, None, :2], boxes2[:, :2])
+    rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
+    whi = _upcast(rbi - lti).clamp(min=0)  # [N,M,2]
+    diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps
+    # centers of boxes
+    x_p = (boxes1[:, 0] + boxes1[:, 2]) / 2
+    y_p = (boxes1[:, 1] + boxes1[:, 3]) / 2
+    x_g = (boxes2[:, 0] + boxes2[:, 2]) / 2
+    y_g = (boxes2[:, 1] + boxes2[:, 3]) / 2
+    # The distance between boxes' centers squared.
+    centers_distance_squared = (_upcast((x_p[:, None] - x_g[None, :])) ** 2) + (
+        _upcast((y_p[:, None] - y_g[None, :])) ** 2
+    )
+    # The distance IoU is the IoU penalized by a normalized
+    # distance between boxes' centers squared.
+    return iou - (centers_distance_squared / diagonal_distance_squared), iou
+
+
+def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
+    """
+    Compute the bounding boxes around the provided masks.
+
+    Returns a [N, 4] tensor containing bounding boxes. The boxes are in ``(x1, y1, x2, y2)`` format with
+    ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
+
+    Args:
+        masks (Tensor[N, H, W]): masks to transform where N is the number of masks
+            and (H, W) are the spatial dimensions.
+
+    Returns:
+        Tensor[N, 4]: bounding boxes
+    """
+    if masks.numel() == 0:
+        return torch.zeros((0, 4), device=masks.device, dtype=torch.float)
+
+    n = masks.shape[0]
+
+    bounding_boxes = torch.zeros((n, 4), device=masks.device, dtype=torch.float)
+
+    for index, mask in enumerate(masks):
+        y, x = torch.where(mask != 0)
+
+        bounding_boxes[index, 0] = torch.min(x)
+        bounding_boxes[index, 1] = torch.min(y)
+        bounding_boxes[index, 2] = torch.max(x)
+        bounding_boxes[index, 3] = torch.max(y)
+
+    return bounding_boxes