From cf00ca4604c77f0d197f10c85593a2dce9ebde51 Mon Sep 17 00:00:00 2001 From: Bryce Date: Sat, 30 Mar 2024 14:00:25 -0700 Subject: [PATCH] feature: densepose controlnet --- imaginairy/cli/imagine.py | 1 + imaginairy/config.py | 22 +- imaginairy/img_processors/control_modes.py | 19 + imaginairy/img_processors/densepose.py | 653 ++++++++++++++++++ imaginairy/utils/downloads.py | 4 +- tests/data/cuda-tests.csv | 1 + ...mages[densepose-create_densepose_map]_.png | Bin 0 -> 5721 bytes tests/test_http_app/test_routes.py | 2 +- 8 files changed, 696 insertions(+), 6 deletions(-) create mode 100644 imaginairy/img_processors/densepose.py create mode 100644 tests/expected_output/test_control_images[densepose-create_densepose_map]_.png diff --git a/imaginairy/cli/imagine.py b/imaginairy/cli/imagine.py index 573006be..6132c570 100644 --- a/imaginairy/cli/imagine.py +++ b/imaginairy/cli/imagine.py @@ -62,6 +62,7 @@ "inpaint", "colorize", "qrcode", + "densepose", ] ), help="how the control image is used as signal", diff --git a/imaginairy/config.py b/imaginairy/config.py index 76bc7ecb..75af3027 100644 --- a/imaginairy/config.py +++ b/imaginairy/config.py @@ -168,10 +168,17 @@ def __post_init__(self): defaults={"negative_prompt": DEFAULT_NEGATIVE_PROMPT}, ), ModelWeightsConfig( - name="Redshift Diffusion", - aliases=["redshift-diffusion", "red", "redshift-diffusion-15", "red15"], + name="Miniaturus Potentia V1.2", + aliases=[ + "miniaturuspotentia", + "potentia", + "miniaturuspotentia12", + "mp12", + "mp", + "potentia12", + ], architecture=MODEL_ARCHITECTURE_LOOKUP["sd15"], - weights_location="https://huggingface.co/nitrosocke/redshift-diffusion/tree/80837fe18df05807861ab91c3bad3693c9342e4c/", + weights_location="https://huggingface.co/dataautogpt3/Miniaturus_PotentiaV1.2/tree/7ef539518ad5ad591c45f0b920050883f7e51e83/", defaults={"negative_prompt": DEFAULT_NEGATIVE_PROMPT}, ), # SDXL Weights @@ -338,6 +345,13 @@ class ControlConfig: weights_location="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster/resolve/4a946e610f670c4cd6cf46b8641fca190e4f56c4/diffusion_pytorch_model.safetensors", aliases=["qrcode"], ), + ControlConfig( + name="DensePose", + control_type="densepose", + config_path="configs/control-net-v15.yaml", + weights_location="https://huggingface.co/zcxu-eric/MagicAnimate/resolve/3d80ae8c50b289e55ee68deecc83afaab9c6a382/densepose_controlnet/diffusion_pytorch_model.safetensors?download=true", + aliases=["densepose"], + ), ] CONTROL_CONFIG_SHORTCUTS: dict[str, ControlConfig] = {} @@ -398,7 +412,7 @@ class SolverConfig: }, } SD21_UNCLIP_WEIGHTS_URL = "https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/resolve/e99f66a92bdcd1b0fb0d4b6a9b81b3b37d8bea44/image_encoder/model.fp16.safetensors" - +DENSEPOSE_REPO_URL = "https://huggingface.co/LayerNorm/DensePose-TorchScript-with-hint-image/resolve/65446422ea6225b9d72f93f3d2e2ad55e78b0b78" SOLVER_TYPE_NAMES = [s.aliases[0] for s in SOLVER_CONFIGS] diff --git a/imaginairy/img_processors/control_modes.py b/imaginairy/img_processors/control_modes.py index 3acfb9f0..699ccbd2 100644 --- a/imaginairy/img_processors/control_modes.py +++ b/imaginairy/img_processors/control_modes.py @@ -139,6 +139,24 @@ def create_pose_map(img_t: "Tensor"): return pose_t +def create_densepose_map(img_t: "Tensor") -> "Tensor": + import torch + + from imaginairy.img_processors.densepose import generate_densepose_image + + img_np = generate_densepose_image(img_t) + + img_t = ( + torch.tensor(img_np, dtype=torch.float) + if not isinstance(img_np, torch.Tensor) + else img_np.float() + ) + img_t /= 255.0 + img_t = img_t.permute(2, 0, 1).unsqueeze(0) + + return img_t + + def make_noise_disk(H: int, W: int, C: int, F: int) -> "np.ndarray": import cv2 import numpy as np @@ -312,4 +330,5 @@ def adaptive_threshold_binarize(img: "Tensor") -> "Tensor": "details": noop, "colorize": to_grayscale, "qrcode": adaptive_threshold_binarize, + "densepose": create_densepose_map, } diff --git a/imaginairy/img_processors/densepose.py b/imaginairy/img_processors/densepose.py new file mode 100644 index 00000000..da0a41b9 --- /dev/null +++ b/imaginairy/img_processors/densepose.py @@ -0,0 +1,653 @@ +# adapted from https://github.com/Mikubill/sd-webui-controlnet/blob/0b90426254debf78bfc09d88c064d2caf0935282/annotator/densepose/densepose.py +import logging +import math +from enum import IntEnum +from functools import lru_cache +from typing import List, Tuple, Union + +import cv2 +import numpy as np +import torch +from torch.nn import functional as F + +from imaginairy import config +from imaginairy.utils.downloads import get_cached_url_path + +logger = logging.getLogger(__name__) + +N_PART_LABELS = 24 + + +_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray] +IntTupleBox = Tuple[int, int, int, int] + + +def safer_memory(x): + # Fix many MAC/AMD problems + return np.ascontiguousarray(x.copy()).copy() + + +def pad64(x): + return int(np.ceil(float(x) / 64.0) * 64 - x) + + +def resize_image_with_pad_torch( + img, resolution, upscale_method="bicubic", mode="constant" +): + B, C, H_raw, W_raw = img.shape + k = float(resolution) / float(min(H_raw, W_raw)) + H_target = int(math.ceil(float(H_raw) * k)) + W_target = int(math.ceil(float(W_raw) * k)) + + if k > 1: + img = F.interpolate( + img, + size=(H_target, W_target), + mode=upscale_method, + align_corners=False, + ) + else: + img = F.interpolate(img, size=(H_target, W_target), mode="area") + + H_pad, W_pad = pad64(H_target), pad64(W_target) + # print(f"image after resize but before padding: {img.shape}") + img_padded = F.pad(img, (0, W_pad, 0, H_pad), mode=mode) + + def remove_pad(x): + # print( + # f"remove_pad: x.shape: {x.shape}. H_target: {H_target}, W_target: {W_target}" + # ) + return safer_memory(x[:H_target, :W_target, ...]) + + return img_padded, remove_pad + + +def HWC3(x: np.ndarray) -> np.ndarray: + assert x.dtype == np.uint8 + if x.ndim == 2: + x = x[:, :, None] + assert x.ndim == 3 + H, W, C = x.shape + assert C == 1 or C == 3 or C == 4 + if C == 3: + return x + if C == 1: + return np.concatenate([x, x, x], axis=2) + if C == 4: + color = x[:, :, 0:3].astype(np.float32) + alpha = x[:, :, 3:4].astype(np.float32) / 255.0 + y = color * alpha + 255.0 * (1.0 - alpha) + y = y.clip(0, 255).astype(np.uint8) + return y + raise RuntimeError("unreachable") + + +@lru_cache(maxsize=1) +def get_densepose_model( + filename="densepose_r101_fpn_dl.torchscript", base_url=config.DENSEPOSE_REPO_URL +): + import torchvision # noqa + + url = f"{base_url}/{filename}" + torchscript_model_path = get_cached_url_path(url) + logger.info(f"Loading densepose model {url} from {torchscript_model_path}") + densepose = torch.jit.load(torchscript_model_path, map_location="cpu") + return densepose + + +@lru_cache(maxsize=1) +def get_segment_result_visualizer(): + return DensePoseMaskedColormapResultsVisualizer( + alpha=1, + data_extractor=_extract_i_from_iuvarr, + segm_extractor=_extract_i_from_iuvarr, + val_scale=255.0 / N_PART_LABELS, + ) + + +def mask_to_bbox(mask_img_t): + m = mask_img_t.nonzero() + if m.numel() == 0: + return None + y0 = torch.min(m[:, 0]) + y1 = torch.max(m[:, 0]) + x0 = torch.min(m[:, 1]) + x1 = torch.max(m[:, 1]) + return x0, y0, x1, y1 + + +def pad_bbox(bbox, max_height, max_width, pad=1): + x0, y0, x1, y1 = bbox + x0 = max(0, x0 - pad) + y0 = max(0, y0 - pad) + x1 = min(max_width, x1 + pad) + y1 = min(max_height, y1 + pad) + return x0, y0, x1, y1 + + +def square_bbox(bbox, max_height, max_width): + """ + Adjusts the bounding box to make it as close to a square as possible while + ensuring it does not exceed the max_size of the image and still includes + the original bounding box contents. + + Args: + - bbox: A tuple of (x0, y0, x1, y1) for the original bounding box. + - max_size: A tuple of (max_width, max_height) representing the image size. + + Returns: + - A tuple of (x0, y0, x1, y1) for the adjusted bounding box. + """ + x0, y0, x1, y1 = bbox + width = x1 - x0 + height = y1 - y0 + + # Determine how much to adjust to make the bounding box square + if width > height: + diff = width - height + half_diff = diff // 2 + y0 = max(0, y0 - half_diff) + y1 = min(max_height, y1 + half_diff + (diff % 2)) # Add 1 if diff is odd + elif height > width: + diff = height - width + half_diff = diff // 2 + x0 = max(0, x0 - half_diff) + x1 = min(max_width, x1 + half_diff + (diff % 2)) # Add 1 if diff is odd + + # Ensure the bounding box is within the image boundaries + x0 = max(0, min(x0, max_width - 1)) + y0 = max(0, min(y0, max_height - 1)) + x1 = max(0, min(x1, max_width)) + y1 = max(0, min(y1, max_height)) + + return x0, y0, x1, y1 + + +def _np_to_t(img_np): + img_t = torch.from_numpy(img_np) / 255.0 + img_t = img_t.permute(2, 0, 1) + img_t = img_t.unsqueeze(0) + return img_t + + +def generate_densepose_image( + img: torch.Tensor, + detect_resolution=512, + upscale_method="bicubic", + cmap="viridis", + double_pass=False, +): + assert_tensor_float_11_bchw(img) + input_h, input_w = img.shape[-2:] + if double_pass: + first_densepose_img_np = _generate_densepose_image( + img, detect_resolution, upscale_method, cmap, adapt_viridis_bg=False + ) + first_densepose_img_t = _np_to_t(first_densepose_img_np) + # convert the densepose image into a mask (every color other than black is part of the mask) + densepose_img_mask = first_densepose_img_t[0].sum(dim=0) > 0 + # print(f"Mask shape: {densepose_img_mask.shape}") + # bbox = masks_to_boxes(densepose_img_mask.unsqueeze(0)).to(torch.uint8) + # crop image by bbox + bbox = mask_to_bbox(densepose_img_mask) + # print(f"bbox: {bbox}") + + if bbox is None: + densepose_np = first_densepose_img_np + else: + bbox = pad_bbox(bbox, max_height=input_h, max_width=input_w, pad=10) + # print(f"padded bbox: {bbox}") + bbox = square_bbox(bbox, max_height=input_h, max_width=input_w) + # print(f"boxed bbox: {bbox}") + x0, y0, x1, y1 = bbox + + cropped_img = img[:, :, y0:y1, x0:x1] + # print(f"cropped_img shape: {cropped_img.shape}") + + densepose_np = _generate_densepose_image( + cropped_img, + detect_resolution, + upscale_method, + cmap, + adapt_viridis_bg=False, + ) + # print(f"cropped densepose_np shape: {densepose_np.shape}") + # print( + # f"pasting into first_densepose_img_np shape: {first_densepose_img_np.shape} at {y0}:{y1}, {x0}:{x1}" + # ) + # paste denspose_np back into first_densepose_img_np using bbox + first_densepose_img_np[y0:y1, x0:x1] = densepose_np + densepose_np = first_densepose_img_np + else: + densepose_np = _generate_densepose_image( + img, detect_resolution, upscale_method, cmap, adapt_viridis_bg=False + ) + + if cmap == "viridis": + densepose_np[:, :, 0][densepose_np[:, :, 0] == 0] = 68 + densepose_np[:, :, 1][densepose_np[:, :, 1] == 0] = 1 + densepose_np[:, :, 2][densepose_np[:, :, 2] == 0] = 84 + + return densepose_np + + +def _generate_densepose_image( + img: torch.Tensor, + detect_resolution=512, + upscale_method="bicubic", + cmap="viridis", + adapt_viridis_bg=True, +) -> np.ndarray: + assert_tensor_float_11_bchw(img) + input_h, input_w = img.shape[-2:] + # print(f"input_h: {input_h}, input_w: {input_w}") + img, remove_pad = resize_image_with_pad_torch( + img, detect_resolution, upscale_method + ) + img = ((img + 1.0) * 127.5).to(torch.uint8) + assert_tensor_uint8_255_bchw(img) + H, W = img.shape[-2:] + # print(f"reduced input img size (with padding): h{H}xw{W}") + hint_image_canvas = np.zeros([H, W], dtype=np.uint8) + hint_image_canvas = np.tile(hint_image_canvas[:, :, np.newaxis], [1, 1, 3]) + densepose_model = get_densepose_model() + pred_boxes, coarse_seg, fine_segm, u, v = densepose_model(img.squeeze(0)) + densepose_results = list( + map( + densepose_chart_predictor_output_to_result, + pred_boxes, + coarse_seg, + fine_segm, + u, + v, + ) + ) + cmaps = { + "viridis": cv2.COLORMAP_VIRIDIS, + "parula": cv2.COLORMAP_PARULA, + "jet": cv2.COLORMAP_JET, + } + cv2_cmap = cmaps.get(cmap, cv2.COLORMAP_PARULA) + result_visualizer = get_segment_result_visualizer() + result_visualizer.mask_visualizer.cmap = cv2_cmap + hint_image = result_visualizer.visualize(hint_image_canvas, densepose_results) + hint_image = cv2.cvtColor(hint_image, cv2.COLOR_BGR2RGB) + + if cv2_cmap == cv2.COLORMAP_VIRIDIS and adapt_viridis_bg: + hint_image[:, :, 0][hint_image[:, :, 0] == 0] = 68 + hint_image[:, :, 1][hint_image[:, :, 1] == 0] = 1 + hint_image[:, :, 2][hint_image[:, :, 2] == 0] = 84 + # print(f"hint_image shape: {hint_image.shape}") + detected_map = remove_pad(HWC3(hint_image)) + # print(f"detected_map shape (padding removed): {detected_map.shape}") + # print(f"Resizing detected_map to original size: {input_w}x{input_h}") + # if map is smaller than input size, scale it up + if detected_map.shape[0] < input_h or detected_map.shape[1] < input_w: + detected_map = cv2.resize( + detected_map, (input_w, input_h), interpolation=cv2.INTER_NEAREST + ) + else: + # scale it down + detected_map = cv2.resize( + detected_map, (input_w, input_h), interpolation=cv2.INTER_AREA + ) + # print(f"detected_map shape (resized to original): {detected_map.shape}") + return detected_map + + +def assert_ndarray_uint8_255_hwc(img): + # assert input_image is ndarray with colors 0-255 + assert img.dtype == np.uint8 + assert img.ndim == 3 + assert img.shape[2] == 3 + assert img.max() <= 255 + assert img.min() >= 0 + + +def assert_tensor_uint8_255_bchw(img): + # assert input_image is a PyTorch tensor with colors 0-255 and dimensions (C, H, W) + assert isinstance(img, torch.Tensor) + assert img.dtype == torch.uint8 + assert img.ndim == 4 + assert img.shape[1] == 3 + assert img.max() <= 255 + assert img.min() >= 0 + + +def assert_tensor_float_11_bchw(img): + # assert input_image is a PyTorch tensor with colors -1 to 1 and dimensions (C, H, W) + if not isinstance(img, torch.Tensor): + msg = f"Input image must be a PyTorch tensor, but got {type(img)}" + raise TypeError(msg) + + if img.dtype not in (torch.float32, torch.float64, torch.float16): + msg = f"Input image must be a float tensor, but got {img.dtype}" + raise ValueError(msg) + + if img.ndim != 4: + msg = f"Input image must be 4D (B, C, H, W), but got {img.ndim}D" + raise ValueError(msg) + + if img.shape[1] != 3: + msg = f"Input image must have 3 channels, but got {img.shape[1]}" + raise ValueError(msg) + if img.max() > 1 or img.min() < -1: + msg = f"Input image must have values in [-1, 1], but got {img.min()} .. {img.max()}" + raise ValueError(msg) + + +class BoxMode(IntEnum): + """ + Enum of different ways to represent a box. + """ + + XYXY_ABS = 0 + """ + (x0, y0, x1, y1) in absolute floating points coordinates. + The coordinates in range [0, width or height]. + """ + XYWH_ABS = 1 + """ + (x0, y0, w, h) in absolute floating points coordinates. + """ + XYXY_REL = 2 + """ + Not yet supported! + (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image. + """ + XYWH_REL = 3 + """ + Not yet supported! + (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image. + """ + XYWHA_ABS = 4 + """ + (xc, yc, w, h, a) in absolute floating points coordinates. + (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw. + """ + + @staticmethod + def convert( + box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode" + ) -> _RawBoxType: + """ + Args: + box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5 + from_mode, to_mode (BoxMode) + + Returns: + The converted box of the same type. + """ + if from_mode == to_mode: + return box + + original_type = type(box) + is_numpy = isinstance(box, np.ndarray) + single_box = isinstance(box, (list, tuple)) + if single_box: + assert len(box) == 4 or len(box) == 5, ( + "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor," + " where k == 4 or 5" + ) + arr = torch.tensor(box)[None, :] + else: + # avoid modifying the input box + arr = torch.from_numpy(np.asarray(box)).clone() if is_numpy else box.clone() # type: ignore + + assert to_mode not in [ + BoxMode.XYXY_REL, + BoxMode.XYWH_REL, + ], "Relative mode not yet supported!" + assert from_mode not in [ + BoxMode.XYXY_REL, + BoxMode.XYWH_REL, + ], "Relative mode not yet supported!" + + if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS: + assert ( + arr.shape[-1] == 5 + ), "The last dimension of input shape must be 5 for XYWHA format" + original_dtype = arr.dtype + arr = arr.double() + + w = arr[:, 2] + h = arr[:, 3] + a = arr[:, 4] + c = torch.abs(torch.cos(a * math.pi / 180.0)) + s = torch.abs(torch.sin(a * math.pi / 180.0)) + # This basically computes the horizontal bounding rectangle of the rotated box + new_w = c * w + s * h + new_h = c * h + s * w + + # convert center to top-left corner + arr[:, 0] -= new_w / 2.0 + arr[:, 1] -= new_h / 2.0 + # bottom-right corner + arr[:, 2] = arr[:, 0] + new_w + arr[:, 3] = arr[:, 1] + new_h + + arr = arr[:, :4].to(dtype=original_dtype) + elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS: + original_dtype = arr.dtype + arr = arr.double() + arr[:, 0] += arr[:, 2] / 2.0 + arr[:, 1] += arr[:, 3] / 2.0 + angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype) + arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype) # type: ignore + else: + if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS: + arr[:, 2] += arr[:, 0] + arr[:, 3] += arr[:, 1] + elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS: + arr[:, 2] -= arr[:, 0] + arr[:, 3] -= arr[:, 1] + else: + msg = f"Conversion from BoxMode {from_mode} to {to_mode} is not supported yet" + raise NotImplementedError(msg) + + if single_box: + return original_type(arr.flatten().tolist()) + if is_numpy: + return arr.numpy() + else: + return arr + + +class MatrixVisualizer: + def __init__( + self, + inplace=True, + cmap=cv2.COLORMAP_PARULA, + val_scale=1.0, + alpha=0.7, + interp_method_matrix=cv2.INTER_LINEAR, + interp_method_mask=cv2.INTER_NEAREST, + ): + self.inplace = inplace + self.cmap = cmap + self.val_scale = val_scale + self.alpha = alpha + self.interp_method_matrix = interp_method_matrix + self.interp_method_mask = interp_method_mask + + def visualize(self, image_bgr: np.ndarray, mask: np.ndarray, matrix, bbox_xywh): + self._check_image(image_bgr) + self._check_mask_matrix(mask, matrix) + image_target_bgr = image_bgr if self.inplace else image_bgr * 0 + + x, y, w, h = (int(v) for v in bbox_xywh) + if w <= 0 or h <= 0: + return image_bgr + mask, matrix = self._resize(mask, matrix, w, h) + mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3]) + matrix_scaled = matrix.astype(np.float32) * self.val_scale + _EPSILON = 1e-6 + if np.any(matrix_scaled > 255 + _EPSILON): + logger = logging.getLogger(__name__) + logger.warning( + f"Matrix has values > {255 + _EPSILON} after " + f"scaling, clipping to [0..255]" + ) + matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8) + matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap) + matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg] + image_target_bgr[y : y + h, x : x + w, :] = ( + image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + + matrix_vis * self.alpha + ) + return image_target_bgr.astype(np.uint8) + + def _resize(self, mask, matrix, w, h): + if (w != mask.shape[1]) or (h != mask.shape[0]): + mask = cv2.resize(mask, (w, h), self.interp_method_mask) + if (w != matrix.shape[1]) or (h != matrix.shape[0]): + matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix) + return mask, matrix + + def _check_image(self, image_rgb): + assert len(image_rgb.shape) == 3 + assert image_rgb.shape[2] == 3 + assert image_rgb.dtype == np.uint8 + + def _check_mask_matrix(self, mask, matrix): + assert len(matrix.shape) == 2 + assert len(mask.shape) == 2 + assert mask.dtype == np.uint8 + + +class DensePoseMaskedColormapResultsVisualizer: + def __init__( + self, + data_extractor, + segm_extractor, + inplace=True, + cmap=cv2.COLORMAP_PARULA, + alpha=0.7, + val_scale=1.0, + ): + self.mask_visualizer = MatrixVisualizer( + inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha + ) + self.data_extractor = data_extractor + self.segm_extractor = segm_extractor + + def visualize( + self, + image_bgr: np.ndarray, + results, + ) -> np.ndarray: + for result in results: + boxes_xywh, labels, uv = result + iuv_array = torch.cat((labels[None].type(torch.float32), uv * 255.0)).type( + torch.uint8 + ) + self.visualize_iuv_arr(image_bgr, iuv_array.cpu().numpy(), boxes_xywh) + return image_bgr + + def visualize_iuv_arr(self, image_bgr, iuv_arr: np.ndarray, bbox_xywh) -> None: + matrix = self.data_extractor(iuv_arr) + segm = self.segm_extractor(iuv_arr) + mask = (segm > 0).astype(np.uint8) + self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh) + + +def _extract_i_from_iuvarr(iuv_arr): + return iuv_arr[0, :, :] + + +def _extract_u_from_iuvarr(iuv_arr): + return iuv_arr[1, :, :] + + +def _extract_v_from_iuvarr(iuv_arr): + return iuv_arr[2, :, :] + + +def make_int_box(box: torch.Tensor) -> IntTupleBox: + int_box = [0, 0, 0, 0] + int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist()) + return int_box[0], int_box[1], int_box[2], int_box[3] + + +def densepose_chart_predictor_output_to_result( + boxes: torch.Tensor, coarse_segm: torch.Tensor, fine_segm, u, v +): + boxes = boxes.unsqueeze(0) + coarse_segm = coarse_segm.unsqueeze(0) + fine_segm = fine_segm.unsqueeze(0) + u = u.unsqueeze(0) + v = v.unsqueeze(0) + boxes_xyxy_abs = boxes.clone() + boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + box_xywh = make_int_box(boxes_xywh_abs[0]) # type: ignore + + labels = resample_fine_and_coarse_segm_tensors_to_bbox( + fine_segm, coarse_segm, box_xywh + ).squeeze(0) + uv = resample_uv_tensors_to_bbox(u, v, labels, box_xywh) + return box_xywh, labels, uv + + +def resample_fine_and_coarse_segm_tensors_to_bbox( + fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox +): + """ + Resample fine and coarse segmentation tensors to the given + bounding box and derive labels for each pixel of the bounding box + + Args: + fine_segm: float tensor of shape [1, C, Hout, Wout] + coarse_segm: float tensor of shape [1, K, Hout, Wout] + box_xywh_abs (tuple of 4 int): bounding box given by its upper-left + corner coordinates, width (W) and height (H) + Return: + Labels for each pixel of the bounding box, a long tensor of size [1, H, W] + """ + x, y, w, h = box_xywh_abs + w = max(int(w), 1) + h = max(int(h), 1) + # coarse segmentation + coarse_segm_bbox = F.interpolate( + coarse_segm, + (h, w), + mode="bilinear", + align_corners=False, + ).argmax(dim=1) + # combined coarse and fine segmentation + labels = ( + F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax( + dim=1 + ) + * (coarse_segm_bbox > 0).long() + ) + return labels + + +def resample_uv_tensors_to_bbox( + u: torch.Tensor, + v: torch.Tensor, + labels: torch.Tensor, + box_xywh_abs: IntTupleBox, +) -> torch.Tensor: + """ + Resamples U and V coordinate estimates for the given bounding box + + Args: + u (tensor [1, C, H, W] of float): U coordinates + v (tensor [1, C, H, W] of float): V coordinates + labels (tensor [H, W] of long): labels obtained by resampling segmentation + outputs for the given bounding box + box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs + Return: + Resampled U and V coordinates - a tensor [2, H, W] of float + """ + x, y, w, h = box_xywh_abs + w = max(int(w), 1) + h = max(int(h), 1) + u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False) + v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False) + uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device) + for part_id in range(1, u_bbox.size(1)): + uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id] + uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id] + return uv diff --git a/imaginairy/utils/downloads.py b/imaginairy/utils/downloads.py index d048139d..927a007a 100644 --- a/imaginairy/utils/downloads.py +++ b/imaginairy/utils/downloads.py @@ -91,7 +91,9 @@ def huggingface_cached_path(url: str) -> str: dest_path = try_to_load_from_cache( repo_id=repo, revision=commit_hash, filename=filepath ) - if not dest_path: + from huggingface_hub.file_download import _CACHED_NO_EXIST + + if not dest_path or dest_path == _CACHED_NO_EXIST: check_huggingface_url_authorized(url) token = HfFolder.get_token() logger.info(f"Downloading {url} from huggingface") diff --git a/tests/data/cuda-tests.csv b/tests/data/cuda-tests.csv index 7a91904d..aa8e0afd 100644 --- a/tests/data/cuda-tests.csv +++ b/tests/data/cuda-tests.csv @@ -1,3 +1,4 @@ +tests/img_processors/test_control_modes.py::test_control_images[densepose-create_densepose_map] tests/img_processors/test_control_modes.py::test_control_images[depth-create_depth_map] tests/img_processors/test_control_modes.py::test_control_images[hed-create_hed_edges] tests/img_processors/test_control_modes.py::test_control_images[normal-create_normal_map] diff --git a/tests/expected_output/test_control_images[densepose-create_densepose_map]_.png b/tests/expected_output/test_control_images[densepose-create_densepose_map]_.png new file mode 100644 index 0000000000000000000000000000000000000000..1cf4c30f3ea3d153b06ec036acfb948f64b115ab GIT binary patch literal 5721 zcmeHLdpJ~G+h21qM$w24W+u&4M5TwKp_3V>kaH!6hfFT5}B9)4A8l{I0 zNU0oRB%~sP5IGc;smXbm3^V)P>HXgCeZTAa4%heh`^UcaUTfX!UTfX!x9+`uzq9tX z<_Zf)3jhELmKLUa06-(3Xn_CuiX@p70I-Z|X==1DJY%@mF44W6RXy&vL$mne=UjXA z!8Fnql|9;tck=CX(Fyh!P2OIu!QH;k`FcE+=C|7MXv&G}p=~_u?b1E_*SY8!doEVL zT}#hduq^*9M&?>j@9wqr5ANp>Bg;!WCMRbj`y)loWkaGFpVL|1J4W5*4EiJAl!+?c zMaz?yfZ7npPFcXv#{#M*3gCAG5UT_r&cBQQ?`U+C8v>4zU7#e=VTt~5pZ;8vY{1i- z={HwxLW8n(vJzqzeWtr1Ol{R#6lC3~D)LrQGmC!!)d? zHQ;!6(PWDi@rNAHpz))S?)0mV;XQP*;=iCi?T@2>R0>Qui~Zaw zz&@|3g2MWCX0&5sAE9ri&NVCEbGV!vo-6nR9MLuSJv%cdJ#;jEyzQ(BJUw3ExF``T z2RsjZmU$h*(MF?7G!rOK=K|OCXGgv&LS0)78q61-n<8CMPOq*;`{Rd90adv`p~;Ud z*nC0O31u{TV)Ker?IULSomg=4-lT32k-x=?!Lza~c%B=grd2Bs#A;nRp~skBn4PNu z5A*W;?4Z_ysXqau^`xGVKsNN>WN{TR5>9R#u`79896f$5^PDJ?FJ`D5R#v%`(#H2# zQdj34nsB4^+Y9{@ENY?>uxL=w9&Yv*QbOC4124to;z8~}TmW3AFcLQeEx9;{{tIFX+Rnj7?O&_37a zcb->#HV0M3x0fmAn;=y$zq0$0DzADRBe@WDi;D%_UXplIJmB4$4~*h~5?~S?X<(6? zpqG(?oTBvtdaeoJkVB!b8S_i1uIWKtkVQcy&T!5aG#Dm+2ayDWJycMAXni5PuMGFr z2J|9v{Bw%Ztp<2B0bR`)Kr*FL4GlE7eP3$w8Vg(pY^WAVfN?k0AB2-j4=B#(*X}k^ z8+c#T5q99shFKFw!es z6_1{;xqWde`)9bFiKm8dXeJy&Apic9Z@lUUQ)hJSX@5%pH?blfLFdNzFP>a_#i&zd z3g|3Pxj##7qd$+F?dOh@wM(MZ24CT@E{kt$s2I^+`BkjKQE%HdSGLXC_=!S(G|Fd+ z2q{3v(@=^vttRk~BKp*FHhu0UciUK*V^&0=)oY_L)s=8dH=EnZw{r59Y< z4CPex!)Q8Tm*H;$ww}mwSvSzZSR|-Zo$J5obKQzjsw8U}8vNNKH8QS9RT_N-x6e~( zmxQ#pj=Az)4+U)GUT3{DvxNl{kDm7j!e+klpnBpJPCj{dje35F2J=h{e49Qq=y<>d z6RZx7z3gfZNM<)l3Vb7L4r9J`VM+zZG=`+l|JqWm3M#z_Ac2UHAU313&0FF0sG;oh zmkCl~a~4?|y>o-HJ3)fkFe3X7SX>~2mZqIeMFvYX<96GhYtrxE?Jd+!YQ$tnMVUmf zZx4+l_UAZHtDdIyNBN!YKVnnaDbunMMJ-9ln_k0a2jCK{&EY*;2|uPgcDA$Y#CF%U zP>VZn+=R!pf0I<`CMHTZ94K!q(Jp{X1SU*JG~*44bbH7!VnMbh2$2Ib=>~f`m&ToK zepvgvz3|Yuw`rXvG?ncs2OQ*gee2GXQ4So-dFD}J&8_}rGIIfwjd z_Qj~rd^nSSZ&InX0=N-3F7wpiHSQa%3d+qm-5F zjAmYNE4?c<6Wr^{Cd}>F(mungg>Y&Id)we8dooz3418)!L>E*JEI%pwn3T>9i~5oG z)5KHW4v`zY$c&6eygEFis&z;qwnP>NtLL$xRBmcS(c z_*VyI1&fcuejH-}3pL*2K{smlFX&M#m8I}7!|;|&GL|<(lzYm{awT??U}mOt;GWAz{SF3lxy(! z+x=g~iiJ9&5SdpI)yUuDmIrbUS##-4y#b*!hH>qS(nIl}SgjpePtgjQ6rC?Jyky&0 zeZTw?6-ZWt(<%Ef2}dTOZn92z4Ji*lpFd-Fug4svG3O$_ydlKd_&p!v)5jexE&xTR zYMk{$d(0h02uVK1R(DW!jq+N>t9C>>YiHk?&f)5aMe#t|!bi#hZqQdS{gQJO(*(b0RU9tta0L~O!9H!K2I+FNHYXe_rhQ1Ak*Rp6Lqtb*oW zp3FMjQ^}8e?-K(fyxObr3?!YoUG>5w6w7NZhq?x(rmyQ5Kx_;?EO4PQ&*rdejnx&Q zDn*b+ZIYKK-DlZ2>|1%+i6V)2jB-%m%b=i_djc3lD8~0xaHR?RiqneuHKb`Zfy>h? zD80m$#Ea=_7PLVY=l3?gW3Es9_`z4Cp5xOpR)H3Q7^dgNo#e@KUR*Z8Bs zq%Uw$0{NzxhGZM2vvutIny>zN%8z*cIM_$oAjk{cPdk-fluIM)5SUxh;I+j@45?aa z&$X?_lq(7r#~kumk+vsTSgkkmVE>hdeVpf()l4DWZ%#OILSD-VaBvsNA9}c4HyP{} zL6lj3)4ABS=cU==kz0N-ZZek*bV0t$(CqVqCQQbxC=)Q&ls7RJD(Hy4tCbY>3h$cb zeCde>(vR!EW`&fh55Z8Z!Hev$vrT*dGr(3+r*;w;mQYCjf?(xJvy#j~6J(Tizol zKTJGuIdJB>Wa9JH_e+m;WN$!00oE(dr5DyH5uFbSxg!)0=O1DGSimsuzP37}|6Sob zFeHt5C<``5i0+1I3q|D0oF}{8uaYI1QqiA8#(|M-JEqLFEBgk5?2~uC=q$f3YamKy zfCpZ@QEqTzfF|bdcX_){Lav7vs_t0l$AQA<&Dl}IGbu$USeix~%cX4_&(V$2P@YG9 z%=k0j@J#Ee4dk)vq_9TT^+!ol6VH&tQobnZ%<=C_56kQ69bNC_L+TahP8$f0Arz%{aEjlXWaLFCr_gdnUi=r^R`xryY3USO{N+ z-OSug;Odt}xzW2a9v~+;TFf5F*)CwoT~Y^K>zA|dXrVGok3F6{BIP86blM#7`>UKp z);an$n~J4+rw3L|jco~un+mO$w00=RC*1hKFCWM(O2^*I{rZTS0wG2vG;_Gsyz|jq zto4>zp$BHQBIM7C6l4Vk_O?wqxb4r9nz!|+Glu=~Df*~}HYT4wF)8a5b;o^HR1jrQ zrwxC2vLFv;{h@SWr^aI99Z4-a7Zp;paDDvj;z+yUC{Ni^O=Q@y#{{}`GPy{)*1^R{&W{mPd+wYrA$~7W<5Jg=m~#eJi1sy_jspC zag#jTjIn9NS(7DBZVSt2{BG{eN2cpiIIMROwKrxC59*c&aKq|tN13%nuV2Kn-oE`F z0zb3!b6K?Bd}uSKjdXvY$z$^cWmE&NZq`;yDALZ@`jEB`JR(StkeF5|mx^CC-*^}^ z-Ui16A3va7SQ_UoXs}HZNv;y#g?$&E(Ldill~K}sm}C8z$LGTQb5>7J>&uWFMh%T- zvI0)+ojTb+b+Esq~2JH&EJ-akV-9PSu-km`YNI6wj^}GjI0xeVWwF!#8~JK zl18i^l9@{O?oSj9BF9puss>Z(u*+egwrcpq8F+ST_*rLz-4>2XM{JD}`0_go zY3PYMJQn^GOR{%R9^H_K+9U@GNC6DrC>ldUk4i;-cyPDbsa(e2Oqaw|;uUn0nG}N{ zG{aR7xUK<19<-<_ z3tPIBs)4Rv+Q(&i@4yV}$~DBXK3tXqW)4dNrM*i7Ke%~uTH4^&F|`k3Cv7+Q+nHyB ze|eP4a1F%o-6=JfzGJbg!13=NzHAUQ50jz}v{NxBSSRIPw?KF-^wMxL^>x&%HM})w zhC`x)Xcv1y3Iwo_!!98>U(NDE-xuY1P6=+8A zW)(@!axC!SAxJ&J!dt5YgrtH`rt^XjXI%$dNn@VM+qWB&fi2vo0eBj!eKKCc=41dS ziIRt(CIKh(%X zMh%Z2n@bm7L{hu0}yC5jv&(FWQ4erUD@Sr$&Kv zWV9l5aK%3;y+!I2h4f2;`04rV1F1>dKRi&)I@msls3Hm2`VH1PSxrB`bBX$XV#*gtf+j|vFWI*Kn$oMNa5S? zi{IY-@J*M+AW{iQzYh!=u2Td)CUuM1!|_ElmYU87J?#HUZsXXVB48lluUY+*p7Kv& z`j0uw1+Ce1^na4>s2c-hI0%RRFK$k_&VIa$Fw1?v!ld@%3a7;6zf%E^d^w