From 9e8efe027795f09bec24e3c06cd807e61b4b838d Mon Sep 17 00:00:00 2001 From: Florian Bruggisser Date: Wed, 5 Jun 2024 10:26:57 +0200 Subject: [PATCH] Fix DetectionRandomAffine target-size to be in row-column format (#2012) * Fix the target size to be in row-column format and work with all aspect ratios Fix the target size to be in row-column format and work with all aspect ratios signed * added more precise comment and unpacked values for more clarity * clarified docstring in random_affine as well --- src/super_gradients/training/transforms/transforms.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/super_gradients/training/transforms/transforms.py b/src/super_gradients/training/transforms/transforms.py index f18b2b9028..18bf9a437d 100644 --- a/src/super_gradients/training/transforms/transforms.py +++ b/src/super_gradients/training/transforms/transforms.py @@ -609,7 +609,7 @@ class DetectionRandomAffine(AbstractDetectionTransform, LegacyDetectionTransform (center-translate, center+translate) :param scales: Values for random rescale, when float the random values are drawn uniformly from (1-scales, 1+scales) :param shear: Degrees for random shear, when float the random values are drawn uniformly from (-shear, shear) - :param target_size: Desired output shape. + :param target_size: Desired output shape tuple formatted as (rows, cols). :param filter_box_candidates: Whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio (default=False). :param wh_thr: Edge size threshold when filter_box_candidates = True. Bounding oxes with edges smaller than this values will be filtered out. @@ -660,7 +660,7 @@ def apply_to_sample(self, sample: DetectionSample) -> DetectionSample: targets=targets, targets_seg=None, crowd_targets=crowd_targets, - target_size=self.target_size or tuple(reversed(sample.image.shape[:2])), + target_size=self.target_size or tuple(sample.image.shape[:2]), degrees=self.degrees, translate=self.translate, scales=self.scale, @@ -1483,7 +1483,7 @@ def random_affine( :param img: Input image of shape [h, w, c] :param targets: Input target :param targets_seg: Targets derived from segmentation masks - :param target_size: Desired output shape + :param target_size: Desired output shape tuple formatted as (rows, cols). :param degrees: Degrees for random rotation, when float the random values are drawn uniformly from (-degrees, degrees). :param translate: Translate size (in pixels) for random translation, when float the random values @@ -1510,7 +1510,8 @@ def random_affine( M = get_affine_matrix(img.shape[:2], target_size, degrees, translate, scales, shear) - img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(border_value, border_value, border_value)) + rows, cols = target_size[:2] + img = cv2.warpAffine(img, M, dsize=(cols, rows), borderValue=(border_value, border_value, border_value)) # Transform label coordinates if len(targets) > 0: