From bd65b8926097b9222416e2dc3a8421626419620a Mon Sep 17 00:00:00 2001
From: Philipp Donn <30521025+phinik@users.noreply.github.com>
Date: Fri, 29 Apr 2022 20:10:35 +0200
Subject: [PATCH 1/4] utils.py: 	- fixed rescale_segmentations(..) -->
 segmentations are now upscaled before padding is removed instead of the other
 way around

---
 yoeo/detect.py      |  2 +-
 yoeo/utils/utils.py | 70 +++++++++++++++++++--------------------------
 2 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/yoeo/detect.py b/yoeo/detect.py
index ec777c4..0b58732 100755
--- a/yoeo/detect.py
+++ b/yoeo/detect.py
@@ -100,7 +100,7 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5):
         detections, segmentations = model(input_img)
         detections = non_max_suppression(detections, conf_thres, nms_thres)
         detections = rescale_boxes(detections[0], img_size, image.shape[0:2])
-        segmentations = rescale_segmentations(segmentations, img_size, image.shape[0:2])
+        segmentations = rescale_segmentations(segmentations, image.shape[0:2])
     return detections.numpy(), segmentations.cpu().detach().numpy()
 
 
diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py
index e1dbc13..4c05ba0 100644
--- a/yoeo/utils/utils.py
+++ b/yoeo/utils/utils.py
@@ -101,61 +101,51 @@ def calculate_applied_padding_per_dimension(current_dim: int, original_shape: Tu
     return int(pad_h), int(pad_w)
 
 
-def rescale_segmentations(segmentations, current_dim: int, original_shape: Tuple[int, int]):
+def rescale_segmentations(segmentation, original_shape: Tuple[int, int]):
     """
-    Removes padding and interpolates segmentations to orginal image shape.
+    Interpolate segmentation back to orginal image size and remove paddings.
     
-    :param segmentations: YOEO segmentation output
-    :type segmentations: torch.Tensor with shape (1, current_dim, current_dim)
-    :param current_dim: segmentation output dimension (1D)
-    :type current_dim: int
+    :param segmentation: YOEO segmentation output
+    :type segmentation: torch.Tensor with shape (1, height, width) and height == width
     :param orginal_shape: orginal image shape (2D)
     :type orgiginal_shape: Tuple[int, int] (height, width)
     """
-    
-    padding = calculate_applied_padding_per_side(current_dim, original_shape)
-    unpadded_segmentations = remove_applied_padding(segmentations, current_dim, padding)
-    return interpolate_to_original_shape(unpadded_segmentations, original_shape)
+    rescaled_img = rescale_to_original_size(segmentation, max(original_shape))
+    return remove_applied_padding(rescaled_img, original_shape)
 
 
-def calculate_applied_padding_per_side(current_dim: int, original_shape: Tuple[int, int]) -> Tuple[int, int]:
+def rescale_to_original_size(segmentation, original_max_dim: int):
     """
-    Calculate the amount of padding that was added to each side of each image dimension, i. e. 
-    current_dim = padding_in_1st_dim + original_shape[0] + padding_in_1st_dim
-    current_dim = padding_in_2nd_dim + original_shape[1] + padding_in_2nd_dim
-    
-    :param current_dim: segmentation output dimension (1D)
-    :type current_dim: int
-    :param orginal_shape: orginal image shape (2D)
-    :type orgiginal_shape: Tuple[int, int] (height, width)
-    :return: Tuple containing paddings (height, width)
-    :rtype: Tuple[int, int]
+    :param segmentation: YOEO segmentation output
+    :type segmentation: torch.Tensor with shape (1, height, width) and height == width
+    :return: YOEO segmentation output with original image size
+    :rtype: torch.Tensor with shape (1, original_max_dim, original_max_dim)
     """
-    pad_h, pad_w = calculate_applied_padding_per_dimension(current_dim, original_shape)
-    return int(pad_h // 2), int(pad_w // 2)
-    
         
-def remove_applied_padding(segmentations, current_dim: int, padding: Tuple[int, int]):
+    return nn.functional.interpolate(
+        segmentation.unsqueeze(0).to(torch.uint8),  # to(torch.uint8) will be unneccessary as soon as segmentations are output as uint8
+        size=(original_max_dim, original_max_dim), 
+        mode="nearest-exact"
+    ).squeeze(0)
+       
+        
+def remove_applied_padding(segmentation, original_shape: Tuple[int, int]):
     """
-    :param segmentations: YOEO segmentation output
-    :type segmentations: torch.Tensor with shape (1, current_dim, current_dim)
+    Remove any applied padding.
+    
+    :param segmentation: YOEO segmentation output
+    :type segmentation: torch.Tensor with shape (1, height, width) and height == width
     :return: unpadded YOEO segmentation output
-    :rtype: torch.Tensor
+    :rtype: torch.Tensor (1, *original_shape)
     """
     
-    pad_h, pad_w = padding
-    return segmentations[..., pad_h:current_dim-pad_h, pad_w:current_dim-pad_w]
-    
+    current_shape = segmentation.size(dim=1)
+    original_height, original_width = original_shape
     
-def interpolate_to_original_shape(segmentations, original_shape: Tuple[int, int]):
-    """
-    :param segmentations: YOEO segmentation output
-    :type segmentations: torch.Tensor with shape (1, current_dim, current_dim)
-    :return: interpolated YOEO yegmentation output with original image shape
-    :rtype: torch.Tensor with shape (1, *original_shape)
-    """
-    
-    return nn.functional.interpolate(segmentations.unsqueeze(0).type(torch.ByteTensor), size=original_shape, mode="nearest").squeeze(0)
+    padding_h = int(max(0, original_width - original_height) // 2)
+    padding_w = int(max(0, original_height - original_width) // 2)
+
+    return segmentation[..., padding_h:current_shape-padding_h, padding_w:current_shape-padding_w]
     
 
 def xywh2xyxy(x):

From eac522d5de4aab08f8915de6823a65cc65ef0687 Mon Sep 17 00:00:00 2001
From: Philipp Donn <30521025+phinik@users.noreply.github.com>
Date: Fri, 13 May 2022 12:26:57 +0200
Subject: [PATCH 2/4] detect.py: refactoring utils.py: fix rescale_boxes(...)

---
 yoeo/detect.py      |   4 +-
 yoeo/utils/utils.py | 170 ++++++++++++++++++++++++++++----------------
 2 files changed, 112 insertions(+), 62 deletions(-)

diff --git a/yoeo/detect.py b/yoeo/detect.py
index 0b58732..dbb3418 100755
--- a/yoeo/detect.py
+++ b/yoeo/detect.py
@@ -17,7 +17,7 @@
 from imgaug.augmentables.segmaps import SegmentationMapsOnImage
 
 from yoeo.models import load_model
-from yoeo.utils.utils import load_classes, rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentations
+from yoeo.utils.utils import load_classes, rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentation
 from yoeo.utils.datasets import ImageFolder
 from yoeo.utils.transforms import Resize, DEFAULT_TRANSFORMS
 
@@ -100,7 +100,7 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5):
         detections, segmentations = model(input_img)
         detections = non_max_suppression(detections, conf_thres, nms_thres)
         detections = rescale_boxes(detections[0], img_size, image.shape[0:2])
-        segmentations = rescale_segmentations(segmentations, image.shape[0:2])
+        segmentations = rescale_segmentation(segmentations, image.shape[0:2])
     return detections.numpy(), segmentations.cpu().detach().numpy()
 
 
diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py
index 4c05ba0..807ad52 100644
--- a/yoeo/utils/utils.py
+++ b/yoeo/utils/utils.py
@@ -64,89 +64,138 @@ def weights_init_normal(m):
         nn.init.constant_(m.bias.data, 0.0)
 
 
-def rescale_boxes(boxes, current_dim, original_shape):
+def rescale_boxes(boxes, output_img_size, original_img_size):
     """
-    Rescales bounding boxes to the original shape
+    Rescale bounding boxes as if they were calculated on the original, non-padded image.
+    1. bounding boxes are scaled as if they were calculated on the (square) padded original image.
+    2. padding is subtracted, thereby shifting the boxes as if they were calculated on the original,
+       non-padded image.
+
+    :param boxes: detection output
+    :type boxes: torch.Tensor with shape(#boxes, 6)
+    :param output_img_size: size of the image for which the network calculates the bounding boxes (1D)
+    :type output_img_size: int
+    :param original_img_size: size of the original image (height, width)
+    :type original_img_size: Tuple[int, int] (height, width)
+    :return: rescaled detection output
+    :rtype: torch.Tensor with shape(#boxes, 6)
     """
-    orig_h, orig_w = original_shape
-    pad_y, pad_x = calculate_applied_padding_per_dimension(current_dim, original_shape)
-    
-    # Image height and width after padding is removed
-    unpad_h = current_dim - pad_y
-    unpad_w = current_dim - pad_x
-    
-    # Rescale bounding boxes to dimension of original image
-    boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
-    boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
-    boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
-    boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
+
+    rescaled_boxes = rescale_boxes_to_original_padded_img_size(boxes, output_img_size, max(original_img_size))
+    rescaled_boxes = unpad_box_coordinates(rescaled_boxes, original_img_size)
+
+    return rescaled_boxes
+
+
+def rescale_boxes_to_original_padded_img_size(boxes, output_img_size: int, original_max_size: int):
+    """
+    Rescale bounding boxes as if they were calculated on the (square) padded original image.
+
+    :param boxes: detection output
+    :type boxes: torch.Tensor with shape(#boxes, 6)
+    :param output_img_size: size of the image for which the network calculates the bounding boxes (1D)
+    :type output_img_size: int
+    :param original_max_size: maximum size of the original image (1)
+    :type original_max_size: int
+    :return: rescaled detection output
+    :rtype: torch.Tensor with shape(#boxes, 6)
+    """
+
+    scale_factor = original_max_size / output_img_size
+    boxes[:, 0:4] = boxes[:, 0:4] * scale_factor
+
     return boxes
 
 
-def calculate_applied_padding_per_dimension(current_dim: int, original_shape: Tuple[int, int]) -> Tuple[int, int]:
+def unpad_box_coordinates(boxes, original_img_size: Tuple[int, int]):
     """
-    Calculate the total amount of padding that was added to each image dimension, i. e. 
-    current_dim = original_shape[0] + padding_in_1st_dim = original_shape[1] + padding_in_2nd_dim
-    
-    :param current_dim: segmentation output dimension (1D)
-    :type current_dim: int
-    :param orginal_shape: orginal image shape (2D)
-    :type orgiginal_shape: Tuple[int, int] (height, width)
-    :return: Tuple containing paddings (height, width)
-    :rtype: Tuple[int, int]
+    Subtract padding, thereby shifting the boxes as if they were calculated on the original,
+    non-padded image.
+
+    :param boxes: detection output
+    :type boxes: torch.Tensor with shape(#boxes, 6)
+    :param original_img_size: size of the original image (height, width)
+    :type original_img_size: Tuple[int, int] (height, width)
+    :return: rescaled detection output
+    :rtype: torch.Tensor with shape(#boxes, 6)
     """
-    orig_h, orig_w = original_shape
-    pad_w = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
-    pad_h = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
-    return int(pad_h), int(pad_w)
+
+    padding_left = max(original_img_size[0] - original_img_size[1], 0) // 2
+    padding_top = max(original_img_size[1] - original_img_size[0], 0) // 2
+
+    boxes[:, 0] = boxes[:, 0] - padding_left
+    boxes[:, 1] = boxes[:, 1] - padding_top
+    boxes[:, 2] = boxes[:, 2] - padding_left
+    boxes[:, 3] = boxes[:, 3] - padding_top
+
+    return boxes
 
 
-def rescale_segmentations(segmentation, original_shape: Tuple[int, int]):
+def rescale_segmentation(segmentation, original_img_size: Tuple[int, int]):
     """
-    Interpolate segmentation back to orginal image size and remove paddings.
-    
-    :param segmentation: YOEO segmentation output
+    Interpolate segmentation back to original image size and remove paddings.
+    1. segmentation is rescaled as if it was calculated on the original, padded image size
+    2. paddings are removed, thereby restoring the original image size
+
+    :param segmentation: segmentation output
     :type segmentation: torch.Tensor with shape (1, height, width) and height == width
-    :param orginal_shape: orginal image shape (2D)
-    :type orgiginal_shape: Tuple[int, int] (height, width)
+    :param original_img_size: size of the original image (height, width)
+    :type original_img_size: Tuple[int, int] (height, width)
+    :return: rescaled segmentation
+    :rtype: torch.Tensor with shape (1, original_img_size[0], original_img_size[1])
     """
-    rescaled_img = rescale_to_original_size(segmentation, max(original_shape))
-    return remove_applied_padding(rescaled_img, original_shape)
+
+    rescaled_seg = rescale_segmentation_to_original_padded_img_size(segmentation, max(original_img_size))
+    rescaled_seg = remove_applied_padding(rescaled_seg, original_img_size)
+
+    return rescaled_seg
 
 
-def rescale_to_original_size(segmentation, original_max_dim: int):
+def rescale_segmentation_to_original_padded_img_size(segmentation, original_max_size: int):
     """
-    :param segmentation: YOEO segmentation output
+    Rescale the segmentation as if it was calculated on the original, padded image size using
+    "nearest-exact" interpolation.
+
+    :param segmentation: segmentation output
     :type segmentation: torch.Tensor with shape (1, height, width) and height == width
-    :return: YOEO segmentation output with original image size
-    :rtype: torch.Tensor with shape (1, original_max_dim, original_max_dim)
+    :param original_max_size: maximum size of the original image (1)
+    :type original_max_size: int
+    :return: segmentation output with original, padded image size
+    :rtype: torch.Tensor with shape (1, original_max_size, original_max_size)
     """
-        
+
     return nn.functional.interpolate(
-        segmentation.unsqueeze(0).to(torch.uint8),  # to(torch.uint8) will be unneccessary as soon as segmentations are output as uint8
-        size=(original_max_dim, original_max_dim), 
+        segmentation.unsqueeze(0),
+        size=(original_max_size, original_max_size),
         mode="nearest-exact"
     ).squeeze(0)
-       
-        
-def remove_applied_padding(segmentation, original_shape: Tuple[int, int]):
+
+
+def remove_applied_padding(segmentation, original_img_size: Tuple[int, int]):
     """
-    Remove any applied padding.
+    Remove paddings, thereby restoring the original image size
     
-    :param segmentation: YOEO segmentation output
+    :param segmentation: segmentation output
     :type segmentation: torch.Tensor with shape (1, height, width) and height == width
-    :return: unpadded YOEO segmentation output
-    :rtype: torch.Tensor (1, *original_shape)
+    :param original_img_size: original image size (height, width)
+    :type original_img_size: Tuple[int, int]
+    :return: unpadded segmentation output
+    :rtype: torch.Tensor with shape (1, original_img_size[0], original_img_size[1])
     """
-    
-    current_shape = segmentation.size(dim=1)
-    original_height, original_width = original_shape
-    
-    padding_h = int(max(0, original_width - original_height) // 2)
-    padding_w = int(max(0, original_height - original_width) // 2)
 
-    return segmentation[..., padding_h:current_shape-padding_h, padding_w:current_shape-padding_w]
-    
+    current_size = segmentation.size(dim=1)
+    original_height, original_width = original_img_size
+
+    total_vertical_padding = max(0, original_width - original_height)
+    total_horizontal_padding = max(0, original_height - original_width)
+
+    padding_top = total_vertical_padding // 2
+    padding_bottom = total_vertical_padding - padding_top
+    padding_left = total_horizontal_padding // 2
+    padding_right = total_horizontal_padding - padding_left
+
+    return segmentation[..., padding_top:current_size - padding_bottom, padding_left:current_size - padding_right]
+
 
 def xywh2xyxy(x):
     y = x.new(x.shape)
@@ -281,7 +330,8 @@ def get_batch_statistics(outputs, targets, iou_threshold):
                     continue
 
                 # Filter target_boxes by pred_label so that we only match against boxes of our own label
-                filtered_target_position, filtered_targets = zip(*filter(lambda x: target_labels[x[0]] == pred_label, enumerate(target_boxes)))
+                filtered_target_position, filtered_targets = zip(
+                    *filter(lambda x: target_labels[x[0]] == pred_label, enumerate(target_boxes)))
 
                 # Find the best matching target for our predicted box
                 iou, box_filtered_index = bbox_iou(pred_box.unsqueeze(0), torch.stack(filtered_targets)).max(0)
@@ -444,7 +494,7 @@ def seg_iou(pred, target, classes):
     pred = pred.view(-1)
     target = target.view(-1)
 
-    for cls in range(classes): 
+    for cls in range(classes):
         pred_inds = pred == cls
         target_inds = target == cls
         intersection = (pred_inds[target_inds]).long().sum().data.cpu().item()  # Cast to long to prevent overflows

From f56297121e7cdb7d7a2cfef6398ac2a2182b9c16 Mon Sep 17 00:00:00 2001
From: Philipp Donn <30521025+phinik@users.noreply.github.com>
Date: Fri, 13 May 2022 12:34:54 +0200
Subject: [PATCH 3/4] utils.py: renamed removed_applied_padding(..) into
 unpad_segmentation(..)

---
 yoeo/utils/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py
index 807ad52..c384d6b 100644
--- a/yoeo/utils/utils.py
+++ b/yoeo/utils/utils.py
@@ -146,7 +146,7 @@ def rescale_segmentation(segmentation, original_img_size: Tuple[int, int]):
     """
 
     rescaled_seg = rescale_segmentation_to_original_padded_img_size(segmentation, max(original_img_size))
-    rescaled_seg = remove_applied_padding(rescaled_seg, original_img_size)
+    rescaled_seg = unpad_segmentation(rescaled_seg, original_img_size)
 
     return rescaled_seg
 
@@ -171,7 +171,7 @@ def rescale_segmentation_to_original_padded_img_size(segmentation, original_max_
     ).squeeze(0)
 
 
-def remove_applied_padding(segmentation, original_img_size: Tuple[int, int]):
+def unpad_segmentation(segmentation, original_img_size: Tuple[int, int]):
     """
     Remove paddings, thereby restoring the original image size
     

From c04500879d18d28a0a8ed4f6c490e756baef8e7b Mon Sep 17 00:00:00 2001
From: Jan Gutsche <34797331+jaagut@users.noreply.github.com>
Date: Wed, 18 May 2022 12:51:18 +0200
Subject: [PATCH 4/4] Bump YOEO version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5bba304..7af144e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "YOEO"
-version = "1.1.1"
+version = "1.2.0"
 description = "A hybrid CNN for object detection and semantic segmentation"
 authors = ["Florian Vahl <git@flova.de>", "Jan Gutsche <git@jagut.de>"]