Skip to content

Commit

Permalink
Merge pull request #40 from bit-bots/fix/rescale_segmentation
Browse files Browse the repository at this point in the history
fix/rescale_segmentations(...) & rescale_boxes(...)
  • Loading branch information
jaagut authored May 18, 2022
2 parents 483a7e8 + c045008 commit 8f35de7
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 80 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "YOEO"
version = "1.1.1"
version = "1.2.0"
description = "A hybrid CNN for object detection and semantic segmentation"
authors = ["Florian Vahl <[email protected]>", "Jan Gutsche <[email protected]>"]

Expand Down
4 changes: 2 additions & 2 deletions yoeo/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from imgaug.augmentables.segmaps import SegmentationMapsOnImage

from yoeo.models import load_model
from yoeo.utils.utils import load_classes, rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentations
from yoeo.utils.utils import load_classes, rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentation
from yoeo.utils.datasets import ImageFolder
from yoeo.utils.transforms import Resize, DEFAULT_TRANSFORMS

Expand Down Expand Up @@ -100,7 +100,7 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5):
detections, segmentations = model(input_img)
detections = non_max_suppression(detections, conf_thres, nms_thres)
detections = rescale_boxes(detections[0], img_size, image.shape[0:2])
segmentations = rescale_segmentations(segmentations, img_size, image.shape[0:2])
segmentations = rescale_segmentation(segmentations, image.shape[0:2])
return detections.numpy(), segmentations.cpu().detach().numpy()


Expand Down
194 changes: 117 additions & 77 deletions yoeo/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,99 +64,138 @@ def weights_init_normal(m):
nn.init.constant_(m.bias.data, 0.0)


def rescale_boxes(boxes, current_dim, original_shape):
def rescale_boxes(boxes, output_img_size, original_img_size):
"""
Rescales bounding boxes to the original shape
Rescale bounding boxes as if they were calculated on the original, non-padded image.
1. bounding boxes are scaled as if they were calculated on the (square) padded original image.
2. padding is subtracted, thereby shifting the boxes as if they were calculated on the original,
non-padded image.
:param boxes: detection output
:type boxes: torch.Tensor with shape(#boxes, 6)
:param output_img_size: size of the image for which the network calculates the bounding boxes (1D)
:type output_img_size: int
:param original_img_size: size of the original image (height, width)
:type original_img_size: Tuple[int, int] (height, width)
:return: rescaled detection output
:rtype: torch.Tensor with shape(#boxes, 6)
"""
orig_h, orig_w = original_shape
pad_y, pad_x = calculate_applied_padding_per_dimension(current_dim, original_shape)

# Image height and width after padding is removed
unpad_h = current_dim - pad_y
unpad_w = current_dim - pad_x

# Rescale bounding boxes to dimension of original image
boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
return boxes

rescaled_boxes = rescale_boxes_to_original_padded_img_size(boxes, output_img_size, max(original_img_size))
rescaled_boxes = unpad_box_coordinates(rescaled_boxes, original_img_size)

return rescaled_boxes


def calculate_applied_padding_per_dimension(current_dim: int, original_shape: Tuple[int, int]) -> Tuple[int, int]:
def rescale_boxes_to_original_padded_img_size(boxes, output_img_size: int, original_max_size: int):
"""
Calculate the total amount of padding that was added to each image dimension, i. e.
current_dim = original_shape[0] + padding_in_1st_dim = original_shape[1] + padding_in_2nd_dim
:param current_dim: segmentation output dimension (1D)
:type current_dim: int
:param orginal_shape: orginal image shape (2D)
:type orgiginal_shape: Tuple[int, int] (height, width)
:return: Tuple containing paddings (height, width)
:rtype: Tuple[int, int]
Rescale bounding boxes as if they were calculated on the (square) padded original image.
:param boxes: detection output
:type boxes: torch.Tensor with shape(#boxes, 6)
:param output_img_size: size of the image for which the network calculates the bounding boxes (1D)
:type output_img_size: int
:param original_max_size: maximum size of the original image (1)
:type original_max_size: int
:return: rescaled detection output
:rtype: torch.Tensor with shape(#boxes, 6)
"""
orig_h, orig_w = original_shape
pad_w = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
pad_h = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
return int(pad_h), int(pad_w)

scale_factor = original_max_size / output_img_size
boxes[:, 0:4] = boxes[:, 0:4] * scale_factor

return boxes


def rescale_segmentations(segmentations, current_dim: int, original_shape: Tuple[int, int]):
def unpad_box_coordinates(boxes, original_img_size: Tuple[int, int]):
"""
Removes padding and interpolates segmentations to orginal image shape.
:param segmentations: YOEO segmentation output
:type segmentations: torch.Tensor with shape (1, current_dim, current_dim)
:param current_dim: segmentation output dimension (1D)
:type current_dim: int
:param orginal_shape: orginal image shape (2D)
:type orgiginal_shape: Tuple[int, int] (height, width)
Subtract padding, thereby shifting the boxes as if they were calculated on the original,
non-padded image.
:param boxes: detection output
:type boxes: torch.Tensor with shape(#boxes, 6)
:param original_img_size: size of the original image (height, width)
:type original_img_size: Tuple[int, int] (height, width)
:return: rescaled detection output
:rtype: torch.Tensor with shape(#boxes, 6)
"""

padding = calculate_applied_padding_per_side(current_dim, original_shape)
unpadded_segmentations = remove_applied_padding(segmentations, current_dim, padding)
return interpolate_to_original_shape(unpadded_segmentations, original_shape)

padding_left = max(original_img_size[0] - original_img_size[1], 0) // 2
padding_top = max(original_img_size[1] - original_img_size[0], 0) // 2

def calculate_applied_padding_per_side(current_dim: int, original_shape: Tuple[int, int]) -> Tuple[int, int]:
"""
Calculate the amount of padding that was added to each side of each image dimension, i. e.
current_dim = padding_in_1st_dim + original_shape[0] + padding_in_1st_dim
current_dim = padding_in_2nd_dim + original_shape[1] + padding_in_2nd_dim
:param current_dim: segmentation output dimension (1D)
:type current_dim: int
:param orginal_shape: orginal image shape (2D)
:type orgiginal_shape: Tuple[int, int] (height, width)
:return: Tuple containing paddings (height, width)
:rtype: Tuple[int, int]
boxes[:, 0] = boxes[:, 0] - padding_left
boxes[:, 1] = boxes[:, 1] - padding_top
boxes[:, 2] = boxes[:, 2] - padding_left
boxes[:, 3] = boxes[:, 3] - padding_top

return boxes


def rescale_segmentation(segmentation, original_img_size: Tuple[int, int]):
"""
pad_h, pad_w = calculate_applied_padding_per_dimension(current_dim, original_shape)
return int(pad_h // 2), int(pad_w // 2)


def remove_applied_padding(segmentations, current_dim: int, padding: Tuple[int, int]):
Interpolate segmentation back to original image size and remove paddings.
1. segmentation is rescaled as if it was calculated on the original, padded image size
2. paddings are removed, thereby restoring the original image size
:param segmentation: segmentation output
:type segmentation: torch.Tensor with shape (1, height, width) and height == width
:param original_img_size: size of the original image (height, width)
:type original_img_size: Tuple[int, int] (height, width)
:return: rescaled segmentation
:rtype: torch.Tensor with shape (1, original_img_size[0], original_img_size[1])
"""
:param segmentations: YOEO segmentation output
:type segmentations: torch.Tensor with shape (1, current_dim, current_dim)
:return: unpadded YOEO segmentation output
:rtype: torch.Tensor

rescaled_seg = rescale_segmentation_to_original_padded_img_size(segmentation, max(original_img_size))
rescaled_seg = unpad_segmentation(rescaled_seg, original_img_size)

return rescaled_seg


def rescale_segmentation_to_original_padded_img_size(segmentation, original_max_size: int):
"""

pad_h, pad_w = padding
return segmentations[..., pad_h:current_dim-pad_h, pad_w:current_dim-pad_w]


def interpolate_to_original_shape(segmentations, original_shape: Tuple[int, int]):
Rescale the segmentation as if it was calculated on the original, padded image size using
"nearest-exact" interpolation.
:param segmentation: segmentation output
:type segmentation: torch.Tensor with shape (1, height, width) and height == width
:param original_max_size: maximum size of the original image (1)
:type original_max_size: int
:return: segmentation output with original, padded image size
:rtype: torch.Tensor with shape (1, original_max_size, original_max_size)
"""
:param segmentations: YOEO segmentation output
:type segmentations: torch.Tensor with shape (1, current_dim, current_dim)
:return: interpolated YOEO yegmentation output with original image shape
:rtype: torch.Tensor with shape (1, *original_shape)

return nn.functional.interpolate(
segmentation.unsqueeze(0),
size=(original_max_size, original_max_size),
mode="nearest-exact"
).squeeze(0)


def unpad_segmentation(segmentation, original_img_size: Tuple[int, int]):
"""
Remove paddings, thereby restoring the original image size
return nn.functional.interpolate(segmentations.unsqueeze(0).type(torch.ByteTensor), size=original_shape, mode="nearest").squeeze(0)

:param segmentation: segmentation output
:type segmentation: torch.Tensor with shape (1, height, width) and height == width
:param original_img_size: original image size (height, width)
:type original_img_size: Tuple[int, int]
:return: unpadded segmentation output
:rtype: torch.Tensor with shape (1, original_img_size[0], original_img_size[1])
"""

current_size = segmentation.size(dim=1)
original_height, original_width = original_img_size

total_vertical_padding = max(0, original_width - original_height)
total_horizontal_padding = max(0, original_height - original_width)

padding_top = total_vertical_padding // 2
padding_bottom = total_vertical_padding - padding_top
padding_left = total_horizontal_padding // 2
padding_right = total_horizontal_padding - padding_left

return segmentation[..., padding_top:current_size - padding_bottom, padding_left:current_size - padding_right]


def xywh2xyxy(x):
y = x.new(x.shape)
Expand Down Expand Up @@ -291,7 +330,8 @@ def get_batch_statistics(outputs, targets, iou_threshold):
continue

# Filter target_boxes by pred_label so that we only match against boxes of our own label
filtered_target_position, filtered_targets = zip(*filter(lambda x: target_labels[x[0]] == pred_label, enumerate(target_boxes)))
filtered_target_position, filtered_targets = zip(
*filter(lambda x: target_labels[x[0]] == pred_label, enumerate(target_boxes)))

# Find the best matching target for our predicted box
iou, box_filtered_index = bbox_iou(pred_box.unsqueeze(0), torch.stack(filtered_targets)).max(0)
Expand Down Expand Up @@ -454,7 +494,7 @@ def seg_iou(pred, target, classes):
pred = pred.view(-1)
target = target.view(-1)

for cls in range(classes):
for cls in range(classes):
pred_inds = pred == cls
target_inds = target == cls
intersection = (pred_inds[target_inds]).long().sum().data.cpu().item() # Cast to long to prevent overflows
Expand Down

0 comments on commit 8f35de7

Please sign in to comment.