Skip to content

Commit

Permalink
Align AvgPool ceil_mode on last value to torch (#16752)
Browse files Browse the repository at this point in the history
Fix #16203

Previous to this PR, if `ceil_mode` is on, the calculation of a value
would divide the kernel size, even if remaining pixels is less than the
kernel size, which causes the difference in this operator between ORT
and torch.

However, this fix only applies to the change in #15597, which only
supports AvgPool since 19. The older opset version is remain the same,
as it's using mlas files.

Also, the PR fixes the shape mismatch caused by sliding window starting
from padding. More detail: onnx/onnx#6650 (And
this PR is also validated with the tests added in
onnx/onnx#6650)
  • Loading branch information
titaiwangms authored Jan 24, 2025
1 parent 06fc73b commit 8b1d3b3
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 9 deletions.
26 changes: 17 additions & 9 deletions onnxruntime/core/providers/cpu/nn/pool_attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,30 +150,30 @@ struct PoolAttributes {
case AutoPadType::VALID:
*pad_head = 0;
*pad_tail = 0;
*out_size = ComputeOutputSize(in_size, stride, kernel, 0, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, 0, 0, dilation);
break;
case AutoPadType::SAME_LOWER: {
int64_t legacy_target_size = (in_size + stride - 1) / stride;
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
*pad_head = (pad_needed + 1) / 2;
*pad_tail = pad_needed - *pad_head;
*out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
break;
}
case AutoPadType::SAME_UPPER: {
int64_t legacy_target_size = (in_size + stride - 1) / stride;
int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
*pad_head = pad_needed / 2;
*pad_tail = pad_needed - *pad_head;
*out_size = ComputeOutputSize(in_size, stride, kernel, pad_needed, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
break;
}
default: {
ORT_THROW("Unsupported AutoPad Type.");
}
}
} else {
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head + *pad_tail, dilation);
*out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head, *pad_tail, dilation);
}
}
#if defined(_MSC_VER) && !defined(__clang__)
Expand All @@ -184,13 +184,21 @@ struct PoolAttributes {
int64_t ComputeOutputSize(int64_t in_size,
int64_t stride,
int64_t kernel,
int64_t pad_needed,
int64_t pad_head,
int64_t pad_tail,
int64_t dilation) const {
if (ceil_mode == 0) {
return static_cast<int64_t>(static_cast<float>(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1);
int64_t numerator = in_size + pad_head + pad_tail - dilation * (kernel - 1) - 1;
int64_t out_size = numerator / stride + 1;

if (ceil_mode == 1) {
out_size = static_cast<int64_t>(std::ceil(static_cast<float>(numerator) / stride)) + 1;
// Ensure that the last pooling starts inside the image (at least 1 pixel)
// Reference: https://github.com/onnx/onnx/pull/5741
if ((out_size - 1) * stride >= in_size + pad_head) {
--out_size;
}
}
return static_cast<int64_t>(
std::ceil(static_cast<float>(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1));
return out_size;
}
#if defined(_MSC_VER) && !defined(__clang__)
#pragma warning(pop)
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/core/providers/cpu/nn/pool_functors.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ struct AveragePool1DTask final {
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = hstart + kernel_shape[0] * dilation_h;
hend = std::min(hend, height + pads[1]);
y_d[ph] = 0;
int total_elements = 0;
for (int64_t h = hstart; h < hend; h += dilation_h) {
Expand Down Expand Up @@ -461,9 +462,11 @@ struct AveragePool2DTask final {
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = hstart + kernel_shape[0] * dilation_h;
hend = std::min(hend, height + pads[1]);
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w - pads[1];
int64_t wend = wstart + kernel_shape[1] * dilation_w;
wend = std::min(wend, width + pads[3]);
const int64_t pool_index = ph * pooled_width + pw;
y_d[pool_index] = 0;
int total_elements = 0;
Expand Down Expand Up @@ -532,12 +535,15 @@ struct AveragePool3DTask {
for (int64_t ph = 0; ph < pooled_height; ++ph) {
int64_t hstart = ph * stride_h - pads[0];
int64_t hend = hstart + kernel_shape[0] * dilation_h;
hend = std::min(hend, height + pads[1]);
for (int64_t pw = 0; pw < pooled_width; ++pw) {
int64_t wstart = pw * stride_w - pads[1];
int64_t wend = wstart + kernel_shape[1] * dilation_w;
wend = std::min(wend, width + pads[3]);
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
int64_t dstart = pd * stride_d - pads[2];
int64_t dend = dstart + kernel_shape[2] * dilation_d;
dend = std::min(dend, depth + pads[5]);
const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
y_d[pool_index] = 0;
int total_elements = 0;
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/onnx/TestCase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,7 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
{"reduce_prod_empty_set", "unknown version", {}},
{"reduce_sum_empty_set", "unknown version", {}},
{"reduce_sum_square_empty_set_expanded", "unknown version", {}},
{"averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True", "TODO(titaiwang): enable this in the next ONNX release."},
#ifdef ENABLE_TRAINING_CORE
{"adagrad", "not a registered function/op", {}}, // Op not registered.
{"adagrad_multiple", "not a registered function/op", {}}, // Op not registered.
Expand Down
25 changes: 25 additions & 0 deletions onnxruntime/test/providers/cpu/nn/pool_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,31 @@ TEST(PoolTest, AveragePool_19_dilation_2d) {
kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider});
}

TEST(PoolTest, AveragePool_19_ceil_count_include_pad_1d) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect.";
}

OpTester test("AveragePool", 19);

test.AddAttribute("auto_pad", "");
test.AddAttribute("strides", std::vector<int64_t>{3});
test.AddAttribute("pads", vector<int64_t>{3, 3});
test.AddAttribute("kernel_shape", vector<int64_t>{7});
test.AddAttribute("ceil_mode", (int64_t)1);
test.AddAttribute("count_include_pad", (int64_t)1);

std::vector<float> x_vals = {2.0903f, 4.6493f, 1.6320f, -3.2051f, 4.6975f, 4.7296f, 3.3653f, -1.5815f, -2.3832f, 0.9628f, -1.5899f, -2.6820f, 5.7529f, 7.7346f, -0.8910f, -2.0151f, 0.1313f, -0.5374f};
std::vector<int64_t> x_dims = {1, 2, 9};
std::vector<int64_t> expected_dims = {1, 2, 4};
std::vector<float> expected_vals = {0.73807144f, 2.5655572f, 0.8032287f, -0.09990001f, 0.34911433f, 1.0389f, 1.4536142f, -0.40353334f};

test.AddInput<float>("X", x_dims, x_vals);
test.AddOutput<float>("Y", expected_dims, expected_vals);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kAclExecutionProvider, kOpenVINOExecutionProvider});
}

TEST(PoolTest, GlobalAveragePool) {
OpTester test("GlobalAveragePool");

Expand Down

0 comments on commit 8b1d3b3

Please sign in to comment.