Skip to content

Commit

Permalink
Func correct implementation of pooling layers
Browse files Browse the repository at this point in the history
To avoid overflow on accumulators use output types instead of inputs
types.
  • Loading branch information
GiuseppeDiGuglielmo committed Dec 7, 2024
1 parent 1885710 commit c7ac85b
Showing 1 changed file with 64 additions and 103 deletions.
167 changes: 64 additions & 103 deletions hls4ml/templates/catapult/nnet_utils/nnet_pooling.h
Original file line number Diff line number Diff line change
@@ -1,66 +1,47 @@
#ifndef NNET_POOLING_H_
#define NNET_POOLING_H_

#include "nnet_common.h"
#include "nnet_helpers.h"
#include <iostream>

namespace nnet {

// Return the maximum value from an array
template <typename T, int N> T max(T x[N]) {
template <typename T, int N, typename accum_t> accum_t max(T x[N]) {
T y = x[0];
for (int i = 1; i < N; i++) {
y = x[i] > y ? x[i] : y;
}
return y;
}

template <int W, int N> ac_int<W, true> avg(ac_int<W, true> (&x)[N]) {
// Use a wider accumulator than the input to avoid overflow
ac_int<W + ceillog2(N), true> tmp = 0;
for (int i = 0; i < N; i++) {
tmp += x[i];
}
tmp /= N;
// Now cast back to original type
ac_int<W, true> y = tmp;
return tmp;
}

template <int W, int I, int N> ac_fixed<W, I, true> avg(ac_fixed<W, I, true> (&x)[N]) {
// Use a wider accumulator than the input to avoid overflow
ac_fixed<W + ceillog2(N), I + ceillog2(N), true> tmp = 0;
for (int i = 0; i < N; i++) {
tmp += x[i];
}
tmp /= N;
// Now cast back to original type
ac_fixed<W, I, true> y = tmp;
return y;
}

// Return the mean value of an array
template <typename T, int N> T avg(T (&x)[N]) {
T y = 0;
template <typename T, int N, typename accum_t> accum_t avg(T (&x)[N], unsigned length) {
accum_t y = 0;
for (int i = 0; i < N; i++) {
y += x[i];
}
y /= N;
y /= length;
return y;
}

// Enumeration for pooling operation (max, avg, l2norm pooling)
enum Pool_Op { Max, Average }; // L2Norm };
template <typename T, int N, Pool_Op op> T pool_op(T (&x)[N]) {
template <typename T, int N, Pool_Op op, typename accum_t> accum_t pool_op(T (&x)[N], unsigned length) {
switch (op) {
case Max:
return max<T, N>(x);
return max<T, N, accum_t>(x);
case Average:
return avg(x);
return avg<T, N, accum_t>(x, length);
// case L2Norm: return l2norm<T, N>(x);
}
}

template <typename T, int N, Pool_Op op, typename accum_t> accum_t pool_op(T (&x)[N]) {
return pool_op<T, N, op, accum_t>(x, N);
}

template <typename T, Pool_Op op> T pad_val() {
/*---
*- In Tensorflow, pooling ignores the value in the padded cells
Expand Down Expand Up @@ -100,72 +81,61 @@ template <typename CONFIG_T> constexpr int pool_op_limit_1d() {

template <class data_T, class res_T, typename CONFIG_T>
void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONFIG_T::n_out * CONFIG_T::n_filt]) {
constexpr int ce_reuse_factor = CONFIG_T::reuse_factor;
(void)ce_reuse_factor;
//#pragma HLS PIPELINE II=CONFIG_T::reuse_factor

// TODO partition the arrays according to the reuse factor
const int limit = pool_op_limit_1d<CONFIG_T>();
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
//#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_width, \
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
// Add any necessary padding

// Add padding and reduce input width to area covered by pooling function
static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;

for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
// Loop over input image x in steps of stride
for (int ii = 0; ii < restricted_padded_width; ii += CONFIG_T::stride_width) {
unsigned overlap_pixel = 0;
data_T pool[CONFIG_T::pool_width];
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
// Keep track of number of pixels in image vs padding region
unsigned img_overlap = 0;
// Loop over pool window x
for (int jj = 0; jj < CONFIG_T::stride_width; jj++) {
if (ii + jj < CONFIG_T::pad_left || ii + jj >= (full_padded_width - CONFIG_T::pad_right)) {
// Add padding
pool[jj] = pad_val<data_T, CONFIG_T::pool_op>();
if (CONFIG_T::count_pad) {
img_overlap++;
}
} else {
//#pragma HLS ARRAY_PARTITION variable=pool complete dim=0

for (int jj = 0; jj < CONFIG_T::pool_width; jj++) {
if (ii + jj >= CONFIG_T::pad_left && ii + jj < CONFIG_T::n_in + CONFIG_T::pad_left) {
pool[jj] = data[(ii + jj - CONFIG_T::pad_left) * CONFIG_T::n_filt + ff];
img_overlap++;
}
overlap_pixel++;
} else
pool[jj] = pad_val<data_T, CONFIG_T::pool_op>();
}
// do the pooling
// TODO in the case of average pooling, need to reduce width to area of pool window
// not overlapping padding region

int patch_size = CONFIG_T::count_pad ? CONFIG_T::stride_width : overlap_pixel;

res[(ii / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] =
pool_op<data_T, CONFIG_T::pool_width, CONFIG_T::pool_op>(pool);
// If the pool op is Average, the zero-padding needs to be removed from the results
if (CONFIG_T::pool_op == Average) {
data_T rescale = static_cast<data_T>(CONFIG_T::pool_width) / img_overlap;
res[(ii / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] *= rescale;
}
pool_op<data_T, CONFIG_T::pool_width, CONFIG_T::pool_op, typename CONFIG_T::accum_t>(pool, patch_size);
}
}
}

template <class data_T, class res_T, typename CONFIG_T>
void global_pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONFIG_T::n_filt]) {
constexpr int ce_reuse_factor = CONFIG_T::reuse_factor;
(void)ce_reuse_factor;
//#pragma HLS PIPELINE II=CONFIG_T::reuse_factor

assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);
assert(CONFIG_T::pool_width == CONFIG_T::stride_width);

// TODO partition the arrays according to the reuse factor
const int limit = pool_op_limit_1d<CONFIG_T>();
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
//#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_width, \
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit

for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
data_T pool[CONFIG_T::n_in];
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
//#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
for (int jj = 0; jj < CONFIG_T::n_in; jj++) {
pool[jj] = data[jj * CONFIG_T::n_filt + ff];
}
// do the pooling
res[ff] = pool_op<data_T, CONFIG_T::n_in, CONFIG_T::pool_op>(pool);
res[ff] = pool_op<data_T, CONFIG_T::n_in, CONFIG_T::pool_op, typename CONFIG_T::accum_t>(pool);
}
}

Expand Down Expand Up @@ -196,19 +166,18 @@ struct pooling2d_config {
};

template <typename CONFIG_T> constexpr int pool_op_limit() {
return (CONFIG_T::out_height * CONFIG_T::out_width) * CONFIG_T::n_filt / CONFIG_T::reuse_factor;
return DIV_ROUNDUP((CONFIG_T::out_height * CONFIG_T::out_width) * CONFIG_T::n_filt, CONFIG_T::reuse_factor);
}

template <class data_T, class res_T, typename CONFIG_T>
void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_filt],
res_T res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt]) {
constexpr int ce_reuse_factor = CONFIG_T::reuse_factor;
(void)ce_reuse_factor;
//#pragma HLS PIPELINE II=CONFIG_T::reuse_factor

// TODO partition the arrays according to the reuse factor
const int limit = pool_op_limit<CONFIG_T>();
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
//#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_height*CONFIG_T::pool_width, \
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
// Add padding and reduce input width to area covered by pooling function
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
Expand All @@ -221,41 +190,34 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
// Loop over input image x in steps of stride
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
// Keep track of number of pixels in image vs padding region
unsigned img_overlap = 0;
//#pragma HLS ARRAY_PARTITION variable=pool complete dim=0

unsigned overlap_pixel = 0;

// Loop over pool window y
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
// Loop over pool window x
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (full_padded_height - CONFIG_T::pad_bottom) ||
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
// Add padding
bool cond1 = ii + kk >= CONFIG_T::pad_top && ii + kk < CONFIG_T::in_height + CONFIG_T::pad_top;
bool cond2 = jj + ll >= CONFIG_T::pad_left && jj + ll < CONFIG_T::in_width + CONFIG_T::pad_left;
if (cond1 && cond2) {
unsigned data_idx =
((ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width + (jj + ll - CONFIG_T::pad_left)) *
CONFIG_T::n_filt +
ff;
pool[kk * CONFIG_T::stride_width + ll] = data[data_idx];
overlap_pixel++;
} else
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
if (CONFIG_T::count_pad) {
img_overlap++;
}
} else {
pool[kk * CONFIG_T::stride_width + ll] =
data[(ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width * CONFIG_T::n_filt +
(jj + ll - CONFIG_T::pad_left) * CONFIG_T::n_filt + ff];
img_overlap++;
}
}
}
// do the pooling
// TODO in the case of average pooling, need to reduce height * width to area of pool window
// not overlapping padding region

int patch_size = CONFIG_T::count_pad ? CONFIG_T::stride_width * CONFIG_T::stride_height : overlap_pixel;

res[(ii / CONFIG_T::stride_height) * CONFIG_T::out_width * CONFIG_T::n_filt +
(jj / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] =
pool_op<data_T, CONFIG_T::pool_height * CONFIG_T::pool_width, CONFIG_T::pool_op>(pool);
// If the pool op is Average, the zero-padding needs to be removed from the results
if (CONFIG_T::pool_op == Average) {
data_T rescale =
static_cast<data_T>(CONFIG_T::pool_height) * static_cast<data_T>(CONFIG_T::pool_width) / img_overlap;
res[(ii / CONFIG_T::stride_height) * CONFIG_T::out_width * CONFIG_T::n_filt +
(jj / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] *= rescale;
}
pool_op<data_T, CONFIG_T::pool_height * CONFIG_T::pool_width, CONFIG_T::pool_op,
typename CONFIG_T::accum_t>(pool, patch_size);
}
}
}
Expand All @@ -264,13 +226,12 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
template <class data_T, class res_T, typename CONFIG_T>
void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_filt],
res_T res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt]) {
constexpr int ce_reuse_factor = CONFIG_T::reuse_factor;
(void)ce_reuse_factor;
//#pragma HLS PIPELINE II=CONFIG_T::reuse_factor

// TODO partition the arrays according to the reuse factor
const int limit = pool_op_limit<CONFIG_T>();
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
//#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_height*CONFIG_T::pool_width, \
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
// Add padding and reduce input width to area covered by pooling function
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
Expand All @@ -283,7 +244,7 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
// Loop over input image x in steps of stride
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
//#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
// Keep track of number of pixels in image vs padding region
unsigned img_overlap = 0;
// Loop over pool window y
Expand All @@ -294,9 +255,8 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
// Add padding
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
if (CONFIG_T::count_pad) {
if (CONFIG_T::count_pad)
img_overlap++;
}
} else {
pool[kk * CONFIG_T::stride_width + ll] =
data[(ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width +
Expand All @@ -310,7 +270,8 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
// not overlapping padding region
res[(ii / CONFIG_T::stride_height) * CONFIG_T::out_width + (jj / CONFIG_T::stride_width) +
ff * CONFIG_T::out_height * CONFIG_T::out_width] =
pool_op<data_T, CONFIG_T::pool_height * CONFIG_T::pool_width, CONFIG_T::pool_op>(pool);
pool_op<data_T, CONFIG_T::pool_height * CONFIG_T::pool_width, CONFIG_T::pool_op,
typename CONFIG_T::accum_t>(pool);
// If the pool op is Average, the zero-padding needs to be removed from the results
if (CONFIG_T::pool_op == Average) {
data_T rescale =
Expand All @@ -331,12 +292,11 @@ void global_pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width *
assert(CONFIG_T::pool_width == CONFIG_T::stride_width);
assert(CONFIG_T::pool_height == CONFIG_T::stride_height);

constexpr int ce_reuse_factor = CONFIG_T::reuse_factor;
(void)ce_reuse_factor;
//#pragma HLS PIPELINE II=CONFIG_T::reuse_factor

const int limit = pool_op_limit<CONFIG_T>();
#pragma HLS ALLOCATION instances=pool_op limit=limit function
//#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_width * CONFIG_T::pool_height, \
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit

FiltLoop:
for (int filt = 0; filt < CONFIG_T::n_filt; filt++) {
Expand All @@ -347,7 +307,8 @@ void global_pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width *
pool[i] = data[i * CONFIG_T::n_filt + filt];
}

res[filt] = static_cast<res_T>(pool_op<data_T, CONFIG_T::in_height * CONFIG_T::in_width, CONFIG_T::pool_op>(pool));
res[filt] = static_cast<res_T>(
pool_op<data_T, CONFIG_T::in_height * CONFIG_T::in_width, CONFIG_T::pool_op, typename CONFIG_T::accum_t>(pool));
}
}

Expand Down

0 comments on commit c7ac85b

Please sign in to comment.