Skip to content

Commit

Permalink
OpenVX - GFX942: Fix canny test case (#1450)
Browse files Browse the repository at this point in the history
* goffset

* temp

* debug

* temp

* temp

* temp

* code cleanup

* boundary check

* boundary check

* boundary check

* feedback update

* additional kernels

* fix

* conformance fixg

* code cleanup

* code cleanup

* typo fix

* cpu boundary check

---------

Co-authored-by: Kiriti Gowda <[email protected]>
  • Loading branch information
hansely and kiritigowda authored Nov 20, 2024
1 parent 1e5a631 commit e580795
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 27 deletions.
14 changes: 8 additions & 6 deletions amd_openvx/openvx/ago/ago_haf_cpu_canny.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,12 +616,14 @@ int HafCpu_CannyEdgeTrace_U8_U8XY
const ago_coord2d_short_t offs = dir_offsets[i];
vx_int16 x1 = x + offs.x;
vx_int16 y1 = y + offs.y;
vx_uint8 *pDst = pDstImage + y1*dstImageStrideInBytes + x1;
if (*pDst == 127)
{
*pDst |= 0x80; // *pDst = 255
*((unsigned *)pxyStack) = (y1<<16)|x1;
pxyStack++;
if(x1 >= 0 && x1 < dstWidth && y1 >= 0 && y1 < dstHeight) {
vx_uint8 *pDst = pDstImage + y1*dstImageStrideInBytes + x1;
if (*pDst == 127)
{
*pDst |= 0x80; // *pDst = 255
*((unsigned *)pxyStack) = (y1<<16)|x1;
pxyStack++;
}
}
}
}
Expand Down
63 changes: 42 additions & 21 deletions amd_openvx/openvx/hipvx/vision_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ Hip_CannySobel_U16_U8_3x3_L1NORM(uint dstWidth, uint dstHeight,
{ // load 136x18 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 1) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}

bool doExtraLoad = false;
if (ly < 2) {
loffset += 16 * 136;
Expand All @@ -54,7 +57,7 @@ Hip_CannySobel_U16_U8_3x3_L1NORM(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 1) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 18) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -267,7 +270,9 @@ Hip_CannySobel_U16_U8_5x5_L1NORM(uint dstWidth, uint dstHeight,
{ // load 136x20 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 2) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 4) {
loffset += 16 * 136;
Expand All @@ -279,7 +284,7 @@ Hip_CannySobel_U16_U8_5x5_L1NORM(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 2) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 20) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -759,7 +764,9 @@ Hip_CannySobel_U16_U8_7x7_L1NORM(uint dstWidth, uint dstHeight,
{ // load 136x22 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 3) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 6) {
loffset += 16 * 136;
Expand All @@ -771,7 +778,7 @@ Hip_CannySobel_U16_U8_7x7_L1NORM(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 3) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 22) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -1646,7 +1653,9 @@ Hip_CannySobel_U16_U8_3x3_L2NORM(uint dstWidth, uint dstHeight,
{ // load 136x18 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 1) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 2) {
loffset += 16 * 136;
Expand All @@ -1658,7 +1667,7 @@ Hip_CannySobel_U16_U8_3x3_L2NORM(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 1) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 18) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -1871,7 +1880,9 @@ Hip_CannySobel_U16_U8_5x5_L2NORM(uint dstWidth, uint dstHeight,
{ // load 136x20 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 2) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 4) {
loffset += 16 * 136;
Expand All @@ -1883,7 +1894,7 @@ Hip_CannySobel_U16_U8_5x5_L2NORM(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 2) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 20) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -2361,7 +2372,9 @@ Hip_CannySobel_U16_U8_7x7_L2NORM(uint dstWidth, uint dstHeight,
{ // load 136x22 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 3) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 6) {
loffset += 16 * 136;
Expand All @@ -2373,7 +2386,7 @@ Hip_CannySobel_U16_U8_7x7_L2NORM(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 3) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 22) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -3263,7 +3276,7 @@ Hip_CannySuppThreshold_U8XY_U16_3x3(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 1) * srcImageStrideInBytes + ((x - lx) << 3) + 124;
doExtraLoad = (id < 18) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -3715,7 +3728,9 @@ Hip_HarrisSobel_HG3_U8_3x3(uint dstWidth, uint dstHeight,
{ // load 136x18 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 1) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 2) {
loffset += 16 * 136;
Expand All @@ -3727,7 +3742,7 @@ Hip_HarrisSobel_HG3_U8_3x3(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 1) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 18) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -3933,7 +3948,9 @@ Hip_HarrisSobel_HG3_U8_5x5(uint dstWidth, uint dstHeight,
{ // load 136x20 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 2) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 4) {
loffset += 16 * 136;
Expand All @@ -3945,7 +3962,7 @@ Hip_HarrisSobel_HG3_U8_5x5(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 2) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 20) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -4412,7 +4429,9 @@ Hip_HarrisSobel_HG3_U8_7x7(uint dstWidth, uint dstHeight,
{ // load 136x22 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (y - 3) * srcImageStrideInBytes + x - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 6) {
loffset += 16 * 136;
Expand All @@ -4424,7 +4443,7 @@ Hip_HarrisSobel_HG3_U8_7x7(uint dstWidth, uint dstHeight,
goffset = (y - ly + id - 3) * srcImageStrideInBytes + (((x >> 3) - lx) << 3) + 124;
doExtraLoad = (id < 22) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down Expand Up @@ -6297,7 +6316,9 @@ Hip_NonMaxSupp_XY_ANY_3x3(char *pDstList, uint dstListOffset, uint capacityOfLis
{ // load 136x18 bytes into local memory using 16x16 workgroup
int loffset = ly * 136 + (lx << 3);
int goffset = (gy - 1) * srcImageStrideInBytes + (gx << 3) - 4;
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
if (goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
bool doExtraLoad = false;
if (ly < 2) {
loffset += 16 * 136;
Expand All @@ -6309,7 +6330,7 @@ Hip_NonMaxSupp_XY_ANY_3x3(char *pDstList, uint dstListOffset, uint capacityOfLis
goffset = (gy - ly + id - 1) * srcImageStrideInBytes + ((gx - lx) << 3) + 124;
doExtraLoad = (id < 18) ? true : false;
}
if (doExtraLoad) {
if (doExtraLoad && goffset >= 0) {
*((uint2 *)(&lbuf[loffset])) = *((uint2 *)(&pSrcImage[goffset]));
}
__syncthreads();
Expand Down

0 comments on commit e580795

Please sign in to comment.