Skip to content

Commit

Permalink
Fix: Saturating narrowing conversion in NEON
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Oct 27, 2024
1 parent 99d810d commit a32b187
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions include/simsimd/elementwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ SIMSIMD_PUBLIC void simsimd_wsum_u8_neon( //
float16x8_t a_scaled_vec = vmulq_n_f16(a_vec, alpha_f16);
float16x8_t b_scaled_vec = vmulq_n_f16(b_vec, beta_f16);
float16x8_t sum_vec = vaddq_f16(a_scaled_vec, b_scaled_vec);
uint8x8_t sum_u8_vec = vmovn_u16(vcvtaq_u16_f16(sum_vec));
uint8x8_t sum_u8_vec = vqmovn_u16(vcvtaq_u16_f16(sum_vec));
vst1_u8(result + i, sum_u8_vec);
}

Expand All @@ -1176,7 +1176,7 @@ SIMSIMD_PUBLIC void simsimd_fma_u8_neon( //
float16x8_t ab_vec = vmulq_f16(a_vec, b_vec);
float16x8_t ab_scaled_vec = vmulq_n_f16(ab_vec, alpha_f16);
float16x8_t sum_vec = vfmaq_n_f16(ab_scaled_vec, c_vec, beta_f16);
uint8x8_t sum_u8_vec = vmovn_u16(vcvtaq_u16_f16(sum_vec));
uint8x8_t sum_u8_vec = vqmovn_u16(vcvtaq_u16_f16(sum_vec));
vst1_u8(result + i, sum_u8_vec);
}

Expand All @@ -1200,7 +1200,7 @@ SIMSIMD_PUBLIC void simsimd_wsum_i8_neon( //
float16x8_t a_scaled_vec = vmulq_n_f16(a_vec, alpha_f16);
float16x8_t b_scaled_vec = vmulq_n_f16(b_vec, beta_f16);
float16x8_t sum_vec = vaddq_f16(a_scaled_vec, b_scaled_vec);
int8x8_t sum_i8_vec = vmovn_s16(vcvtaq_s16_f16(sum_vec));
int8x8_t sum_i8_vec = vqmovn_s16(vcvtaq_s16_f16(sum_vec));
vst1_s8(result + i, sum_i8_vec);
}

Expand All @@ -1226,7 +1226,7 @@ SIMSIMD_PUBLIC void simsimd_fma_i8_neon( //
float16x8_t ab_vec = vmulq_f16(a_vec, b_vec);
float16x8_t ab_scaled_vec = vmulq_n_f16(ab_vec, alpha_f16);
float16x8_t sum_vec = vfmaq_n_f16(ab_scaled_vec, c_vec, beta_f16);
int8x8_t sum_i8_vec = vmovn_s16(vcvtaq_s16_f16(sum_vec));
int8x8_t sum_i8_vec = vqmovn_s16(vcvtaq_s16_f16(sum_vec));
vst1_s8(result + i, sum_i8_vec);
}

Expand Down

0 comments on commit a32b187

Please sign in to comment.