Skip to content

Commit

Permalink
Poly1305 ARM32 NEON: add implementation
Browse files Browse the repository at this point in the history
Add assembly for Poly1305 using ARM32 NEON instruction set.

For Poly1305 ARM32 Base:
  Change name from poly1305_blocks_arm32_16 to poly1305_arm32_blocks_16

poly1305.c:
  ARM32 NEON - buffer up to 4 blocks
  x86_64 - only calculate powers of r once after key is set.
test.c: poly1305 testing with multiple updates.
benchmark: chacha20-poly1305 now uses AAD
  • Loading branch information
SparkiDev committed Jan 9, 2025
1 parent 71b7d0c commit c2b610a
Show file tree
Hide file tree
Showing 7 changed files with 2,024 additions and 16 deletions.
17 changes: 13 additions & 4 deletions wolfcrypt/benchmark/benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,8 @@
#define BENCH_RNG 0x00000001
#define BENCH_SCRYPT 0x00000002

#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM) || \
(defined(HAVE_CHACHA) && defined(HAVE_POLY1305))
/* Define AES_AUTH_ADD_SZ already here, since it's used in the
* static declaration of `bench_Usage_msg1`. */
#if !defined(AES_AUTH_ADD_SZ) && \
Expand Down Expand Up @@ -1945,10 +1946,13 @@ static const char* bench_result_words2[][5] = {
#define BENCH_MIN_RUNTIME_SEC 1.0F
#endif

#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM) || \
(defined(HAVE_CHACHA) && defined(HAVE_POLY1305))
static word32 aesAuthAddSz = AES_AUTH_ADD_SZ;
#endif
#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
#define AES_AUTH_TAG_SZ 16
#define BENCH_CIPHER_ADD AES_AUTH_TAG_SZ
static word32 aesAuthAddSz = AES_AUTH_ADD_SZ;
#if !defined(AES_AAD_OPTIONS_DEFAULT)
#if !defined(NO_MAIN_DRIVER)
#define AES_AAD_OPTIONS_DEFAULT 0x1U
Expand Down Expand Up @@ -6059,15 +6063,19 @@ void bench_chacha20_poly1305_aead(void)
int ret = 0, i, count;
DECLARE_MULTI_VALUE_STATS_VARS()

WC_DECLARE_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
WC_DECLARE_VAR(authTag, byte, CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE, HEAP_HINT);
WC_ALLOC_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
WC_ALLOC_VAR(authTag, byte, CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE, HEAP_HINT);
XMEMSET(bench_additional, 0, AES_AUTH_ADD_SZ);
XMEMSET(authTag, 0, CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE);

bench_stats_start(&count, &start);
do {
for (i = 0; i < numBlocks; i++) {
ret = wc_ChaCha20Poly1305_Encrypt(bench_key, bench_iv, NULL, 0,
bench_plain, bench_size, bench_cipher, authTag);
ret = wc_ChaCha20Poly1305_Encrypt(bench_key, bench_iv,
bench_additional, aesAuthAddSz, bench_plain, bench_size,
bench_cipher, authTag);
if (ret < 0) {
printf("wc_ChaCha20Poly1305_Encrypt error: %d\n", ret);
goto exit;
Expand All @@ -6089,6 +6097,7 @@ void bench_chacha20_poly1305_aead(void)
exit:

WC_FREE_VAR(authTag, HEAP_HINT);
WC_FREE_VAR(bench_additional, HEAP_HINT);
}
#endif /* HAVE_CHACHA && HAVE_POLY1305 */

Expand Down
48 changes: 45 additions & 3 deletions wolfcrypt/src/poly1305.c
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
#endif
poly1305_setkey_avx(ctx, key);
RESTORE_VECTOR_REGISTERS();
ctx->started = 0;
#elif defined(POLY130564)

/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
Expand Down Expand Up @@ -813,13 +814,49 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
printf("\n");
#endif

#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_THUMB2) && \
!defined(WOLFSSL_ARMASM_NO_NEON)
/* handle leftover */
if (ctx->leftover) {
size_t want = sizeof(ctx->buffer) - ctx->leftover;
if (want > bytes)
want = bytes;

for (i = 0; i < want; i++)
ctx->buffer[ctx->leftover + i] = m[i];
bytes -= (word32)want;
m += want;
ctx->leftover += want;
if (ctx->leftover < sizeof(ctx->buffer)) {
return 0;
}

poly1305_blocks(ctx, ctx->buffer, sizeof(ctx->buffer));
ctx->leftover = 0;
}

/* process full blocks */
if (bytes >= sizeof(ctx->buffer)) {
size_t want = bytes & ~((size_t)POLY1305_BLOCK_SIZE - 1);

poly1305_blocks(ctx, m, want);
m += want;
bytes -= (word32)want;
}

/* store leftover */
if (bytes) {
for (i = 0; i < bytes; i++)
ctx->buffer[ctx->leftover + i] = m[i];
ctx->leftover += bytes;
}
#else
#ifdef USE_INTEL_POLY1305_SPEEDUP
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags)) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);

/* handle leftover */

if (ctx->leftover) {
size_t want = sizeof(ctx->buffer) - ctx->leftover;
if (want > bytes)
Expand All @@ -835,8 +872,10 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
return 0;
}

if (!ctx->started)
if (!ctx->started) {
poly1305_calc_powers_avx2(ctx);
ctx->started = 1;
}
poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer));
ctx->leftover = 0;
}
Expand All @@ -845,8 +884,10 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
if (bytes >= sizeof(ctx->buffer)) {
size_t want = bytes & ~(sizeof(ctx->buffer) - 1);

if (!ctx->started)
if (!ctx->started) {
poly1305_calc_powers_avx2(ctx);
ctx->started = 1;
}
poly1305_blocks_avx2(ctx, m, want);
m += want;
bytes -= (word32)want;
Expand Down Expand Up @@ -902,6 +943,7 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
ctx->leftover += bytes;
}
}
#endif

return 0;
}
Expand Down
Loading

0 comments on commit c2b610a

Please sign in to comment.