Skip to content

Commit

Permalink
CM7: Simplify Dilithium 769 NTT code
Browse files Browse the repository at this point in the history
  • Loading branch information
dop-amin committed Jan 13, 2025
1 parent 630bf95 commit 5b47a4b
Show file tree
Hide file tree
Showing 3 changed files with 1,169 additions and 854 deletions.
8 changes: 4 additions & 4 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -1814,6 +1814,7 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 32

r = slothy.config.reserved_regs
r.add("r1")
r = r.union(f"s{i}" for i in range(31)) # reserve FPR
slothy.config.reserved_regs = r

Expand All @@ -1825,13 +1826,12 @@ def core(self, slothy):
slothy.config.variable_size = True
slothy.config.split_heuristic = True
slothy.config.timeout = 360 # Not more than 2min per step
slothy.config.split_heuristic_factor = 1
slothy.config.visualize_expected_performance = False
slothy.config.split_heuristic_factor = 4
slothy.config.split_heuristic_factor = 5
slothy.config.split_heuristic_stepsize = 0.15
slothy.optimize_loop("layer1234_loop")
slothy.optimize_loop("layer1234_loop", forced_loop_type=Arch_Armv7M.BranchLoop)
slothy.config.split_heuristic_optimize_seam = 6
slothy.optimize_loop("layer1234_loop")
slothy.optimize_loop("layer1234_loop", forced_loop_type=Arch_Armv7M.BranchLoop)

slothy.config.outputs = ["r14"]

Expand Down
6 changes: 2 additions & 4 deletions examples/naive/armv7m/ntt_769_dilithium.s
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ small_ntt_asm_769:
// s24: tmp
// s25: twiddle_ptr
vmov s24, tmp
vmov s25, twiddle_ptr
layer1234_loop:
// load a1, a3, ..., a15
vmov s23, poly
Expand Down Expand Up @@ -251,10 +250,10 @@ small_ntt_asm_769:
uadd16 tmp, poly0, poly1
usub16 twiddle1, poly0, poly1
str.w twiddle1, [poly, #offset]
str.w tmp, [poly], #4 // @slothy:core
str.w tmp, [poly], #4 // @slothy:core // @slothy:before=cmp

vmov tmp, s24
cmp.w poly, tmp
cmp.w poly, tmp // @slothy:id=cmp
bne.w layer1234_loop

sub.w poly, #8*strincr
Expand All @@ -266,7 +265,6 @@ small_ntt_asm_769:

add.w tmp, poly, #strincr2*16
vmov s13, tmp
vmov twiddle_ptr, s25
layer567_loop:
vmov s23, poly
load poly, poly0, poly1, poly2, poly3, #0, #distance2/4, #2*distance2/4, #3*distance2/4
Expand Down
Loading

0 comments on commit 5b47a4b

Please sign in to comment.