Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RISC-V for XuanTie C908 #115

Draft
wants to merge 57 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
ed232aa
feat: add risc-v example
thisisjube Oct 16, 2024
1b710d0
refactor: adjust .gitignore
thisisjube Oct 22, 2024
431d07a
doc: start overview of RISC-V and C908
thisisjube Oct 22, 2024
db22fdd
doc+feat: add notes for RISCV ISA and start adopting riscv model
thisisjube Oct 23, 2024
4eddc96
feat: minimal riscv example works
thisisjube Oct 25, 2024
cfa7107
feat: add Integer Register-Immediate Instructions
thisisjube Oct 25, 2024
8dee162
feat: add integer register-register ops
thisisjube Oct 26, 2024
3a8fbf7
fix: make immediate parsing work, further research
thisisjube Nov 6, 2024
f5eab98
feat: add load/ store instructions
thisisjube Nov 10, 2024
9ca2eb4
fix: add forgotten instructions, fix typos
thisisjube Nov 10, 2024
3ec1841
feat: implement <w>-specifier to indicate a dedicated 32-bit version …
thisisjube Nov 10, 2024
1b8a278
refactor: restructure files and classes
thisisjube Nov 10, 2024
cd1f5c7
fix: add all_subclasses_leaves() to correct class
thisisjube Nov 13, 2024
bda2803
refactor: change all_subclass_leaves from attribute to method
thisisjube Nov 13, 2024
c8213ee
fix: move iter_riscv_instructions() to correct place
thisisjube Nov 13, 2024
a570273
refactor: rename some files
thisisjube Nov 13, 2024
40e11ac
feat: Introduce a factory method to dynamically create instruction cl…
thisisjube Nov 15, 2024
c6b6f3d
feat: add m extension set
thisisjube Nov 18, 2024
1e154db
feat: add <w> for m extension set
thisisjube Nov 18, 2024
1d6511e
feat: add new testing code, add register aliases
thisisjube Nov 18, 2024
8c93b9f
feat: add naive rv64im Dilithium NTTs from PQRV paper
dop-amin Nov 19, 2024
19a88a5
feat: add uArch model
thisisjube Nov 24, 2024
c56821d
feat: optimize ntt_8l_singlissue_plant_rv64im.s
thisisjube Nov 24, 2024
c547d35
feat: further optimizations
thisisjube Nov 24, 2024
3dd91cd
feat: opt
thisisjube Nov 24, 2024
ab36ff4
feat: split ntt_dilithium example
thisisjube Dec 4, 2024
e77d78b
fix: fix wrong instruction defs and register names
thisisjube Dec 4, 2024
45d92ec
feat: first heuristic split optimization
thisisjube Dec 4, 2024
b1fc513
refactor: update copyright headers
thisisjube Dec 4, 2024
274db25
refactor: clean up code, reformat according to PEP-8 style
thisisjube Dec 4, 2024
e336d8c
refactor: move instruction factory to RISCVInstruction class
thisisjube Dec 4, 2024
3a7aad3
doc: update todo
thisisjube Dec 6, 2024
ccd9e47
Merge branch 'main' into riscv
thisisjube Dec 6, 2024
1c30331
refactor: cleanup
thisisjube Dec 6, 2024
551d9ef
fix: missing function renaming after optimization
thisisjube Dec 11, 2024
3fa15be
feat: add timeout for risc-v example
thisisjube Dec 11, 2024
915ff0b
fix: fix typos in XuanTie and rename optimized function for risc-v nt…
thisisjube Dec 11, 2024
5f30e5d
fix: add missing constants in asm code
thisisjube Dec 11, 2024
f46bec4
wip: add vector registers
thisisjube Dec 11, 2024
91bc53e
feat: Add loop parsing to riscv model
dop-amin Dec 12, 2024
136af27
feat: use loop optimization for dilithium
thisisjube Dec 18, 2024
87cf058
fix: typo in examples
Dec 18, 2024
c672e88
fix: loop recogntion regex
thisisjube Jan 8, 2025
e143233
fix: repair loop recognition
thisisjube Jan 8, 2025
3828b6b
feat: add new optimized file
Jan 8, 2025
872c493
fix: final (tm) fix for loop recognition
thisisjube Jan 8, 2025
bacebf3
feat: new opt
Jan 8, 2025
65e1f17
wip: try other values for latency and cpi
thisisjube Jan 8, 2025
5b5212b
feat: new opt
Jan 8, 2025
d3c36e4
feat: new opt
thisisjube Jan 8, 2025
96989b7
feat: new opt
Jan 8, 2025
1333acd
wip: update uArch model
thisisjube Jan 10, 2025
514ee1f
wip: new opt
Jan 10, 2025
0afda4e
feat: add a testing file to examine the uArch
thisisjube Jan 10, 2025
da8326c
refactor: refactor poly_basemul test
thisisjube Jan 15, 2025
30cce36
Merge branch 'main' into riscv
thisisjube Jan 15, 2025
e448909
refactor: maintenance after merge
thisisjube Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
**/__pycache__
venv/
.idea
arch-docs/
doc.md
img.png
uarch.md
144 changes: 128 additions & 16 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@
import slothy.targets.aarch64.apple_m1_firestorm_experimental as Target_AppleM1_firestorm
import slothy.targets.aarch64.apple_m1_icestorm_experimental as Target_AppleM1_icestorm

import slothy.targets.riscv.riscv as RISC_V
import slothy.targets.riscv.xuantie_c908 as Target_XuanTieC908

target_label_dict = {Target_CortexA55: "a55",
Target_CortexA72: "a72",
Target_CortexM7: "m7",
Target_CortexM55r1: "m55",
Target_CortexM85r1: "m85",
Target_AppleM1_firestorm: "m1_firestorm",
Target_AppleM1_icestorm: "m1_icestorm"}
Target_AppleM1_icestorm: "m1_icestorm",
Target_XuanTieC908: "c908"}


class ExampleException(Exception):
Expand Down Expand Up @@ -81,6 +85,8 @@ def __init__(self, infile, name=None, funcname=None, suffix="opt",
subfolder = "aarch64/"
elif self.arch == Arch_Armv7M:
subfolder = "armv7m/"
elif self.arch == RISC_V:
subfolder = "riscv/"
self.infile_full = f"examples/naive/{subfolder}{self.infile}.s"
self.outfile_full = f"examples/opt/{subfolder}{self.outfile}.s"
self.name = name
Expand Down Expand Up @@ -654,6 +660,7 @@ def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
def core(self,slothy):
slothy.config.allow_useless_instructions = True
slothy.fusion_region("start", "end", ssa=False)

class Armv7mExample0(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "armv7m_simple0"
Expand Down Expand Up @@ -703,10 +710,7 @@ def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):

def core(self,slothy):
slothy.config.variable_size=True
slothy.optimize_loop("start", forced_loop_type=Arch_Armv7M.SubsLoop)
slothy.config.sw_pipelining.enabled = True
slothy.config.outputs = ["r0", "r1", "r2", "r5", "flags"]
slothy.optimize_loop("start2", forced_loop_type=Arch_Armv7M.BranchLoop)
slothy.optimize_loop("start")

class Armv7mLoopCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
Expand Down Expand Up @@ -741,7 +745,7 @@ def core(self,slothy):
slothy.config.variable_size=True
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")

class Armv7mLoopVmovCmpForced(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_vmov_cmp_forced"
Expand Down Expand Up @@ -1551,6 +1555,108 @@ def core(self, slothy):
slothy.config.sw_pipelining.optimize_postamble = False
slothy.optimize_loop("flt_radix4_fft_loop_start")

class RISC_VExample0(Example):
def __init__(self, var="", arch=RISC_V, target=Target_XuanTieC908):
name = "riscv_simple0"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Have an actual "simple" and minimalistic example here instead of the full NTT code as this is also part of a separate example.

infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target, funcname="ntt_8l_rv32im")

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.constraints.stalls_first_attempt=32
slothy.config.inputs_are_outputs = True
slothy.config.outputs = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for the current example as it is, there are too many outputs defined here. For example x10, which corresponds to a0 is used as an input in the input assembly and thus automatically gets marked as an input due to slothy.config.inputs_are_outputs = True.

Generally, we should try to keep this list as small as it needs to be.

'x11', 'x12', 'x13', 'x14', 'x15', 'x16', 'x17', 'x18', 'x19',
'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26', 'x27', 'x28',
'x29', 'x30', 'x31']
slothy.optimize(start="mainloop", end="end_label")

class RISC_VExampleLoop0(Example):
def __init__(self, var="", arch=RISC_V, target=Target_XuanTieC908):
name = "riscv_simple_loop0"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.inputs_are_outputs = True

slothy.config.sw_pipelining.enabled = True

slothy.optimize_loop("my_loop")
slothy.optimize_loop("my_loop2")
slothy.optimize_loop("my_loop3")

class RISC_V_ntt8l_singleissue_plant_rv64im(Example):
def __init__(self, var="", arch=RISC_V, target=Target_XuanTieC908, timeout=None):
name = "ntt_8l_singleissue_plant_rv64im"
subpath = "ntt_dilithium/"
infile = subpath + name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target, funcname="ntt_8l_rv64im", timeout=timeout)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.constraints.stalls_first_attempt=32
slothy.config.inputs_are_outputs = True

r = slothy.config.reserved_regs
r += ['x3']
slothy.config.reserved_regs = r

slothy.config.sw_pipelining.enabled = True
slothy.config.sw_pipelining.halving_heuristic = True
slothy.config.split_heuristic = True
slothy.config.split_heuristic_factor = 5
slothy.config.split_heuristic_repeat = 2
slothy.config.split_heuristic_stepsize = 0.05
#slothy.config.split_heuristic_factor = 10
#slothy.config.split_heuristic_repeat = 1
#slothy.config.split_heuristic_stepsize = 0.3
slothy.optimize_loop("ntt_8l_rv64im_loop1")
slothy.optimize_loop("ntt_8l_rv64im_loop2")

class RISC_V_poly_basemul_8l_acc_rv64im(Example):
def __init__(self, var="", arch=RISC_V, target=Target_XuanTieC908, timeout=None):
name = "poly_basemul_8l_acc_rv64im"
subpath = "poly_basemul/"
infile = subpath + name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target, funcname="poly_basemul_8l_acc_rv64im", timeout=timeout)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.constraints.stalls_first_attempt=32
slothy.config.inputs_are_outputs = True

r = slothy.config.reserved_regs
r += ['x3']
slothy.config.reserved_regs = r
slothy.optimize_loop("poly_basemul_8l_acc_rv64im_looper")


#############################################################################################

class ntt_dilithium(Example):
Expand Down Expand Up @@ -1728,7 +1834,7 @@ def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7, timeout=Non

def core(self, slothy):
slothy.config.timeout = 300

slothy.config.unsafe_address_offset_fixup = False

slothy.config.outputs = ["r14", "s1", "r12"]
Expand Down Expand Up @@ -1868,7 +1974,7 @@ def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7, timeout=Non

def core(self, slothy):
slothy.config.timeout = 180

slothy.config.constraints.stalls_first_attempt = 16
slothy.config.inputs_are_outputs = True
slothy.config.variable_size = True
Expand Down Expand Up @@ -1989,13 +2095,13 @@ def core(self, slothy):
slothy.config.inputs_are_outputs = True
slothy.config.sw_pipelining.enabled = True
slothy.optimize_loop("1")

class Keccak(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7, timeout=None):
name = f"keccakf1600"
infile = name
funcname = "KeccakF1600_StatePermute"


if var != "":
name += f"_{var}"
Expand All @@ -2011,7 +2117,7 @@ def core(self, slothy):
slothy.config.reserved_regs = ["sp", "r13"]
slothy.config.locked_registers = ["sp", "r13"]
slothy.config.unsafe_address_offset_fixup = False

slothy.config.split_heuristic = True
slothy.config.split_heuristic_preprocess_naive_interleaving = True
slothy.config.split_heuristic_repeat = 2
Expand All @@ -2020,17 +2126,17 @@ def core(self, slothy):

if "adomnicai_m7" in self.name:
slothy.config.split_heuristic_factor = 6

slothy.config.outputs = ['hint_spEga0', 'hint_spEge0', 'hint_spEgi0', 'hint_spEgo0', 'hint_spEgu0', 'hint_spEka1', 'hint_spEke1', 'hint_spEki1', 'hint_spEko1', 'hint_spEku1', 'hint_spEma0', 'hint_spEme0', 'hint_spEmi0', 'hint_spEmo0', 'hint_spEmu0', 'hint_spEsa1', 'hint_spEse1', 'hint_spEsi1', 'hint_spEso1', 'hint_spEsu1', 'hint_spEbe0', 'hint_spEbi0', 'hint_spEbo0', 'hint_spEbu0', 'hint_spEba0', 'hint_spEga1', 'hint_spEge1', 'hint_spEgi1', 'hint_spEgo1', 'hint_spEgu1', 'hint_spEka0', 'hint_spEke0', 'hint_spEki0', 'hint_spEko0', 'hint_spEku0', 'hint_spEma1', 'hint_spEme1', 'hint_spEmi1', 'hint_spEmo1', 'hint_spEmu1', 'hint_spEsa0', 'hint_spEse0', 'hint_spEsi0', 'hint_spEso0', 'hint_spEsu0', 'hint_spEbe1', 'hint_spEbi1', 'hint_spEbo1', 'hint_spEbu1', 'hint_spEba1']
slothy.optimize(start="slothy_start_round0", end="slothy_end_round0")
slothy.config.outputs = ['flags', 'hint_r0Aba0', 'hint_r0Aba1', 'hint_r0Abe0', 'hint_r0Abe1', 'hint_r0Abi0', 'hint_r0Abi1', 'hint_r0Abo0', 'hint_r0Abo1', 'hint_r0Abu0', 'hint_r0Abu1', 'hint_r0Aga0', 'hint_r0Aga1', 'hint_r0Age0', 'hint_r0Age1', 'hint_r0Agi0', 'hint_r0Agi1', 'hint_r0Ago0', 'hint_r0Ago1', 'hint_r0Agu0', 'hint_r0Agu1', 'hint_r0Aka0', 'hint_r0Aka1', 'hint_r0Ake0', 'hint_r0Ake1', 'hint_r0Aki0', 'hint_r0Aki1', 'hint_r0Ako0', 'hint_r0Ako1', 'hint_r0Aku0', 'hint_r0Aku1', 'hint_r0Ama0', 'hint_r0Ama1', 'hint_r0Ame0', 'hint_r0Ame1', 'hint_r0Ami0', 'hint_r0Ami1', 'hint_r0Amo0', 'hint_r0Amo1', 'hint_r0Amu0', 'hint_r0Amu1', 'hint_r0Asa0', 'hint_r0Asa1', 'hint_r0Ase0', 'hint_r0Ase1', 'hint_r0Asi0', 'hint_r0Asi1', 'hint_r0Aso0', 'hint_r0Aso1', 'hint_r0Asu0', 'hint_r0Asu1']
slothy.optimize(start="slothy_start_round1", end="slothy_end_round1")
else:
else:
if "xkcp" in self.name:
slothy.config.outputs = ['flags', 'hint_spEba0', 'hint_spEba1', 'hint_spEbe0', 'hint_spEbe1', 'hint_spEbi0', 'hint_spEbi1', 'hint_spEbo0', 'hint_spEbo1', 'hint_spEbu0', 'hint_spEbu1', 'hint_spEga0', 'hint_spEga1', 'hint_spEge0', 'hint_spEge1', 'hint_spEgi0', 'hint_spEgi1', 'hint_spEgo0', 'hint_spEgo1', 'hint_spEgu0', 'hint_spEgu1', 'hint_spEka0', 'hint_spEka1', 'hint_spEke0', 'hint_spEke1', 'hint_spEki0', 'hint_spEki1', 'hint_spEko0', 'hint_spEko1', 'hint_spEku0', 'hint_spEku1', 'hint_spEma0', 'hint_spEma1', 'hint_spEme0', 'hint_spEme1', 'hint_spEmi0', 'hint_spEmi1', 'hint_spEmo0', 'hint_spEmo1', 'hint_spEmu0', 'hint_spEmu1', 'hint_spEsa0', 'hint_spEsa1', 'hint_spEse0', 'hint_spEse1', 'hint_spEsi0', 'hint_spEsi1', 'hint_spEso0', 'hint_spEso1', 'hint_spEsu0', 'hint_spEsu1']
if "adomnicai_m4" in self.name:
slothy.config.outputs = ['flags', 'hint_r0Aba1', 'hint_r0Aka1', 'hint_spEba0', 'hint_spEba1', 'hint_spEbe0', 'hint_spEbe1', 'hint_spEbi0', 'hint_spEbi1', 'hint_spEbo0', 'hint_spEbo1', 'hint_spEbu0', 'hint_spEbu1', 'hint_spEga0', 'hint_spEga1', 'hint_spEge0', 'hint_spEge1', 'hint_spEgi0', 'hint_spEgi1', 'hint_spEgo0', 'hint_spEgo1', 'hint_spEgu0', 'hint_spEgu1', 'hint_spEka0', 'hint_spEka1', 'hint_spEke0', 'hint_spEke1', 'hint_spEki0', 'hint_spEki1', 'hint_spEko0', 'hint_spEko1', 'hint_spEku0', 'hint_spEku1', 'hint_spEma0', 'hint_spEma1', 'hint_spEme0', 'hint_spEme1', 'hint_spEmi0', 'hint_spEmi1', 'hint_spEmo0', 'hint_spEmo1', 'hint_spEmu0', 'hint_spEmu1', 'hint_spEsa0', 'hint_spEsa1', 'hint_spEse0', 'hint_spEse1', 'hint_spEsi0', 'hint_spEsi1', 'hint_spEso0', 'hint_spEso1', 'hint_spEsu0', 'hint_spEsu1', 'hint_spmDa0']

slothy.config.split_heuristic_factor = 22
slothy.config.constraints.stalls_first_attempt = 16

Expand Down Expand Up @@ -2853,7 +2959,13 @@ def main():
fft_floatingpoint_radix4(),
# Fixed point
fft_fixedpoint_radix4(),


# RISC-V
RISC_VExample0(target=Target_XuanTieC908),
RISC_VExampleLoop0(),
RISC_V_ntt8l_singleissue_plant_rv64im(target=Target_XuanTieC908, timeout=300),
RISC_V_poly_basemul_8l_acc_rv64im(target=Target_XuanTieC908),

ntt_dilithium(),
intt_dilithium_123_456_78(),
pointwise_montgomery_dilithium(),
Expand All @@ -2868,7 +2980,7 @@ def main():
pointwise_769_asymmetric_dilithium(),
reduce32_dilithium(),
caddq_dilithium(),

Keccak(var="xkcp"),
Keccak(var="adomnicai_m4"),
Keccak(var="adomnicai_m7"),
Expand Down
Loading