From 7682d24b2440e21e931c15d5b96cb1fd5a8ec97d Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Tue, 24 Dec 2024 04:22:48 +0000 Subject: [PATCH 1/3] initial scaffold for tunable variables Signed-off-by: Jack Luar --- docs/user/FlowVariables.md | 10 ++-- flow/scripts/variables.yaml | 8 +++ tools/AutoTuner/requirements.txt | 1 + tools/AutoTuner/src/autotuner/distributed.py | 58 ++++++-------------- 4 files changed, 33 insertions(+), 44 deletions(-) diff --git a/docs/user/FlowVariables.md b/docs/user/FlowVariables.md index 5d84f396df..f3f20475a2 100644 --- a/docs/user/FlowVariables.md +++ b/docs/user/FlowVariables.md @@ -94,6 +94,7 @@ configuration file. | FILL_CELLS| Fill cells are used to fill empty sites. If not set or empty, fill cell insertion is skipped.| | | | FILL_CONFIG| JSON rule file for metal fill during chip finishing.| | | | FLOORPLAN_DEF| Use the DEF file to initialize floorplan.| | | +| GDS_ALLOW_EMPTY| Regular expression of module names of macros that have no .gds file| | | | GDS_FILES| Path to platform GDS files.| | | | GENERATE_ARTIFACTS_ON_FAILURE| For instance Bazel needs artifacts (.odb and .rpt files) on a failure to allow the user to save hours on re-running the failed step locally, but when working with a Makefile flow, it is more natural to fail the step and leave the user to manually inspect the logs and artifacts directly via the file system. Set to 1 to change the behavior to generate artifacts upon failure to e.g. do a global route. The exit code will still be non-zero on all other failures that aren't covered by the "useful to inspect the artifacts on failure" use-case. Example: just like detailed routing, a global route that fails with congestion, is not a build failure(as in exit code non-zero), it is a successful(as in zero exit code) global route that produce reports detailing the problem. Detailed route will not proceed, if there is global routing congestion This allows build systems, such as bazel, to create artifacts for global and detailed route, even if the operation had problems, without having know about the semantics between global and detailed route. Considering that global and detailed route can run for a long time and use a lot of memory, this allows inspecting results on a laptop for a build that ran on a server.| 0| | | GLOBAL_PLACEMENT_ARGS| Use additional tuning parameters during global placement other than default args defined in global_place.tcl.| | | @@ -102,7 +103,7 @@ configuration file. | GPL_ROUTABILITY_DRIVEN| Specifies whether the placer should use routability driven placement.| 1| | | GPL_TIMING_DRIVEN| Specifies whether the placer should use timing driven placement.| 1| | | GUI_TIMING| Load timing information when opening GUI. For large designs, this can be quite time consuming. Useful to disable when investigating non-timing aspects like floorplan, placement, routing, etc.| 1| | -| HOLD_SLACK_MARGIN| Specifies a time margin for the slack when fixing hold violations. This option allows you to overfix or underfix(negative value, terminate retiming before 0 or positive slack). Use min of HOLD_SLACK_MARGIN and 0(default hold slack margin) in floorplan. This avoids overrepair in floorplan for hold by default, but allows skipping hold repair using a negative HOLD_SLACK_MARGIN. Exiting timing repair early is useful in exploration where the .sdc has a fixed clock period at designs target clock period and where HOLD/SETUP_SLACK_MARGIN is used to avoid overrepair(extremely long running times) when exploring different parameter settings.| 0| | +| HOLD_SLACK_MARGIN| Specifies a time margin for the slack when fixing hold violations. This option allows you to overfix or underfix(negative value, terminate retiming before 0 or positive slack). floorplan.tcl uses min of HOLD_SLACK_MARGIN and 0(default hold slack margin). This avoids overrepair in floorplan for hold by default, but allows skipping hold repair using a negative HOLD_SLACK_MARGIN. Exiting timing repair early is useful in exploration where the .sdc has a fixed clock period at the design's target clock period and where HOLD/SETUP_SLACK_MARGIN is used to avoid overrepair(extremely long running times) when exploring different parameter settings. When an ideal clock is used, that is before CTS, a clock insertion delay of 0 is used in timing paths. This creates a mismatch between macros that have a .lib file from after CTS, when the clock is propagated. To mitigate this, OpenSTA will use subtract the clock insertion delay of macros when calculating timing with ideal clock. Provided that min_clock_tree_path and max_clock_tree_path are in the .lib file, which is the case for macros built with OpenROAD. This is less accurate than if OpenROAD had created a placeholder clock tree for timing estimation purposes prior to CTS. There will inevitably be inaccuracies in the timing calculation prior to CTS. Use a slack margin that is low enough, even negative, to avoid overrepair. Inaccuracies in the timing prior to CTS can also lead to underrepair, but there no obvious and simple way to avoid underrapir in these cases. Overrepair can lead to excessive runtimes in repair or too much buffering being added, which can present itself as congestion of hold cells or buffer cells. Another use of SETUP/HOLD_SLACK_MARGIN is design parameter exploration when trying to find the minimum clock period for a design. The SDC_FILE for a design can be quite complicated and instead of modifying the clock period in the SDC_FILE, which can be non-trivial, the clock period can be fixed at the target frequency and the SETUP/HOLD_SLACK_MARGIN can be swept to find a plausible current minimum clock period.| 0| | | IO_CONSTRAINTS| File path to the IO constraints .tcl file.| | | | IO_PLACER_H| The metal layer on which to place the I/O pins horizontally (top and bottom of the die).| | | | IO_PLACER_V| The metal layer on which to place the I/O pins vertically (sides of the die).| | | @@ -137,13 +138,13 @@ configuration file. | PWR_NETS_VOLTAGES| Used for IR Drop calculation.| | | | RCX_RULES| RC Extraction rules file path.| | | | RECOVER_POWER| Specifies how many percent of paths with positive slacks can be slowed for power savings [0-100].| 0| | -| REMOVE_ABC_BUFFERS| Remove abc buffers from the netlist. If timing repair in floorplanning is taking too long, use a SETUP_HOLD_MARGIN to terminate timing repair early instead of using REMOVE_ABC_BUFFERS or set SKIP_LAST_GAST=1.| | yes| +| REMOVE_ABC_BUFFERS| Remove abc buffers from the netlist. If timing repair in floorplanning is taking too long, use a SETUP/HOLD_SLACK_MARGIN to terminate timing repair early instead of using REMOVE_ABC_BUFFERS or set SKIP_LAST_GASP=1.| | yes| | REMOVE_CELLS_FOR_EQY| String patterns directly passed to write_verilog -remove_cells <> for equivalence checks.| | | | REPAIR_PDN_VIA_LAYER| Remove power grid vias which generate DRC violations after detailed routing.| | | | REPORT_CLOCK_SKEW| Report clock skew as part of reporting metrics, starting at CTS, before which there is no clock skew. This metric can be quite time-consuming, so it can be useful to disable.| 1| | | RESYNTH_AREA_RECOVER| Enable re-synthesis for area reclaim.| 0| | | RESYNTH_TIMING_RECOVER| Enable re-synthesis for timing optimization.| 0| | -| ROUTING_LAYER_ADJUSTMENT| Default routing layer adjustment| 0.5| | +| ROUTING_LAYER_ADJUSTMENT| Adjusts routing layer capacities to manage congestion and improve detailed routing. High values ease detailed routing but risk excessive detours and long global routing times, while low values reduce global routing failure but can complicate detailed routing. The global routing running time normally reduces dramatically(entirely design specific, but going from hours to minutes has been observed) when the value is low(such as 0.10). Sometimes, global routing will succeed with lower values and fail with higher values. Exploring results with different values can help shed light on the problem. Start with a too low value, such as 0.10, and bisect to value that works by doing multiple global routing runs. As a last resort, `make global_route_issue` and using the tools/OpenROAD/etc/deltaDebug.py can be useful to debug global routing errors. If there is something specific that is impossible to route, such as a clock line over a macro, global routing will terminate with DRC errors routes that could have been routed were it not for the specific impossible routes. deltaDebug.py should weed out the possible routes and leave a minimal failing case that pinpoints the problem.| 0.5| | | RTLMP_AREA_WT| Weight for the area of the current floorplan.| 0.1| | | RTLMP_ARGS| Overrides all other RTL macro placer arguments.| | | | RTLMP_BOUNDARY_WT| Weight for the boundary or how far the hard macro clusters are from boundaries.| 50.0| | @@ -167,7 +168,7 @@ configuration file. | SDC_FILE| The path to design constraint (SDC) file.| | | | SDC_GUT| Load design and remove all internal logic before doing synthesis. This is useful when creating a mock .lef abstract that has a smaller area than the amount of logic would allow. bazel-orfs uses this to mock SRAMs, for instance.| | | | SEAL_GDS| Seal macro to place around the design.| | | -| SETUP_SLACK_MARGIN| Specifies a time margin for the slack when fixing setup violations. This option allows you to overfix or underfix(negative value, terminate retiming before 0 or positive slack).| 0| | +| SETUP_SLACK_MARGIN| Specifies a time margin for the slack when fixing setup violations. This option allows you to overfix or underfix(negative value, terminate retiming before 0 or positive slack). See HOLD_SLACK_MARGIN for more details.| 0| | | SET_RC_TCL| Metal & Via RC definition file path.| | | | SKIP_CTS_REPAIR_TIMING| Skipping CTS repair, which can take a long time, can be useful in architectural exploration or when getting CI up and running.| | | | SKIP_GATE_CLONING| Do not use gate cloning transform to fix timing violations (default: use gate cloning).| | | @@ -343,6 +344,7 @@ configuration file. ## final variables - [ADDITIONAL_GDS](#ADDITIONAL_GDS) +- [GDS_ALLOW_EMPTY](#GDS_ALLOW_EMPTY) - [GND_NETS_VOLTAGES](#GND_NETS_VOLTAGES) - [MAX_ROUTING_LAYER](#MAX_ROUTING_LAYER) - [MIN_ROUTING_LAYER](#MIN_ROUTING_LAYER) diff --git a/flow/scripts/variables.yaml b/flow/scripts/variables.yaml index 39a1893d66..d4392fbc96 100644 --- a/flow/scripts/variables.yaml +++ b/flow/scripts/variables.yaml @@ -104,6 +104,7 @@ CORE_UTILIZATION: The core utilization percentage (0-100). stages: - floorplan + tunable: 1 CORE_AREA: description: > The core area specified as a list of lower-left and upper-right corners in @@ -111,6 +112,7 @@ CORE_AREA: (X1 Y1 X2 Y2). stages: - floorplan + tunable: 1 REPORT_CLOCK_SKEW: description: Report clock skew as part of reporting metrics, starting at CTS, @@ -344,6 +346,7 @@ CELL_PAD_IN_SITES_DETAIL_PLACEMENT: - cts - grt default: 0 + tunable: 1 PLACE_PINS_ARGS: description: > Arguments to place_pins @@ -362,6 +365,7 @@ PLACE_DENSITY_LB_ADDON: description: > Check the lower boundary of the PLACE_DENSITY and add PLACE_DENSITY_LB_ADDON if it exists. + tunable: 1 REPAIR_PDN_VIA_LAYER: description: > Remove power grid vias which generate DRC violations after detailed routing. @@ -657,6 +661,7 @@ CORE_MARGIN: is undefined. stages: - floorplan + tunable: 1 DIE_AREA: description: > The die area specified as a list of lower-left and upper-right corners in @@ -664,6 +669,7 @@ DIE_AREA: (X1 Y1 X2 Y2). stages: - floorplan + tunable: 1 RESYNTH_AREA_RECOVER: description: > Enable re-synthesis for area reclaim. @@ -702,12 +708,14 @@ CTS_CLUSTER_DIAMETER: default: 20 stages: - cts + tunable: 1 CTS_CLUSTER_SIZE: description: > Maximum number of sinks per cluster. default: 50 stages: - cts + tunable: 1 CTS_SNAPSHOT: description: > Creates ODB/SDC files prior to clock net and setup/hold repair. diff --git a/tools/AutoTuner/requirements.txt b/tools/AutoTuner/requirements.txt index 5bf65305cc..fb72219420 100644 --- a/tools/AutoTuner/requirements.txt +++ b/tools/AutoTuner/requirements.txt @@ -9,3 +9,4 @@ tensorboard>=2.14.0,<=2.16.2 protobuf==3.20.3 SQLAlchemy==1.4.17 urllib3<=1.26.15 +pyyaml==6.0.1 diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 5543f0002a..95f9faf6a4 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -32,6 +32,7 @@ import glob import subprocess import random +import yaml from datetime import datetime from multiprocessing import cpu_count from subprocess import run @@ -368,42 +369,22 @@ def read_tune_pbt(name, this): return config, sdc_file, fr_file -def parse_flow_variables(): +def parse_tunable_variables(): """ - Parse the flow variables from source - - Code: Makefile `vars` target output - + Parse the tunable variables from variables.yaml TODO: Tests. - - Output: - - flow_variables: set of flow variables """ cur_path = os.path.dirname(os.path.realpath(__file__)) - - # first, generate vars.tcl - makefile_path = os.path.join(cur_path, "../../../../flow/") - initial_path = os.path.abspath(os.getcwd()) - os.chdir(makefile_path) - result = subprocess.run(["make", "vars", f"PLATFORM={args.platform}"]) - if result.returncode != 0: - print(f"[ERROR TUN-0018] Makefile failed with error code {result.returncode}.") - sys.exit(1) - if not os.path.exists("vars.tcl"): - print(f"[ERROR TUN-0019] Makefile did not generate vars.tcl.") - sys.exit(1) - os.chdir(initial_path) - - # for code parsing, you need to parse from both scripts and vars.tcl file. - pattern = r"(?:::)?env\((.*?)\)" - files = glob.glob(os.path.join(cur_path, "../../../../flow/scripts/*.tcl")) - files.append(os.path.join(cur_path, "../../../../flow/vars.tcl")) - variables = set() - for file in files: - with open(file) as fp: - matches = re.findall(pattern, fp.read()) - for match in matches: - for variable in match.split("\n"): - variables.add(variable.strip().upper()) + vars_path = os.path.join(cur_path, "../../../../flow/scripts/variables.yaml") + + # Read from variables.yaml and get variables with tunable = 1 + with open(vars_path) as file: + try: + result = yaml.safe_load(file) + except yaml.YAMLError as exc: + print("[ERROR TUN-0018] Error parsing variables.yaml.") + sys.exit(1) + variables = {key for key, value in result.items() if value.get("tunable", 0) == 1} return variables @@ -414,7 +395,7 @@ def parse_config(config, path=os.getcwd()): options = "" sdc = {} fast_route = {} - flow_variables = parse_flow_variables() + flow_variables = parse_tunable_variables() for key, value in config.items(): # Keys that begin with underscore need special handling. if key.startswith("_"): @@ -432,15 +413,12 @@ def parse_config(config, path=os.getcwd()): "[WARNING TUN-0013] Non-flatten the designs are not " "fully supported, ignoring _SYNTH_FLATTEN parameter." ) - # Default case is VAR=VALUE else: - # FIXME there is no robust way to get this metainformation from - # ORFS about the variables, so disable this code for now. - + # Default case is VAR=VALUE # Sanity check: ignore all flow variables that are not tunable - # if key not in flow_variables: - # print(f"[ERROR TUN-0017] Variable {key} is not tunable.") - # sys.exit(1) + if key not in flow_variables: + print(f"[ERROR TUN-0017] Variable {key} is not tunable.") + sys.exit(1) options += f" {key}={value}" if bool(sdc): write_sdc(sdc, path) From 4e07ffacf7898bb05de6e8d5d9c484948a428042 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Tue, 24 Dec 2024 04:35:36 +0000 Subject: [PATCH 2/3] fix space Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 95f9faf6a4..dc07c5723d 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -418,7 +418,7 @@ def parse_config(config, path=os.getcwd()): # Sanity check: ignore all flow variables that are not tunable if key not in flow_variables: print(f"[ERROR TUN-0017] Variable {key} is not tunable.") - sys.exit(1) + sys.exit(1) options += f" {key}={value}" if bool(sdc): write_sdc(sdc, path) From da475f7c3a95b308db4c3d856ab52b438f84d636 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Wed, 25 Dec 2024 06:05:54 +0000 Subject: [PATCH 3/3] remove unnecessary try/except Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index dc07c5723d..e46ae7728a 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -379,11 +379,7 @@ def parse_tunable_variables(): # Read from variables.yaml and get variables with tunable = 1 with open(vars_path) as file: - try: - result = yaml.safe_load(file) - except yaml.YAMLError as exc: - print("[ERROR TUN-0018] Error parsing variables.yaml.") - sys.exit(1) + result = yaml.safe_load(file) variables = {key for key, value in result.items() if value.get("tunable", 0) == 1} return variables