Pass slurm job memory to scenario.

easeml · Feb 27, 2024 · 999ea0f · 999ea0f
1 parent e85b228
commit 999ea0f
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 3 deletions.
diff --git a/experiments/datascope/experiments/base.py b/experiments/datascope/experiments/base.py
@@ -111,6 +111,7 @@ def run_scenario(
     output_path: str = DEFAULT_RESULTS_SCENARIOS_PATH,
     no_save: bool = False,
     event_server: Optional[str] = None,
+    job_memory: Optional[str] = None,
     **attributes: Any
 ) -> None:
     # If we should continue the execution of an existing scenario, then we should load it.
@@ -144,7 +145,7 @@ def run_scenario(
         client.connect(event_server)
         queue = client
         pickled_queue = True
-    runner = get_scenario_runner(queue=queue, pickled_queue=pickled_queue)
+    runner = get_scenario_runner(queue=queue, pickled_queue=pickled_queue, job_memory=job_memory)
 
     scenario.logger.setLevel(logging.DEBUG)
     scenario = runner(scenario)

diff --git a/experiments/datascope/experiments/main.py b/experiments/datascope/experiments/main.py
@@ -207,6 +207,14 @@ def main():
         help="Address of the event server. If specified, logging and progress events will be streamed to it.",
     )
 
+    parser_run_scenario.add_argument(
+        "-m",
+        "--job-memory",
+        type=str,
+        default=None,
+        help="The amount of memory allowed for a job.",
+    )
+
     add_dynamic_arguments(
         parser=parser_run_scenario,
         targets=Scenario.scenarios.values(),

diff --git a/experiments/datascope/experiments/scenarios/base.py b/experiments/datascope/experiments/scenarios/base.py
@@ -27,7 +27,7 @@
 from enum import Enum
 from glob import glob
 from inspect import signature
-from io import TextIOBase, StringIO, SEEK_END
+from io import TextIOBase, StringIO, BytesIO, SEEK_END
 from itertools import product
 from logging import Logger
 from matplotlib.figure import Figure
@@ -699,6 +699,7 @@ def get_scenario_runner(
     console_log: bool = True,
     rerun: bool = False,
     pickled_queue: bool = False,
+    job_memory: Optional[str] = None,
 ) -> Callable[[Scenario], Scenario]:
     def _scenario_runner(scenario: Scenario) -> Scenario:
         try:
@@ -720,6 +721,15 @@ def _scenario_runner(scenario: Scenario) -> Scenario:
                 ch.setFormatter(formatter)
                 scenario.logger.addHandler(ch)
 
+            # Quickly consume a 70% of the given amount of memory for 10 seconds.
+            if job_memory is not None:
+                suffixes = {"G": 1024**3, "M": 1024**2, "K": 1024}
+                size = int(float(job_memory[:-1]) * suffixes[job_memory[-1]] * 0.7)
+                with BytesIO() as buffer:
+                    buffer.write(bytearray(os.urandom(size)))
+                    buffer.seek(0)
+                    time.sleep(10)
+
             if rerun or not scenario.completed:
                 if queue is not None:
                     scenario.run(progress_bar=progress_bar, console_log=False)
@@ -964,7 +974,11 @@ def stop_handler(number, frame):
                         else:
                             path = self.save_scenario(scenario)
                             logpath = os.path.join(path, "slurm.log")
-                            run_command = "python -m datascope.experiments run-scenario -o %s -e %s" % (path, address)
+                            run_command = "python -m datascope.experiments run-scenario -o %s -e %s -m %s" % (
+                                path,
+                                address,
+                                slurm_jobmemory,
+                            )
                             slurm_command = "sbatch --job-name=%s" % self.id
                             slurm_command += " --time=%s" % slurm_jobtime
                             slurm_command += " --mem-per-cpu=%s" % slurm_jobmemory