some clean up of blend_files

NREL · Jul 2, 2024 · 628e103 · 628e103
1 parent 39092bd
commit 628e103
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 48 deletions.
diff --git a/nsrdb/cli.py b/nsrdb/cli.py
@@ -555,26 +555,22 @@ def collect_data_model(ctx, config, verbose=False, pipeline_step=None):
     config['n_chunks'] = config.get('n_chunks', 1)
     config['n_writes'] = config.get('n_writes', 1)
     config['final'] = config.get('final', False)
-    n_files_default = (0, 1, 3, 4, 6)  # all files minus irrad and clearsky
-    i_files = (
-        range(len(NSRDB.OUTS))
-        if config['final']
-        else config.get('collect_files', n_files_default)
-    )
     fnames = sorted(NSRDB.OUTS.keys())
+    min_files = [
+        f for f in fnames if f.split('_')[1] not in ('clearsky', 'irradiance')
+    ]
+    collect_files = fnames if config['final'] else min_files
 
-    if config['final'] and config['n_chunks'] != 1:
-        msg = 'collect-data-model was marked as final but n_chunks != 1'
-        logger.error(msg)
-        raise ValueError(msg)
+    msg = 'collect-data-model was marked as final but n_chunks != 1'
+    assert not (config['final'] and config['n_chunks'] != 1), msg
 
-    for i_chunk, i_fname in itertools.product(
-        range(config['n_chunks']), i_files
+    for i_chunk, fname in itertools.product(
+        range(config['n_chunks']), collect_files
     ):
-        log_id = f'{fnames[i_fname].split("_")[1]}_{i_chunk}'
+        log_id = '_'.join(fname.split('_')[1:-1] + [i_chunk])
         config['i_chunk'] = i_chunk
-        config['i_fname'] = i_fname
-        config['job_name'] = f'{ctx.obj["NAME"]}_{i_fname}_{log_id}'
+        config['i_fname'] = fnames.index(fname)
+        config['job_name'] = f'{ctx.obj["MOD_NAME"]}_{log_id}'
 
         BaseCLI.kickoff_job(
             ctx=ctx,
@@ -612,8 +608,8 @@ def collect_final(ctx, config, verbose=False, pipeline_step=None):
     )
 
     for i_fname, fname in enumerate(sorted(NSRDB.OUTS.keys())):
-        log_id = fname.split('_')[1]
-        config['job_name'] = f'{ctx.obj["NAME"]}_{i_fname}_{log_id}'
+        log_id = '_'.join(fname.split('_')[1:-1])
+        config['job_name'] = f'{ctx.obj["MOD_NAME"]}_{log_id}'
         config['i_fname'] = i_fname
         BaseCLI.kickoff_job(
             ctx=ctx,

diff --git a/nsrdb/nsrdb.py b/nsrdb/nsrdb.py
@@ -452,32 +452,32 @@ def blend_files(kwargs):
         logger = init_logger('nsrdb.cli', stream=True)
         logger.info(f'Blending NSRDB data files with {user_input}')
 
-        all_tags = [
-            'ancillary_a',
-            'ancillary_b',
-            'clearsky',
-            'clouds',
-            'csp',
-            'irradiance',
-            'pv',
-        ]
-
-        cmd = f'python -m nsrdb.blend.cli -n {name}'
-        cmd += '_{tag}'
-        cmd += f' -m {user_input["meta_file"]} -od {out_dir}'
-        cmd += f' -ed {east_dir} -wd {west_dir}'
-        cmd += ' -t "{tag}"'
-        cmd += f' -mc {map_col} -ls {meta_lon}'
-        cmd += f' -cs {user_input["chunk_size"]}'
-        cmd += f' -ld "{log_dir}"'
-        cmd += f' slurm -a {user_input["alloc"]}'
-        cmd += f' -wt {user_input["walltime"]}'
-        cmd += f' -mem {user_input["memory"]}'
-        cmd += f' -sout "{out_dir}/stdout"'
-        cmd += ' -l "--qos=normal"'
+        cmd = (
+            'python -m nsrdb.blend.cli '
+            '-n {name} -m {meta} -od {out_dir} -ed {east_dir} -wd {west_dir} '
+            '-mc {map_col} -ls {lon_seam} -cs {chunk_size} -ld {log_dir} '
+            'slurm -a {alloc} -wt {walltime} -mem {memory} -sout {stdout_path}'
+            ' -l --qos=normal'
+        )
+        cmd = cmd.format(
+            name=name,
+            meta=user_input['meta_file'],
+            out_dir=out_dir,
+            east_dir=east_dir,
+            west_dir=west_dir,
+            map_col=map_col,
+            lon_seam=meta_lon,
+            chunk_size=user_input['chunk_size'],
+            log_dir=log_dir,
+            alloc=user_input['alloc'],
+            walltime=user_input['walltime'],
+            memory=user_input['memory'],
+            stdout_path=f'{out_dir}/stdout',
+        )
+        cmd += ' -t {file_tag}'
 
         if user_input['file_tag'] == 'all':
-            for tag in all_tags:
+            for tag in ['_'.join(k.split('_')[1:-1]) for k in NSRDB.OUTS]:
                 logger.debug(f'Running command: {cmd.format(tag=tag)}')
                 os.system(cmd.format(tag=tag))
         else:

diff --git a/nsrdb/utilities/cli.py b/nsrdb/utilities/cli.py
@@ -117,12 +117,11 @@ def from_config_preflight(
         ctx.obj['VERBOSE'] = verbose
         ctx.obj['OUT_DIR'] = config.get('outdir', status_dir)
         ctx.obj['PIPELINE_STEP'] = pipeline_step or module_name
-        sanitized_mod = module_name.replace('-', '_')
-        ctx.obj['LOG_DIR'] = os.path.join(status_dir, 'logs', sanitized_mod)
+        mod_name = module_name.replace('-', '_')
+        ctx.obj['MOD_NAME'] = mod_name
+        ctx.obj['LOG_DIR'] = os.path.join(status_dir, 'logs', mod_name)
         os.makedirs(ctx.obj['LOG_DIR'], exist_ok=True)
-        name = f'nsrdb_{sanitized_mod}_{os.path.basename(status_dir)}'
-        name = config.get('job_name', name)
-        ctx.obj['NAME'] = name
+        ctx.obj['NAME'] = name = config.get('job_name', mod_name)
         ctx.obj['LOG_FILE'] = config.get(
             'log_file', os.path.join(ctx.obj['LOG_DIR'], name + '.log')
         )
@@ -143,7 +142,7 @@ def from_config_preflight(
         )
 
         init_mult(
-            f'nsrdb_{sanitized_mod}',
+            f'nsrdb_{mod_name}',
             ctx.obj['LOG_DIR'],
             modules=[__name__, 'nsrdb'],
             verbose=verbose,
@@ -506,8 +505,9 @@ def kickoff_multiday(
 
         for doy in doys:
             date = NSRDB.doy_to_datestr(config_dict['year'], doy)
+            log_id = f'{date}_{str(doy).zfill(3)}'
             config_dict['date'] = date
-            config_dict['job_name'] = f'{ctx.obj["NAME"]}_{doy}_{date}'
+            config_dict['job_name'] = f'{ctx.obj["MOD_NAME"]}_{log_id}'
             config_dict['doy'] = doy
 
             cls.kickoff_job(