diff --git a/README.rst b/README.rst index 95e8a86a..a8c372a1 100755 --- a/README.rst +++ b/README.rst @@ -52,21 +52,31 @@ All-Sky. Installation ============ -1. Use conda (anaconda or miniconda with python 3.9) to create an nsrdb - environment: ``conda create --name nsrdb python=3.9`` -2. Activate your new conda env: ``conda activate nsrdb`` -3. Follow the steps used in the `pytest actions `_. +Option 1: Install from PIP (recommended for analysts): +------------------------------------------------------ - 1) These actions refer to the required repositories needed to run all tests and the commands which should be run from the local location of those repositories - 2) If you plan to run without MLClouds the step associated with this repository can be skipped. -4. Test your installation: +1. Create a new environment: ``conda create --name nsrdb python=3.9`` - 1) Start ipython and test the following import: - ``from nsrdb.data_model import DataModel`` - 2) Navigate to the tests/ directory and run the command: ``pytest`` +2. Activate environment: ``conda activate nsrdb`` + +3. Install nsrdb: ``pip install NREL-nsrdb`` + +Option 2: Clone repo (recommended for developers) +------------------------------------------------- + +1. from home dir, ``git clone git@github.com:NREL/nsrdb.git`` + +2. Create ``nsrdb`` environment and install package + 1) Create a conda env: ``conda create -n nsrdb`` + 2) Run the command: ``conda activate nsrdb`` + 3) ``cd`` into the repo cloned in 1. + 4) Prior to running ``pip`` below, make sure the branch is correct (install + from main!) + 5) Install ``nsrdb`` and its dependencies by running: + ``pip install .`` (or ``pip install -e .`` if running a dev branch + or working on the source code) + 7) *Optional*: Set up the pre-commit hooks with ``pip install pre-commit`` and ``pre-commit install`` -5. If you are a developer, also run ``pre-commit install`` in the directory - containing .pre-commit-config.yaml. NSRDB Versions @@ -80,34 +90,77 @@ NSRDB Versions - Effective Date - Data Years* - Notes - * - 1.0.0 - - 2015 - - 2005-2012 - - Initial release of PSM v1 (no FARMS) - - - Satellite Algorithm for Shortwave Radiation Budget (SASRAB) model - - MMAC model for clear sky condition - - The DNI for cloud scenes is then computed using the DISC model - - * - 2.0.0 - - 2016 - - 1998-2015 - - Initial release of PSM v2 (use of FARMS, downscaling of ancillary data - introduced to account for elevation, NSRDB website distribution - developed) - - - Clear sky: REST2, Cloudy sky: NREL FARMS model and DISC model - - Climate Forecast System Reanalysis (CFSR) is used for ancillary data - - Monthly 0.5º aerosol optical depth (AOD) for 1998-2014 using - satellite and ground-based measurements. Monthly results interpolated - to daily 4-km AOD data. Daily data calibrated using ground - measurements to develop accurate AOD product. - + * - 4.0.0 + - 5/1/23 + - 2022 + - Integrated new FARMS-DNI model. + * - 3.2.3 + - 4/13/23 + - None + - Fixed MERRA interpolation issue #51 and deprecated python 3.7/3.8. + Added changes to accommodate pandas v2.0.0. + * - 3.2.2 + - 2/25/2022 + - 1998-2021 + - Implemented a model for snowy albedo as a function of temperature from + MERRA2 based on the paper "A comparison of simulated and observed + fluctuations in summertime Arctic surface albedo" by Becky Ross and + John E. Walsh + * - 3.2.1 + - 1/12/2021 + - 2021 + - Implemented an algorithm to re-map the parallax and shading corrected + cloud coordinates to the nominal GOES coordinate system. This fixes the + issue of PC cloud coordinates conflicting with clearsky coordinates. + This also fixes the strange pattern that was found in the long term + means generated from PC data. + * - 3.2.0 + - 3/17/2021 + - 2020 + - Enabled cloud solar shading coordinate adjustment by default, enabled + MLClouds machine learning gap fill method for missing cloud properties + (cloud fill flag #7) + * - 3.1.2 + - 6/8/2020 + - 2020 + - Added feature to adjust cloud coordinates based on solar position and + shading geometry. + * - 3.1.1 + - 12/5/2019 + - 2018+, TMY/TDY/TGY-2018 + - Complete refactor of TMY processing code. + * - 3.1.0 + - 9/23/2019 + - 2018+ + - Complete refactor of NSRDB processing code for NSRDB 2018 + * - 3.0.6 + - 4/23/2019 + - 1998-2017 + - Missing data for all cloud properties gap filled using heuristics method + * - 3.0.5 + - 4/8/2019 + - 1998-2017 + - Cloud pressure attributes and scale/offset fixed for 2016 and 2017 + * - 3.0.4 + - 3/29/2019 + - 1998-2017 + - Aerosol optical depth patched with physical range from 0 to 3.2 + * - 3.0.3 + - 2/25/2019 + - 1998-2017 + - Wind data recomputed to fix corrupted data in western extent + * - 3.0.2 + - 2/25/2019 + - 1998-2017 + - Air temperature data recomputed from MERRA2 with elevation correction + * - 3.0.1 + - 2018 + - 2017+ + - Moved from timeshift of radiation to timeshift of cloud properties. * - 3.0.0 - 2018 - 1998-2017 - Initial release of PSM v3 - - Hourly AOD (1998-2016) from Modern-Era Retrospective analysis for Research and Applications Version 2 (MERRA2). - Snow-free Surface Albedo from MODIS (2001-2015), (MCD43GF CMG @@ -120,74 +173,26 @@ NSRDB Versions - Modern-Era Retrospective analysis for Research and Applications, Version 2 (MERRA-2) is used for ancillary data (pressure, humidity, wind speed etc.) + * - 2.0.0 + - 2016 + - 1998-2015 + - Initial release of PSM v2 (use of FARMS, downscaling of ancillary data + introduced to account for elevation, NSRDB website distribution + developed) + - Clear sky: REST2, Cloudy sky: NREL FARMS model and DISC model + - Climate Forecast System Reanalysis (CFSR) is used for ancillary data + - Monthly 0.5º aerosol optical depth (AOD) for 1998-2014 using + satellite and ground-based measurements. Monthly results interpolated + to daily 4-km AOD data. Daily data calibrated using ground + measurements to develop accurate AOD product. + * - 1.0.0 + - 2015 + - 2005-2012 + - Initial release of PSM v1 (no FARMS) + - Satellite Algorithm for Shortwave Radiation Budget (SASRAB) model + - MMAC model for clear sky condition + - The DNI for cloud scenes is then computed using the DISC model - * - 3.0.1 - - 2018 - - 2017+ - - Moved from timeshift of radiation to timeshift of cloud properties. - * - 3.0.2 - - 2/25/2019 - - 1998-2017 - - Air temperature data recomputed from MERRA2 with elevation correction - * - 3.0.3 - - 2/25/2019 - - 1998-2017 - - Wind data recomputed to fix corrupted data in western extent - * - 3.0.4 - - 3/29/2019 - - 1998-2017 - - Aerosol optical depth patched with physical range from 0 to 3.2 - * - 3.0.5 - - 4/8/2019 - - 1998-2017 - - Cloud pressure attributes and scale/offset fixed for 2016 and 2017 - * - 3.0.6 - - 4/23/2019 - - 1998-2017 - - Missing data for all cloud properties gap filled using heuristics method - * - 3.1.0 - - 9/23/2019 - - 2018+ - - Complete refactor of NSRDB processing code for NSRDB 2018 - * - 3.1.1 - - 12/5/2019 - - 2018+, TMY/TDY/TGY-2018 - - Complete refactor of TMY processing code. - * - 3.1.2 - - 6/8/2020 - - 2020 - - Added feature to adjust cloud coordinates based on solar position and - shading geometry. - * - 3.2.0 - - 3/17/2021 - - 2020 - - Enabled cloud solar shading coordinate adjustment by default, enabled - MLClouds machine learning gap fill method for missing cloud properties - (cloud fill flag #7) - * - 3.2.1 - - 1/12/2021 - - 2021 - - Implemented an algorithm to re-map the parallax and shading corrected - cloud coordinates to the nominal GOES coordinate system. This fixes the - issue of PC cloud coordinates conflicting with clearsky coordinates. - This also fixes the strange pattern that was found in the long term - means generated from PC data. - * - 3.2.2 - - 2/25/2022 - - 1998-2021 - - Implemented a model for snowy albedo as a function of temperature from - MERRA2 based on the paper "A comparison of simulated and observed - fluctuations in summertime Arctic surface albedo" by Becky Ross and - John E. Walsh - * - 3.2.3 - - 4/13/23 - - None - - Fixed MERRA interpolation issue #51 and deprecated python 3.7/3.8. - Added changes to accommodate pandas v2.0.0. - * - 4.0.0 - - 5/1/23 - - 2022 - - Integrated new FARMS-DNI model. Recommended Citation ==================== diff --git a/nsrdb/aggregation/aggregation.py b/nsrdb/aggregation/aggregation.py index 50154480..492d1057 100755 --- a/nsrdb/aggregation/aggregation.py +++ b/nsrdb/aggregation/aggregation.py @@ -36,14 +36,14 @@ 'tree_file': 'kdtree_nsrdb_meta_2km.pkl', 'meta_file': 'nsrdb_meta_2km.csv', 'spatial': '2km', - 'temporal': '10min', + 'freq': '10min', }, 'conus': { 'data_sub_dir': 'blended_conus', 'tree_file': 'kdtree_nsrdb_meta_2km_conus.pkl', 'meta_file': 'nsrdb_meta_2km_conus.csv', 'spatial': '2km', - 'temporal': '5min', + 'freq': '5min', }, 'final': { 'data_sub_dir': 'nsrdb_4km_30min', @@ -51,7 +51,7 @@ 'tree_file': 'kdtree_nsrdb_meta_4km.pkl', 'meta_file': 'nsrdb_meta_4km.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', 'source_priority': ['conus', 'full_disk'], }, } @@ -63,7 +63,7 @@ 'tree_file': 'kdtree_nsrdb_meta_2km_east.pkl', 'meta_file': 'nsrdb_meta_2km_east.csv', 'spatial': '2km', - 'temporal': '15min', + 'freq': '15min', }, 'final': { 'data_sub_dir': 'wrf_9km', @@ -71,7 +71,7 @@ 'tree_file': 'kdtree_wrf_meta_9km.pkl', 'meta_file': 'wrf_meta_9km.csv', 'spatial': '9km', - 'temporal': '15min', + 'freq': '15min', }, } @@ -82,21 +82,21 @@ 'tree_file': 'kdtree_nsrdb_meta_2km_full.pkl', 'meta_file': 'nsrdb_meta_2km_full.csv', 'spatial': '2km', - 'temporal': '10min', + 'freq': '10min', }, 'conus': { 'data_sub_dir': 'nsrdb_conus_east_2018_2km_5min/final/', 'tree_file': 'kdtree_nsrdb_meta_2km_conus.pkl', 'meta_file': 'nsrdb_meta_2km_conus.csv', 'spatial': '2km', - 'temporal': '5min', + 'freq': '5min', }, 'west': { 'data_sub_dir': 'nsrdb_west_full_2018_4km_30min/final/', 'tree_file': 'kdtree_nsrdb_meta_4km.pkl', 'meta_file': 'nsrdb_meta_4km.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', }, 'final': { 'data_sub_dir': 'nsrdb_4km_30min', @@ -104,7 +104,7 @@ 'tree_file': 'kdtree_nsrdb_meta_4km.pkl', 'meta_file': 'nsrdb_meta_4km.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', }, } @@ -115,21 +115,21 @@ 'tree_file': 'kdtree_nsrdb_meta_2km_east.pkl', 'meta_file': 'nsrdb_meta_2km_east.csv', 'spatial': '2km', - 'temporal': '15min', + 'freq': '15min', }, 'west': { 'data_sub_dir': 'west', 'tree_file': 'kdtree_west_psm_extent.pkl', 'meta_file': 'west_psm_extent.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', }, 'conus': { 'data_sub_dir': 'conus', 'tree_file': 'kdtree_nsrdb_meta_2km_conus.pkl', 'meta_file': 'nsrdb_meta_2km_conus.csv', 'spatial': '2km', - 'temporal': '5min', + 'freq': '5min', }, 'final': { 'data_sub_dir': 'nsrdb_4km_30min', @@ -137,7 +137,7 @@ 'tree_file': 'kdtree_surfrad_meta.pkl', 'meta_file': 'surfrad_meta.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', }, } @@ -964,7 +964,7 @@ def __init__( def parse_data(self): """Parse the data input for several useful attributes.""" self.final_sres = self.data['final']['spatial'] - self.final_tres = self.data['final']['temporal'] + self.final_tres = self.data['final']['freq'] if 'fpath' in self.data['final']: self.fout = self.data['final']['fpath'] else: @@ -984,7 +984,7 @@ def parse_data(self): def preflight( self, - reqs=('data_sub_dir', 'tree_file', 'meta_file', 'spatial', 'temporal'), + reqs=('data_sub_dir', 'tree_file', 'meta_file', 'spatial', 'freq'), ): """Run validity checks on input data. @@ -1357,7 +1357,7 @@ def add_temporal(self): """Get the temporal window sizes for all data sources.""" for source in self.data_sources: w = self._get_temporal_w( - self.data[source]['temporal'], self.final_tres + self.data[source]['freq'], self.final_tres ) self.data[source]['window'] = w logger.info( diff --git a/nsrdb/cli.py b/nsrdb/cli.py index 4e99c831..06c1d6b6 100755 --- a/nsrdb/cli.py +++ b/nsrdb/cli.py @@ -194,6 +194,7 @@ def pipeline(ctx, config, cancel, monitor, background, verbose): """ # noqa: D301 ctx.ensure_object(dict) + init_logger('gaps', log_level='DEBUG') ctx.obj['VERBOSE'] = verbose or ctx.obj.get('VERBOSE', False) gaps_pipeline(config, cancel, monitor, background) @@ -855,8 +856,7 @@ def _run_or_collect_tmy( '-c', type=str, required=True, - help='Path to config file with kwargs for TmyRunner.func(), where func ' - 'is "tmy", "tdy", or "tgy".', + help='Path to config file with kwargs for TmyRunner.tmy()', ) @click.option( '-v', diff --git a/nsrdb/create_configs.py b/nsrdb/create_configs.py index 99c3d7c1..41bc6b14 100755 --- a/nsrdb/create_configs.py +++ b/nsrdb/create_configs.py @@ -30,7 +30,6 @@ DEFAULT_META_DIR = '/projects/pxs/reference_grids/' - BASE_KWARGS = { 'basename': 'nsrdb', 'out_dir': './', @@ -38,19 +37,16 @@ 'meta_dir': DEFAULT_META_DIR, } -MAIN_KWARGS = { +BLEND_KWARGS = { **BASE_KWARGS, - 'freq': '30min', - 'spatial': '4km', - 'satellite': 'east', + 'file_tag': 'all', 'extent': 'full', + 'main_dir': '../', } -BLEND_KWARGS = { +COLLECT_BLEND_KWARGS = { **BASE_KWARGS, - 'file_tag': 'all', 'extent': 'full', - 'main_dir': '../', } AGG_KWARGS = { @@ -87,6 +83,128 @@ class CreateConfigs: COLLECT_AGG_RUN_NAME = '{basename}_{year}_collect_aggregate' COLLECT_BLEND_RUN_NAME = '{basename}_{extent}_{year}_collect_blend' + @classmethod + def _init_kwargs(cls, kwargs, default_kwargs): + """Initialize config with default kwargs.""" + msg = f'kwargs must have a "year" key. Received {kwargs}.' + assert 'year' in kwargs, msg + config = copy.deepcopy(default_kwargs) + input_kwargs = copy.deepcopy(kwargs) + if 'execution_control' in kwargs: + config['execution_control'].update( + input_kwargs.pop('execution_control') + ) + config.update(input_kwargs) + config['out_dir'] = os.path.abspath(config['out_dir']) + os.makedirs(config['out_dir'], exist_ok=True) + return config + + @classmethod + def _get_res(cls, config): + """Get spatiotemporal res for a given year and extent.""" + + if config['year'] == 2018: + if config['extent'] == 'full': + if config['satellite'] == 'east': + spatial = '2km' + freq = '10min' + if config['satellite'] == 'west': + spatial = '4km' + freq = '30min' + else: + spatial = '2km' + freq = '5min' + + elif config['year'] > 2018: + spatial = '2km' + freq = '10min' if config['extent'] == 'full' else '5min' + else: + spatial = '4km' + freq = '30min' + return config.get('spatial', spatial), config.get('freq', freq) + + @classmethod + def _get_meta(cls, config, run_type='main'): + """Get meta file for a given extent, satellite, and resolution.""" + + if 'final_spatial' in config: + spatial = config['final_spatial'] + + else: + spatial = config.get('spatial', cls._get_res(config)[0]) + + meta_file = f'nsrdb_meta_{spatial}' + + if config['year'] > 2017 and 'collect' not in run_type: + meta_file += f'_{config["extent"]}' + + if run_type in ('blend', 'aggregate') or 'collect' in run_type: + meta_file = os.path.join(config['meta_dir'], f'{meta_file}.csv') + + else: + meta_file += f'_{config["satellite"]}_{config["lon_seam"]}.csv' + + return meta_file + + @classmethod + def _get_run_name(cls, config, run_type='main'): + """Get name of run for given main run input.""" + config.update( + {k: v for k, v in BASE_KWARGS.items() if k not in config} + ) + pattern_dict = { + 'main': cls.MAIN_RUN_NAME, + 'blend': cls.BLEND_RUN_NAME, + 'aggregate': cls.AGG_RUN_NAME, + 'collect-aggregate': cls.COLLECT_AGG_RUN_NAME, + 'collect-blend': cls.COLLECT_BLEND_RUN_NAME, + } + pattern = pattern_dict[run_type] + keys = get_format_keys(pattern) + run_config = {k: v for k, v in config.items() if k in keys} + if 'spatial' in keys or 'freq' in keys: + run_config['spatial'], run_config['freq'] = cls._get_res( + run_config + ) + return pattern.format(**run_config) + + @classmethod + def _update_run_templates(cls, config): + """Replace format keys and dictionary keys in config templates with + user input values.""" + + logger.info( + 'Updating NSRDB run templates with config:\n' + f'{pprint.pformat(config, indent=2)}' + ) + + template = ( + PRE2018_CONFIG_TEMPLATE + if int(config['year']) < 2018 + else POST2017_CONFIG_TEMPLATE + ) + with open(template, encoding='utf-8') as s: + s = s.read() + + config_dict = json.loads(str_replace_dict(s, config)) + config_dict.update( + {k: v for k, v in config.items() if k in config_dict} + ) + cls._write_config( + config_dict, + os.path.join(config['out_dir'], 'config_nsrdb.json'), + ) + + with open(PIPELINE_CONFIG_TEMPLATE, encoding='utf-8') as s: + s = s.read() + + config_dict = json.loads(str_replace_dict(s, config)) + + cls._write_config( + config_dict, + os.path.join(config['out_dir'], 'config_pipeline.json'), + ) + @classmethod def main(cls, kwargs): """Modify config files with specified parameters @@ -97,24 +215,13 @@ def main(cls, kwargs): Dictionary of parameters including year, basename, satellite, extent, freq, spatial, meta_file, doy_range """ - config = copy.deepcopy(MAIN_KWARGS) - config.update(kwargs) - config['out_dir'] = os.path.abspath(config['out_dir']) - os.makedirs(config['out_dir'], exist_ok=True) - + config = cls._init_kwargs(kwargs, BASE_KWARGS) extent_tag_map = {'full': 'RadF', 'conus': 'RadC'} lon_seam_map = {'full': -105, 'conus': -113} config['extent_tag'] = extent_tag_map[config['extent']] lon_seam = lon_seam_map[config['extent']] - - if config['year'] != 2018: - meta_file = f'nsrdb_meta_{config["spatial"]}' - - if config['year'] > 2018: - meta_file += f'_{config["extent"]}' - - meta_file += f'_{config["satellite"]}_{lon_seam}.csv' - config['meta_file'] = meta_file + config['meta_file'] = cls._get_meta({**config, 'lon_seam': lon_seam}) + config['spatial'], config['freq'] = cls._get_res(config) if config.get('doy_range', None) is None: if calendar.isleap(config['year']): @@ -149,10 +256,9 @@ def main_all(cls, kwargs): """ out_dir = os.path.abspath(kwargs.get('out_dir', './')) if kwargs['year'] < 2018: - full_kws = {'spatial': '4km', 'extent': 'full', 'freq': '30min'} kwargs_list = [ - {**full_kws, 'satellite': 'east'}, - {**full_kws, 'satellite': 'west'}, + {'extent': 'full', 'satellite': sat} + for sat in ('east', 'west') ] elif kwargs['year'] == 2018: kwargs_list = [ @@ -165,19 +271,19 @@ def main_all(cls, kwargs): }, ] else: - full_kws = {'spatial': '2km', 'extent': 'full', 'freq': '10min'} - conus_kws = {'extent': 'conus', 'freq': '5min'} kwargs_list = [ - {**full_kws, 'satellite': 'east'}, - {**full_kws, 'satellite': 'west'}, - {**conus_kws, 'satellite': 'east'}, - {**conus_kws, 'satellite': 'west'}, + {'extent': ex, 'satellite': sat} + for ex in ('full', 'conus') + for sat in ('east', 'west') ] run_cmd = '' for kws in kwargs_list: - kwargs.update(kws) - run_cmd += f'cd {cls._get_run_name(kwargs)}; bash run.sh; cd ../; ' - cls.main(kwargs) + input_kws = copy.deepcopy(kwargs) + input_kws.update(kws) + run_cmd += ( + f'cd {cls._get_run_name(input_kws)}; bash run.sh; cd ../; ' + ) + cls.main(input_kws) run_file = os.path.join(out_dir, 'run.sh') with open(run_file, 'w') as f: @@ -200,60 +306,6 @@ def full(cls, kwargs): cls.main_all(kwargs) cls.post(kwargs) - @classmethod - def _update_run_templates(cls, config): - """Replace format keys and dictionary keys in config templates with - user input values.""" - - logger.info( - 'Updating NSRDB run templates with config:\n' - f'{pprint.pformat(config, indent=2)}' - ) - - template = ( - PRE2018_CONFIG_TEMPLATE - if int(config['year']) < 2018 - else POST2017_CONFIG_TEMPLATE - ) - with open(template, encoding='utf-8') as s: - s = s.read() - - config_dict = json.loads(str_replace_dict(s, config)) - config_dict.update( - {k: v for k, v in config.items() if k in config_dict} - ) - cls._write_config( - config_dict, - os.path.join(config['out_dir'], 'config_nsrdb.json'), - ) - - with open(PIPELINE_CONFIG_TEMPLATE, encoding='utf-8') as s: - s = s.read() - - config_dict = json.loads(str_replace_dict(s, config)) - - cls._write_config( - config_dict, - os.path.join(config['out_dir'], 'config_pipeline.json'), - ) - - @classmethod - def _get_run_name(cls, config, run_type='main'): - """Get name of run for given main run input.""" - config.update( - {k: v for k, v in MAIN_KWARGS.items() if k not in config} - ) - pattern_dict = { - 'main': cls.MAIN_RUN_NAME, - 'blend': cls.BLEND_RUN_NAME, - 'aggregate': cls.AGG_RUN_NAME, - 'collect-aggregate': cls.COLLECT_AGG_RUN_NAME, - 'collect-blend': cls.COLLECT_BLEND_RUN_NAME, - } - pattern = pattern_dict[run_type] - keys = get_format_keys(pattern) - return pattern.format(**{k: v for k, v in config.items() if k in keys}) - @classmethod def post(cls, kwargs): """Create all post processing config files for blending / aggregation / @@ -347,7 +399,7 @@ def _get_agg_entry(cls, config, extent): res=config['final_spatial'] ), 'spatial': f'{config["final_spatial"]}', - 'temporal': f'{config["final_freq"]}', + 'freq': f'{config["final_freq"]}', 'source_priority': source_priority, } @@ -362,7 +414,7 @@ def _get_agg_entry(cls, config, extent): res=config[f'{extent}_spatial'], extent=extent ), 'spatial': config[f'{extent}_spatial'], - 'temporal': config[f'{extent}_freq'], + 'freq': config[f'{extent}_freq'], } @classmethod @@ -376,8 +428,7 @@ def _aggregate(cls, kwargs): Dictionary with keys specifying the case for which to aggregate files """ - config = copy.deepcopy(AGG_KWARGS) - config.update(kwargs) + config = cls._init_kwargs(kwargs, AGG_KWARGS) if config['year'] == 2018: data = NSRDB_2018 @@ -428,15 +479,7 @@ def _blend(cls, kwargs): Dictionary with keys specifying the case for which to blend data files """ - config = copy.deepcopy(BLEND_KWARGS) - config.update(kwargs) - - if config['year'] > 2017: - config['spatial'] = '2km' - if config['extent'] == 'full': - config['freq'] = '10min' - else: - config['freq'] = '5min' + config = cls._init_kwargs(kwargs, BLEND_KWARGS) map_col_map = {'full': 'gid_full', 'conus': 'gid_full_conus'} config['map_col'] = map_col_map[config['extent']] @@ -444,13 +487,7 @@ def _blend(cls, kwargs): lon_seam_map = {'full': -105, 'conus': -113} config['lon_seam'] = lon_seam_map[config['extent']] - meta_file = f'nsrdb_meta_{config["spatial"]}' - - if config['year'] > 2017: - meta_file += f'_{config["extent"]}' - - meta_file += '.csv' - config['meta_file'] = os.path.join(config['meta_dir'], meta_file) + config['meta_file'] = cls._get_meta(config, run_type='blend') config['east_dir'] = os.path.join( config['main_dir'], @@ -490,7 +527,7 @@ def _write_config(cls, config, config_file, module_name=None): f.write(json.dumps(config, indent=2)) logger.info( - f'Created config file: {config_file}:' + f'Created config file {config_file}:' f'\n{pprint.pformat(config, indent=2)}' ) @@ -530,12 +567,8 @@ def _collect_blend(cls, kwargs): Dictionary with keys specifying the case for blend collection """ - config = copy.deepcopy(BASE_KWARGS) - config.update(kwargs) - - config['meta'] = os.path.join( - config['meta_dir'], f'nsrdb_meta_{config["spatial"]}.csv' - ) + config = cls._init_kwargs(kwargs, COLLECT_BLEND_KWARGS) + config['meta_final'] = cls._get_meta(config, run_type='collect-blend') config['collect_dir'] = cls._get_run_name(config, run_type='blend') config['collect_tag'] = config['collect_dir'].replace('_blend', '') config['fout'] = os.path.join( @@ -587,15 +620,15 @@ def _collect_aggregate(cls, kwargs): kwargs : dict Dictionary with keys specifying the case for aggregation collection """ - config = copy.deepcopy(COLLECT_AGG_KWARGS) - config.update(kwargs) - - meta_file = f'nsrdb_meta_{config["final_spatial"]}.csv' - config['meta_final'] = os.path.join(config['meta_dir'], meta_file) - collect_dir = f'nsrdb_{config["final_spatial"]}_{config["final_freq"]}' - collect_tag = f'{config["basename"]}_' - config['collect_dir'] = collect_dir - config['collect_tag'] = collect_tag + config = cls._init_kwargs(kwargs, COLLECT_AGG_KWARGS) + + config['meta_final'] = cls._get_meta( + config, run_type='collect-aggregate' + ) + config['collect_dir'] = ( + f'nsrdb_{config["final_spatial"]}_' f'{config["final_freq"]}' + ) + config['collect_tag'] = f'{config["basename"]}_' config['fout'] = os.path.join( f'{config["out_dir"]}', f'{config["basename"]}_{config["year"]}.h5', diff --git a/tests/cli/test_agg_cli.py b/tests/cli/test_agg_cli.py index 9460c792..a22ac36f 100755 --- a/tests/cli/test_agg_cli.py +++ b/tests/cli/test_agg_cli.py @@ -61,14 +61,14 @@ def test_agg_cli(runner): 'tree_file': 'kdtree_surfrad_meta.pkl', 'meta_file': 'surfrad_meta.csv', 'spatial': '2km', - 'temporal': '5min', + 'freq': '5min', }, 'final': { 'fpath': fpath_out, 'tree_file': 'kdtree_test_meta_agg.pkl', 'meta_file': 'test_meta_agg.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', }, } diff --git a/tests/cli/test_nsrdb_cli.py b/tests/cli/test_nsrdb_cli.py index 68244012..77f79848 100755 --- a/tests/cli/test_nsrdb_cli.py +++ b/tests/cli/test_nsrdb_cli.py @@ -1,5 +1,6 @@ """PyTest file for main nsrdb CLI.""" +import inspect import json import os import tempfile @@ -10,7 +11,10 @@ from rex import safe_json_load from nsrdb import NSRDB, TESTDATADIR, cli +from nsrdb.aggregation.aggregation import Manager +from nsrdb.blend.blend import Blender from nsrdb.data_model import DataModel +from nsrdb.file_handlers.collection import Collector from nsrdb.utilities.pytest import execute_pytest VAR_META = os.path.join(TESTDATADIR, 'nsrdb_vars.csv') @@ -145,17 +149,66 @@ def modern_config(tmpdir_factory): return config_file, pipeline_file -def test_cli_create_all_configs(runner): +def _check_args(post_files, funcs): + for file, func in zip(post_files[:-2], funcs): + config = safe_json_load(file) + config_args = config[next(iter(config.keys()))] + arg_spec = inspect.getargspec(func) + args = arg_spec.args[1 : -len(arg_spec.defaults)] + + # not requiring i_chunk in configs since this is defined in + # BaseCLI.kickoff_multichunk call + if 'i_chunk' in args: + args.pop(args.index('i_chunk')) + assert all(arg in config_args for arg in args) + + +def test_cli_create_all_configs_2018(runner): """Test nsrdb.cli create-configs for main and post modules""" with tempfile.TemporaryDirectory() as td: - kwargs = { - 'year': 2020, - 'out_dir': td, - 'satellite': 'east', - 'extent': 'conus', - 'spatial': '4km', - 'freq': '5min', - } + kwargs = {'year': 2018, 'out_dir': td} + result = runner.invoke( + cli.create_configs, ['-c', kwargs, '--run_type', 'full'] + ) + + assert result.exit_code == 0, traceback.print_exception( + *result.exc_info + ) + + out_dirs = [ + f'{td}/nsrdb_east_conus_2018_2km_5min', + f'{td}/nsrdb_east_full_2018_2km_10min', + f'{td}/nsrdb_west_full_2018_4km_30min', + ] + for out_dir in out_dirs: + assert os.path.exists(os.path.join(out_dir, 'config_nsrdb.json')) + assert os.path.exists( + os.path.join(out_dir, 'config_pipeline.json') + ) + assert os.path.exists(os.path.join(out_dir, 'run.sh')) + + post_files = [ + f'{td}/post_proc/config_aggregate.json', + f'{td}/post_proc/config_collect_aggregate.json', + f'{td}/post_proc/config_pipeline_post.json', + f'{td}/post_proc/run.sh', + ] + assert all(os.path.exists(f) for f in post_files) + + funcs = [ + Manager.run_chunk, + Collector.collect_dir, + ] + + # make sure configs have all positional args for the corresponding + # modules + _check_args(post_files, funcs) + + +def test_cli_create_all_configs_post2018(runner): + """Test nsrdb.cli create-configs for main and post modules""" + with tempfile.TemporaryDirectory() as td: + kwargs = {'year': 2020, 'out_dir': td} result = runner.invoke( cli.create_configs, ['-c', kwargs, '--run_type', 'full'] ) @@ -187,6 +240,58 @@ def test_cli_create_all_configs(runner): ] assert all(os.path.exists(f) for f in post_files) + funcs = [ + Blender.run_full, + Blender.run_full, + Manager.run_chunk, + Collector.collect_dir, + ] + + # make sure configs have all positional args for the corresponding + # modules + _check_args(post_files, funcs) + + +def test_cli_create_all_configs_pre2018(runner): + """Test nsrdb.cli create-configs for main and post modules""" + with tempfile.TemporaryDirectory() as td: + kwargs = {'year': 2016, 'out_dir': td} + result = runner.invoke( + cli.create_configs, ['-c', kwargs, '--run_type', 'full'] + ) + + assert result.exit_code == 0, traceback.print_exception( + *result.exc_info + ) + + out_dirs = [ + f'{td}/nsrdb_east_full_2016_4km_30min', + f'{td}/nsrdb_west_full_2016_4km_30min', + ] + for out_dir in out_dirs: + assert os.path.exists(os.path.join(out_dir, 'config_nsrdb.json')) + assert os.path.exists( + os.path.join(out_dir, 'config_pipeline.json') + ) + assert os.path.exists(os.path.join(out_dir, 'run.sh')) + + post_files = [ + f'{td}/post_proc/config_blend.json', + f'{td}/post_proc/config_collect_blend.json', + f'{td}/post_proc/config_pipeline_post.json', + f'{td}/post_proc/run.sh', + ] + assert all(os.path.exists(f) for f in post_files) + + funcs = [ + Blender.run_full, + Collector.collect_dir, + ] + + # make sure configs have all positional args for the corresponding + # modules + _check_args(post_files, funcs) + def test_cli_create_main_configs(runner): """Test nsrdb.cli create-configs""" diff --git a/tests/postproc/test_aggregation.py b/tests/postproc/test_aggregation.py index a6c0adc1..7356083a 100755 --- a/tests/postproc/test_aggregation.py +++ b/tests/postproc/test_aggregation.py @@ -30,7 +30,7 @@ 'tree_file': 'kdtree_surfrad_meta.pkl', 'meta_file': 'surfrad_meta.csv', 'spatial': '2km', - 'temporal': '5min', + 'freq': '5min', }, 'final': { 'data_sub_dir': 'agg_out', @@ -38,7 +38,7 @@ 'tree_file': 'kdtree_test_meta_agg.pkl', 'meta_file': 'test_meta_agg.csv', 'spatial': '4km', - 'temporal': '5min', + 'freq': '5min', }, } @@ -48,7 +48,7 @@ 'tree_file': 'kdtree_surfrad_meta.pkl', 'meta_file': 'surfrad_meta.csv', 'spatial': '2km', - 'temporal': '5min', + 'freq': '5min', }, 'final': { 'data_sub_dir': 'agg_out', @@ -56,7 +56,7 @@ 'tree_file': 'kdtree_test_meta_agg.pkl', 'meta_file': 'test_meta_agg.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', }, } @@ -256,14 +256,14 @@ def test_multi_file(): 'tree_file': 'kdtree_surfrad_meta.pkl', 'meta_file': 'surfrad_meta.csv', 'spatial': '2km', - 'temporal': '5min', + 'freq': '5min', }, 'final': { 'fpath': fpath_out, 'tree_file': 'kdtree_test_meta_agg.pkl', 'meta_file': 'test_meta_agg.csv', 'spatial': '4km', - 'temporal': '30min', + 'freq': '30min', }, }