diff --git a/docs/background/gridded_datasets.rst b/docs/background/gridded_datasets.rst new file mode 100644 index 00000000..27913aac --- /dev/null +++ b/docs/background/gridded_datasets.rst @@ -0,0 +1,5 @@ +Gridded Datasets +================ + +Gridded datasets + diff --git a/docs/background/time_chunking.rst b/docs/background/time_chunking.rst new file mode 100644 index 00000000..0d0e9b0a --- /dev/null +++ b/docs/background/time_chunking.rst @@ -0,0 +1,5 @@ +Time Chunking +================ + +Time chunking + diff --git a/docs/develop/developers_guide.rst b/docs/develop/developers_guide.rst index 9f1e3366..f75a0637 100644 --- a/docs/develop/developers_guide.rst +++ b/docs/develop/developers_guide.rst @@ -74,7 +74,7 @@ these instructions: $ conda create --name melodies-monet python=3.9 $ conda activate melodies-monet - $ conda install -y -c conda-forge pyyaml monet monetio netcdf4 wrf-python typer rich pooch jupyterlab + $ conda install -y -c conda-forge pyyaml pandas=1 monet monetio netcdf4 wrf-python typer rich pooch jupyterlab (b) Clone [#clone]_ and link the latest development versions of MONET and MONETIO from GitHub to your conda environment:: diff --git a/docs/index.rst b/docs/index.rst index 8fe2972f..0bb6a0c0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,6 +47,8 @@ MONETIO please refer to: background/supported_analyses background/supported_plots background/supported_stats + background/time_chunking + background/gridded_datasets .. toctree:: :hidden: diff --git a/docs/tutorial/installation.rst b/docs/tutorial/installation.rst index 35b7f3dd..2c9c7ccf 100644 --- a/docs/tutorial/installation.rst +++ b/docs/tutorial/installation.rst @@ -34,7 +34,7 @@ First create and activate a conda environment:: Add dependencies from conda-forge:: - $ conda install -y -c conda-forge pyyaml monet monetio netcdf4 wrf-python typer rich pooch + $ conda install -y -c conda-forge pyyaml pandas=1 monet monetio netcdf4 wrf-python typer rich pooch Now, install the stable branch of MELODIES MONET to the environment:: diff --git a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml new file mode 100644 index 00000000..618a23de --- /dev/null +++ b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml @@ -0,0 +1,44 @@ +analysis: + start_time: '2020-01-01' + end_time: '2020-12-31' + time_interval: 'MS' + output_dir: $HOME/Plots + debug: True + regrid: False + target_grid: $HOME/Data/Grids/cam_grid.nc + time_chunking_with_gridded_data: True + +obs: + + MOD08_M3: + data_format: gridded_eos + datadir: $HOME/Data/MOD08_M3 + obs_type: gridded_data + filename: MOD08_M3.AYYYYDDD.061.*_regrid.nc + regrid: + base_grid: $HOME/Data/Grids/modis_l3_grid.nc + method: bilinear + variables: + AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean: + fillvalue: -9999 + scale: 0.001 + units: none + +model: + + MERRA2: + data_format: netcdf + mod_type: merra2 + datadir: $HOME/Data/MERRA2 + files: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4 + regrid: + base_grid: $HOME/Data/Grids/merra2_grid.nc + method: bilinear + variables: + fillvalue: 1.e+15 + scale: 1.0 + units: none + mapping: + MOD08_M3: + TOTEXTTAU: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean + diff --git a/examples/process_gridded_data/process_time_chunking_with_gridded_data.py b/examples/process_gridded_data/process_time_chunking_with_gridded_data.py new file mode 100644 index 00000000..9224a652 --- /dev/null +++ b/examples/process_gridded_data/process_time_chunking_with_gridded_data.py @@ -0,0 +1,20 @@ +from melodies_monet import driver + +an = driver.analysis() +an.control = 'control_time_chunking_with_gridded_data.yaml' +an.read_control() +an.setup_regridders() + +for time_interval in an.time_intervals: + + print(time_interval) + + an.open_obs(time_interval=time_interval) + an.open_models(time_interval=time_interval) + + print(an.obs) + for obs in an.obs: + print(an.obs[obs].obj) + print(an.models) + for mod in an.models: + print(an.models[mod].obj) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index ada30226..cc3f1ce8 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -134,7 +134,7 @@ def __repr__(self): ")" ) - def open_obs(self, time_interval=None): + def open_obs(self, time_interval=None, control_dict=None): """Open the observational data, store data in observation pair, and apply mask and scaling. @@ -151,26 +151,47 @@ def open_obs(self, time_interval=None): from numpy import sort from . import tutorial + from .util import analysis_util + from .util import read_grid_util + + time_chunking_with_gridded_data \ + = 'time_chunking_with_gridded_data' in control_dict['analysis'].keys() \ + and control_dict['analysis']['time_chunking_with_gridded_data'] + + if time_chunking_with_gridded_data: + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('obs time chunk %s' % date_str) + obs_vars = analysis_util.get_obs_vars(control_dict) + print(obs_vars) + obs_datasets, filenames = read_grid_util.read_grid_obs( + control_dict, obs_vars, date_str, obs=self.obs) + print(filenames) + self.obj = obs_datasets[self.obs] - if self.file.startswith("example:"): - example_id = ":".join(s.strip() for s in self.file.split(":")[1:]) - files = [tutorial.fetch_example(example_id)] else: - files = sort(glob(self.file)) + if self.file.startswith("example:"): + example_id = ":".join(s.strip() for s in self.file.split(":")[1:]) + files = [tutorial.fetch_example(example_id)] + else: + files = sort(glob(self.file)) - assert len(files) >= 1, "need at least one" + assert len(files) >= 1, "need at least one" - _, extension = os.path.splitext(files[0]) - try: - if extension in ['.nc', '.ncf', '.netcdf', '.nc4']: - if len(glob(self.file)) > 1: - self.obj = xr.open_mfdataset(sort(glob(self.file))) + _, extension = os.path.splitext(files[0]) + try: + if extension in {'.nc', '.ncf', '.netcdf', '.nc4'}: + if len(files) > 1: + self.obj = xr.open_mfdataset(files) + else: + self.obj = xr.open_dataset(files[0]) + elif extension in ['.ict', '.icarrt']: + assert len(files) == 1, "monetio.icarrt.add_data can only read one file" + self.obj = mio.icarrt.add_data(files[0]) else: - self.obj = xr.open_dataset(self.file[0]) - elif extension in ['.ict', '.icarrt']: - self.obj = mio.icarrt.add_data(self.file) - except ValueError: - print('something happened opening file') + raise ValueError(f'extension {extension!r} currently unsupported') + except Exception as e: + print('something happened opening file:', e) + return self.mask_and_scale() # mask and scale values from the control values self.filter_obs() @@ -386,7 +407,7 @@ def glob_files(self): if self.file_pm25_str is not None: self.files_pm25 = sort(glob(self.file_pm25_str)) - def open_model_files(self, time_interval=None): + def open_model_files(self, time_interval=None, control_dict=None): """Open the model files, store data in :class:`model` instance attributes, and apply mask and scaling. @@ -405,8 +426,16 @@ def open_model_files(self, time_interval=None): None """ from .util import time_interval_subset as tsub + from .util import analysis_util + from .util import read_grid_util + from .util import regrid_util + print(self.model.lower()) + time_chunking_with_gridded_data \ + = 'time_chunking_with_gridded_data' in control_dict['analysis'].keys() \ + and control_dict['analysis']['time_chunking_with_gridded_data'] + self.glob_files() # Calculate species to input into MONET, so works for all mechanisms in wrfchem # I want to expand this for the other models too when add aircraft data. @@ -414,65 +443,68 @@ def open_model_files(self, time_interval=None): for obs_map in self.mapping: list_input_var = list_input_var + list(set(self.mapping[obs_map].keys()) - set(list_input_var)) #Only certain models need this option for speeding up i/o. - if 'cmaq' in self.model.lower(): - print('**** Reading CMAQ model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - if self.files_vert is not None: - self.mod_kwargs.update({'fname_vert' : self.files_vert}) - if self.files_surf is not None: - self.mod_kwargs.update({'fname_surf' : self.files_surf}) - if len(self.files) > 1: - self.mod_kwargs.update({'concatenate_forecasts' : True}) - self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'wrfchem' in self.model.lower(): - print('**** Reading WRF-Chem model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'rrfs' in self.model.lower(): - print('**** Reading RRFS-CMAQ model output...') - if self.files_pm25 is not None: - self.mod_kwargs.update({'fname_pm25' : self.files_pm25}) - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'gsdchem' in self.model.lower(): - print('**** Reading GSD-Chem model output...') - if len(self.files) > 1: - self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs) - else: - self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs) - elif 'cesm_fv' in self.model.lower(): - print('**** Reading CESM FV model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs) - # CAM-chem-SE grid or MUSICAv0 - elif 'cesm_se' in self.model.lower(): - print('**** Reading CESM SE model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - if self.scrip_file.startswith("example:"): - from . import tutorial - example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:]) - self.scrip_file = tutorial.fetch_example(example_id) - self.mod_kwargs.update({'scrip_file' : self.scrip_file}) - self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs) - #self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs) - #self.obj.monet.scrip = self.obj_scrip - elif 'raqms' in self.model.lower(): - if time_interval is not None: - # fill filelist with subset - file_sublist = tsub.subset_model_filelist(self.files,'%m_%d_%Y_%H','6H',time_interval) - else: - # fill filelist with all files - file_sublist = self.files - if len(self.files) > 1: - self.obj = mio.raqms.open_mfdataset(file_sublist,**self.mod_kwargs) - else: - self.obj = mio.raqms.open_dataset(file_sublist,**self.mod_kwargs) + + if time_chunking_with_gridded_data: + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('model time chunk %s' % date_str) + model_datasets, filenames = read_grid_util.read_grid_models( + control_dict, date_str, model=self.label) + print(filenames) + self.obj = model_datasets[self.label] else: - print('**** Reading Unspecified model output. Take Caution...') - if len(self.files) > 1: - self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs) + if 'cmaq' in self.model.lower(): + print('**** Reading CMAQ model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + if self.files_vert is not None: + self.mod_kwargs.update({'fname_vert' : self.files_vert}) + if self.files_surf is not None: + self.mod_kwargs.update({'fname_surf' : self.files_surf}) + if len(self.files) > 1: + self.mod_kwargs.update({'concatenate_forecasts' : True}) + self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'wrfchem' in self.model.lower(): + print('**** Reading WRF-Chem model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'rrfs' in self.model.lower(): + print('**** Reading RRFS-CMAQ model output...') + if self.files_pm25 is not None: + self.mod_kwargs.update({'fname_pm25' : self.files_pm25}) + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'gsdchem' in self.model.lower(): + print('**** Reading GSD-Chem model output...') + if len(self.files) > 1: + self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs) + elif 'cesm_fv' in self.model.lower(): + print('**** Reading CESM FV model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs) + # CAM-chem-SE grid or MUSICAv0 + elif 'cesm_se' in self.model.lower(): + print('**** Reading CESM SE model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + if self.scrip_file.startswith("example:"): + from . import tutorial + example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:]) + self.scrip_file = tutorial.fetch_example(example_id) + self.mod_kwargs.update({'scrip_file' : self.scrip_file}) + self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs) + #self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs) + #self.obj.monet.scrip = self.obj_scrip + elif 'raqms' in self.model.lower(): + if len(self.files) > 1: + self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) else: - self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs) + print('**** Reading Unspecified model output. Take Caution...') + if len(self.files) > 1: + self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs) self.mask_and_scale() def mask_and_scale(self): @@ -535,7 +567,12 @@ def __init__(self): self.debug = False self.save = None self.read = None - + self.time_chunking_with_gridded_data = False # Default to False + self.regrid = False # Default to False + self.target_grid = None + self.obs_regridders = None + self.model_regridders = None + def __repr__(self): return ( f"{type(self).__name__}(\n" @@ -605,6 +642,14 @@ def read_control(self, control=None): if 'read' in self.control_dict['analysis'].keys(): self.read = self.control_dict['analysis']['read'] + # set time_chunking_with_gridded_data option, regrid option, and target_grid + if 'time_chunking_with_gridded_data' in self.control_dict['analysis'].keys(): + self.time_chunking_with_gridded_data = self.control_dict['analysis']['time_chunking_with_gridded_data'] + if 'regrid' in self.control_dict['analysis'].keys(): + self.regrid = self.control_dict['analysis']['regrid'] + if 'target_grid' in self.control_dict['analysis'].keys(): + self.target_grid = self.control_dict['analysis']['target_grid'] + # generate time intervals for time chunking if 'time_interval' in self.control_dict['analysis'].keys(): time_stamps = pd.date_range( @@ -684,6 +729,17 @@ def read_analysis(self): if not self.obs: self.open_obs(load_files=False) + def setup_regridders(self): + """Create an obs xesmf.Regridder from base and target grids specified in the control_dict + + Returns + ------- + None + """ + from .util import regrid_util + if self.regrid: + self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') + self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') def open_models(self, time_interval=None,load_files=True): """Open all models listed in the input yaml file and create a :class:`model` @@ -772,7 +828,7 @@ def open_models(self, time_interval=None,load_files=True): # open the model if load_files: - m.open_model_files(time_interval=time_interval) + m.open_model_files(time_interval=time_interval, control_dict=self.control_dict) self.models[m.label] = m def open_obs(self, time_interval=None, load_files=True): @@ -791,6 +847,10 @@ def open_obs(self, time_interval=None, load_files=True): ------- None """ + from .util import analysis_util + from .util import read_grid_util + from .util import regrid_util + if 'obs' in self.control_dict: for obs in self.control_dict['obs']: o = observation() @@ -812,9 +872,10 @@ def open_obs(self, time_interval=None, load_files=True): 'sat_grid_clm', 'sat_swath_prof']: o.open_sat_obs(time_interval=time_interval) else: - o.open_obs(time_interval=time_interval) + o.open_obs(time_interval=time_interval, control_dict=self.control_dict) self.obs[o.label] = o + def pair_data(self, time_interval=None): """Pair all observations and models in the analysis class (i.e., those listed in the input yaml file) together, diff --git a/melodies_monet/tests/test_analysis_util.py b/melodies_monet/tests/test_analysis_util.py new file mode 100644 index 00000000..43d61951 --- /dev/null +++ b/melodies_monet/tests/test_analysis_util.py @@ -0,0 +1,36 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# +import os +import pytest +from datetime import datetime + +from melodies_monet.util import analysis_util + + +def test_fill_date_template(): + + date = datetime.now() + date_str = date.strftime('%Y-%m-%b-%d-%j') + print(date_str) + + template_str = 'Year YYYY, Month MM, Month Name M_ABBR, Day DD' + filled_str = analysis_util.fill_date_template(template_str, date_str) + print(filled_str) + assert(filled_str == date.strftime('Year %Y, Month %m, Month Name %b, Day %d')) + + template_str = 'Year YYYY, Julian Day DDD' + filled_str = analysis_util.fill_date_template(template_str, date_str) + print(filled_str) + assert(filled_str == date.strftime('Year %Y, Julian Day %j')) + + +def test_find_file(tmpdir): + + test_file = os.path.join(tmpdir, 'test.txt') + f = open(test_file, 'w') + f.close() + + filename = analysis_util.find_file(tmpdir, 'test*') + print(filename) + assert(filename == test_file) diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py new file mode 100644 index 00000000..7c888ae2 --- /dev/null +++ b/melodies_monet/util/analysis_util.py @@ -0,0 +1,87 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# + +import os +import logging +from glob import glob + + +def fill_date_template(template_str, date_str): + """ + Replace date template parameters with values from date string + + Parameters + template_str (str): template string + date_str (str yyyy-mm-m_abbr-dd-ddd): date string + + Returns + template_str (str): filled template string + """ + + yyyy_str, mm_str, m_abbr_str, dd_str, ddd_str \ + = tuple(date_str.split('-')) + + if 'DDD' in template_str: + return template_str.replace( + 'YYYY', yyyy_str).replace( + 'DDD', ddd_str) + else: + return template_str.replace( + 'YYYY', yyyy_str).replace( + 'MM', mm_str).replace( + 'M_ABBR', m_abbr_str).replace( + 'DD', dd_str) + + +def find_file(datadir, filestr): + """ + Parameters + datadir (str): data directory + filestr (str): filename regular expression + + Returns + filename (str): complete path of matching filename in data directory + """ + logger = logging.getLogger(__name__) + + pattern = os.path.join(os.path.expandvars(datadir), filestr) + files = glob(pattern) + + if len(files) == 0: + raise Exception('no file matches for %s' % pattern) + if len(files) > 1: + raise Exception('more than one file match %s' % pattern) + + filename = files[0] + logger.info(filename) + + return filename + + +def get_obs_vars(config): + """ + Get subset of obs variables from model to obs variable mapping + + Parameters + config (dict): configuration dictionary + + Returns + obs_vars_subset (dict of dict): + nested dictionary keyed by obs set name and obs variable name + """ + obs_vars_subset = dict() + + for model_name in config['model']: + + mapping = config['model'][model_name]['mapping'] + + for obs_name in mapping: + obs_vars = config['obs'][obs_name]['variables'] + obs_vars_subset[obs_name] = dict() + + for model_var in mapping[obs_name]: + obs_var = mapping[obs_name][model_var] + obs_vars_subset[obs_name][obs_var] = obs_vars[obs_var] + + return obs_vars_subset diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py new file mode 100644 index 00000000..d54c7b8c --- /dev/null +++ b/melodies_monet/util/read_grid_util.py @@ -0,0 +1,97 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# +import os +import logging +import xarray as xr +from monetio.sat._gridded_eos_mm import read_gridded_eos + +from .analysis_util import fill_date_template, find_file + + +def read_grid_models(config, date_str, model=None): + """ + Read grid data models + + Parameters + config (dict): configuration dictionary + date_str (str yyyy-mm-m_abbr-dd-ddd): date string + model: specific model to read optional, if not specified all models in config['models'] will be read + + Returns + model_datasets (dict of xr.Dataset): dictionary of model datasets + filenames (dict of str): dictionary of filenames + """ + model_datasets = dict() + filenames = dict() + + if model is not None: + model_list = [model] + else: + model_list = config['model'] + + for model_name in model_list: + + datadir = config['model'][model_name]['datadir'] + filestr = fill_date_template( + config['model'][model_name]['files'], date_str) + filename = find_file(datadir, filestr) + + model_datasets[model_name] = xr.open_dataset(filename) + filenames[model_name] = filename + + return model_datasets, filenames + + +def read_grid_obs(config, obs_vars, date_str, obs=None): + """ + Read grid data obs + + Parameters + config (dict): configuration dictionary + obs_vars (dict of dict): nested dictionary keyed by obs set name and obs variable name + date_str (str yyyy-mm-m_abbr-dd-ddd): date string + obs: specific observation to read, optional, if not specified all obs in obs_vars will be read + + Returns + obs_datasets (dict of xr.Dataset): dictionary of obs datasets + filenames (dict of str): dictionary of filenames + """ + obs_datasets = dict() + filenames = dict() + + if obs is not None: + obs_list = [obs] + else: + obs_list = obs_vars.keys() + + yyyy_str, mm_str, m_abbr_str, dd_str, ddd_str \ + = tuple(date_str.split('-')) + + for obs_name in obs_list: + + data_format = config['obs'][obs_name]['data_format'] + datadir = config['obs'][obs_name]['datadir'] + filestr = fill_date_template( + config['obs'][obs_name]['filename'], date_str) + filename = find_file(datadir, filestr) + + file_extension = os.path.splitext(filename)[1] + + if data_format == 'gridded_eos': + if file_extension == '.hdf': + ds_obs = read_gridded_eos( + filename, obs_vars[obs_name]) + filename_nc = filename.replace('.hdf', '.nc') + logging.info('writing ' + filename_nc) + ds_obs.to_netcdf(filename_nc) + else: + ds_obs = xr.open_dataset(filename) + else: + ds_obs = xr.open_dataset(filename) + + obs_datasets[obs_name] = ds_obs + filenames[obs_name] = filename + + return obs_datasets, filenames + diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py new file mode 100644 index 00000000..1ccd2517 --- /dev/null +++ b/melodies_monet/util/regrid_util.py @@ -0,0 +1,57 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# + +""" +file: regrid_util.py +""" +import os +import xarray as xr + + +def setup_regridder(config, config_group='obs'): + """ + Setup regridder for observations or model + + Parameters + config (dict): configuration dictionary + + Returns + regridder (dict of xe.Regridder): dictionary of regridder instances + """ + try: + import xesmf as xe + except ImportError as e: + print('regrid_util: xesmf module not found') + raise + + target_file = os.path.expandvars(config['analysis']['target_grid']) + ds_target = xr.open_dataset(target_file) + + regridder_dict = dict() + + for name in config[config_group]: + base_file = os.path.expandvars(config[config_group][name]['regrid']['base_grid']) + ds_base = xr.open_dataset(base_file) + method = config[config_group][name]['regrid']['method'] + regridder = xe.Regridder(ds_base, ds_target, method) + regridder_dict[name] = regridder + + return regridder_dict + + +def filename_regrid(filename, regridder): + """ + Construct modified filename for regridded dataset + + Parameters + filename (str): filename of dataset + regridder (xe.Regridder): regridder instance + + Returns + filename_regrid (str): filename of regridded dataset + """ + filename_regrid = filename.replace('.nc', '_regrid.nc') + + return filename_regrid +