From dc70fb1201af5f3ffd3e6fdb9b436bab77c86ec4 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Wed, 11 Jan 2023 12:20:42 -0700 Subject: [PATCH 01/64] Start on analysis_util module. --- melodies_monet/util/analysis_util.py | 54 ++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 melodies_monet/util/analysis_util.py diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py new file mode 100644 index 00000000..4fbea90d --- /dev/null +++ b/melodies_monet/util/analysis_util.py @@ -0,0 +1,54 @@ +import os +import logging +from glob import glob + + +def fill_date_template(template_str, date_str): + """ + Replace date template parameters with values from date string + + Parameters + template_str (str): template string + date_str (str yyyy-mm-m_abbr-dd-ddd): date string + + Returns + None + """ + + yyyy_str, mm_str, m_abbr_str, dd_str, ddd_str \ + = tuple(date_str.split('-')) + + if 'DDD' in template_str: + return template_str.replace( + 'YYYY', yyyy_str).replace( + 'DDD', ddd_str) + else: + return template_str.replace( + 'YYYY', yyyy_str).replace( + 'MM', mm_str).replace( + 'M_ABBR', m_abbr_str).replace( + 'DD', dd_str) + + +def find_file(datadir, filestr): + """ + Parameters + datadir (str): data directory + filestr (str): filename regular expression + + Returns + filename (str): complete path of matching filename in data directory + """ + + pattern = os.path.join(os.path.expandvars(datadir), filestr) + files = glob(pattern) + + if len(files) == 0: + raise Exception('no file matches for %s' % pattern) + if len(files) > 1: + raise Exception('more than one file match %s' % pattern) + + filename = files[0] + logging.info(filename) + + return filename From c3eb5d7abe96ac727283c55816e73f11ebfaa78b Mon Sep 17 00:00:00 2001 From: dwfncar Date: Wed, 11 Jan 2023 15:51:54 -0700 Subject: [PATCH 02/64] Start on test module for analysis_util. --- melodies_monet/tests/test_analysis_util.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 melodies_monet/tests/test_analysis_util.py diff --git a/melodies_monet/tests/test_analysis_util.py b/melodies_monet/tests/test_analysis_util.py new file mode 100644 index 00000000..5f549ac5 --- /dev/null +++ b/melodies_monet/tests/test_analysis_util.py @@ -0,0 +1,13 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# +import pytest + +from pathlib import Path + +from melodies_monet import util + + +def test_find_file(tmpdir): + + save_dir = Path(tmpdir) From 903625b7a0c675eeca27bd8ae1c6e366f9f1195f Mon Sep 17 00:00:00 2001 From: dwfncar Date: Wed, 11 Jan 2023 17:45:48 -0700 Subject: [PATCH 03/64] Add test_find_file. --- melodies_monet/tests/test_analysis_util.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/melodies_monet/tests/test_analysis_util.py b/melodies_monet/tests/test_analysis_util.py index 5f549ac5..aab9f2b4 100644 --- a/melodies_monet/tests/test_analysis_util.py +++ b/melodies_monet/tests/test_analysis_util.py @@ -1,13 +1,19 @@ # Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration # SPDX-License-Identifier: Apache-2.0 # +import os import pytest -from pathlib import Path - -from melodies_monet import util +from melodies_monet.util import analysis_util def test_find_file(tmpdir): - save_dir = Path(tmpdir) + print(tmpdir) + + test_file = os.path.join(tmpdir, 'test.txt') + f = open(test_file, 'w') + f.close() + + filename = analysis_util.find_file(tmpdir, 'test*') + print(filename) From d9737fe7504aa3b97c3f6862c035244719aacc9b Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 12 Jan 2023 09:46:30 -0700 Subject: [PATCH 04/64] Added test_find_file. --- melodies_monet/tests/test_analysis_util.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/melodies_monet/tests/test_analysis_util.py b/melodies_monet/tests/test_analysis_util.py index aab9f2b4..dbeccf52 100644 --- a/melodies_monet/tests/test_analysis_util.py +++ b/melodies_monet/tests/test_analysis_util.py @@ -9,11 +9,9 @@ def test_find_file(tmpdir): - print(tmpdir) - test_file = os.path.join(tmpdir, 'test.txt') f = open(test_file, 'w') f.close() filename = analysis_util.find_file(tmpdir, 'test*') - print(filename) + assert(filename == test_file) From f62bbcb51067e261112cd666629f62d46e572872 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 12 Jan 2023 10:38:00 -0700 Subject: [PATCH 05/64] Added regrid_util module. --- melodies_monet/util/regrid_util.py | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 melodies_monet/util/regrid_util.py diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py new file mode 100644 index 00000000..3789377f --- /dev/null +++ b/melodies_monet/util/regrid_util.py @@ -0,0 +1,32 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# + +""" +file: regrid_util.py +""" + +import xarray as xr +import xesmf as xe + + +def setup_obs_regridder(config): + """ + Setup regridder for observations + + Parameters + config (dict): configuration dictionary + + Returns + regridder (xe.Regridder): regridder instance + ds_target (xr.Dataset): target grid dataset + """ + + base_file = os.path.expandvars(config['obs']['regrid']['base_grid']) + ds_base = xr.open_dataset(base_file) + target_file = os.path.expandvars(config['obs']['regrid']['target_grid']) + ds_target = xr.open_dataset(target_file) + + regridder = xe.Regridder(ds_base, ds_target, config['obs']['regrid']['method']) + + return regridder, ds_target From 6ca17608f8744dab7663bf2c0a0a11841d6e984f Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 12 Jan 2023 12:40:31 -0700 Subject: [PATCH 06/64] Added test_fill_date_template. --- melodies_monet/tests/test_analysis_util.py | 14 ++++++++++++++ melodies_monet/util/analysis_util.py | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/melodies_monet/tests/test_analysis_util.py b/melodies_monet/tests/test_analysis_util.py index dbeccf52..f869e118 100644 --- a/melodies_monet/tests/test_analysis_util.py +++ b/melodies_monet/tests/test_analysis_util.py @@ -3,10 +3,23 @@ # import os import pytest +from datetime import datetime from melodies_monet.util import analysis_util +def test_fill_date_template(): + + date = datetime.now() + date_str = date.strftime('%Y-%m-%b-%d-%j') + print(date_str) + + template_str = 'Year YYYY, Month MM, Month Name M_ABBR, Day DD' + filled_str = analysis_util.fill_date_template(template_str, date_str) + print(filled_str) + assert(filled_str == date.strftime('Year %Y, Month %m, Month Name %b, Day %d')) + + def test_find_file(tmpdir): test_file = os.path.join(tmpdir, 'test.txt') @@ -14,4 +27,5 @@ def test_find_file(tmpdir): f.close() filename = analysis_util.find_file(tmpdir, 'test*') + print(filename) assert(filename == test_file) diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py index 4fbea90d..975b1339 100644 --- a/melodies_monet/util/analysis_util.py +++ b/melodies_monet/util/analysis_util.py @@ -12,7 +12,7 @@ def fill_date_template(template_str, date_str): date_str (str yyyy-mm-m_abbr-dd-ddd): date string Returns - None + template_str (str): filled template string """ yyyy_str, mm_str, m_abbr_str, dd_str, ddd_str \ @@ -39,6 +39,7 @@ def find_file(datadir, filestr): Returns filename (str): complete path of matching filename in data directory """ + logger = logging.getLogger(__name__) pattern = os.path.join(os.path.expandvars(datadir), filestr) files = glob(pattern) @@ -49,6 +50,6 @@ def find_file(datadir, filestr): raise Exception('more than one file match %s' % pattern) filename = files[0] - logging.info(filename) + logger.info(filename) return filename From 5ca09ddc7a5cd4730e322ca233c120d7d1f0c84c Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 12 Jan 2023 14:38:14 -0700 Subject: [PATCH 07/64] Test julian day in fill_date_template. --- melodies_monet/tests/test_analysis_util.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/melodies_monet/tests/test_analysis_util.py b/melodies_monet/tests/test_analysis_util.py index f869e118..43d61951 100644 --- a/melodies_monet/tests/test_analysis_util.py +++ b/melodies_monet/tests/test_analysis_util.py @@ -19,6 +19,11 @@ def test_fill_date_template(): print(filled_str) assert(filled_str == date.strftime('Year %Y, Month %m, Month Name %b, Day %d')) + template_str = 'Year YYYY, Julian Day DDD' + filled_str = analysis_util.fill_date_template(template_str, date_str) + print(filled_str) + assert(filled_str == date.strftime('Year %Y, Julian Day %j')) + def test_find_file(tmpdir): From 5f9a6518880355a6d02e4cc1b7f908484962fda8 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Fri, 13 Jan 2023 11:02:34 -0700 Subject: [PATCH 08/64] Start on script process_grid_data. --- examples/process_grid_data/process_grid_data.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 examples/process_grid_data/process_grid_data.py diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py new file mode 100644 index 00000000..179fcea1 --- /dev/null +++ b/examples/process_grid_data/process_grid_data.py @@ -0,0 +1,16 @@ +from melodies_monet import driver + +an = driver.analysis() +an.control = 'control_grid_data.yaml' +an.read_control() + +for time_interval in an.time_intervals: + + an.open_models(time_interval=time_interval) + an.open_obs(time_interval=time_interval) + an.pair_data() + +# an.concat_pairs() + +# an.plotting() +# an.stats() From 7486d3dd9181bfc02d3c2cda57bbada8cddee0b8 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Fri, 13 Jan 2023 11:26:35 -0700 Subject: [PATCH 09/64] Start on control YAML for process_grid_data example. --- examples/process_grid_data/control_grid_data.yaml | 6 ++++++ examples/process_grid_data/process_grid_data.py | 8 +++++--- 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 examples/process_grid_data/control_grid_data.yaml diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml new file mode 100644 index 00000000..ed3ab86e --- /dev/null +++ b/examples/process_grid_data/control_grid_data.yaml @@ -0,0 +1,6 @@ +analysis: + start_time: '2022-01-01' + end_time: '2022-12-31' + time_interval: 'D' + output_dir: $HOME/Plots + debug: True diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index 179fcea1..243f888e 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -6,9 +6,11 @@ for time_interval in an.time_intervals: - an.open_models(time_interval=time_interval) - an.open_obs(time_interval=time_interval) - an.pair_data() + print(time_interval) + + # an.open_models(time_interval=time_interval) + # an.open_obs(time_interval=time_interval) + # an.pair_data() # an.concat_pairs() From 8cd3efa1ddaaa57ec71b86149c1862c5601a2233 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 16 Jan 2023 10:33:15 -0700 Subject: [PATCH 10/64] Added analysis.setup_regridders. --- examples/process_grid_data/process_grid_data.py | 1 + melodies_monet/driver.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index 243f888e..e53e02e3 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -3,6 +3,7 @@ an = driver.analysis() an.control = 'control_grid_data.yaml' an.read_control() +an.setup_regridders() for time_interval in an.time_intervals: diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 3651ca3f..2c8fa492 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -432,6 +432,8 @@ def __init__(self): self.debug = False self.save = None self.read = None + self.obs_regridder = None + self.obs_target = None def __repr__(self): return ( @@ -572,6 +574,9 @@ def read_analysis(self): elif self.read[attr]['method']=='netcdf': read_saved_data(analysis=self,filenames=self.read[attr]['filenames'], method='netcdf', attr=attr) + def setup_regridders(self): + from .util import regrid_util + def open_models(self, time_interval=None): """Open all models listed in the input yaml file and create a :class:`model` object for each of them, populating the :attr:`models` dict. From 51e5d3afa6f6b5b06d5a70a151c4e417c2254bf6 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 16 Jan 2023 13:42:13 -0700 Subject: [PATCH 11/64] Added analysis.setup_regridders. --- melodies_monet/driver.py | 7 +++++++ melodies_monet/util/regrid_util.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 2c8fa492..f3b704eb 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -575,7 +575,14 @@ def read_analysis(self): read_saved_data(analysis=self,filenames=self.read[attr]['filenames'], method='netcdf', attr=attr) def setup_regridders(self): + """Create an obs xesmf.Regridder from base and target grids specified in the control_dict + + Returns + ------- + None + """ from .util import regrid_util + self.obs_regridder, self.obs_target = regrid_util.setup_obs_regridder(self.control_dict) def open_models(self, time_interval=None): """Open all models listed in the input yaml file and create a :class:`model` diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py index 3789377f..01d58e58 100644 --- a/melodies_monet/util/regrid_util.py +++ b/melodies_monet/util/regrid_util.py @@ -5,7 +5,7 @@ """ file: regrid_util.py """ - +import os import xarray as xr import xesmf as xe From 74e8a7717caf014501729dcc070233b1773a5cf7 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 28 Jan 2023 11:30:26 -0700 Subject: [PATCH 12/64] Start on read_grid_util module. --- melodies_monet/util/read_grid_util.py | 32 +++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 melodies_monet/util/read_grid_util.py diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py new file mode 100644 index 00000000..b6c21152 --- /dev/null +++ b/melodies_monet/util/read_grid_util.py @@ -0,0 +1,32 @@ +import logging +import xarray as xr +from monetio.sat._gridded_eos_mm import read_gridded_eos + +from analysis_util import fill_date_template, find_file + + +def read_grid_models(config, date_str): + """ + Read grid data models + + Parameters + config (dict): configuration dictionary + date_str (str yyyy-mm-m_abbr-dd-ddd): date string + + Returns + model_datasets (dict of xr.Dataset): dictionary of model datasets + """ + + model_datasets = dict() + + for model_name in config['model']: + + datadir = config['model'][model_name]['datadir'] + filestr = config['model'][model_name]['filestr'] + filestr = fill_date_template( + config['model'][model_name]['filestr'], date_str) + filename = find_file(datadir, filestr) + + model_datasets[model_name] = xr.open_dataset(filename) + + return model_datasets From c2dd2472ac46e4b19877cac770920dfefc7ba2fc Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 29 Jan 2023 09:57:24 -0700 Subject: [PATCH 13/64] Added get_obs_vars. --- melodies_monet/util/read_grid_util.py | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index b6c21152..390909dd 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -5,6 +5,34 @@ from analysis_util import fill_date_template, find_file +def get_obs_vars(config): + """ + Get subset of obs variables from model to obs variable mapping + + Parameters + config (dict): configuration dictionary + + Returns + obs_vars_subset (dict of dicts): + nested dictionary keyed by obs set name and obs variable name + """ + obs_vars_subset = dict() + + for model_name in config['model']: + + mapping = config['model'][model_name]['mapping'] + + for obs_name in mapping: + obs_vars = config['obs'][obs_name]['variables'] + obs_vars_subset[obs_name] = dict() + + for model_var in mapping[obs_name]: + obs_var = mapping[obs_name][model_var] + obs_vars_subset[obs_name][obs_var] = obs_vars[obs_var] + + return obs_vars_subset + + def read_grid_models(config, date_str): """ Read grid data models From 39e31e8f2e7348d7fb8eeedeba8dcb2fc5153d78 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 29 Jan 2023 10:11:46 -0700 Subject: [PATCH 14/64] Move get_obs_vars to analysis_util module. --- melodies_monet/util/analysis_util.py | 28 +++++++++++++++++++++++++++ melodies_monet/util/read_grid_util.py | 28 --------------------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py index 975b1339..eb3604d1 100644 --- a/melodies_monet/util/analysis_util.py +++ b/melodies_monet/util/analysis_util.py @@ -53,3 +53,31 @@ def find_file(datadir, filestr): logger.info(filename) return filename + + +def get_obs_vars(config): + """ + Get subset of obs variables from model to obs variable mapping + + Parameters + config (dict): configuration dictionary + + Returns + obs_vars_subset (dict of dicts): + nested dictionary keyed by obs set name and obs variable name + """ + obs_vars_subset = dict() + + for model_name in config['model']: + + mapping = config['model'][model_name]['mapping'] + + for obs_name in mapping: + obs_vars = config['obs'][obs_name]['variables'] + obs_vars_subset[obs_name] = dict() + + for model_var in mapping[obs_name]: + obs_var = mapping[obs_name][model_var] + obs_vars_subset[obs_name][obs_var] = obs_vars[obs_var] + + return obs_vars_subset diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 390909dd..b6c21152 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -5,34 +5,6 @@ from analysis_util import fill_date_template, find_file -def get_obs_vars(config): - """ - Get subset of obs variables from model to obs variable mapping - - Parameters - config (dict): configuration dictionary - - Returns - obs_vars_subset (dict of dicts): - nested dictionary keyed by obs set name and obs variable name - """ - obs_vars_subset = dict() - - for model_name in config['model']: - - mapping = config['model'][model_name]['mapping'] - - for obs_name in mapping: - obs_vars = config['obs'][obs_name]['variables'] - obs_vars_subset[obs_name] = dict() - - for model_var in mapping[obs_name]: - obs_var = mapping[obs_name][model_var] - obs_vars_subset[obs_name][obs_var] = obs_vars[obs_var] - - return obs_vars_subset - - def read_grid_models(config, date_str): """ Read grid data models From a7f198846da11dd2e9d49e2d3dc31e78315693bb Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 29 Jan 2023 10:32:51 -0700 Subject: [PATCH 15/64] Added read_grid_util.read_grid_obs. --- melodies_monet/util/read_grid_util.py | 47 ++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index b6c21152..d590fad9 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -16,7 +16,6 @@ def read_grid_models(config, date_str): Returns model_datasets (dict of xr.Dataset): dictionary of model datasets """ - model_datasets = dict() for model_name in config['model']: @@ -30,3 +29,49 @@ def read_grid_models(config, date_str): model_datasets[model_name] = xr.open_dataset(filename) return model_datasets + + +def read_grid_obs(config, obs_vars, date_str): + """ + Read grid data obs + + Parameters + config (dict): configuration dictionary + obs_vars (dict of dict): + nested dictionary keyed by obs set name and obs variable name + date_str (str yyyy-mm-m_abbr-dd-ddd): date string + + Returns + obs_datasets (dict of xr.Dataset): dictionary of obs datasets + """ + obs_datasets = dict() + + yyyy_str, mm_str, m_abbr_str, dd_str, ddd_str \ + = tuple(date_str.split('-')) + + for obs_name in obs_vars: + + data_format = config['obs'][obs_name]['data_format'] + datadir = config['obs'][obs_name]['datadir'] + filestr = fill_date_template( + config['obs'][obs_name]['filestr'], date_str) + filename = find_file(datadir, filestr) + + file_extension = os.path.splitext(filename)[1] + + if data_format == 'gridded_eos': + if file_extension == '.hdf': + ds_obs = read_gridded_eos( + filename, obs_vars[obs_name]) + filename_nc = filename.replace('.hdf', '.nc') + logging.info('writing ' + filename_nc) + ds_obs.to_netcdf(filename_nc) + else: + ds_obs = xr.open_dataset(filename) + else: + ds_obs = xr.open_dataset(filename) + + obs_datasets[obs_name] = ds_obs + + return obs_datasets + From a12f70913440194dc1124378959b3a4a04178ba8 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 30 Jan 2023 11:43:12 -0700 Subject: [PATCH 16/64] Added grid_data config flag in analysis block. --- melodies_monet/driver.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index f3b704eb..5d896aa6 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -432,6 +432,7 @@ def __init__(self): self.debug = False self.save = None self.read = None + self.grid_data = None self.obs_regridder = None self.obs_target = None @@ -501,6 +502,10 @@ def read_control(self, control=None): if 'read' in self.control_dict['analysis'].keys(): self.read = self.control_dict['analysis']['read'] + # set grid_data option + if 'grid_data' in self.control_dict['analysis'].keys(): + self.grid_data = self.control_dict['analysis'] + # generate time intervals for time chunking if 'time_interval' in self.control_dict['analysis'].keys(): time_stamps = pd.date_range( From 9f5770cdb7676ffb36a4188eea840fe05254fe7e Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 30 Jan 2023 13:09:45 -0700 Subject: [PATCH 17/64] Added grid_data control flag. --- examples/process_grid_data/control_grid_data.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index ed3ab86e..a445766d 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -2,5 +2,6 @@ analysis: start_time: '2022-01-01' end_time: '2022-12-31' time_interval: 'D' + grid_data: True output_dir: $HOME/Plots debug: True From e81593f4848bebbb7248a464cbfa687952c1b96b Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 9 Feb 2023 11:18:17 -0700 Subject: [PATCH 18/64] Added model and obs control blocks. --- .../process_grid_data/control_grid_data.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index a445766d..96c8816d 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -5,3 +5,28 @@ analysis: grid_data: True output_dir: $HOME/Plots debug: True + +obs: + MOD08_M3: + data_format: gridded_eos + datadir: $HOME/Data/MOD08_M3_nc + filestr: MOD08_M3.AYYYYDDD.061.*_regrid.nc + variables: + AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean: + fillvalue: -9999 + scale: 0.001 + units: none + +model: + MERRA2: + data_format: netcdf + datadir: $HOME/Data/MERRA2_TOTEXTTAU + filestr: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4 + variables: + fillvalue: 1.e+15 + scale: 1.0 + units: none + mapping: + MOD08_M3: + TOTEXTTAU: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean + From 8e3d8d7f746918cf55a41ede8a3719c63697e431 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 9 Feb 2023 11:55:05 -0700 Subject: [PATCH 19/64] Switch to monthly data. --- examples/process_grid_data/control_grid_data.yaml | 2 +- examples/process_grid_data/process_grid_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index 96c8816d..2f8f57bc 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -1,7 +1,7 @@ analysis: start_time: '2022-01-01' end_time: '2022-12-31' - time_interval: 'D' + time_interval: 'MS' grid_data: True output_dir: $HOME/Plots debug: True diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index e53e02e3..9312ecb2 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -3,7 +3,7 @@ an = driver.analysis() an.control = 'control_grid_data.yaml' an.read_control() -an.setup_regridders() +# an.setup_regridders() for time_interval in an.time_intervals: From 7f135b18602a7989ea2f7e4034117c8f38bad870 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 9 Feb 2023 12:39:48 -0700 Subject: [PATCH 20/64] Construct date_str from time_interval in analysis.open_obs. --- examples/process_grid_data/control_grid_data.yaml | 2 +- examples/process_grid_data/process_grid_data.py | 2 +- melodies_monet/driver.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index 2f8f57bc..007cd158 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -6,7 +6,7 @@ analysis: output_dir: $HOME/Plots debug: True -obs: +grid_obs: MOD08_M3: data_format: gridded_eos datadir: $HOME/Data/MOD08_M3_nc diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index 9312ecb2..c39b4353 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -9,8 +9,8 @@ print(time_interval) + an.open_obs(time_interval=time_interval) # an.open_models(time_interval=time_interval) - # an.open_obs(time_interval=time_interval) # an.pair_data() # an.concat_pairs() diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 5d896aa6..59b26398 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -676,6 +676,10 @@ def open_obs(self, time_interval=None): o.open_obs(time_interval=time_interval) self.obs[o.label] = o + if 'grid_obs' in self.control_dict: + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('grid_obs reading %s' % date_str) + def pair_data(self, time_interval=None): """Pair all observations and models in the analysis class (i.e., those listed in the input yaml file) together, From 9ea5d1466073ac2d7237f7c334651ebd4c94d250 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 9 Feb 2023 12:57:20 -0700 Subject: [PATCH 21/64] Use analysis_util.get_obs_vars in analysis.open_obs. --- melodies_monet/driver.py | 4 ++++ melodies_monet/util/analysis_util.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 59b26398..80315e33 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -663,6 +663,8 @@ def open_obs(self, time_interval=None): ------- None """ + from .util import analysis_util + if 'obs' in self.control_dict: for obs in self.control_dict['obs']: o = observation() @@ -679,6 +681,8 @@ def open_obs(self, time_interval=None): if 'grid_obs' in self.control_dict: date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('grid_obs reading %s' % date_str) + obs_vars = analysis_util.get_obs_vars(self.control_dict) + print(obs_vars) def pair_data(self, time_interval=None): """Pair all observations and models in the analysis class diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py index eb3604d1..a4fc715c 100644 --- a/melodies_monet/util/analysis_util.py +++ b/melodies_monet/util/analysis_util.py @@ -63,7 +63,7 @@ def get_obs_vars(config): config (dict): configuration dictionary Returns - obs_vars_subset (dict of dicts): + obs_vars_subset (dict of dict): nested dictionary keyed by obs set name and obs variable name """ obs_vars_subset = dict() @@ -73,7 +73,7 @@ def get_obs_vars(config): mapping = config['model'][model_name]['mapping'] for obs_name in mapping: - obs_vars = config['obs'][obs_name]['variables'] + obs_vars = config['grid_obs'][obs_name]['variables'] obs_vars_subset[obs_name] = dict() for model_var in mapping[obs_name]: From 9d1253853bf7785dfeeb8fc96165f004fddae82b Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 9 Feb 2023 17:25:28 -0700 Subject: [PATCH 22/64] Call read_grid_util.read_grid_obs in analysis.open_obs. --- examples/process_grid_data/control_grid_data.yaml | 4 ++-- melodies_monet/driver.py | 4 ++++ melodies_monet/util/read_grid_util.py | 9 +++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index 007cd158..a4932964 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -1,6 +1,6 @@ analysis: - start_time: '2022-01-01' - end_time: '2022-12-31' + start_time: '2020-01-01' + end_time: '2020-12-31' time_interval: 'MS' grid_data: True output_dir: $HOME/Plots diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 80315e33..d3291eb6 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -664,6 +664,7 @@ def open_obs(self, time_interval=None): None """ from .util import analysis_util + from .util import read_grid_util if 'obs' in self.control_dict: for obs in self.control_dict['obs']: @@ -683,6 +684,9 @@ def open_obs(self, time_interval=None): print('grid_obs reading %s' % date_str) obs_vars = analysis_util.get_obs_vars(self.control_dict) print(obs_vars) + obs_datasets = read_grid_util.read_grid_obs(self.control_dict, + obs_vars, date_str) + print(obs_datasets) def pair_data(self, time_interval=None): """Pair all observations and models in the analysis class diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index d590fad9..433ce2f8 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -1,8 +1,9 @@ +import os import logging import xarray as xr from monetio.sat._gridded_eos_mm import read_gridded_eos -from analysis_util import fill_date_template, find_file +from .analysis_util import fill_date_template, find_file def read_grid_models(config, date_str): @@ -51,10 +52,10 @@ def read_grid_obs(config, obs_vars, date_str): for obs_name in obs_vars: - data_format = config['obs'][obs_name]['data_format'] - datadir = config['obs'][obs_name]['datadir'] + data_format = config['grid_obs'][obs_name]['data_format'] + datadir = config['grid_obs'][obs_name]['datadir'] filestr = fill_date_template( - config['obs'][obs_name]['filestr'], date_str) + config['grid_obs'][obs_name]['filestr'], date_str) filename = find_file(datadir, filestr) file_extension = os.path.splitext(filename)[1] From 3e01b5f61d452c8f1f96f86ff0271a00d10c1706 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 9 Feb 2023 18:09:12 -0700 Subject: [PATCH 23/64] In open_obs, grid_obs block, assign observation object attributes. --- melodies_monet/driver.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index d3291eb6..79e0537b 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -687,6 +687,13 @@ def open_obs(self, time_interval=None): obs_datasets = read_grid_util.read_grid_obs(self.control_dict, obs_vars, date_str) print(obs_datasets) + for obs in obs_vars: + o = observation() + o.obs = obs + o.label = obs + o.obs_type = 'grid_obs' + o.obj = obs_datasets[obs] + self.obs[o.label] = o def pair_data(self, time_interval=None): """Pair all observations and models in the analysis class From 1de4221f81c5069e1efdf821df2c828413b03b23 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Fri, 10 Feb 2023 18:33:47 -0700 Subject: [PATCH 24/64] Added open grid models. --- examples/process_grid_data/control_grid_data.yaml | 2 +- examples/process_grid_data/process_grid_data.py | 6 +----- melodies_monet/driver.py | 12 ++++++++++-- melodies_monet/util/analysis_util.py | 4 ++-- melodies_monet/util/read_grid_util.py | 8 ++++---- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index a4932964..24f86dfd 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -17,7 +17,7 @@ grid_obs: scale: 0.001 units: none -model: +grid_model: MERRA2: data_format: netcdf datadir: $HOME/Data/MERRA2_TOTEXTTAU diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index c39b4353..496155d9 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -10,10 +10,6 @@ print(time_interval) an.open_obs(time_interval=time_interval) - # an.open_models(time_interval=time_interval) - # an.pair_data() - -# an.concat_pairs() + an.open_models(time_interval=time_interval) # an.plotting() -# an.stats() diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 79e0537b..f8a854c0 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -648,6 +648,14 @@ def open_models(self, time_interval=None): m.open_model_files(time_interval=time_interval) self.models[m.label] = m + if 'grid_model' in self.control_dict: + from .util import read_grid_util + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('grid_model reading %s' % date_str) + model_datasets = read_grid_util.read_grid_models( + self.control_dict, date_str) + print(model_datasets) + def open_obs(self, time_interval=None): """Open all observations listed in the input yaml file and create an :class:`observation` instance for each of them, @@ -684,8 +692,8 @@ def open_obs(self, time_interval=None): print('grid_obs reading %s' % date_str) obs_vars = analysis_util.get_obs_vars(self.control_dict) print(obs_vars) - obs_datasets = read_grid_util.read_grid_obs(self.control_dict, - obs_vars, date_str) + obs_datasets = read_grid_util.read_grid_obs( + self.control_dict, obs_vars, date_str) print(obs_datasets) for obs in obs_vars: o = observation() diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py index a4fc715c..1f7283b1 100644 --- a/melodies_monet/util/analysis_util.py +++ b/melodies_monet/util/analysis_util.py @@ -68,9 +68,9 @@ def get_obs_vars(config): """ obs_vars_subset = dict() - for model_name in config['model']: + for model_name in config['grid_model']: - mapping = config['model'][model_name]['mapping'] + mapping = config['grid_model'][model_name]['mapping'] for obs_name in mapping: obs_vars = config['grid_obs'][obs_name]['variables'] diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 433ce2f8..94bdde85 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -19,12 +19,12 @@ def read_grid_models(config, date_str): """ model_datasets = dict() - for model_name in config['model']: + for model_name in config['grid_model']: - datadir = config['model'][model_name]['datadir'] - filestr = config['model'][model_name]['filestr'] + datadir = config['grid_model'][model_name]['datadir'] + filestr = config['grid_model'][model_name]['filestr'] filestr = fill_date_template( - config['model'][model_name]['filestr'], date_str) + config['grid_model'][model_name]['filestr'], date_str) filename = find_file(datadir, filestr) model_datasets[model_name] = xr.open_dataset(filename) From 907d487f1b9d45c201209b9004c1789da802f65f Mon Sep 17 00:00:00 2001 From: dwfncar Date: Fri, 10 Feb 2023 18:49:46 -0700 Subject: [PATCH 25/64] Populate analysis.models. --- examples/process_grid_data/process_grid_data.py | 3 +++ melodies_monet/driver.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index 496155d9..4765f8f1 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -12,4 +12,7 @@ an.open_obs(time_interval=time_interval) an.open_models(time_interval=time_interval) +print(an.obs) +print(an.models) + # an.plotting() diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index f8a854c0..f919be7b 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -655,6 +655,14 @@ def open_models(self, time_interval=None): model_datasets = read_grid_util.read_grid_models( self.control_dict, date_str) print(model_datasets) + for mod in model_datasets: + m = model() + m.label = mod + if 'variables' in self.control_dict['grid_model'][mod].keys(): + m.variable_dict \ + = self.control_dict['grid_model'][mod]['variables'] + m.obj = model_datasets[mod] + self.models[m.label] = m def open_obs(self, time_interval=None): """Open all observations listed in the input yaml file and create an From 4adb4a5a81699ced256c7189e9f5af2b886a85de Mon Sep 17 00:00:00 2001 From: dwfncar Date: Fri, 10 Feb 2023 18:53:58 -0700 Subject: [PATCH 26/64] Print obs and model objs. --- examples/process_grid_data/process_grid_data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index 4765f8f1..cc9b68ea 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -12,7 +12,11 @@ an.open_obs(time_interval=time_interval) an.open_models(time_interval=time_interval) -print(an.obs) -print(an.models) + print(an.obs) + for obs in an.obs: + print(an.obs[obs].obj) + print(an.models) + for mod in an.models: + print(an.models[mod].obj) # an.plotting() From 210d56f384a399515c163a2010ebfdb4cd7c98c2 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 12:37:42 -0700 Subject: [PATCH 27/64] Use analysis grid_data flag. --- .../process_grid_data/control_grid_data.yaml | 4 ++-- melodies_monet/driver.py | 20 +++++++++---------- melodies_monet/util/analysis_util.py | 6 +++--- melodies_monet/util/read_grid_util.py | 14 ++++++------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index 24f86dfd..a2ed2a52 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -6,7 +6,7 @@ analysis: output_dir: $HOME/Plots debug: True -grid_obs: +obs: MOD08_M3: data_format: gridded_eos datadir: $HOME/Data/MOD08_M3_nc @@ -17,7 +17,7 @@ grid_obs: scale: 0.001 units: none -grid_model: +model: MERRA2: data_format: netcdf datadir: $HOME/Data/MERRA2_TOTEXTTAU diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index f919be7b..107e506b 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -432,7 +432,7 @@ def __init__(self): self.debug = False self.save = None self.read = None - self.grid_data = None + self.grid_data = False # Default to False self.obs_regridder = None self.obs_target = None @@ -602,7 +602,7 @@ def open_models(self, time_interval=None): ------- None """ - if 'model' in self.control_dict: + if ('model' in self.control_dict) and (not self.grid_data): # open each model for mod in self.control_dict['model']: # create a new model instance @@ -648,19 +648,19 @@ def open_models(self, time_interval=None): m.open_model_files(time_interval=time_interval) self.models[m.label] = m - if 'grid_model' in self.control_dict: + if ('model' in self.control_dict) and self.grid_data: from .util import read_grid_util date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('grid_model reading %s' % date_str) + print('model reading %s' % date_str) model_datasets = read_grid_util.read_grid_models( self.control_dict, date_str) print(model_datasets) for mod in model_datasets: m = model() m.label = mod - if 'variables' in self.control_dict['grid_model'][mod].keys(): + if 'variables' in self.control_dict['model'][mod].keys(): m.variable_dict \ - = self.control_dict['grid_model'][mod]['variables'] + = self.control_dict['model'][mod]['variables'] m.obj = model_datasets[mod] self.models[m.label] = m @@ -682,7 +682,7 @@ def open_obs(self, time_interval=None): from .util import analysis_util from .util import read_grid_util - if 'obs' in self.control_dict: + if ('obs' in self.control_dict) and (not self.grid_data): for obs in self.control_dict['obs']: o = observation() o.obs = obs @@ -695,9 +695,9 @@ def open_obs(self, time_interval=None): o.open_obs(time_interval=time_interval) self.obs[o.label] = o - if 'grid_obs' in self.control_dict: + if ('obs' in self.control_dict) and self.grid_data: date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('grid_obs reading %s' % date_str) + print('obs reading %s' % date_str) obs_vars = analysis_util.get_obs_vars(self.control_dict) print(obs_vars) obs_datasets = read_grid_util.read_grid_obs( @@ -707,7 +707,7 @@ def open_obs(self, time_interval=None): o = observation() o.obs = obs o.label = obs - o.obs_type = 'grid_obs' + o.obs_type = 'grid_data' o.obj = obs_datasets[obs] self.obs[o.label] = o diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py index 1f7283b1..8ce78a0d 100644 --- a/melodies_monet/util/analysis_util.py +++ b/melodies_monet/util/analysis_util.py @@ -68,12 +68,12 @@ def get_obs_vars(config): """ obs_vars_subset = dict() - for model_name in config['grid_model']: + for model_name in config['model']: - mapping = config['grid_model'][model_name]['mapping'] + mapping = config['model'][model_name]['mapping'] for obs_name in mapping: - obs_vars = config['grid_obs'][obs_name]['variables'] + obs_vars = config['obs'][obs_name]['variables'] obs_vars_subset[obs_name] = dict() for model_var in mapping[obs_name]: diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 94bdde85..ad6da925 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -19,12 +19,12 @@ def read_grid_models(config, date_str): """ model_datasets = dict() - for model_name in config['grid_model']: + for model_name in config['model']: - datadir = config['grid_model'][model_name]['datadir'] - filestr = config['grid_model'][model_name]['filestr'] + datadir = config['model'][model_name]['datadir'] + filestr = config['model'][model_name]['filestr'] filestr = fill_date_template( - config['grid_model'][model_name]['filestr'], date_str) + config['model'][model_name]['filestr'], date_str) filename = find_file(datadir, filestr) model_datasets[model_name] = xr.open_dataset(filename) @@ -52,10 +52,10 @@ def read_grid_obs(config, obs_vars, date_str): for obs_name in obs_vars: - data_format = config['grid_obs'][obs_name]['data_format'] - datadir = config['grid_obs'][obs_name]['datadir'] + data_format = config['obs'][obs_name]['data_format'] + datadir = config['obs'][obs_name]['datadir'] filestr = fill_date_template( - config['grid_obs'][obs_name]['filestr'], date_str) + config['obs'][obs_name]['filestr'], date_str) filename = find_file(datadir, filestr) file_extension = os.path.splitext(filename)[1] From 8143f1e6241e0ed2d55c22ec570b77070803a0db Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 13:07:00 -0700 Subject: [PATCH 28/64] Added observation type grid_data. --- melodies_monet/driver.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 107e506b..46962dde 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -659,8 +659,9 @@ def open_models(self, time_interval=None): m = model() m.label = mod if 'variables' in self.control_dict['model'][mod].keys(): - m.variable_dict \ - = self.control_dict['model'][mod]['variables'] + m.variable_dict = self.control_dict['model'][mod]['variables'] + if 'plot_kwargs' in self.control_dict['model'][mod].keys(): + m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] m.obj = model_datasets[mod] self.models[m.label] = m @@ -707,7 +708,7 @@ def open_obs(self, time_interval=None): o = observation() o.obs = obs o.label = obs - o.obs_type = 'grid_data' + o.type = 'grid_data' o.obj = obs_datasets[obs] self.obs[o.label] = o From 13182e7ae9eaf8ce875ed2b75248ba0d98766cbf Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 13:20:05 -0700 Subject: [PATCH 29/64] Added model name. --- melodies_monet/driver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 46962dde..78c7a289 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -657,7 +657,9 @@ def open_models(self, time_interval=None): print(model_datasets) for mod in model_datasets: m = model() + m.model = mod m.label = mod + m.mapping = self.control_dict['model'][mod]['mapping'] if 'variables' in self.control_dict['model'][mod].keys(): m.variable_dict = self.control_dict['model'][mod]['variables'] if 'plot_kwargs' in self.control_dict['model'][mod].keys(): From 5f995450fd215c3f8709dba78441f265958f9240 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 14:26:49 -0700 Subject: [PATCH 30/64] Return filename from read_grid_util readers. --- melodies_monet/driver.py | 6 ++++-- melodies_monet/util/read_grid_util.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 78c7a289..09e12383 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -652,13 +652,14 @@ def open_models(self, time_interval=None): from .util import read_grid_util date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('model reading %s' % date_str) - model_datasets = read_grid_util.read_grid_models( + filename, model_datasets = read_grid_util.read_grid_models( self.control_dict, date_str) print(model_datasets) for mod in model_datasets: m = model() m.model = mod m.label = mod + m.file_str = filename m.mapping = self.control_dict['model'][mod]['mapping'] if 'variables' in self.control_dict['model'][mod].keys(): m.variable_dict = self.control_dict['model'][mod]['variables'] @@ -703,13 +704,14 @@ def open_obs(self, time_interval=None): print('obs reading %s' % date_str) obs_vars = analysis_util.get_obs_vars(self.control_dict) print(obs_vars) - obs_datasets = read_grid_util.read_grid_obs( + filename, obs_datasets = read_grid_util.read_grid_obs( self.control_dict, obs_vars, date_str) print(obs_datasets) for obs in obs_vars: o = observation() o.obs = obs o.label = obs + o.file = filename o.type = 'grid_data' o.obj = obs_datasets[obs] self.obs[o.label] = o diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index ad6da925..c3fc0345 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -29,7 +29,7 @@ def read_grid_models(config, date_str): model_datasets[model_name] = xr.open_dataset(filename) - return model_datasets + return filename, model_datasets def read_grid_obs(config, obs_vars, date_str): @@ -74,5 +74,5 @@ def read_grid_obs(config, obs_vars, date_str): obs_datasets[obs_name] = ds_obs - return obs_datasets + return filename, obs_datasets From e3445d7d4b97dae332d2b6b1d330c89fea6d85e8 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 15:16:48 -0700 Subject: [PATCH 31/64] Renamed to setup_regridder. --- melodies_monet/driver.py | 6 ++++-- melodies_monet/util/regrid_util.py | 13 +++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 09e12383..8ecece79 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -434,7 +434,8 @@ def __init__(self): self.read = None self.grid_data = False # Default to False self.obs_regridder = None - self.obs_target = None + self.model_regridder = None + self.target_grid = None def __repr__(self): return ( @@ -587,7 +588,8 @@ def setup_regridders(self): None """ from .util import regrid_util - self.obs_regridder, self.obs_target = regrid_util.setup_obs_regridder(self.control_dict) + self.obs_regridder, self.target_grid \ + = regrid_util.setup_regridder(self.control_dict) def open_models(self, time_interval=None): """Open all models listed in the input yaml file and create a :class:`model` diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py index 01d58e58..a2993bda 100644 --- a/melodies_monet/util/regrid_util.py +++ b/melodies_monet/util/regrid_util.py @@ -10,9 +10,9 @@ import xesmf as xe -def setup_obs_regridder(config): +def setup_regridder(config, config_type='obs'): """ - Setup regridder for observations + Setup regridder for observations or model Parameters config (dict): configuration dictionary @@ -22,11 +22,12 @@ def setup_obs_regridder(config): ds_target (xr.Dataset): target grid dataset """ - base_file = os.path.expandvars(config['obs']['regrid']['base_grid']) - ds_base = xr.open_dataset(base_file) - target_file = os.path.expandvars(config['obs']['regrid']['target_grid']) + base_file = os.path.expandvars(config[config_type]['regrid']['base_grid']) + config_typeds_base = xr.open_dataset(base_file) + target_file = os.path.expandvars(config[config_type]['regrid']['target_grid']) ds_target = xr.open_dataset(target_file) - regridder = xe.Regridder(ds_base, ds_target, config['obs']['regrid']['method']) + regridder = xe.Regridder(ds_base, ds_target, config[config_type]['regrid']['method']) return regridder, ds_target + From 6e83b651a76e32485b7bbc011c4ea81486097160 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 15:37:59 -0700 Subject: [PATCH 32/64] Added regrid control. --- examples/process_grid_data/control_grid_data.yaml | 11 ++++++++++- examples/process_grid_data/process_grid_data.py | 3 --- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index a2ed2a52..0c47f534 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -2,15 +2,20 @@ analysis: start_time: '2020-01-01' end_time: '2020-12-31' time_interval: 'MS' - grid_data: True output_dir: $HOME/Plots debug: True + grid_data: True + target_grid: $HOME/Data/Grids/cam_grid.nc obs: + MOD08_M3: data_format: gridded_eos datadir: $HOME/Data/MOD08_M3_nc filestr: MOD08_M3.AYYYYDDD.061.*_regrid.nc + regrid: + base_grid: $HOME/Data/Grids/modis_l3_grid.nc + method: bilinear variables: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean: fillvalue: -9999 @@ -18,10 +23,14 @@ obs: units: none model: + MERRA2: data_format: netcdf datadir: $HOME/Data/MERRA2_TOTEXTTAU filestr: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4 + regrid: + base_grid: $HOME/Data/Grids/merra2_grid.nc + method: bilinear variables: fillvalue: 1.e+15 scale: 1.0 diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index cc9b68ea..f2e2a058 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -3,7 +3,6 @@ an = driver.analysis() an.control = 'control_grid_data.yaml' an.read_control() -# an.setup_regridders() for time_interval in an.time_intervals: @@ -18,5 +17,3 @@ print(an.models) for mod in an.models: print(an.models[mod].obj) - -# an.plotting() From 75a7d04817e0548bcbc6424ed589b3893473f327 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 17:44:19 -0700 Subject: [PATCH 33/64] Rework setup_regridders to return regridder instance for each obs set or model. --- melodies_monet/driver.py | 14 ++++++++------ melodies_monet/util/regrid_util.py | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 8ecece79..749c2b95 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -433,9 +433,9 @@ def __init__(self): self.save = None self.read = None self.grid_data = False # Default to False - self.obs_regridder = None - self.model_regridder = None self.target_grid = None + self.obs_regridders = None + self.model_regridders = None def __repr__(self): return ( @@ -503,9 +503,11 @@ def read_control(self, control=None): if 'read' in self.control_dict['analysis'].keys(): self.read = self.control_dict['analysis']['read'] - # set grid_data option + # set grid_data option and target_grid if 'grid_data' in self.control_dict['analysis'].keys(): - self.grid_data = self.control_dict['analysis'] + self.grid_data = self.control_dict['analysis']['grid_data'] + if 'target_grid' in self.control_dict['analysis'].keys(): + self.target_grid = self.control_dict['analysis']['target_grid'] # generate time intervals for time chunking if 'time_interval' in self.control_dict['analysis'].keys(): @@ -588,8 +590,8 @@ def setup_regridders(self): None """ from .util import regrid_util - self.obs_regridder, self.target_grid \ - = regrid_util.setup_regridder(self.control_dict) + self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') + self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') def open_models(self, time_interval=None): """Open all models listed in the input yaml file and create a :class:`model` diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py index a2993bda..95a93097 100644 --- a/melodies_monet/util/regrid_util.py +++ b/melodies_monet/util/regrid_util.py @@ -18,16 +18,19 @@ def setup_regridder(config, config_type='obs'): config (dict): configuration dictionary Returns - regridder (xe.Regridder): regridder instance - ds_target (xr.Dataset): target grid dataset + regridder (dict of xe.Regridder): dictionary of regridder instances """ - - base_file = os.path.expandvars(config[config_type]['regrid']['base_grid']) - config_typeds_base = xr.open_dataset(base_file) - target_file = os.path.expandvars(config[config_type]['regrid']['target_grid']) + target_file = os.path.expandvars(config['analysis']['target_grid']) ds_target = xr.open_dataset(target_file) - regridder = xe.Regridder(ds_base, ds_target, config[config_type]['regrid']['method']) + regridder_dict = dict() + + for name in config[config_type]: + base_file = os.path.expandvars(config[config_type][name]['regrid']['base_grid']) + ds_base = xr.open_dataset(base_file) + method = config[config_type][name]['regrid']['method'] + regridder = xe.Regridder(ds_base, ds_target, method) + regridder_dict[name] = regridder - return regridder, ds_target + return regridder_dict From e0a040e8e52e1898b2eb593719b6e058c033581b Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 17:48:48 -0700 Subject: [PATCH 34/64] Call setup_regridders. --- examples/process_grid_data/control_grid_data.yaml | 1 + examples/process_grid_data/process_grid_data.py | 2 ++ melodies_monet/driver.py | 6 ++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index 0c47f534..95005951 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -5,6 +5,7 @@ analysis: output_dir: $HOME/Plots debug: True grid_data: True + regrid: True target_grid: $HOME/Data/Grids/cam_grid.nc obs: diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index f2e2a058..86c7ecae 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -4,6 +4,8 @@ an.control = 'control_grid_data.yaml' an.read_control() +an.setup_regridders() + for time_interval in an.time_intervals: print(time_interval) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 749c2b95..b720c043 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -433,6 +433,7 @@ def __init__(self): self.save = None self.read = None self.grid_data = False # Default to False + self.regrid = False # Default to False self.target_grid = None self.obs_regridders = None self.model_regridders = None @@ -590,8 +591,9 @@ def setup_regridders(self): None """ from .util import regrid_util - self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') - self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') + if self.regrid: + self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') + self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') def open_models(self, time_interval=None): """Open all models listed in the input yaml file and create a :class:`model` From 03db6d6a0a21dd1357dc4c03115434bf9837f762 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 17:56:26 -0700 Subject: [PATCH 35/64] Make regridders. --- examples/process_grid_data/process_grid_data.py | 1 - melodies_monet/driver.py | 7 ++++++- melodies_monet/util/regrid_util.py | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_grid_data/process_grid_data.py index 86c7ecae..0bc86cb1 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_grid_data/process_grid_data.py @@ -3,7 +3,6 @@ an = driver.analysis() an.control = 'control_grid_data.yaml' an.read_control() - an.setup_regridders() for time_interval in an.time_intervals: diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index b720c043..f199aa24 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -504,9 +504,11 @@ def read_control(self, control=None): if 'read' in self.control_dict['analysis'].keys(): self.read = self.control_dict['analysis']['read'] - # set grid_data option and target_grid + # set grid_data option, regrid option, and target_grid if 'grid_data' in self.control_dict['analysis'].keys(): self.grid_data = self.control_dict['analysis']['grid_data'] + if 'regrid' in self.control_dict['analysis'].keys(): + self.regrid = self.control_dict['analysis']['regrid'] if 'target_grid' in self.control_dict['analysis'].keys(): self.target_grid = self.control_dict['analysis']['target_grid'] @@ -591,9 +593,12 @@ def setup_regridders(self): None """ from .util import regrid_util + print('setup_regridders') if self.regrid: self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') + print(self.obs_regridders) self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') + print(self.model_regridders) def open_models(self, time_interval=None): """Open all models listed in the input yaml file and create a :class:`model` diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py index 95a93097..a2b0b20f 100644 --- a/melodies_monet/util/regrid_util.py +++ b/melodies_monet/util/regrid_util.py @@ -10,7 +10,7 @@ import xesmf as xe -def setup_regridder(config, config_type='obs'): +def setup_regridder(config, config_group='obs'): """ Setup regridder for observations or model @@ -25,10 +25,10 @@ def setup_regridder(config, config_type='obs'): regridder_dict = dict() - for name in config[config_type]: - base_file = os.path.expandvars(config[config_type][name]['regrid']['base_grid']) + for name in config[config_group]: + base_file = os.path.expandvars(config[config_group][name]['regrid']['base_grid']) ds_base = xr.open_dataset(base_file) - method = config[config_type][name]['regrid']['method'] + method = config[config_group][name]['regrid']['method'] regridder = xe.Regridder(ds_base, ds_target, method) regridder_dict[name] = regridder From bc4f9553d26b3f66fc7852278e720430186123e1 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sat, 11 Feb 2023 19:05:22 -0700 Subject: [PATCH 36/64] Save. --- melodies_monet/driver.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index f199aa24..1ca501e8 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -593,12 +593,9 @@ def setup_regridders(self): None """ from .util import regrid_util - print('setup_regridders') if self.regrid: self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') - print(self.obs_regridders) self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') - print(self.model_regridders) def open_models(self, time_interval=None): """Open all models listed in the input yaml file and create a :class:`model` From 660d0a8bf0387f174c8da4103011456689e8c423 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 12 Feb 2023 12:53:59 -0700 Subject: [PATCH 37/64] Regrid obs. --- examples/process_grid_data/control_grid_data.yaml | 2 +- melodies_monet/driver.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index 95005951..dc371cdf 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -5,7 +5,7 @@ analysis: output_dir: $HOME/Plots debug: True grid_data: True - regrid: True + regrid: False target_grid: $HOME/Data/Grids/cam_grid.nc obs: diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 1ca501e8..b0b56472 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -721,7 +721,12 @@ def open_obs(self, time_interval=None): o.label = obs o.file = filename o.type = 'grid_data' - o.obj = obs_datasets[obs] + ds_obs = obs_datasets[obs] + if self.regrid: + o.obj = self.obs_regridders[obs](ds_obs) + # o.obj.to_netcdf(filename_regrid) + else: + o.obj = ds_obs self.obs[o.label] = o def pair_data(self, time_interval=None): From 5f741a4888a4d19ffe5d482296a456b4f29ca5e0 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 12 Feb 2023 13:03:09 -0700 Subject: [PATCH 38/64] Regrid models. --- melodies_monet/driver.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index b0b56472..4dcab7d7 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -673,7 +673,12 @@ def open_models(self, time_interval=None): m.variable_dict = self.control_dict['model'][mod]['variables'] if 'plot_kwargs' in self.control_dict['model'][mod].keys(): m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] - m.obj = model_datasets[mod] + ds_model = model_datasets[mod] + if self.regrid: + m.obj = self.model_regridders[mod](ds_model) + # m.obj.to_netcdf(filename_regrid) + else: + m.obj = ds_model self.models[m.label] = m def open_obs(self, time_interval=None): From b7a0b1fd7f4c255ae0b8ed60a9c88b722bfe60a9 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 12 Feb 2023 13:29:20 -0700 Subject: [PATCH 39/64] Added regrid_util.filename_regrid. --- melodies_monet/util/regrid_util.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py index a2b0b20f..6de32f6f 100644 --- a/melodies_monet/util/regrid_util.py +++ b/melodies_monet/util/regrid_util.py @@ -34,3 +34,19 @@ def setup_regridder(config, config_group='obs'): return regridder_dict + +def filename_regrid(filename, regridder): + """ + Construct modified filename for regridded dataset + + Parameters + filename (str): filename of dataset + regridder (xe.Regridder): regridder instance + + Returns + filename_regrid (str): filename of regridded dataset + """ + filename_regrid = filename.replace('.nc', '_regrid.nc') + + return filename_regrid + From 15023d443f6f66e60acb5bca4b6c8294e96853ca Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 12 Feb 2023 13:36:24 -0700 Subject: [PATCH 40/64] Construct regrid filename before output of regridded dataset. --- melodies_monet/driver.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 4dcab7d7..faf73de4 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -675,8 +675,9 @@ def open_models(self, time_interval=None): m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] ds_model = model_datasets[mod] if self.regrid: - m.obj = self.model_regridders[mod](ds_model) - # m.obj.to_netcdf(filename_regrid) + regridder = self.model_regridders[mod] + m.obj = regridder(ds_model) + m.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) else: m.obj = ds_model self.models[m.label] = m @@ -698,6 +699,7 @@ def open_obs(self, time_interval=None): """ from .util import analysis_util from .util import read_grid_util + from .util import regrid_util if ('obs' in self.control_dict) and (not self.grid_data): for obs in self.control_dict['obs']: @@ -728,8 +730,9 @@ def open_obs(self, time_interval=None): o.type = 'grid_data' ds_obs = obs_datasets[obs] if self.regrid: - o.obj = self.obs_regridders[obs](ds_obs) - # o.obj.to_netcdf(filename_regrid) + regridder = self.obs_regridders[obs] + o.obj = regridder(ds_obs) + o.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) else: o.obj = ds_obs self.obs[o.label] = o From 4a22b5dd99b50716d2828ef81e2b0292de053925 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Sun, 12 Feb 2023 17:24:46 -0700 Subject: [PATCH 41/64] Start on gridded dataset docs. --- docs/background/gridded_datasets.rst | 5 +++++ docs/index.rst | 1 + 2 files changed, 6 insertions(+) create mode 100644 docs/background/gridded_datasets.rst diff --git a/docs/background/gridded_datasets.rst b/docs/background/gridded_datasets.rst new file mode 100644 index 00000000..27913aac --- /dev/null +++ b/docs/background/gridded_datasets.rst @@ -0,0 +1,5 @@ +Gridded Datasets +================ + +Gridded datasets + diff --git a/docs/index.rst b/docs/index.rst index 8fe2972f..4a5c15c9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,6 +47,7 @@ MONETIO please refer to: background/supported_analyses background/supported_plots background/supported_stats + background/gridded_datasets .. toctree:: :hidden: From eebb0ab96d2645e5aae32e4c42278c0d66aa7794 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 15 Jun 2023 12:56:02 -0600 Subject: [PATCH 42/64] Added file docs/background/time_chunking.rst. --- docs/background/time_chunking.rst | 5 +++++ docs/index.rst | 1 + 2 files changed, 6 insertions(+) create mode 100644 docs/background/time_chunking.rst diff --git a/docs/background/time_chunking.rst b/docs/background/time_chunking.rst new file mode 100644 index 00000000..0d0e9b0a --- /dev/null +++ b/docs/background/time_chunking.rst @@ -0,0 +1,5 @@ +Time Chunking +================ + +Time chunking + diff --git a/docs/index.rst b/docs/index.rst index 4a5c15c9..0bb6a0c0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,6 +47,7 @@ MONETIO please refer to: background/supported_analyses background/supported_plots background/supported_stats + background/time_chunking background/gridded_datasets .. toctree:: From 34ed45491c07d79df293f7d822f02028e8a7d6f0 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 22 Jun 2023 12:27:33 -0600 Subject: [PATCH 43/64] Renamed config option grid_data to longer time_chunking_with_gridded_data. --- .../process_grid_data/control_grid_data.yaml | 2 +- melodies_monet/driver.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_grid_data/control_grid_data.yaml index dc371cdf..6e586d86 100644 --- a/examples/process_grid_data/control_grid_data.yaml +++ b/examples/process_grid_data/control_grid_data.yaml @@ -4,9 +4,9 @@ analysis: time_interval: 'MS' output_dir: $HOME/Plots debug: True - grid_data: True regrid: False target_grid: $HOME/Data/Grids/cam_grid.nc + time_chunking_with_gridded_data: True obs: diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 7df52fa7..cbea3fea 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -458,7 +458,7 @@ def __init__(self): self.debug = False self.save = None self.read = None - self.grid_data = False # Default to False + self.time_chunking_with_gridded_data = False # Default to False self.regrid = False # Default to False self.target_grid = None self.obs_regridders = None @@ -533,9 +533,9 @@ def read_control(self, control=None): if 'read' in self.control_dict['analysis'].keys(): self.read = self.control_dict['analysis']['read'] - # set grid_data option, regrid option, and target_grid - if 'grid_data' in self.control_dict['analysis'].keys(): - self.grid_data = self.control_dict['analysis']['grid_data'] + # set time_chunking_with_gridded_data option, regrid option, and target_grid + if 'time_chunking_with_gridded_data' in self.control_dict['analysis'].keys(): + self.time_chunking_with_gridded_data = self.control_dict['analysis']['time_chunking_with_gridded_data'] if 'regrid' in self.control_dict['analysis'].keys(): self.regrid = self.control_dict['analysis']['regrid'] if 'target_grid' in self.control_dict['analysis'].keys(): @@ -639,7 +639,7 @@ def open_models(self, time_interval=None): ------- None """ - if ('model' in self.control_dict) and (not self.grid_data): + if ('model' in self.control_dict) and (not self.time_chunking_with_gridded_data): # open each model for mod in self.control_dict['model']: # create a new model instance @@ -710,7 +710,7 @@ def open_models(self, time_interval=None): m.open_model_files(time_interval=time_interval) self.models[m.label] = m - if ('model' in self.control_dict) and self.grid_data: + if ('model' in self.control_dict) and self.time_chunking_with_gridded_data: from .util import read_grid_util date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('model reading %s' % date_str) @@ -755,7 +755,7 @@ def open_obs(self, time_interval=None): from .util import read_grid_util from .util import regrid_util - if ('obs' in self.control_dict) and (not self.grid_data): + if ('obs' in self.control_dict) and (not self.time_chunking_with_gridded_data): for obs in self.control_dict['obs']: o = observation() o.obs = obs @@ -770,7 +770,7 @@ def open_obs(self, time_interval=None): o.open_obs(time_interval=time_interval) self.obs[o.label] = o - if ('obs' in self.control_dict) and self.grid_data: + if ('obs' in self.control_dict) and self.time_chunking_with_gridded_data: date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('obs reading %s' % date_str) obs_vars = analysis_util.get_obs_vars(self.control_dict) @@ -783,7 +783,7 @@ def open_obs(self, time_interval=None): o.obs = obs o.label = obs o.file = filename - o.type = 'grid_data' + o.type = 'gridded_data' ds_obs = obs_datasets[obs] if self.regrid: regridder = self.obs_regridders[obs] From 3ef410e2390393fce78bd02ed3d3238c1908eb0f Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 22 Jun 2023 12:31:31 -0600 Subject: [PATCH 44/64] Moved time chunking with gridded data example to process_gridded_data subdir. --- .../control_time_chunking_with_gridded_data.yaml} | 0 .../process_time_chunking_with_gridded_data.py} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/{process_grid_data/control_grid_data.yaml => process_gridded_data/control_time_chunking_with_gridded_data.yaml} (100%) rename examples/{process_grid_data/process_grid_data.py => process_gridded_data/process_time_chunking_with_gridded_data.py} (87%) diff --git a/examples/process_grid_data/control_grid_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml similarity index 100% rename from examples/process_grid_data/control_grid_data.yaml rename to examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml diff --git a/examples/process_grid_data/process_grid_data.py b/examples/process_gridded_data/process_time_chunking_with_gridded_data.py similarity index 87% rename from examples/process_grid_data/process_grid_data.py rename to examples/process_gridded_data/process_time_chunking_with_gridded_data.py index 0bc86cb1..9224a652 100644 --- a/examples/process_grid_data/process_grid_data.py +++ b/examples/process_gridded_data/process_time_chunking_with_gridded_data.py @@ -1,7 +1,7 @@ from melodies_monet import driver an = driver.analysis() -an.control = 'control_grid_data.yaml' +an.control = 'control_time_chunking_with_gridded_data.yaml' an.read_control() an.setup_regridders() From e56d43c7d5024b16965b15090a742fa092604771 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 29 Jun 2023 12:16:37 -0600 Subject: [PATCH 45/64] Use filename in config instead of filestr. --- melodies_monet/driver_time_chunking.py | 1604 ++++++++++++++++++++++++ melodies_monet/util/read_grid_util.py | 5 +- 2 files changed, 1606 insertions(+), 3 deletions(-) create mode 100644 melodies_monet/driver_time_chunking.py diff --git a/melodies_monet/driver_time_chunking.py b/melodies_monet/driver_time_chunking.py new file mode 100644 index 00000000..cbea3fea --- /dev/null +++ b/melodies_monet/driver_time_chunking.py @@ -0,0 +1,1604 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# +""" +Drive the entire analysis package via the :class:`analysis` class. +""" +import monetio as mio +import monet as m +import os +import xarray as xr +import pandas as pd +import numpy as np +import datetime + +# from util import write_ncf + +__all__ = ( + "pair", + "observation", + "model", + "analysis", +) + + +class pair: + """The pair class. + + The pair class pairs model data + directly with observational data along time and space. + """ + + def __init__(self): + """Initialize a :class:`pair` object.""" + self.type = 'pt_sfc' + self.radius_of_influence = 1e6 + self.obs = None + self.model = None + self.model_vars = None + self.obs_vars = None + self.filename = None + + def __repr__(self): + return ( + f"{type(self).__name__}(\n" + f" type={self.type!r},\n" + f" radius_of_influence={self.radius_of_influence!r},\n" + f" obs={self.obs!r},\n" + f" model={self.model!r},\n" + f" model_vars={self.model_vars!r},\n" + f" obs_vars={self.obs_vars!r},\n" + f" filename={self.filename!r},\n" + ")" + ) + + def fix_paired_xarray(self, dset): + """Reformat the paired dataset. + + Parameters + ---------- + dset : xarray.Dataset + + Returns + ------- + xarray.Dataset + Reformatted paired dataset. + """ + # first convert to dataframe + df = dset.to_dataframe().reset_index(drop=True) + + # now get just the single site index + dfpsite = df.rename({'siteid': 'x'}, axis=1).drop_duplicates(subset=['x']) + columns = dfpsite.columns # all columns + site_columns = [ + 'latitude', + 'longitude', + 'x', + 'site', + 'msa_code', + 'cmsa_name', + 'epa_region', + 'state_name', + 'msa_name', + 'site', + 'utcoffset', + ] # only columns for single site identificaiton + + # site only xarray obj (no time dependence) + dfps = dfpsite.loc[:, columns[columns.isin(site_columns)]].set_index(['x']).to_xarray() # single column index + + # now pivot df and convert back to xarray using only non site_columns + site_columns.remove('x') # need to keep x to merge later + dfx = df.loc[:, df.columns[~df.columns.isin(site_columns)]].rename({'siteid': 'x'}, axis=1).set_index(['time', 'x']).to_xarray() + + # merge the time dependent and time independent + out = xr.merge([dfx, dfps]) + + # reset x index and add siteid back to the xarray object + if ~pd.api.types.is_numeric_dtype(out.x): + siteid = out.x.values + out['x'] = range(len(siteid)) + out['siteid'] = (('x'), siteid) + + return out + + +class observation: + """The observation class. + + A class with information and data from an observational dataset. + """ + + def __init__(self): + """Initialize an :class:`observation` object.""" + self.obs = None + self.label = None + self.file = None + self.obj = None + """The data object (:class:`pandas.DataFrame` or :class:`xarray.Dataset`).""" + self.type = 'pt_src' + self.data_proc = None + self.variable_dict = None + + def __repr__(self): + return ( + f"{type(self).__name__}(\n" + f" obs={self.obs!r},\n" + f" label={self.label!r},\n" + f" file={self.file!r},\n" + f" obj={repr(self.obj) if self.obj is None else '...'},\n" + f" type={self.type!r},\n" + f" type={self.data_proc!r},\n" + f" variable_dict={self.variable_dict!r},\n" + ")" + ) + + def open_obs(self, time_interval=None): + """Open the observational data, store data in observation pair, + and apply mask and scaling. + + Parameters + __________ + time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] + If not None, restrict obs to datetime range spanned by time interval [start, end]. + + Returns + ------- + None + """ + from glob import glob + from numpy import sort + from . import tutorial + + if self.file.startswith("example:"): + example_id = ":".join(s.strip() for s in self.file.split(":")[1:]) + files = [tutorial.fetch_example(example_id)] + else: + files = sort(glob(self.file)) + + assert len(files) >= 1, "need at least one" + + _, extension = os.path.splitext(files[0]) + try: + if extension in {'.nc', '.ncf', '.netcdf', '.nc4'}: + if len(files) > 1: + self.obj = xr.open_mfdataset(files) + else: + self.obj = xr.open_dataset(files[0]) + elif extension in ['.ict', '.icarrt']: + assert len(files) == 1, "monetio.icarrt.add_data can only read one file" + self.obj = mio.icarrt.add_data(files[0]) + else: + raise ValueError(f'extension {extension!r} currently unsupported') + except Exception as e: + print('something happened opening file:', e) + return + + self.mask_and_scale() # mask and scale values from the control values + self.filter_obs() + + def filter_obs(self): + """Filter observations based on filter_dict. + + Returns + ------- + None + """ + if self.data_proc is not None: + if 'filter_dict' in self.data_proc: + filter_dict = self.data_proc['filter_dict'] + for column in filter_dict.keys(): + filter_vals = filter_dict[column]['value'] + filter_op = filter_dict[column]['oper'] + if filter_op == 'isin': + self.obj = self.obj.where(self.obj[column].isin(filter_vals),drop=True) + elif filter_op == 'isnotin': + self.obj = self.obj.where(~self.obj[column].isin(filter_vals),drop=True) + elif filter_op == '==': + self.obj = self.obj.where(self.obj[column] == filter_vals,drop=True) + elif filter_op == '>': + self.obj = self.obj.where(self.obj[column] > filter_vals,drop=True) + elif filter_op == '<': + self.obj = self.obj.where(self.obj[column] < filter_vals,drop=True) + elif filter_op == '>=': + self.obj = self.obj.where(self.obj[column] >= filter_vals,drop=True) + elif filter_op == '<=': + self.obj = self.obj.where(self.obj[column] <= filter_vals,drop=True) + elif filter_op == '!=': + self.obj = self.obj.where(self.obj[column] != filter_vals,drop=True) + else: + raise ValueError(f'Filter operation {filter_op!r} is not supported') + + def mask_and_scale(self): + """Mask and scale observations, including unit conversions and setting + detection limits. + + Returns + ------- + None + """ + vars = self.obj.data_vars + if self.variable_dict is not None: + for v in vars: + if v in self.variable_dict: + d = self.variable_dict[v] + # Apply removal of min, max, and nan on the units in the obs file first. + if 'obs_min' in d: + self.obj[v].data = self.obj[v].where(self.obj[v] >= d['obs_min']) + if 'obs_max' in d: + self.obj[v].data = self.obj[v].where(self.obj[v] <= d['obs_max']) + if 'nan_value' in d: + self.obj[v].data = self.obj[v].where(self.obj[v] != d['nan_value']) + # Then apply a correction if needed for the units. + if 'unit_scale' in d: + scale = d['unit_scale'] + else: + scale = 1 + if 'unit_scale_method' in d: + if d['unit_scale_method'] == '*': + self.obj[v].data *= scale + elif d['unit_scale_method'] == '/': + self.obj[v].data /= scale + elif d['unit_scale_method'] == '+': + self.obj[v].data += scale + elif d['unit_scale_method'] == '-': + self.obj[v].data += -1 * scale + + def obs_to_df(self): + """Convert and reformat observation object (:attr:`obj`) to dataframe. + + Returns + ------- + None + """ + self.obj = self.obj.to_dataframe().reset_index().drop(['x', 'y'], axis=1) + + +class model: + """The model class. + + A class with information and data from model results. + """ + + def __init__(self): + """Initialize a :class:`model` object.""" + self.model = None + self.radius_of_influence = None + self.mod_kwargs = {} + self.file_str = None + self.files = None + self.file_vert_str = None + self.files_vert = None + self.file_surf_str = None + self.files_surf = None + self.file_pm25_str = None + self.files_pm25 = None + self.label = None + self.obj = None + self.mapping = None + self.variable_dict = None + self.plot_kwargs = None + self.proj = None + + def __repr__(self): + return ( + f"{type(self).__name__}(\n" + f" model={self.model!r},\n" + f" radius_of_influence={self.radius_of_influence!r},\n" + f" mod_kwargs={self.mod_kwargs!r},\n" + f" file_str={self.file_str!r},\n" + f" label={self.label!r},\n" + f" obj={repr(self.obj) if self.obj is None else '...'},\n" + f" mapping={self.mapping!r},\n" + f" label={self.label!r},\n" + " ...\n" + ")" + ) + + def glob_files(self): + """Convert the model file location string read in by the yaml file + into a list of files containing all model data. + + Returns + ------- + None + """ + from numpy import sort # TODO: maybe use `sorted` for this + from glob import glob + from . import tutorial + + print(self.file_str) + if self.file_str.startswith("example:"): + example_id = ":".join(s.strip() for s in self.file_str.split(":")[1:]) + self.files = [tutorial.fetch_example(example_id)] + else: + self.files = sort(glob(self.file_str)) + + if self.file_vert_str is not None: + self.files_vert = sort(glob(self.file_vert_str)) + if self.file_surf_str is not None: + self.files_surf = sort(glob(self.file_surf_str)) + if self.file_pm25_str is not None: + self.files_pm25 = sort(glob(self.file_pm25_str)) + + def open_model_files(self, time_interval=None): + """Open the model files, store data in :class:`model` instance attributes, + and apply mask and scaling. + + Models supported are cmaq, wrfchem, rrfs, and gsdchem. + If a model is not supported, MELODIES-MONET will try to open + the model data using a generic reader. If you wish to include new + models, add the new model option to this module. + + Parameters + __________ + time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] + If not None, restrict models to datetime range spanned by time interval [start, end]. + + Returns + ------- + None + """ + + self.glob_files() + # Calculate species to input into MONET, so works for all mechanisms in wrfchem + # I want to expand this for the other models too when add aircraft data. + list_input_var = [] + for obs_map in self.mapping: + list_input_var = list_input_var + list(set(self.mapping[obs_map].keys()) - set(list_input_var)) + #Only certain models need this option for speeding up i/o. + if 'cmaq' in self.model.lower(): + print('**** Reading CMAQ model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + if self.files_vert is not None: + self.mod_kwargs.update({'fname_vert' : self.files_vert}) + if self.files_surf is not None: + self.mod_kwargs.update({'fname_surf' : self.files_surf}) + if len(self.files) > 1: + self.mod_kwargs.update({'concatenate_forecasts' : True}) + self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'wrfchem' in self.model.lower(): + print('**** Reading WRF-Chem model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'rrfs' in self.model.lower(): + print('**** Reading RRFS-CMAQ model output...') + if self.files_pm25 is not None: + self.mod_kwargs.update({'fname_pm25' : self.files_pm25}) + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'gsdchem' in self.model.lower(): + print('**** Reading GSD-Chem model output...') + if len(self.files) > 1: + self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs) + elif 'cesm_fv' in self.model.lower(): + print('**** Reading CESM FV model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs) + # CAM-chem-SE grid or MUSICAv0 + elif 'cesm_se' in self.model.lower(): + print('**** Reading CESM SE model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + if self.scrip_file.startswith("example:"): + from . import tutorial + example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:]) + self.scrip_file = tutorial.fetch_example(example_id) + self.mod_kwargs.update({'scrip_file' : self.scrip_file}) + self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs) + #self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs) + #self.obj.monet.scrip = self.obj_scrip + elif 'raqms' in self.model.lower(): + if len(self.files) > 1: + self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) + else: + print('**** Reading Unspecified model output. Take Caution...') + if len(self.files) > 1: + self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs) + self.mask_and_scale() + + def mask_and_scale(self): + """Mask and scale model data including unit conversions. + + Returns + ------- + None + """ + vars = self.obj.data_vars + if self.variable_dict is not None: + for v in vars: + if v in self.variable_dict: + d = self.variable_dict[v] + if 'unit_scale' in d: + scale = d['unit_scale'] + else: + scale = 1 + if 'unit_scale_method' in d: + if d['unit_scale_method'] == '*': + self.obj[v].data *= scale + elif d['unit_scale_method'] == '/': + self.obj[v].data /= scale + elif d['unit_scale_method'] == '+': + self.obj[v].data += scale + elif d['unit_scale_method'] == '-': + self.obj[v].data += -1 * scale + +class analysis: + """The analysis class. + + The analysis class is the highest + level class and stores all information about the analysis. It reads + and stores information from the input yaml file and defines + overarching analysis information like the start and end time, which + models and observations to pair, etc. + """ + + def __init__(self): + """Initialize an :class:`analysis` object.""" + self.control = 'control.yaml' + self.control_dict = None + self.models = {} + """dict : Models, set by :meth:`open_models`.""" + self.obs = {} + """dict : Observations, set by :meth:`open_obs`.""" + self.paired = {} + """dict : Paired data, set by :meth:`pair_data`.""" + self.start_time = None + self.end_time = None + self.time_intervals = None + self.download_maps = True # Default to True + self.output_dir = None + self.output_dir_save = None + self.output_dir_read = None + self.debug = False + self.save = None + self.read = None + self.time_chunking_with_gridded_data = False # Default to False + self.regrid = False # Default to False + self.target_grid = None + self.obs_regridders = None + self.model_regridders = None + + def __repr__(self): + return ( + f"{type(self).__name__}(\n" + f" control={self.control!r},\n" + f" control_dict={repr(self.control_dict) if self.control_dict is None else '...'},\n" + f" models={self.models!r},\n" + f" obs={self.obs!r},\n" + f" paired={self.paired!r},\n" + f" start_time={self.start_time!r},\n" + f" end_time={self.end_time!r},\n" + f" time_intervals={self.time_intervals!r},\n" + f" download_maps={self.download_maps!r},\n" + f" output_dir={self.output_dir!r},\n" + f" output_dir_save={self.output_dir_save!r},\n" + f" output_dir_read={self.output_dir_read!r},\n" + f" debug={self.debug!r},\n" + f" save={self.save!r},\n" + f" read={self.read!r},\n" + ")" + ) + + def read_control(self, control=None): + """Read the input yaml file, + updating various :class:`analysis` instance attributes. + + Parameters + ---------- + control : str + Input yaml file path. + If provided, :attr:`control` will be set to this value. + + Returns + ------- + None + """ + import yaml + + if control is not None: + self.control = control + + with open(self.control, 'r') as stream: + self.control_dict = yaml.safe_load(stream) + + # set analysis time + self.start_time = pd.Timestamp(self.control_dict['analysis']['start_time']) + self.end_time = pd.Timestamp(self.control_dict['analysis']['end_time']) + if 'output_dir' in self.control_dict['analysis'].keys(): + self.output_dir = os.path.expandvars( + self.control_dict['analysis']['output_dir']) + else: + raise Exception('output_dir was not specified and is required. Please set analysis.output_dir in the control file.') + if 'output_dir_save' in self.control_dict['analysis'].keys(): + self.output_dir_save = os.path.expandvars( + self.control_dict['analysis']['output_dir_save']) + else: + self.output_dir_save=self.output_dir + if 'output_dir_read' in self.control_dict['analysis'].keys(): + if self.control_dict['analysis']['output_dir_read'] is not None: + self.output_dir_read = os.path.expandvars( + self.control_dict['analysis']['output_dir_read']) + else: + self.output_dir_read=self.output_dir + + self.debug = self.control_dict['analysis']['debug'] + if 'save' in self.control_dict['analysis'].keys(): + self.save = self.control_dict['analysis']['save'] + if 'read' in self.control_dict['analysis'].keys(): + self.read = self.control_dict['analysis']['read'] + + # set time_chunking_with_gridded_data option, regrid option, and target_grid + if 'time_chunking_with_gridded_data' in self.control_dict['analysis'].keys(): + self.time_chunking_with_gridded_data = self.control_dict['analysis']['time_chunking_with_gridded_data'] + if 'regrid' in self.control_dict['analysis'].keys(): + self.regrid = self.control_dict['analysis']['regrid'] + if 'target_grid' in self.control_dict['analysis'].keys(): + self.target_grid = self.control_dict['analysis']['target_grid'] + + # generate time intervals for time chunking + if 'time_interval' in self.control_dict['analysis'].keys(): + time_stamps = pd.date_range( + start=self.start_time, end=self.end_time, + freq=self.control_dict['analysis']['time_interval']) + # if (end_time - start_time) is not an integer multiple + # of freq, append end_time to time_stamps + if time_stamps[-1] < pd.Timestamp(self.end_time): + time_stamps = time_stamps.append( + pd.DatetimeIndex([self.end_time])) + self.time_intervals \ + = [[time_stamps[n], time_stamps[n+1]] + for n in range(len(time_stamps)-1)] + + # Enable Dask progress bars? (default: false) + enable_dask_progress_bars = self.control_dict["analysis"].get( + "enable_dask_progress_bars", False) + if enable_dask_progress_bars: + from dask.diagnostics import ProgressBar + + ProgressBar().register() + else: + from dask.callbacks import Callback + + Callback.active = set() + + def save_analysis(self): + """Save all analysis attributes listed in analysis section of input yaml file. + + Returns + ------- + None + """ + if self.save is not None: + # Loop over each possible attr type (models, obs and paired) + for attr in self.save: + if self.save[attr]['method']=='pkl': + from .util.write_util import write_pkl + write_pkl(obj=getattr(self,attr), output_name=os.path.join(self.output_dir_save,self.save[attr]['output_name'])) + + elif self.save[attr]['method']=='netcdf': + from .util.write_util import write_analysis_ncf + # save either all groups or selected groups + if self.save[attr]['data']=='all': + if 'prefix' in self.save[attr]: + write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save, + fn_prefix=self.save[attr]['prefix']) + else: + write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save) + else: + if 'prefix' in self.save[attr]: + write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save, + fn_prefix=self.save[attr]['prefix'], keep_groups=self.save[attr]['data']) + else: + write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save, + keep_groups=self.save[attr]['data']) + + def read_analysis(self): + """Read all previously saved analysis attributes listed in analysis section of input yaml file. + + Returns + ------- + None + """ + if self.read is not None: + # Loop over each possible attr type (models, obs and paired) + from .util.read_util import read_saved_data + for attr in self.read: + if self.read[attr]['method']=='pkl': + read_saved_data(analysis=self,filenames=self.read[attr]['filenames'], method='pkl', attr=attr) + elif self.read[attr]['method']=='netcdf': + read_saved_data(analysis=self,filenames=self.read[attr]['filenames'], method='netcdf', attr=attr) + + def setup_regridders(self): + """Create an obs xesmf.Regridder from base and target grids specified in the control_dict + + Returns + ------- + None + """ + from .util import regrid_util + if self.regrid: + self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') + self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') + + def open_models(self, time_interval=None): + """Open all models listed in the input yaml file and create a :class:`model` + object for each of them, populating the :attr:`models` dict. + + Parameters + __________ + time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] + If not None, restrict models to datetime range spanned by time interval [start, end]. + + Returns + ------- + None + """ + if ('model' in self.control_dict) and (not self.time_chunking_with_gridded_data): + # open each model + for mod in self.control_dict['model']: + # create a new model instance + m = model() + # this is the model type (ie cmaq, rapchem, gsdchem etc) + m.model = self.control_dict['model'][mod]['mod_type'] + # set the model label in the dictionary and model class instance + if 'radius_of_influence' in self.control_dict['model'][mod].keys(): + m.radius_of_influence = self.control_dict['model'][mod]['radius_of_influence'] + else: + m.radius_of_influence = 1e6 + if 'mod_kwargs' in self.control_dict['model'][mod].keys(): + m.mod_kwargs = self.control_dict['model'][mod]['mod_kwargs'] + m.label = mod + # create file string (note this can include hot strings) + m.file_str = os.path.expandvars( + self.control_dict['model'][mod]['files']) + if 'files_vert' in self.control_dict['model'][mod].keys(): + m.file_vert_str = os.path.expandvars( + self.control_dict['model'][mod]['files_vert']) + if 'files_surf' in self.control_dict['model'][mod].keys(): + m.file_surf_str = os.path.expandvars( + self.control_dict['model'][mod]['files_surf']) + if 'files_pm25' in self.control_dict['model'][mod].keys(): + m.file_pm25_str = os.path.expandvars( + self.control_dict['model'][mod]['files_pm25']) + # create mapping + m.mapping = self.control_dict['model'][mod]['mapping'] + # add variable dict + if 'variables' in self.control_dict['model'][mod].keys(): + m.variable_dict = self.control_dict['model'][mod]['variables'] + if 'plot_kwargs' in self.control_dict['model'][mod].keys(): + m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] + + # unstructured grid check + if m.model in ['cesm_se']: + if 'scrip_file' in self.control_dict['model'][mod].keys(): + m.scrip_file = self.control_dict['model'][mod]['scrip_file'] + else: + raise ValueError( '"Scrip_file" must be provided for unstructured grid output!' ) + + # maybe set projection + proj_in = self.control_dict['model'][mod].get("projection") + if proj_in == "None": + print( + f"NOTE: model.{mod}.projection is {proj_in!r} (str), " + "but we assume you want `None` (Python null sentinel). " + "To avoid this warning, " + "update your control file to remove the projection setting " + "or set to `~` or `null` if you want null value in YAML." + ) + proj_in = None + if proj_in is not None: + if isinstance(proj_in, str) and proj_in.startswith("model:"): + m.proj = proj_in + elif isinstance(proj_in, str) and proj_in.startswith("ccrs."): + import cartopy.crs as ccrs + m.proj = eval(proj_in) + else: + import cartopy.crs as ccrs + + if isinstance(proj_in, ccrs.Projection): + m.proj = proj_in + else: + m.proj = ccrs.Projection(proj_in) + + # open the model + m.open_model_files(time_interval=time_interval) + self.models[m.label] = m + + if ('model' in self.control_dict) and self.time_chunking_with_gridded_data: + from .util import read_grid_util + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('model reading %s' % date_str) + filename, model_datasets = read_grid_util.read_grid_models( + self.control_dict, date_str) + print(model_datasets) + for mod in model_datasets: + m = model() + m.model = mod + m.label = mod + m.file_str = filename + m.mapping = self.control_dict['model'][mod]['mapping'] + if 'variables' in self.control_dict['model'][mod].keys(): + m.variable_dict = self.control_dict['model'][mod]['variables'] + if 'plot_kwargs' in self.control_dict['model'][mod].keys(): + m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] + ds_model = model_datasets[mod] + if self.regrid: + regridder = self.model_regridders[mod] + m.obj = regridder(ds_model) + m.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) + else: + m.obj = ds_model + self.models[m.label] = m + + def open_obs(self, time_interval=None): + """Open all observations listed in the input yaml file and create an + :class:`observation` instance for each of them, + populating the :attr:`obs` dict. + + Parameters + __________ + time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] + If not None, restrict obs to datetime range spanned by time interval [start, end]. + + + Returns + ------- + None + """ + from .util import analysis_util + from .util import read_grid_util + from .util import regrid_util + + if ('obs' in self.control_dict) and (not self.time_chunking_with_gridded_data): + for obs in self.control_dict['obs']: + o = observation() + o.obs = obs + o.label = obs + o.obs_type = self.control_dict['obs'][obs]['obs_type'] + if 'data_proc' in self.control_dict['obs'][obs].keys(): + o.data_proc = self.control_dict['obs'][obs]['data_proc'] + o.file = os.path.expandvars( + self.control_dict['obs'][obs]['filename']) + if 'variables' in self.control_dict['obs'][obs].keys(): + o.variable_dict = self.control_dict['obs'][obs]['variables'] + o.open_obs(time_interval=time_interval) + self.obs[o.label] = o + + if ('obs' in self.control_dict) and self.time_chunking_with_gridded_data: + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('obs reading %s' % date_str) + obs_vars = analysis_util.get_obs_vars(self.control_dict) + print(obs_vars) + filename, obs_datasets = read_grid_util.read_grid_obs( + self.control_dict, obs_vars, date_str) + print(obs_datasets) + for obs in obs_vars: + o = observation() + o.obs = obs + o.label = obs + o.file = filename + o.type = 'gridded_data' + ds_obs = obs_datasets[obs] + if self.regrid: + regridder = self.obs_regridders[obs] + o.obj = regridder(ds_obs) + o.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) + else: + o.obj = ds_obs + self.obs[o.label] = o + + def pair_data(self, time_interval=None): + """Pair all observations and models in the analysis class + (i.e., those listed in the input yaml file) together, + populating the :attr:`paired` dict. + + Parameters + __________ + time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] + If not None, restrict pairing to datetime range spanned by time interval [start, end]. + + + Returns + ------- + None + """ + pairs = {} # TODO: unused + for model_label in self.models: + mod = self.models[model_label] + # Now we have the models we need to loop through the mapping table for each network and pair the data + # each paired dataset will be output to a netcdf file with 'model_label_network.nc' + for obs_to_pair in mod.mapping.keys(): + # get the variables to pair from the model data (ie don't pair all data) + keys = [key for key in mod.mapping[obs_to_pair].keys()] + obs_vars = [mod.mapping[obs_to_pair][key] for key in keys] + + # unstructured grid check - lon/lat variables should be explicitly added + # in addition to comparison variables + if mod.obj.attrs.get("mio_scrip_file", False): + lonlat_list = [ 'lon', 'lat', 'longitude', 'latitude', 'Longitude', 'Latitude' ] + for ll in lonlat_list: + if ll in mod.obj.data_vars: + keys += [ll] + model_obj = mod.obj[keys] + + ## TODO: add in ability for simple addition of variables from + + # simplify the objs object with the correct mapping variables + obs = self.obs[obs_to_pair] + + # pair the data + # if pt_sfc (surface point network or monitor) + if obs.obs_type.lower() == 'pt_sfc': + # convert this to pandas dataframe unless already done because second time paired this obs + if not isinstance(obs.obj, pd.DataFrame): + obs.obs_to_df() + #Check if z dim is larger than 1. If so select, the first level as all models read through + #MONETIO will be reordered such that the first level is the level nearest to the surface. + try: + if model_obj.sizes['z'] > 1: + # Select only the surface values to pair with obs. + model_obj = model_obj.isel(z=0).expand_dims('z',axis=1) + except KeyError as e: + raise Exception("MONET requires an altitude dimension named 'z'") from e + # now combine obs with + paired_data = model_obj.monet.combine_point(obs.obj, radius_of_influence=mod.radius_of_influence, suffix=mod.label) + if self.debug: + print('After pairing: ', paired_data) + # this outputs as a pandas dataframe. Convert this to xarray obj + p = pair() + p.obs = obs.label + p.model = mod.label + p.model_vars = keys + p.obs_vars = obs_vars + p.filename = '{}_{}.nc'.format(p.obs, p.model) + p.obj = paired_data.monet._df_to_da() + label = "{}_{}".format(p.obs, p.model) + self.paired[label] = p + p.obj = p.fix_paired_xarray(dset=p.obj) + # write_util.write_ncf(p.obj,p.filename) # write out to file + # TODO: add other network types / data types where (ie flight, satellite etc) + + def concat_pairs(self): + """Read and concatenate all observation and model time interval pair data, + populating the :attr:`paired` dict. + + Returns + ------- + None + """ + pass + + ### TODO: Create the plotting driver (most complicated one) + # def plotting(self): + def plotting(self): + """Cycle through all the plotting groups (e.g., plot_grp1) listed in + the input yaml file and create the plots. + + This routine loops over all the domains and + model/obs pairs specified in the plotting group (``.control_dict['plots']``) + for all the variables specified in the mapping dictionary listed in + :attr:`paired`. + + Creates plots stored in the file location specified by output_dir + in the analysis section of the yaml file. + + Returns + ------- + None + """ + import matplotlib.pyplot as plt + + from .plots import surfplots as splots, savefig + + # Disable figure count warning + initial_max_fig = plt.rcParams["figure.max_open_warning"] + plt.rcParams["figure.max_open_warning"] = 0 + + # first get the plotting dictionary from the yaml file + plot_dict = self.control_dict['plots'] + # Calculate any items that do not need to recalculate each loop. + startdatename = str(datetime.datetime.strftime(self.start_time, '%Y-%m-%d_%H')) + enddatename = str(datetime.datetime.strftime(self.end_time, '%Y-%m-%d_%H')) + # now we are going to loop through each plot_group (note we can have multiple plot groups) + # a plot group can have + # 1) a singular plot type + # 2) multiple paired datasets or model datasets depending on the plot type + # 3) kwargs for creating the figure ie size and marker (note the default for obs is 'x') + for grp, grp_dict in plot_dict.items(): + pair_labels = grp_dict['data'] + # get the plot type + plot_type = grp_dict['type'] + + # first get the observational obs labels + pair1 = self.paired[list(self.paired.keys())[0]] + obs_vars = pair1.obs_vars + + # loop through obs variables + for obsvar in obs_vars: + # Loop also over the domain types. So can easily create several overview and zoomed in plots. + domain_types = grp_dict['domain_type'] + domain_names = grp_dict['domain_name'] + for domain in range(len(domain_types)): + domain_type = domain_types[domain] + domain_name = domain_names[domain] + + # Then loop through each of the pairs to add to the plot. + for p_index, p_label in enumerate(pair_labels): + p = self.paired[p_label] + # find the pair model label that matches the obs var + index = p.obs_vars.index(obsvar) + modvar = p.model_vars[index] + + # Adjust the modvar as done in pairing script, if the species name in obs and model are the same. + if obsvar == modvar: + modvar = modvar + '_new' + + # convert to dataframe + pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"]) + + # Select only the analysis time window. + pairdf_all = pairdf_all.loc[self.start_time : self.end_time] + + # Determine the default plotting colors. + if 'default_plot_kwargs' in grp_dict.keys(): + if self.models[p.model].plot_kwargs is not None: + plot_dict = {**grp_dict['default_plot_kwargs'], **self.models[p.model].plot_kwargs} + else: + plot_dict = {**grp_dict['default_plot_kwargs'], **splots.calc_default_colors(p_index)} + obs_dict = grp_dict['default_plot_kwargs'] + else: + if self.models[p.model].plot_kwargs is not None: + plot_dict = self.models[p.model].plot_kwargs.copy() + else: + plot_dict = splots.calc_default_colors(p_index).copy() + obs_dict = None + + # Determine figure_kwargs and text_kwargs + if 'fig_kwargs' in grp_dict.keys(): + fig_dict = grp_dict['fig_kwargs'] + else: + fig_dict = None + if 'text_kwargs' in grp_dict.keys(): + text_dict = grp_dict['text_kwargs'] + else: + text_dict = None + + # Read in some plotting specifications stored with observations. + if self.obs[p.obs].variable_dict is not None: + if obsvar in self.obs[p.obs].variable_dict.keys(): + obs_plot_dict = self.obs[p.obs].variable_dict[obsvar].copy() + else: + obs_plot_dict = {} + else: + obs_plot_dict = {} + + # Specify ylabel if noted in yaml file. + if 'ylabel_plot' in obs_plot_dict.keys(): + use_ylabel = obs_plot_dict['ylabel_plot'] + else: + use_ylabel = None + + # Determine if set axis values or use defaults + if grp_dict['data_proc']['set_axis'] == True: + if obs_plot_dict: # Is not null + set_yaxis = True + else: + print('Warning: variables dict for ' + obsvar + ' not provided, so defaults used') + set_yaxis = False + else: + set_yaxis = False + + # Determine to calculate mean values or percentile + if 'percentile_opt' in obs_plot_dict.keys(): + use_percentile = obs_plot_dict['percentile_opt'] + else: + use_percentile = None + + # Determine outname + outname = "{}.{}.{}.{}.{}.{}.{}".format(grp, plot_type, obsvar, startdatename, enddatename, domain_type, domain_name) + + # Query selected points if applicable + if domain_type != 'all': + pairdf_all.query(domain_type + ' == ' + '"' + domain_name + '"', inplace=True) + + # Query with filter options + if 'filter_dict' in grp_dict['data_proc'] and 'filter_string' in grp_dict['data_proc']: + raise Exception("""For plot group: {}, only one of filter_dict and filter_string can be specified.""".format(grp)) + elif 'filter_dict' in grp_dict['data_proc']: + filter_dict = grp_dict['data_proc']['filter_dict'] + for column in filter_dict.keys(): + filter_vals = filter_dict[column]['value'] + filter_op = filter_dict[column]['oper'] + if filter_op == 'isin': + pairdf_all.query(f'{column} == {filter_vals}', inplace=True) + elif filter_op == 'isnotin': + pairdf_all.query(f'{column} != {filter_vals}', inplace=True) + else: + pairdf_all.query(f'{column} {filter_op} {filter_vals}', inplace=True) + elif 'filter_string' in grp_dict['data_proc']: + pairdf_all.query(grp_dict['data_proc']['filter_string'], inplace=True) + + # Drop sites with greater than X percent NAN values + if 'rem_obs_by_nan_pct' in grp_dict['data_proc']: + grp_var = grp_dict['data_proc']['rem_obs_by_nan_pct']['group_var'] + pct_cutoff = grp_dict['data_proc']['rem_obs_by_nan_pct']['pct_cutoff'] + + if grp_dict['data_proc']['rem_obs_by_nan_pct']['times'] == 'hourly': + # Select only hours at the hour + hourly_pairdf_all = pairdf_all.reset_index().loc[pairdf_all.reset_index()['time'].dt.minute==0,:] + + # calculate total obs count, obs count with nan removed, and nan percent for each group + grp_fullcount = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) + grp_nonan_count = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values + else: + # calculate total obs count, obs count with nan removed, and nan percent for each group + grp_fullcount = pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) + grp_nonan_count = pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values + + grp_pct_nan = 100 - grp_nonan_count.div(grp_fullcount,axis=0)*100 + + # make list of sites meeting condition and select paired data by this by this + grp_select = grp_pct_nan.query(obsvar + ' < ' + str(pct_cutoff)).reset_index() + pairdf_all = pairdf_all.loc[pairdf_all[grp_var].isin(grp_select[grp_var].values)] + + # Drop NaNs + if grp_dict['data_proc']['rem_obs_nan'] == True: + # I removed drop=True in reset_index in order to keep 'time' as a column. + pairdf = pairdf_all.reset_index().dropna(subset=[modvar, obsvar]) + else: + print('Warning: set rem_obs_nan = True for regulatory metrics') + pairdf = pairdf_all.reset_index().dropna(subset=[modvar]) + + # JianHe: do we need provide a warning if pairdf is empty (no valid obsdata) for specific subdomain? + if pairdf.empty or pairdf[obsvar].isnull().all(): + print('Warning: no valid obs found for '+domain_name) + continue + + # JianHe: Determine if calculate regulatory values + cal_reg = obs_plot_dict.get('regulatory', False) + + if cal_reg: + # Reset use_ylabel for regulatory calculations + if 'ylabel_reg_plot' in obs_plot_dict.keys(): + use_ylabel = obs_plot_dict['ylabel_reg_plot'] + else: + use_ylabel = None + + df2 = ( + pairdf.copy() + .groupby("siteid") + .resample('H', on='time_local') + .mean() + .reset_index() + ) + + if obsvar == 'PM2.5': + pairdf_reg = splots.make_24hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) + elif obsvar == 'OZONE': + pairdf_reg = splots.make_8hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) + else: + print('Warning: no regulatory calculations found for ' + obsvar + '. Skipping plot.') + del df2 + continue + del df2 + if len(pairdf_reg[obsvar+'_reg']) == 0: + print('No valid data for '+obsvar+'_reg. Skipping plot.') + continue + else: + # Reset outname for regulatory options + outname = "{}.{}.{}.{}.{}.{}.{}".format(grp, plot_type, obsvar+'_reg', startdatename, enddatename, domain_type, domain_name) + else: + pairdf_reg = None + + if plot_type.lower() == 'spatial_bias': + if use_percentile is None: + outname = outname+'.mean' + else: + outname = outname+'.p'+'{:02d}'.format(use_percentile) + + if self.output_dir is not None: + outname = self.output_dir + '/' + outname # Extra / just in case. + + # Types of plots + if plot_type.lower() == 'timeseries': + if set_yaxis == True: + if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')): + vmin = obs_plot_dict['vmin_plot'] + vmax = obs_plot_dict['vmax_plot'] + else: + print('Warning: vmin_plot and vmax_plot not specified for ' + obsvar + ', so default used.') + vmin = None + vmax = None + else: + vmin = None + vmax = None + # Select time to use as index. + pairdf = pairdf.set_index(grp_dict['data_proc']['ts_select_time']) + a_w = grp_dict['data_proc']['ts_avg_window'] + if p_index == 0: + # First plot the observations. + ax = splots.make_timeseries( + pairdf, + pairdf_reg, + column=obsvar, + label=p.obs, + avg_window=a_w, + ylabel=use_ylabel, + vmin=vmin, + vmax=vmax, + domain_type=domain_type, + domain_name=domain_name, + plot_dict=obs_dict, + fig_dict=fig_dict, + text_dict=text_dict, + debug=self.debug + ) + # For all p_index plot the model. + ax = splots.make_timeseries( + pairdf, + pairdf_reg, + column=modvar, + label=p.model, + ax=ax, + avg_window=a_w, + ylabel=use_ylabel, + vmin=vmin, + vmax=vmax, + domain_type=domain_type, + domain_name=domain_name, + plot_dict=plot_dict, + text_dict=text_dict, + debug=self.debug + ) + # At the end save the plot. + if p_index == len(pair_labels) - 1: + savefig(outname + '.png', logo_height=150) + del (ax, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear axis for next plot. + if plot_type.lower() == 'boxplot': + if set_yaxis == True: + if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')): + vmin = obs_plot_dict['vmin_plot'] + vmax = obs_plot_dict['vmax_plot'] + else: + print('Warning: vmin_plot and vmax_plot not specified for ' + obsvar + ', so default used.') + vmin = None + vmax = None + else: + vmin = None + vmax = None + # First for p_index = 0 create the obs box plot data array. + if p_index == 0: + comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=obsvar, + label=p.obs, plot_dict=obs_dict) + # Then add the models to this dataarray. + comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=modvar, label=p.model, + plot_dict=plot_dict, comb_bx=comb_bx, + label_bx=label_bx) + # For the last p_index make the plot. + if p_index == len(pair_labels) - 1: + splots.make_boxplot( + comb_bx, + label_bx, + ylabel=use_ylabel, + vmin=vmin, + vmax=vmax, + outname=outname, + domain_type=domain_type, + domain_name=domain_name, + plot_dict=obs_dict, + fig_dict=fig_dict, + text_dict=text_dict, + debug=self.debug + ) + #Clear info for next plot. + del (comb_bx, label_bx, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) + elif plot_type.lower() == 'taylor': + if set_yaxis == True: + if 'ty_scale' in obs_plot_dict.keys(): + ty_scale = obs_plot_dict['ty_scale'] + else: + print('Warning: ty_scale not specified for ' + obsvar + ', so default used.') + ty_scale = 1.5 # Use default + else: + ty_scale = 1.5 # Use default + if p_index == 0: + # Plot initial obs/model + dia = splots.make_taylor( + pairdf, + pairdf_reg, + column_o=obsvar, + label_o=p.obs, + column_m=modvar, + label_m=p.model, + ylabel=use_ylabel, + ty_scale=ty_scale, + domain_type=domain_type, + domain_name=domain_name, + plot_dict=plot_dict, + fig_dict=fig_dict, + text_dict=text_dict, + debug=self.debug + ) + else: + # For the rest, plot on top of dia + dia = splots.make_taylor( + pairdf, + pairdf_reg, + column_o=obsvar, + label_o=p.obs, + column_m=modvar, + label_m=p.model, + dia=dia, + ylabel=use_ylabel, + ty_scale=ty_scale, + domain_type=domain_type, + domain_name=domain_name, + plot_dict=plot_dict, + text_dict=text_dict, + debug=self.debug + ) + # At the end save the plot. + if p_index == len(pair_labels) - 1: + savefig(outname + '.png', logo_height=70) + del (dia, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. + elif plot_type.lower() == 'spatial_bias': + if set_yaxis == True: + if 'vdiff_plot' in obs_plot_dict.keys(): + vdiff = obs_plot_dict['vdiff_plot'] + else: + print('Warning: vdiff_plot not specified for ' + obsvar + ', so default used.') + vdiff = None + else: + vdiff = None + # p_label needs to be added to the outname for this plot + outname = "{}.{}".format(outname, p_label) + splots.make_spatial_bias( + pairdf, + pairdf_reg, + column_o=obsvar, + label_o=p.obs, + column_m=modvar, + label_m=p.model, + ylabel=use_ylabel, + ptile=use_percentile, + vdiff=vdiff, + outname=outname, + domain_type=domain_type, + domain_name=domain_name, + fig_dict=fig_dict, + text_dict=text_dict, + debug=self.debug + ) + del (fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. + elif plot_type.lower() == 'spatial_bias_exceedance': + if cal_reg: + if set_yaxis == True: + if 'vdiff_reg_plot' in obs_plot_dict.keys(): + vdiff = obs_plot_dict['vdiff_reg_plot'] + else: + print('Warning: vdiff_reg_plot not specified for ' + obsvar + ', so default used.') + vdiff = None + else: + vdiff = None + + # p_label needs to be added to the outname for this plot + outname = "{}.{}".format(outname, p_label) + splots.make_spatial_bias_exceedance( + pairdf_reg, + column_o=obsvar+'_reg', + label_o=p.obs, + column_m=modvar+'_reg', + label_m=p.model, + ylabel=use_ylabel, + vdiff=vdiff, + outname=outname, + domain_type=domain_type, + domain_name=domain_name, + fig_dict=fig_dict, + text_dict=text_dict, + debug=self.debug + ) + del (fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. + else: + print('Warning: spatial_bias_exceedance plot only works when regulatory=True.') + # JianHe: need updates to include regulatory option for overlay plots + elif plot_type.lower() == 'spatial_overlay': + if set_yaxis == True: + if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot', 'nlevels_plot')): + vmin = obs_plot_dict['vmin_plot'] + vmax = obs_plot_dict['vmax_plot'] + nlevels = obs_plot_dict['nlevels_plot'] + elif all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')): + vmin = obs_plot_dict['vmin_plot'] + vmax = obs_plot_dict['vmax_plot'] + nlevels = None + else: + print('Warning: vmin_plot and vmax_plot not specified for ' + obsvar + ', so default used.') + vmin = None + vmax = None + nlevels = None + else: + vmin = None + vmax = None + nlevels = None + #Check if z dim is larger than 1. If so select, the first level as all models read through + #MONETIO will be reordered such that the first level is the level nearest to the surface. + # Create model slice and select time window for spatial plots + try: + self.models[p.model].obj.sizes['z'] + if self.models[p.model].obj.sizes['z'] > 1: #Select only surface values. + vmodel = self.models[p.model].obj.isel(z=0).expand_dims('z',axis=1).loc[ + dict(time=slice(self.start_time, self.end_time))] + else: + vmodel = self.models[p.model].obj.loc[dict(time=slice(self.start_time, self.end_time))] + except KeyError as e: + raise Exception("MONET requires an altitude dimension named 'z'") from e + + # Determine proj to use for spatial plots + proj = splots.map_projection(self.models[p.model]) + # p_label needs to be added to the outname for this plot + outname = "{}.{}".format(outname, p_label) + # For just the spatial overlay plot, you do not use the model data from the pair file + # So get the variable name again since pairing one could be _new. + # JianHe: only make overplay plots for non-regulatory variables for now + if not cal_reg: + splots.make_spatial_overlay( + pairdf, + vmodel, + column_o=obsvar, + label_o=p.obs, + column_m=p.model_vars[index], + label_m=p.model, + ylabel=use_ylabel, + vmin=vmin, + vmax=vmax, + nlevels=nlevels, + proj=proj, + outname=outname, + domain_type=domain_type, + domain_name=domain_name, + fig_dict=fig_dict, + text_dict=text_dict, + debug=self.debug + ) + else: + print('Warning: Spatial overlay plots are not available yet for regulatory metrics.') + + del (fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. + + # Restore figure count warning + plt.rcParams["figure.max_open_warning"] = initial_max_fig + + def stats(self): + """Calculate statistics specified in the input yaml file. + + This routine loops over all the domains and model/obs pairs for all the variables + specified in the mapping dictionary listed in :attr:`paired`. + + Creates a csv file storing the statistics and optionally a figure + visualizing the table. + + Returns + ------- + None + """ + from .stats import proc_stats as proc_stats + from .plots import surfplots as splots + + # first get the stats dictionary from the yaml file + stat_dict = self.control_dict['stats'] + # Calculate general items + startdatename = str(datetime.datetime.strftime(self.start_time, '%Y-%m-%d_%H')) + enddatename = str(datetime.datetime.strftime(self.end_time, '%Y-%m-%d_%H')) + stat_list = stat_dict['stat_list'] + # Determine stat_grp full name + stat_fullname_ns = proc_stats.produce_stat_dict(stat_list=stat_list, spaces=False) + stat_fullname_s = proc_stats.produce_stat_dict(stat_list=stat_list, spaces=True) + pair_labels = stat_dict['data'] + + # Determine rounding + if 'round_output' in stat_dict.keys(): + round_output = stat_dict['round_output'] + else: + round_output = 3 + + # Then loop over all the observations + # first get the observational obs labels + pair1 = self.paired[list(self.paired.keys())[0]] + obs_vars = pair1.obs_vars + for obsvar in obs_vars: + # Read in some plotting specifications stored with observations. + if self.obs[pair1.obs].variable_dict is not None: + if obsvar in self.obs[pair1.obs].variable_dict.keys(): + obs_plot_dict = self.obs[pair1.obs].variable_dict[obsvar] + else: + obs_plot_dict = {} + else: + obs_plot_dict = {} + + # JianHe: Determine if calculate regulatory values + cal_reg = obs_plot_dict.get('regulatory', False) + + # Next loop over all of the domains. + # Loop also over the domain types. + domain_types = stat_dict['domain_type'] + domain_names = stat_dict['domain_name'] + for domain in range(len(domain_types)): + domain_type = domain_types[domain] + domain_name = domain_names[domain] + + # The tables and text files will be output at this step in loop. + # Create an empty pandas dataarray. + df_o_d = pd.DataFrame() + # Determine outname + if cal_reg: + outname = "{}.{}.{}.{}.{}.{}".format('stats', obsvar+'_reg', domain_type, domain_name, startdatename, enddatename) + else: + outname = "{}.{}.{}.{}.{}.{}".format('stats', obsvar, domain_type, domain_name, startdatename, enddatename) + + # Determine plotting kwargs + if 'output_table_kwargs' in stat_dict.keys(): + out_table_kwargs = stat_dict['output_table_kwargs'] + else: + out_table_kwargs = None + + # Add Stat ID and FullName to pandas dictionary. + df_o_d['Stat_ID'] = stat_list + df_o_d['Stat_FullName'] = stat_fullname_ns + + # Specify title for stat plots. + if cal_reg: + if 'ylabel_reg_plot' in obs_plot_dict.keys(): + title = obs_plot_dict['ylabel_reg_plot'] + ': ' + domain_type + ' ' + domain_name + else: + title = obsvar + '_reg: ' + domain_type + ' ' + domain_name + else: + if 'ylabel_plot' in obs_plot_dict.keys(): + title = obs_plot_dict['ylabel_plot'] + ': ' + domain_type + ' ' + domain_name + else: + title = obsvar + ': ' + domain_type + ' ' + domain_name + + # Finally Loop through each of the pairs + for p_label in pair_labels: + p = self.paired[p_label] + # Create an empty list to store the stat_var + p_stat_list = [] + + # Loop through each of the stats + for stat_grp in stat_list: + + # find the pair model label that matches the obs var + index = p.obs_vars.index(obsvar) + modvar = p.model_vars[index] + + # Adjust the modvar as done in pairing script, if the species name in obs and model are the same. + if obsvar == modvar: + modvar = modvar + '_new' + + # convert to dataframe + pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"]) + + # Select only the analysis time window. + pairdf_all = pairdf_all.loc[self.start_time : self.end_time] + + # Query selected points if applicable + if domain_type != 'all': + pairdf_all.query(domain_type + ' == ' + '"' + domain_name + '"', inplace=True) + + # Query with filter options + if 'data_proc' in stat_dict: + if 'filter_dict' in stat_dict['data_proc'] and 'filter_string' in stat_dict['data_proc']: + raise Exception("For statistics, only one of filter_dict and filter_string can be specified.") + elif 'filter_dict' in stat_dict['data_proc']: + filter_dict = stat_dict['data_proc']['filter_dict'] + for column in filter_dict.keys(): + filter_vals = filter_dict[column]['value'] + filter_op = filter_dict[column]['oper'] + if filter_op == 'isin': + pairdf_all.query(f'{column} == {filter_vals}', inplace=True) + elif filter_op == 'isnotin': + pairdf_all.query(f'{column} != {filter_vals}', inplace=True) + else: + pairdf_all.query(f'{column} {filter_op} {filter_vals}', inplace=True) + elif 'filter_string' in stat_dict['data_proc']: + pairdf_all.query(stat_dict['data_proc']['filter_string'], inplace=True) + + # Drop sites with greater than X percent NAN values + if 'data_proc' in stat_dict: + if 'rem_obs_by_nan_pct' in stat_dict['data_proc']: + grp_var = stat_dict['data_proc']['rem_obs_by_nan_pct']['group_var'] + pct_cutoff = stat_dict['data_proc']['rem_obs_by_nan_pct']['pct_cutoff'] + + if stat_dict['data_proc']['rem_obs_by_nan_pct']['times'] == 'hourly': + # Select only hours at the hour + hourly_pairdf_all = pairdf_all.reset_index().loc[pairdf_all.reset_index()['time'].dt.minute==0,:] + + # calculate total obs count, obs count with nan removed, and nan percent for each group + grp_fullcount = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) + grp_nonan_count = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values + else: + # calculate total obs count, obs count with nan removed, and nan percent for each group + grp_fullcount = pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) + grp_nonan_count = pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values + + grp_pct_nan = 100 - grp_nonan_count.div(grp_fullcount,axis=0)*100 + + # make list of sites meeting condition and select paired data by this by this + grp_select = grp_pct_nan.query(obsvar + ' < ' + str(pct_cutoff)).reset_index() + pairdf_all = pairdf_all.loc[pairdf_all[grp_var].isin(grp_select[grp_var].values)] + + # Drop NaNs for model and observations in all cases. + pairdf = pairdf_all.reset_index().dropna(subset=[modvar, obsvar]) + + # JianHe: do we need provide a warning if pairdf is empty (no valid obsdata) for specific subdomain? + if pairdf[obsvar].isnull().all() or pairdf.empty: + print('Warning: no valid obs found for '+domain_name) + p_stat_list.append('NaN') + continue + + if cal_reg: + # Process regulatory values + df2 = ( + pairdf.copy() + .groupby("siteid") + .resample('H', on='time_local') + .mean() + .reset_index() + ) + + if obsvar == 'PM2.5': + pairdf_reg = splots.make_24hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) + elif obsvar == 'OZONE': + pairdf_reg = splots.make_8hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) + else: + print('Warning: no regulatory calculations found for ' + obsvar + '. Setting stat calculation to NaN.') + del df2 + p_stat_list.append('NaN') + continue + del df2 + if len(pairdf_reg[obsvar+'_reg']) == 0: + print('No valid data for '+obsvar+'_reg. Setting stat calculation to NaN.') + p_stat_list.append('NaN') + continue + else: + # Drop NaNs for model and observations in all cases. + pairdf2 = pairdf_reg.reset_index().dropna(subset=[modvar+'_reg', obsvar+'_reg']) + + # Create empty list for all dom + # Calculate statistic and append to list + if obsvar == 'WD': # Use separate calculations for WD + p_stat_list.append(proc_stats.calc(pairdf, stat=stat_grp, obsvar=obsvar, modvar=modvar, wind=True)) + else: + if cal_reg: + p_stat_list.append(proc_stats.calc(pairdf2, stat=stat_grp, obsvar=obsvar+'_reg', modvar=modvar+'_reg', wind=False)) + else: + p_stat_list.append(proc_stats.calc(pairdf, stat=stat_grp, obsvar=obsvar, modvar=modvar, wind=False)) + + # Save the stat to a dataarray + df_o_d[p_label] = p_stat_list + + if self.output_dir is not None: + outname = self.output_dir + '/' + outname # Extra / just in case. + + # Save the pandas dataframe to a txt file + # Save rounded output + df_o_d = df_o_d.round(round_output) + df_o_d.to_csv(path_or_buf=outname + '.csv', index=False) + + if stat_dict['output_table'] == True: + # Output as a table graphic too. + # Change to use the name with full spaces. + df_o_d['Stat_FullName'] = stat_fullname_s + + proc_stats.create_table(df_o_d.drop(columns=['Stat_ID']), + outname=outname, + title=title, + out_table_kwargs=out_table_kwargs, + debug=self.debug + ) diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index c3fc0345..4f42b6ca 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -22,9 +22,8 @@ def read_grid_models(config, date_str): for model_name in config['model']: datadir = config['model'][model_name]['datadir'] - filestr = config['model'][model_name]['filestr'] filestr = fill_date_template( - config['model'][model_name]['filestr'], date_str) + config['model'][model_name]['filename'], date_str) filename = find_file(datadir, filestr) model_datasets[model_name] = xr.open_dataset(filename) @@ -55,7 +54,7 @@ def read_grid_obs(config, obs_vars, date_str): data_format = config['obs'][obs_name]['data_format'] datadir = config['obs'][obs_name]['datadir'] filestr = fill_date_template( - config['obs'][obs_name]['filestr'], date_str) + config['obs'][obs_name]['filename'], date_str) filename = find_file(datadir, filestr) file_extension = os.path.splitext(filename)[1] From 57a1216f9e46faf24903db7a928ed3832d9295f8 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 29 Jun 2023 12:17:16 -0600 Subject: [PATCH 46/64] Use filename in config instead of filestr. --- .../control_time_chunking_with_gridded_data.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml index 6e586d86..35d8042d 100644 --- a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml +++ b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml @@ -13,7 +13,8 @@ obs: MOD08_M3: data_format: gridded_eos datadir: $HOME/Data/MOD08_M3_nc - filestr: MOD08_M3.AYYYYDDD.061.*_regrid.nc + obs_type: gridded_data + filename: MOD08_M3.AYYYYDDD.061.*_regrid.nc regrid: base_grid: $HOME/Data/Grids/modis_l3_grid.nc method: bilinear From 5b520b5caf8f35e8e90e97761360436a1ba2383c Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 29 Jun 2023 12:43:39 -0600 Subject: [PATCH 47/64] Refactoring time chunking option in drive. --- ...ntrol_time_chunking_with_gridded_data.yaml | 2 +- melodies_monet/driver.py | 105 ++++++------------ ...r_time_chunking.py => driver_prototype.py} | 0 melodies_monet/util/read_grid_util.py | 4 +- 4 files changed, 38 insertions(+), 73 deletions(-) rename melodies_monet/{driver_time_chunking.py => driver_prototype.py} (100%) diff --git a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml index 35d8042d..57b87592 100644 --- a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml +++ b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml @@ -29,7 +29,7 @@ model: MERRA2: data_format: netcdf datadir: $HOME/Data/MERRA2_TOTEXTTAU - filestr: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4 + filename: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4 regrid: base_grid: $HOME/Data/Grids/merra2_grid.nc method: bilinear diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index cbea3fea..78785738 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -133,7 +133,7 @@ def __repr__(self): ")" ) - def open_obs(self, time_interval=None): + def open_obs(self, time_interval=None, control_dict=None): """Open the observational data, store data in observation pair, and apply mask and scaling. @@ -149,30 +149,42 @@ def open_obs(self, time_interval=None): from glob import glob from numpy import sort from . import tutorial + from .util import analysis_util + from .util import read_grid_util + + time_chunking_with_gridded_data \ + = 'time_chunking_with_gridded_data' in control_dict['analysis'].keys() \ + and control_dict['analysis']['time_chunking_with_gridded_data'] + + if time_chunking_with_gridded_data: + + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('obs reading %s' % date_str) - if self.file.startswith("example:"): - example_id = ":".join(s.strip() for s in self.file.split(":")[1:]) - files = [tutorial.fetch_example(example_id)] else: - files = sort(glob(self.file)) + if self.file.startswith("example:"): + example_id = ":".join(s.strip() for s in self.file.split(":")[1:]) + files = [tutorial.fetch_example(example_id)] + else: + files = sort(glob(self.file)) - assert len(files) >= 1, "need at least one" + assert len(files) >= 1, "need at least one" - _, extension = os.path.splitext(files[0]) - try: - if extension in {'.nc', '.ncf', '.netcdf', '.nc4'}: - if len(files) > 1: - self.obj = xr.open_mfdataset(files) + _, extension = os.path.splitext(files[0]) + try: + if extension in {'.nc', '.ncf', '.netcdf', '.nc4'}: + if len(files) > 1: + self.obj = xr.open_mfdataset(files) + else: + self.obj = xr.open_dataset(files[0]) + elif extension in ['.ict', '.icarrt']: + assert len(files) == 1, "monetio.icarrt.add_data can only read one file" + self.obj = mio.icarrt.add_data(files[0]) else: - self.obj = xr.open_dataset(files[0]) - elif extension in ['.ict', '.icarrt']: - assert len(files) == 1, "monetio.icarrt.add_data can only read one file" - self.obj = mio.icarrt.add_data(files[0]) - else: - raise ValueError(f'extension {extension!r} currently unsupported') - except Exception as e: - print('something happened opening file:', e) - return + raise ValueError(f'extension {extension!r} currently unsupported') + except Exception as e: + print('something happened opening file:', e) + return self.mask_and_scale() # mask and scale values from the control values self.filter_obs() @@ -639,7 +651,7 @@ def open_models(self, time_interval=None): ------- None """ - if ('model' in self.control_dict) and (not self.time_chunking_with_gridded_data): + if 'model' in self.control_dict: # open each model for mod in self.control_dict['model']: # create a new model instance @@ -710,31 +722,6 @@ def open_models(self, time_interval=None): m.open_model_files(time_interval=time_interval) self.models[m.label] = m - if ('model' in self.control_dict) and self.time_chunking_with_gridded_data: - from .util import read_grid_util - date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('model reading %s' % date_str) - filename, model_datasets = read_grid_util.read_grid_models( - self.control_dict, date_str) - print(model_datasets) - for mod in model_datasets: - m = model() - m.model = mod - m.label = mod - m.file_str = filename - m.mapping = self.control_dict['model'][mod]['mapping'] - if 'variables' in self.control_dict['model'][mod].keys(): - m.variable_dict = self.control_dict['model'][mod]['variables'] - if 'plot_kwargs' in self.control_dict['model'][mod].keys(): - m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] - ds_model = model_datasets[mod] - if self.regrid: - regridder = self.model_regridders[mod] - m.obj = regridder(ds_model) - m.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) - else: - m.obj = ds_model - self.models[m.label] = m def open_obs(self, time_interval=None): """Open all observations listed in the input yaml file and create an @@ -755,7 +742,7 @@ def open_obs(self, time_interval=None): from .util import read_grid_util from .util import regrid_util - if ('obs' in self.control_dict) and (not self.time_chunking_with_gridded_data): + if 'obs' in self.control_dict: for obs in self.control_dict['obs']: o = observation() o.obs = obs @@ -767,31 +754,9 @@ def open_obs(self, time_interval=None): self.control_dict['obs'][obs]['filename']) if 'variables' in self.control_dict['obs'][obs].keys(): o.variable_dict = self.control_dict['obs'][obs]['variables'] - o.open_obs(time_interval=time_interval) + o.open_obs(time_interval=time_interval, control_dict=self.control_dict) self.obs[o.label] = o - if ('obs' in self.control_dict) and self.time_chunking_with_gridded_data: - date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('obs reading %s' % date_str) - obs_vars = analysis_util.get_obs_vars(self.control_dict) - print(obs_vars) - filename, obs_datasets = read_grid_util.read_grid_obs( - self.control_dict, obs_vars, date_str) - print(obs_datasets) - for obs in obs_vars: - o = observation() - o.obs = obs - o.label = obs - o.file = filename - o.type = 'gridded_data' - ds_obs = obs_datasets[obs] - if self.regrid: - regridder = self.obs_regridders[obs] - o.obj = regridder(ds_obs) - o.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) - else: - o.obj = ds_obs - self.obs[o.label] = o def pair_data(self, time_interval=None): """Pair all observations and models in the analysis class diff --git a/melodies_monet/driver_time_chunking.py b/melodies_monet/driver_prototype.py similarity index 100% rename from melodies_monet/driver_time_chunking.py rename to melodies_monet/driver_prototype.py diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 4f42b6ca..a20a6269 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -28,7 +28,7 @@ def read_grid_models(config, date_str): model_datasets[model_name] = xr.open_dataset(filename) - return filename, model_datasets + return model_datasets def read_grid_obs(config, obs_vars, date_str): @@ -73,5 +73,5 @@ def read_grid_obs(config, obs_vars, date_str): obs_datasets[obs_name] = ds_obs - return filename, obs_datasets + return obs_datasets From a2b29938f8169e1271d87dc3f38181d8bc898092 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 10 Jul 2023 11:08:12 -0600 Subject: [PATCH 48/64] Revised data paths. --- .../control_time_chunking_with_gridded_data.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml index 57b87592..cc3c8c8a 100644 --- a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml +++ b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml @@ -12,7 +12,7 @@ obs: MOD08_M3: data_format: gridded_eos - datadir: $HOME/Data/MOD08_M3_nc + datadir: $HOME/Data/MOD08_M3 obs_type: gridded_data filename: MOD08_M3.AYYYYDDD.061.*_regrid.nc regrid: @@ -28,8 +28,8 @@ model: MERRA2: data_format: netcdf - datadir: $HOME/Data/MERRA2_TOTEXTTAU - filename: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4 + datadir: $HOME/Data/MERRA2 + filename: subset_MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_regrid.nc4 regrid: base_grid: $HOME/Data/Grids/merra2_grid.nc method: bilinear From b880b4bd4f9692d9760347a13cc3202892ebbe06 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Wed, 12 Jul 2023 11:18:02 -0600 Subject: [PATCH 49/64] Return filename from read_grid_util.read_grid_obs. --- melodies_monet/driver.py | 5 +++++ melodies_monet/util/read_grid_util.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 78785738..7c21aa11 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -161,6 +161,11 @@ def open_obs(self, time_interval=None, control_dict=None): date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('obs reading %s' % date_str) + obs_vars = analysis_util.get_obs_vars(self.control_dict) + print(obs_vars) + filename, obs_datasets = read_grid_util.read_grid_obs( + self.control_dict, obs_vars, date_str) + else: if self.file.startswith("example:"): example_id = ":".join(s.strip() for s in self.file.split(":")[1:]) diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index a20a6269..4f42b6ca 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -28,7 +28,7 @@ def read_grid_models(config, date_str): model_datasets[model_name] = xr.open_dataset(filename) - return model_datasets + return filename, model_datasets def read_grid_obs(config, obs_vars, date_str): @@ -73,5 +73,5 @@ def read_grid_obs(config, obs_vars, date_str): obs_datasets[obs_name] = ds_obs - return obs_datasets + return filename, obs_datasets From b4c67975c9f3cf43ff20ad01f75db0b929d6275e Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 24 Jul 2023 14:34:35 -0600 Subject: [PATCH 50/64] Extract the self.obs dataset from the dictionary of datasets from read_grid_obs. Need to add option to read_grid_obs to read only a designated dataset. --- melodies_monet/driver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 7c21aa11..604fcf8c 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -161,10 +161,13 @@ def open_obs(self, time_interval=None, control_dict=None): date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('obs reading %s' % date_str) - obs_vars = analysis_util.get_obs_vars(self.control_dict) + obs_vars = analysis_util.get_obs_vars(control_dict) print(obs_vars) + # Need the option for read_grid_obs to read a single dataset filename, obs_datasets = read_grid_util.read_grid_obs( - self.control_dict, obs_vars, date_str) + control_dict, obs_vars, date_str) + self.obj = obs_datasets[self.obs] + print(self.obj) else: if self.file.startswith("example:"): From 70f1655a05681bb03efc8bb432d637cd5a296882 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 24 Jul 2023 14:39:53 -0600 Subject: [PATCH 51/64] Added mod_type entry. --- .../control_time_chunking_with_gridded_data.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml index cc3c8c8a..edac1ca9 100644 --- a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml +++ b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml @@ -28,6 +28,7 @@ model: MERRA2: data_format: netcdf + mod_type: merra2 datadir: $HOME/Data/MERRA2 filename: subset_MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_regrid.nc4 regrid: From 8235eeb3f6e851751d85c963f003b418d8ae25c0 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Tue, 25 Jul 2023 10:58:36 -0600 Subject: [PATCH 52/64] Added control_dict optional arg to model.open_model_files. --- ...ntrol_time_chunking_with_gridded_data.yaml | 2 +- melodies_monet/driver.py | 21 +++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml index edac1ca9..d7c9eb67 100644 --- a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml +++ b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml @@ -30,7 +30,7 @@ model: data_format: netcdf mod_type: merra2 datadir: $HOME/Data/MERRA2 - filename: subset_MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_regrid.nc4 + files: subset_MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_regrid.nc4 regrid: base_grid: $HOME/Data/Grids/merra2_grid.nc method: bilinear diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 604fcf8c..6bf85e34 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -341,7 +341,7 @@ def glob_files(self): if self.file_pm25_str is not None: self.files_pm25 = sort(glob(self.file_pm25_str)) - def open_model_files(self, time_interval=None): + def open_model_files(self, time_interval=None, control_dict=None): """Open the model files, store data in :class:`model` instance attributes, and apply mask and scaling. @@ -360,6 +360,10 @@ def open_model_files(self, time_interval=None): None """ + time_chunking_with_gridded_data \ + = 'time_chunking_with_gridded_data' in control_dict['analysis'].keys() \ + and control_dict['analysis']['time_chunking_with_gridded_data'] + self.glob_files() # Calculate species to input into MONET, so works for all mechanisms in wrfchem # I want to expand this for the other models too when add aircraft data. @@ -414,6 +418,19 @@ def open_model_files(self, time_interval=None): self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs) else: self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) + elif time_chunking_with_gridded_data: + print('model time chunking') + """ + date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') + print('obs reading %s' % date_str) + obs_vars = analysis_util.get_obs_vars(control_dict) + print(obs_vars) + # Need the option for read_grid_obs to read a single dataset + filename, obs_datasets = read_grid_util.read_grid_obs( + control_dict, obs_vars, date_str) + self.obj = obs_datasets[self.obs] + print(self.obj) + """ else: print('**** Reading Unspecified model output. Take Caution...') if len(self.files) > 1: @@ -727,7 +744,7 @@ def open_models(self, time_interval=None): m.proj = ccrs.Projection(proj_in) # open the model - m.open_model_files(time_interval=time_interval) + m.open_model_files(time_interval=time_interval, control_dict=self.control_dict) self.models[m.label] = m From 9574748a7e55d337d17f0c3744d0681147cfe186 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Tue, 25 Jul 2023 12:49:24 -0600 Subject: [PATCH 53/64] Added time_chunking_with_gridded_data option in open_model_files. --- ...ontrol_time_chunking_with_gridded_data.yaml | 2 +- melodies_monet/driver.py | 18 ++++++++---------- melodies_monet/util/read_grid_util.py | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml index d7c9eb67..618a23de 100644 --- a/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml +++ b/examples/process_gridded_data/control_time_chunking_with_gridded_data.yaml @@ -30,7 +30,7 @@ model: data_format: netcdf mod_type: merra2 datadir: $HOME/Data/MERRA2 - files: subset_MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_regrid.nc4 + files: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4 regrid: base_grid: $HOME/Data/Grids/merra2_grid.nc method: bilinear diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 6bf85e34..6a35892b 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -359,6 +359,9 @@ def open_model_files(self, time_interval=None, control_dict=None): ------- None """ + from .util import analysis_util + from .util import read_grid_util + from .util import regrid_util time_chunking_with_gridded_data \ = 'time_chunking_with_gridded_data' in control_dict['analysis'].keys() \ @@ -419,18 +422,13 @@ def open_model_files(self, time_interval=None, control_dict=None): else: self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) elif time_chunking_with_gridded_data: - print('model time chunking') - """ date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('obs reading %s' % date_str) - obs_vars = analysis_util.get_obs_vars(control_dict) - print(obs_vars) - # Need the option for read_grid_obs to read a single dataset - filename, obs_datasets = read_grid_util.read_grid_obs( - control_dict, obs_vars, date_str) - self.obj = obs_datasets[self.obs] + print('model reading %s' % date_str) + # Need the option for read_grid_models to read a single dataset + filename, model_datasets = read_grid_util.read_grid_models( + control_dict, date_str) + self.obj = model_datasets[self.model] print(self.obj) - """ else: print('**** Reading Unspecified model output. Take Caution...') if len(self.files) > 1: diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 4f42b6ca..7a69da4b 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -23,7 +23,7 @@ def read_grid_models(config, date_str): datadir = config['model'][model_name]['datadir'] filestr = fill_date_template( - config['model'][model_name]['filename'], date_str) + config['model'][model_name]['files'], date_str) filename = find_file(datadir, filestr) model_datasets[model_name] = xr.open_dataset(filename) From 19bda7c06a6399fc6741cc098f2d2b99ce0f0947 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Wed, 26 Jul 2023 08:31:22 -0600 Subject: [PATCH 54/64] Added time_chunking_with_gridded_data block in open_model_files. --- melodies_monet/driver.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 6a35892b..bb1a8bd8 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -157,10 +157,8 @@ def open_obs(self, time_interval=None, control_dict=None): and control_dict['analysis']['time_chunking_with_gridded_data'] if time_chunking_with_gridded_data: - date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('obs reading %s' % date_str) - + print('obs time chunk %s' % date_str) obs_vars = analysis_util.get_obs_vars(control_dict) print(obs_vars) # Need the option for read_grid_obs to read a single dataset @@ -423,11 +421,12 @@ def open_model_files(self, time_interval=None, control_dict=None): self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) elif time_chunking_with_gridded_data: date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('model reading %s' % date_str) + print('model time chunk %s' % date_str) # Need the option for read_grid_models to read a single dataset filename, model_datasets = read_grid_util.read_grid_models( control_dict, date_str) - self.obj = model_datasets[self.model] + print(self.label) + self.obj = model_datasets[self.label] print(self.obj) else: print('**** Reading Unspecified model output. Take Caution...') From b82d77e5061590cbf2fcd016e52f77af0353d129 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Wed, 26 Jul 2023 08:52:10 -0600 Subject: [PATCH 55/64] Removed filename retrun in read_grid_util.read_grid_models. --- melodies_monet/driver.py | 4 ++-- melodies_monet/util/read_grid_util.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index bb1a8bd8..de910e4f 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -162,7 +162,7 @@ def open_obs(self, time_interval=None, control_dict=None): obs_vars = analysis_util.get_obs_vars(control_dict) print(obs_vars) # Need the option for read_grid_obs to read a single dataset - filename, obs_datasets = read_grid_util.read_grid_obs( + obs_datasets = read_grid_util.read_grid_obs( control_dict, obs_vars, date_str) self.obj = obs_datasets[self.obs] print(self.obj) @@ -423,7 +423,7 @@ def open_model_files(self, time_interval=None, control_dict=None): date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('model time chunk %s' % date_str) # Need the option for read_grid_models to read a single dataset - filename, model_datasets = read_grid_util.read_grid_models( + model_datasets = read_grid_util.read_grid_models( control_dict, date_str) print(self.label) self.obj = model_datasets[self.label] diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 7a69da4b..aa4f42bd 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -28,7 +28,7 @@ def read_grid_models(config, date_str): model_datasets[model_name] = xr.open_dataset(filename) - return filename, model_datasets + return model_datasets def read_grid_obs(config, obs_vars, date_str): @@ -73,5 +73,5 @@ def read_grid_obs(config, obs_vars, date_str): obs_datasets[obs_name] = ds_obs - return filename, obs_datasets + return obs_datasets From b5ac1bf159773e72df01201af4080b2d926aacb5 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 27 Jul 2023 12:43:55 -0600 Subject: [PATCH 56/64] Updated utils/read_grid_util to allow for single model or obs to be read. --- melodies_monet/driver.py | 4 ++-- melodies_monet/util/read_grid_util.py | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index de910e4f..33acbc4c 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -163,7 +163,7 @@ def open_obs(self, time_interval=None, control_dict=None): print(obs_vars) # Need the option for read_grid_obs to read a single dataset obs_datasets = read_grid_util.read_grid_obs( - control_dict, obs_vars, date_str) + control_dict, obs_vars, date_str, obs=self.obs) self.obj = obs_datasets[self.obs] print(self.obj) @@ -424,7 +424,7 @@ def open_model_files(self, time_interval=None, control_dict=None): print('model time chunk %s' % date_str) # Need the option for read_grid_models to read a single dataset model_datasets = read_grid_util.read_grid_models( - control_dict, date_str) + control_dict, date_str, model=self.label) print(self.label) self.obj = model_datasets[self.label] print(self.obj) diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index aa4f42bd..3df9bd3b 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -6,20 +6,25 @@ from .analysis_util import fill_date_template, find_file -def read_grid_models(config, date_str): +def read_grid_models(config, date_str, model=None): """ Read grid data models Parameters config (dict): configuration dictionary date_str (str yyyy-mm-m_abbr-dd-ddd): date string + model: specific model to read Returns model_datasets (dict of xr.Dataset): dictionary of model datasets """ model_datasets = dict() + if model is not None: + model_list = [model] + else: + model_list = config['model'] - for model_name in config['model']: + for model_name in model_list: datadir = config['model'][model_name]['datadir'] filestr = fill_date_template( @@ -31,7 +36,7 @@ def read_grid_models(config, date_str): return model_datasets -def read_grid_obs(config, obs_vars, date_str): +def read_grid_obs(config, obs_vars, date_str, obs=None): """ Read grid data obs @@ -40,16 +45,21 @@ def read_grid_obs(config, obs_vars, date_str): obs_vars (dict of dict): nested dictionary keyed by obs set name and obs variable name date_str (str yyyy-mm-m_abbr-dd-ddd): date string + obs: specific observation to read Returns obs_datasets (dict of xr.Dataset): dictionary of obs datasets """ obs_datasets = dict() + if obs is not None: + obs_list = [obs] + else: + obs_list = obs_vars.keys() yyyy_str, mm_str, m_abbr_str, dd_str, ddd_str \ = tuple(date_str.split('-')) - for obs_name in obs_vars: + for obs_name in obs_list: data_format = config['obs'][obs_name]['data_format'] datadir = config['obs'][obs_name]['datadir'] From 9f8a60936179d825c73ed7f29115b2e748a78465 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 27 Jul 2023 12:46:54 -0600 Subject: [PATCH 57/64] Updated utils/read_grid_util to allow for single model or obs to be read. --- melodies_monet/util/read_grid_util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 3df9bd3b..a2326ed2 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -14,6 +14,7 @@ def read_grid_models(config, date_str, model=None): config (dict): configuration dictionary date_str (str yyyy-mm-m_abbr-dd-ddd): date string model: specific model to read + optional, if not specified all model in config['models'] will be read Returns model_datasets (dict of xr.Dataset): dictionary of model datasets @@ -46,6 +47,7 @@ def read_grid_obs(config, obs_vars, date_str, obs=None): nested dictionary keyed by obs set name and obs variable name date_str (str yyyy-mm-m_abbr-dd-ddd): date string obs: specific observation to read + optional, if not specified all obs in obs_vars will be read Returns obs_datasets (dict of xr.Dataset): dictionary of obs datasets From 9fe5274c51e30ffdc2843052a6086bcbd4ea98ac Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 27 Jul 2023 18:47:00 -0600 Subject: [PATCH 58/64] Return filenames in read_grid_util readers. --- melodies_monet/driver.py | 11 ++++------- melodies_monet/util/read_grid_util.py | 12 ++++++++++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 33acbc4c..5ce443d1 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -161,11 +161,10 @@ def open_obs(self, time_interval=None, control_dict=None): print('obs time chunk %s' % date_str) obs_vars = analysis_util.get_obs_vars(control_dict) print(obs_vars) - # Need the option for read_grid_obs to read a single dataset - obs_datasets = read_grid_util.read_grid_obs( + obs_datasets, filenames = read_grid_util.read_grid_obs( control_dict, obs_vars, date_str, obs=self.obs) + print(filenames) self.obj = obs_datasets[self.obs] - print(self.obj) else: if self.file.startswith("example:"): @@ -422,12 +421,10 @@ def open_model_files(self, time_interval=None, control_dict=None): elif time_chunking_with_gridded_data: date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('model time chunk %s' % date_str) - # Need the option for read_grid_models to read a single dataset - model_datasets = read_grid_util.read_grid_models( + model_datasets, filenames = read_grid_util.read_grid_models( control_dict, date_str, model=self.label) - print(self.label) + print(filenames) self.obj = model_datasets[self.label] - print(self.obj) else: print('**** Reading Unspecified model output. Take Caution...') if len(self.files) > 1: diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index a2326ed2..a3c1fc69 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -18,8 +18,11 @@ def read_grid_models(config, date_str, model=None): Returns model_datasets (dict of xr.Dataset): dictionary of model datasets + filenames (dict of str): dictionary of filenames """ model_datasets = dict() + filenames = dict() + if model is not None: model_list = [model] else: @@ -33,8 +36,9 @@ def read_grid_models(config, date_str, model=None): filename = find_file(datadir, filestr) model_datasets[model_name] = xr.open_dataset(filename) + filenames[model_name] = filename - return model_datasets + return model_datasets, filenames def read_grid_obs(config, obs_vars, date_str, obs=None): @@ -51,8 +55,11 @@ def read_grid_obs(config, obs_vars, date_str, obs=None): Returns obs_datasets (dict of xr.Dataset): dictionary of obs datasets + filenames (dict of str): dictionary of filenames """ obs_datasets = dict() + filenames = dict() + if obs is not None: obs_list = [obs] else: @@ -84,6 +91,7 @@ def read_grid_obs(config, obs_vars, date_str, obs=None): ds_obs = xr.open_dataset(filename) obs_datasets[obs_name] = ds_obs + filenames[obs_name] = filename - return obs_datasets + return obs_datasets, filenames From bd505b8da1d3277479c96fb58b9675bd53455181 Mon Sep 17 00:00:00 2001 From: dwfncar Date: Mon, 31 Jul 2023 11:18:42 -0600 Subject: [PATCH 59/64] Removed prototype version. --- melodies_monet/driver_prototype.py | 1604 ---------------------------- 1 file changed, 1604 deletions(-) delete mode 100644 melodies_monet/driver_prototype.py diff --git a/melodies_monet/driver_prototype.py b/melodies_monet/driver_prototype.py deleted file mode 100644 index cbea3fea..00000000 --- a/melodies_monet/driver_prototype.py +++ /dev/null @@ -1,1604 +0,0 @@ -# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration -# SPDX-License-Identifier: Apache-2.0 -# -""" -Drive the entire analysis package via the :class:`analysis` class. -""" -import monetio as mio -import monet as m -import os -import xarray as xr -import pandas as pd -import numpy as np -import datetime - -# from util import write_ncf - -__all__ = ( - "pair", - "observation", - "model", - "analysis", -) - - -class pair: - """The pair class. - - The pair class pairs model data - directly with observational data along time and space. - """ - - def __init__(self): - """Initialize a :class:`pair` object.""" - self.type = 'pt_sfc' - self.radius_of_influence = 1e6 - self.obs = None - self.model = None - self.model_vars = None - self.obs_vars = None - self.filename = None - - def __repr__(self): - return ( - f"{type(self).__name__}(\n" - f" type={self.type!r},\n" - f" radius_of_influence={self.radius_of_influence!r},\n" - f" obs={self.obs!r},\n" - f" model={self.model!r},\n" - f" model_vars={self.model_vars!r},\n" - f" obs_vars={self.obs_vars!r},\n" - f" filename={self.filename!r},\n" - ")" - ) - - def fix_paired_xarray(self, dset): - """Reformat the paired dataset. - - Parameters - ---------- - dset : xarray.Dataset - - Returns - ------- - xarray.Dataset - Reformatted paired dataset. - """ - # first convert to dataframe - df = dset.to_dataframe().reset_index(drop=True) - - # now get just the single site index - dfpsite = df.rename({'siteid': 'x'}, axis=1).drop_duplicates(subset=['x']) - columns = dfpsite.columns # all columns - site_columns = [ - 'latitude', - 'longitude', - 'x', - 'site', - 'msa_code', - 'cmsa_name', - 'epa_region', - 'state_name', - 'msa_name', - 'site', - 'utcoffset', - ] # only columns for single site identificaiton - - # site only xarray obj (no time dependence) - dfps = dfpsite.loc[:, columns[columns.isin(site_columns)]].set_index(['x']).to_xarray() # single column index - - # now pivot df and convert back to xarray using only non site_columns - site_columns.remove('x') # need to keep x to merge later - dfx = df.loc[:, df.columns[~df.columns.isin(site_columns)]].rename({'siteid': 'x'}, axis=1).set_index(['time', 'x']).to_xarray() - - # merge the time dependent and time independent - out = xr.merge([dfx, dfps]) - - # reset x index and add siteid back to the xarray object - if ~pd.api.types.is_numeric_dtype(out.x): - siteid = out.x.values - out['x'] = range(len(siteid)) - out['siteid'] = (('x'), siteid) - - return out - - -class observation: - """The observation class. - - A class with information and data from an observational dataset. - """ - - def __init__(self): - """Initialize an :class:`observation` object.""" - self.obs = None - self.label = None - self.file = None - self.obj = None - """The data object (:class:`pandas.DataFrame` or :class:`xarray.Dataset`).""" - self.type = 'pt_src' - self.data_proc = None - self.variable_dict = None - - def __repr__(self): - return ( - f"{type(self).__name__}(\n" - f" obs={self.obs!r},\n" - f" label={self.label!r},\n" - f" file={self.file!r},\n" - f" obj={repr(self.obj) if self.obj is None else '...'},\n" - f" type={self.type!r},\n" - f" type={self.data_proc!r},\n" - f" variable_dict={self.variable_dict!r},\n" - ")" - ) - - def open_obs(self, time_interval=None): - """Open the observational data, store data in observation pair, - and apply mask and scaling. - - Parameters - __________ - time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] - If not None, restrict obs to datetime range spanned by time interval [start, end]. - - Returns - ------- - None - """ - from glob import glob - from numpy import sort - from . import tutorial - - if self.file.startswith("example:"): - example_id = ":".join(s.strip() for s in self.file.split(":")[1:]) - files = [tutorial.fetch_example(example_id)] - else: - files = sort(glob(self.file)) - - assert len(files) >= 1, "need at least one" - - _, extension = os.path.splitext(files[0]) - try: - if extension in {'.nc', '.ncf', '.netcdf', '.nc4'}: - if len(files) > 1: - self.obj = xr.open_mfdataset(files) - else: - self.obj = xr.open_dataset(files[0]) - elif extension in ['.ict', '.icarrt']: - assert len(files) == 1, "monetio.icarrt.add_data can only read one file" - self.obj = mio.icarrt.add_data(files[0]) - else: - raise ValueError(f'extension {extension!r} currently unsupported') - except Exception as e: - print('something happened opening file:', e) - return - - self.mask_and_scale() # mask and scale values from the control values - self.filter_obs() - - def filter_obs(self): - """Filter observations based on filter_dict. - - Returns - ------- - None - """ - if self.data_proc is not None: - if 'filter_dict' in self.data_proc: - filter_dict = self.data_proc['filter_dict'] - for column in filter_dict.keys(): - filter_vals = filter_dict[column]['value'] - filter_op = filter_dict[column]['oper'] - if filter_op == 'isin': - self.obj = self.obj.where(self.obj[column].isin(filter_vals),drop=True) - elif filter_op == 'isnotin': - self.obj = self.obj.where(~self.obj[column].isin(filter_vals),drop=True) - elif filter_op == '==': - self.obj = self.obj.where(self.obj[column] == filter_vals,drop=True) - elif filter_op == '>': - self.obj = self.obj.where(self.obj[column] > filter_vals,drop=True) - elif filter_op == '<': - self.obj = self.obj.where(self.obj[column] < filter_vals,drop=True) - elif filter_op == '>=': - self.obj = self.obj.where(self.obj[column] >= filter_vals,drop=True) - elif filter_op == '<=': - self.obj = self.obj.where(self.obj[column] <= filter_vals,drop=True) - elif filter_op == '!=': - self.obj = self.obj.where(self.obj[column] != filter_vals,drop=True) - else: - raise ValueError(f'Filter operation {filter_op!r} is not supported') - - def mask_and_scale(self): - """Mask and scale observations, including unit conversions and setting - detection limits. - - Returns - ------- - None - """ - vars = self.obj.data_vars - if self.variable_dict is not None: - for v in vars: - if v in self.variable_dict: - d = self.variable_dict[v] - # Apply removal of min, max, and nan on the units in the obs file first. - if 'obs_min' in d: - self.obj[v].data = self.obj[v].where(self.obj[v] >= d['obs_min']) - if 'obs_max' in d: - self.obj[v].data = self.obj[v].where(self.obj[v] <= d['obs_max']) - if 'nan_value' in d: - self.obj[v].data = self.obj[v].where(self.obj[v] != d['nan_value']) - # Then apply a correction if needed for the units. - if 'unit_scale' in d: - scale = d['unit_scale'] - else: - scale = 1 - if 'unit_scale_method' in d: - if d['unit_scale_method'] == '*': - self.obj[v].data *= scale - elif d['unit_scale_method'] == '/': - self.obj[v].data /= scale - elif d['unit_scale_method'] == '+': - self.obj[v].data += scale - elif d['unit_scale_method'] == '-': - self.obj[v].data += -1 * scale - - def obs_to_df(self): - """Convert and reformat observation object (:attr:`obj`) to dataframe. - - Returns - ------- - None - """ - self.obj = self.obj.to_dataframe().reset_index().drop(['x', 'y'], axis=1) - - -class model: - """The model class. - - A class with information and data from model results. - """ - - def __init__(self): - """Initialize a :class:`model` object.""" - self.model = None - self.radius_of_influence = None - self.mod_kwargs = {} - self.file_str = None - self.files = None - self.file_vert_str = None - self.files_vert = None - self.file_surf_str = None - self.files_surf = None - self.file_pm25_str = None - self.files_pm25 = None - self.label = None - self.obj = None - self.mapping = None - self.variable_dict = None - self.plot_kwargs = None - self.proj = None - - def __repr__(self): - return ( - f"{type(self).__name__}(\n" - f" model={self.model!r},\n" - f" radius_of_influence={self.radius_of_influence!r},\n" - f" mod_kwargs={self.mod_kwargs!r},\n" - f" file_str={self.file_str!r},\n" - f" label={self.label!r},\n" - f" obj={repr(self.obj) if self.obj is None else '...'},\n" - f" mapping={self.mapping!r},\n" - f" label={self.label!r},\n" - " ...\n" - ")" - ) - - def glob_files(self): - """Convert the model file location string read in by the yaml file - into a list of files containing all model data. - - Returns - ------- - None - """ - from numpy import sort # TODO: maybe use `sorted` for this - from glob import glob - from . import tutorial - - print(self.file_str) - if self.file_str.startswith("example:"): - example_id = ":".join(s.strip() for s in self.file_str.split(":")[1:]) - self.files = [tutorial.fetch_example(example_id)] - else: - self.files = sort(glob(self.file_str)) - - if self.file_vert_str is not None: - self.files_vert = sort(glob(self.file_vert_str)) - if self.file_surf_str is not None: - self.files_surf = sort(glob(self.file_surf_str)) - if self.file_pm25_str is not None: - self.files_pm25 = sort(glob(self.file_pm25_str)) - - def open_model_files(self, time_interval=None): - """Open the model files, store data in :class:`model` instance attributes, - and apply mask and scaling. - - Models supported are cmaq, wrfchem, rrfs, and gsdchem. - If a model is not supported, MELODIES-MONET will try to open - the model data using a generic reader. If you wish to include new - models, add the new model option to this module. - - Parameters - __________ - time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] - If not None, restrict models to datetime range spanned by time interval [start, end]. - - Returns - ------- - None - """ - - self.glob_files() - # Calculate species to input into MONET, so works for all mechanisms in wrfchem - # I want to expand this for the other models too when add aircraft data. - list_input_var = [] - for obs_map in self.mapping: - list_input_var = list_input_var + list(set(self.mapping[obs_map].keys()) - set(list_input_var)) - #Only certain models need this option for speeding up i/o. - if 'cmaq' in self.model.lower(): - print('**** Reading CMAQ model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - if self.files_vert is not None: - self.mod_kwargs.update({'fname_vert' : self.files_vert}) - if self.files_surf is not None: - self.mod_kwargs.update({'fname_surf' : self.files_surf}) - if len(self.files) > 1: - self.mod_kwargs.update({'concatenate_forecasts' : True}) - self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'wrfchem' in self.model.lower(): - print('**** Reading WRF-Chem model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'rrfs' in self.model.lower(): - print('**** Reading RRFS-CMAQ model output...') - if self.files_pm25 is not None: - self.mod_kwargs.update({'fname_pm25' : self.files_pm25}) - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'gsdchem' in self.model.lower(): - print('**** Reading GSD-Chem model output...') - if len(self.files) > 1: - self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs) - else: - self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs) - elif 'cesm_fv' in self.model.lower(): - print('**** Reading CESM FV model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs) - # CAM-chem-SE grid or MUSICAv0 - elif 'cesm_se' in self.model.lower(): - print('**** Reading CESM SE model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - if self.scrip_file.startswith("example:"): - from . import tutorial - example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:]) - self.scrip_file = tutorial.fetch_example(example_id) - self.mod_kwargs.update({'scrip_file' : self.scrip_file}) - self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs) - #self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs) - #self.obj.monet.scrip = self.obj_scrip - elif 'raqms' in self.model.lower(): - if len(self.files) > 1: - self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs) - else: - self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) - else: - print('**** Reading Unspecified model output. Take Caution...') - if len(self.files) > 1: - self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs) - else: - self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs) - self.mask_and_scale() - - def mask_and_scale(self): - """Mask and scale model data including unit conversions. - - Returns - ------- - None - """ - vars = self.obj.data_vars - if self.variable_dict is not None: - for v in vars: - if v in self.variable_dict: - d = self.variable_dict[v] - if 'unit_scale' in d: - scale = d['unit_scale'] - else: - scale = 1 - if 'unit_scale_method' in d: - if d['unit_scale_method'] == '*': - self.obj[v].data *= scale - elif d['unit_scale_method'] == '/': - self.obj[v].data /= scale - elif d['unit_scale_method'] == '+': - self.obj[v].data += scale - elif d['unit_scale_method'] == '-': - self.obj[v].data += -1 * scale - -class analysis: - """The analysis class. - - The analysis class is the highest - level class and stores all information about the analysis. It reads - and stores information from the input yaml file and defines - overarching analysis information like the start and end time, which - models and observations to pair, etc. - """ - - def __init__(self): - """Initialize an :class:`analysis` object.""" - self.control = 'control.yaml' - self.control_dict = None - self.models = {} - """dict : Models, set by :meth:`open_models`.""" - self.obs = {} - """dict : Observations, set by :meth:`open_obs`.""" - self.paired = {} - """dict : Paired data, set by :meth:`pair_data`.""" - self.start_time = None - self.end_time = None - self.time_intervals = None - self.download_maps = True # Default to True - self.output_dir = None - self.output_dir_save = None - self.output_dir_read = None - self.debug = False - self.save = None - self.read = None - self.time_chunking_with_gridded_data = False # Default to False - self.regrid = False # Default to False - self.target_grid = None - self.obs_regridders = None - self.model_regridders = None - - def __repr__(self): - return ( - f"{type(self).__name__}(\n" - f" control={self.control!r},\n" - f" control_dict={repr(self.control_dict) if self.control_dict is None else '...'},\n" - f" models={self.models!r},\n" - f" obs={self.obs!r},\n" - f" paired={self.paired!r},\n" - f" start_time={self.start_time!r},\n" - f" end_time={self.end_time!r},\n" - f" time_intervals={self.time_intervals!r},\n" - f" download_maps={self.download_maps!r},\n" - f" output_dir={self.output_dir!r},\n" - f" output_dir_save={self.output_dir_save!r},\n" - f" output_dir_read={self.output_dir_read!r},\n" - f" debug={self.debug!r},\n" - f" save={self.save!r},\n" - f" read={self.read!r},\n" - ")" - ) - - def read_control(self, control=None): - """Read the input yaml file, - updating various :class:`analysis` instance attributes. - - Parameters - ---------- - control : str - Input yaml file path. - If provided, :attr:`control` will be set to this value. - - Returns - ------- - None - """ - import yaml - - if control is not None: - self.control = control - - with open(self.control, 'r') as stream: - self.control_dict = yaml.safe_load(stream) - - # set analysis time - self.start_time = pd.Timestamp(self.control_dict['analysis']['start_time']) - self.end_time = pd.Timestamp(self.control_dict['analysis']['end_time']) - if 'output_dir' in self.control_dict['analysis'].keys(): - self.output_dir = os.path.expandvars( - self.control_dict['analysis']['output_dir']) - else: - raise Exception('output_dir was not specified and is required. Please set analysis.output_dir in the control file.') - if 'output_dir_save' in self.control_dict['analysis'].keys(): - self.output_dir_save = os.path.expandvars( - self.control_dict['analysis']['output_dir_save']) - else: - self.output_dir_save=self.output_dir - if 'output_dir_read' in self.control_dict['analysis'].keys(): - if self.control_dict['analysis']['output_dir_read'] is not None: - self.output_dir_read = os.path.expandvars( - self.control_dict['analysis']['output_dir_read']) - else: - self.output_dir_read=self.output_dir - - self.debug = self.control_dict['analysis']['debug'] - if 'save' in self.control_dict['analysis'].keys(): - self.save = self.control_dict['analysis']['save'] - if 'read' in self.control_dict['analysis'].keys(): - self.read = self.control_dict['analysis']['read'] - - # set time_chunking_with_gridded_data option, regrid option, and target_grid - if 'time_chunking_with_gridded_data' in self.control_dict['analysis'].keys(): - self.time_chunking_with_gridded_data = self.control_dict['analysis']['time_chunking_with_gridded_data'] - if 'regrid' in self.control_dict['analysis'].keys(): - self.regrid = self.control_dict['analysis']['regrid'] - if 'target_grid' in self.control_dict['analysis'].keys(): - self.target_grid = self.control_dict['analysis']['target_grid'] - - # generate time intervals for time chunking - if 'time_interval' in self.control_dict['analysis'].keys(): - time_stamps = pd.date_range( - start=self.start_time, end=self.end_time, - freq=self.control_dict['analysis']['time_interval']) - # if (end_time - start_time) is not an integer multiple - # of freq, append end_time to time_stamps - if time_stamps[-1] < pd.Timestamp(self.end_time): - time_stamps = time_stamps.append( - pd.DatetimeIndex([self.end_time])) - self.time_intervals \ - = [[time_stamps[n], time_stamps[n+1]] - for n in range(len(time_stamps)-1)] - - # Enable Dask progress bars? (default: false) - enable_dask_progress_bars = self.control_dict["analysis"].get( - "enable_dask_progress_bars", False) - if enable_dask_progress_bars: - from dask.diagnostics import ProgressBar - - ProgressBar().register() - else: - from dask.callbacks import Callback - - Callback.active = set() - - def save_analysis(self): - """Save all analysis attributes listed in analysis section of input yaml file. - - Returns - ------- - None - """ - if self.save is not None: - # Loop over each possible attr type (models, obs and paired) - for attr in self.save: - if self.save[attr]['method']=='pkl': - from .util.write_util import write_pkl - write_pkl(obj=getattr(self,attr), output_name=os.path.join(self.output_dir_save,self.save[attr]['output_name'])) - - elif self.save[attr]['method']=='netcdf': - from .util.write_util import write_analysis_ncf - # save either all groups or selected groups - if self.save[attr]['data']=='all': - if 'prefix' in self.save[attr]: - write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save, - fn_prefix=self.save[attr]['prefix']) - else: - write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save) - else: - if 'prefix' in self.save[attr]: - write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save, - fn_prefix=self.save[attr]['prefix'], keep_groups=self.save[attr]['data']) - else: - write_analysis_ncf(obj=getattr(self,attr), output_dir=self.output_dir_save, - keep_groups=self.save[attr]['data']) - - def read_analysis(self): - """Read all previously saved analysis attributes listed in analysis section of input yaml file. - - Returns - ------- - None - """ - if self.read is not None: - # Loop over each possible attr type (models, obs and paired) - from .util.read_util import read_saved_data - for attr in self.read: - if self.read[attr]['method']=='pkl': - read_saved_data(analysis=self,filenames=self.read[attr]['filenames'], method='pkl', attr=attr) - elif self.read[attr]['method']=='netcdf': - read_saved_data(analysis=self,filenames=self.read[attr]['filenames'], method='netcdf', attr=attr) - - def setup_regridders(self): - """Create an obs xesmf.Regridder from base and target grids specified in the control_dict - - Returns - ------- - None - """ - from .util import regrid_util - if self.regrid: - self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') - self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') - - def open_models(self, time_interval=None): - """Open all models listed in the input yaml file and create a :class:`model` - object for each of them, populating the :attr:`models` dict. - - Parameters - __________ - time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] - If not None, restrict models to datetime range spanned by time interval [start, end]. - - Returns - ------- - None - """ - if ('model' in self.control_dict) and (not self.time_chunking_with_gridded_data): - # open each model - for mod in self.control_dict['model']: - # create a new model instance - m = model() - # this is the model type (ie cmaq, rapchem, gsdchem etc) - m.model = self.control_dict['model'][mod]['mod_type'] - # set the model label in the dictionary and model class instance - if 'radius_of_influence' in self.control_dict['model'][mod].keys(): - m.radius_of_influence = self.control_dict['model'][mod]['radius_of_influence'] - else: - m.radius_of_influence = 1e6 - if 'mod_kwargs' in self.control_dict['model'][mod].keys(): - m.mod_kwargs = self.control_dict['model'][mod]['mod_kwargs'] - m.label = mod - # create file string (note this can include hot strings) - m.file_str = os.path.expandvars( - self.control_dict['model'][mod]['files']) - if 'files_vert' in self.control_dict['model'][mod].keys(): - m.file_vert_str = os.path.expandvars( - self.control_dict['model'][mod]['files_vert']) - if 'files_surf' in self.control_dict['model'][mod].keys(): - m.file_surf_str = os.path.expandvars( - self.control_dict['model'][mod]['files_surf']) - if 'files_pm25' in self.control_dict['model'][mod].keys(): - m.file_pm25_str = os.path.expandvars( - self.control_dict['model'][mod]['files_pm25']) - # create mapping - m.mapping = self.control_dict['model'][mod]['mapping'] - # add variable dict - if 'variables' in self.control_dict['model'][mod].keys(): - m.variable_dict = self.control_dict['model'][mod]['variables'] - if 'plot_kwargs' in self.control_dict['model'][mod].keys(): - m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] - - # unstructured grid check - if m.model in ['cesm_se']: - if 'scrip_file' in self.control_dict['model'][mod].keys(): - m.scrip_file = self.control_dict['model'][mod]['scrip_file'] - else: - raise ValueError( '"Scrip_file" must be provided for unstructured grid output!' ) - - # maybe set projection - proj_in = self.control_dict['model'][mod].get("projection") - if proj_in == "None": - print( - f"NOTE: model.{mod}.projection is {proj_in!r} (str), " - "but we assume you want `None` (Python null sentinel). " - "To avoid this warning, " - "update your control file to remove the projection setting " - "or set to `~` or `null` if you want null value in YAML." - ) - proj_in = None - if proj_in is not None: - if isinstance(proj_in, str) and proj_in.startswith("model:"): - m.proj = proj_in - elif isinstance(proj_in, str) and proj_in.startswith("ccrs."): - import cartopy.crs as ccrs - m.proj = eval(proj_in) - else: - import cartopy.crs as ccrs - - if isinstance(proj_in, ccrs.Projection): - m.proj = proj_in - else: - m.proj = ccrs.Projection(proj_in) - - # open the model - m.open_model_files(time_interval=time_interval) - self.models[m.label] = m - - if ('model' in self.control_dict) and self.time_chunking_with_gridded_data: - from .util import read_grid_util - date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('model reading %s' % date_str) - filename, model_datasets = read_grid_util.read_grid_models( - self.control_dict, date_str) - print(model_datasets) - for mod in model_datasets: - m = model() - m.model = mod - m.label = mod - m.file_str = filename - m.mapping = self.control_dict['model'][mod]['mapping'] - if 'variables' in self.control_dict['model'][mod].keys(): - m.variable_dict = self.control_dict['model'][mod]['variables'] - if 'plot_kwargs' in self.control_dict['model'][mod].keys(): - m.plot_kwargs = self.control_dict['model'][mod]['plot_kwargs'] - ds_model = model_datasets[mod] - if self.regrid: - regridder = self.model_regridders[mod] - m.obj = regridder(ds_model) - m.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) - else: - m.obj = ds_model - self.models[m.label] = m - - def open_obs(self, time_interval=None): - """Open all observations listed in the input yaml file and create an - :class:`observation` instance for each of them, - populating the :attr:`obs` dict. - - Parameters - __________ - time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] - If not None, restrict obs to datetime range spanned by time interval [start, end]. - - - Returns - ------- - None - """ - from .util import analysis_util - from .util import read_grid_util - from .util import regrid_util - - if ('obs' in self.control_dict) and (not self.time_chunking_with_gridded_data): - for obs in self.control_dict['obs']: - o = observation() - o.obs = obs - o.label = obs - o.obs_type = self.control_dict['obs'][obs]['obs_type'] - if 'data_proc' in self.control_dict['obs'][obs].keys(): - o.data_proc = self.control_dict['obs'][obs]['data_proc'] - o.file = os.path.expandvars( - self.control_dict['obs'][obs]['filename']) - if 'variables' in self.control_dict['obs'][obs].keys(): - o.variable_dict = self.control_dict['obs'][obs]['variables'] - o.open_obs(time_interval=time_interval) - self.obs[o.label] = o - - if ('obs' in self.control_dict) and self.time_chunking_with_gridded_data: - date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') - print('obs reading %s' % date_str) - obs_vars = analysis_util.get_obs_vars(self.control_dict) - print(obs_vars) - filename, obs_datasets = read_grid_util.read_grid_obs( - self.control_dict, obs_vars, date_str) - print(obs_datasets) - for obs in obs_vars: - o = observation() - o.obs = obs - o.label = obs - o.file = filename - o.type = 'gridded_data' - ds_obs = obs_datasets[obs] - if self.regrid: - regridder = self.obs_regridders[obs] - o.obj = regridder(ds_obs) - o.obj.to_netcdf(regrid_util.filename_regrid(filename, regridder)) - else: - o.obj = ds_obs - self.obs[o.label] = o - - def pair_data(self, time_interval=None): - """Pair all observations and models in the analysis class - (i.e., those listed in the input yaml file) together, - populating the :attr:`paired` dict. - - Parameters - __________ - time_interval (optional, default None) : [pandas.Timestamp, pandas.Timestamp] - If not None, restrict pairing to datetime range spanned by time interval [start, end]. - - - Returns - ------- - None - """ - pairs = {} # TODO: unused - for model_label in self.models: - mod = self.models[model_label] - # Now we have the models we need to loop through the mapping table for each network and pair the data - # each paired dataset will be output to a netcdf file with 'model_label_network.nc' - for obs_to_pair in mod.mapping.keys(): - # get the variables to pair from the model data (ie don't pair all data) - keys = [key for key in mod.mapping[obs_to_pair].keys()] - obs_vars = [mod.mapping[obs_to_pair][key] for key in keys] - - # unstructured grid check - lon/lat variables should be explicitly added - # in addition to comparison variables - if mod.obj.attrs.get("mio_scrip_file", False): - lonlat_list = [ 'lon', 'lat', 'longitude', 'latitude', 'Longitude', 'Latitude' ] - for ll in lonlat_list: - if ll in mod.obj.data_vars: - keys += [ll] - model_obj = mod.obj[keys] - - ## TODO: add in ability for simple addition of variables from - - # simplify the objs object with the correct mapping variables - obs = self.obs[obs_to_pair] - - # pair the data - # if pt_sfc (surface point network or monitor) - if obs.obs_type.lower() == 'pt_sfc': - # convert this to pandas dataframe unless already done because second time paired this obs - if not isinstance(obs.obj, pd.DataFrame): - obs.obs_to_df() - #Check if z dim is larger than 1. If so select, the first level as all models read through - #MONETIO will be reordered such that the first level is the level nearest to the surface. - try: - if model_obj.sizes['z'] > 1: - # Select only the surface values to pair with obs. - model_obj = model_obj.isel(z=0).expand_dims('z',axis=1) - except KeyError as e: - raise Exception("MONET requires an altitude dimension named 'z'") from e - # now combine obs with - paired_data = model_obj.monet.combine_point(obs.obj, radius_of_influence=mod.radius_of_influence, suffix=mod.label) - if self.debug: - print('After pairing: ', paired_data) - # this outputs as a pandas dataframe. Convert this to xarray obj - p = pair() - p.obs = obs.label - p.model = mod.label - p.model_vars = keys - p.obs_vars = obs_vars - p.filename = '{}_{}.nc'.format(p.obs, p.model) - p.obj = paired_data.monet._df_to_da() - label = "{}_{}".format(p.obs, p.model) - self.paired[label] = p - p.obj = p.fix_paired_xarray(dset=p.obj) - # write_util.write_ncf(p.obj,p.filename) # write out to file - # TODO: add other network types / data types where (ie flight, satellite etc) - - def concat_pairs(self): - """Read and concatenate all observation and model time interval pair data, - populating the :attr:`paired` dict. - - Returns - ------- - None - """ - pass - - ### TODO: Create the plotting driver (most complicated one) - # def plotting(self): - def plotting(self): - """Cycle through all the plotting groups (e.g., plot_grp1) listed in - the input yaml file and create the plots. - - This routine loops over all the domains and - model/obs pairs specified in the plotting group (``.control_dict['plots']``) - for all the variables specified in the mapping dictionary listed in - :attr:`paired`. - - Creates plots stored in the file location specified by output_dir - in the analysis section of the yaml file. - - Returns - ------- - None - """ - import matplotlib.pyplot as plt - - from .plots import surfplots as splots, savefig - - # Disable figure count warning - initial_max_fig = plt.rcParams["figure.max_open_warning"] - plt.rcParams["figure.max_open_warning"] = 0 - - # first get the plotting dictionary from the yaml file - plot_dict = self.control_dict['plots'] - # Calculate any items that do not need to recalculate each loop. - startdatename = str(datetime.datetime.strftime(self.start_time, '%Y-%m-%d_%H')) - enddatename = str(datetime.datetime.strftime(self.end_time, '%Y-%m-%d_%H')) - # now we are going to loop through each plot_group (note we can have multiple plot groups) - # a plot group can have - # 1) a singular plot type - # 2) multiple paired datasets or model datasets depending on the plot type - # 3) kwargs for creating the figure ie size and marker (note the default for obs is 'x') - for grp, grp_dict in plot_dict.items(): - pair_labels = grp_dict['data'] - # get the plot type - plot_type = grp_dict['type'] - - # first get the observational obs labels - pair1 = self.paired[list(self.paired.keys())[0]] - obs_vars = pair1.obs_vars - - # loop through obs variables - for obsvar in obs_vars: - # Loop also over the domain types. So can easily create several overview and zoomed in plots. - domain_types = grp_dict['domain_type'] - domain_names = grp_dict['domain_name'] - for domain in range(len(domain_types)): - domain_type = domain_types[domain] - domain_name = domain_names[domain] - - # Then loop through each of the pairs to add to the plot. - for p_index, p_label in enumerate(pair_labels): - p = self.paired[p_label] - # find the pair model label that matches the obs var - index = p.obs_vars.index(obsvar) - modvar = p.model_vars[index] - - # Adjust the modvar as done in pairing script, if the species name in obs and model are the same. - if obsvar == modvar: - modvar = modvar + '_new' - - # convert to dataframe - pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"]) - - # Select only the analysis time window. - pairdf_all = pairdf_all.loc[self.start_time : self.end_time] - - # Determine the default plotting colors. - if 'default_plot_kwargs' in grp_dict.keys(): - if self.models[p.model].plot_kwargs is not None: - plot_dict = {**grp_dict['default_plot_kwargs'], **self.models[p.model].plot_kwargs} - else: - plot_dict = {**grp_dict['default_plot_kwargs'], **splots.calc_default_colors(p_index)} - obs_dict = grp_dict['default_plot_kwargs'] - else: - if self.models[p.model].plot_kwargs is not None: - plot_dict = self.models[p.model].plot_kwargs.copy() - else: - plot_dict = splots.calc_default_colors(p_index).copy() - obs_dict = None - - # Determine figure_kwargs and text_kwargs - if 'fig_kwargs' in grp_dict.keys(): - fig_dict = grp_dict['fig_kwargs'] - else: - fig_dict = None - if 'text_kwargs' in grp_dict.keys(): - text_dict = grp_dict['text_kwargs'] - else: - text_dict = None - - # Read in some plotting specifications stored with observations. - if self.obs[p.obs].variable_dict is not None: - if obsvar in self.obs[p.obs].variable_dict.keys(): - obs_plot_dict = self.obs[p.obs].variable_dict[obsvar].copy() - else: - obs_plot_dict = {} - else: - obs_plot_dict = {} - - # Specify ylabel if noted in yaml file. - if 'ylabel_plot' in obs_plot_dict.keys(): - use_ylabel = obs_plot_dict['ylabel_plot'] - else: - use_ylabel = None - - # Determine if set axis values or use defaults - if grp_dict['data_proc']['set_axis'] == True: - if obs_plot_dict: # Is not null - set_yaxis = True - else: - print('Warning: variables dict for ' + obsvar + ' not provided, so defaults used') - set_yaxis = False - else: - set_yaxis = False - - # Determine to calculate mean values or percentile - if 'percentile_opt' in obs_plot_dict.keys(): - use_percentile = obs_plot_dict['percentile_opt'] - else: - use_percentile = None - - # Determine outname - outname = "{}.{}.{}.{}.{}.{}.{}".format(grp, plot_type, obsvar, startdatename, enddatename, domain_type, domain_name) - - # Query selected points if applicable - if domain_type != 'all': - pairdf_all.query(domain_type + ' == ' + '"' + domain_name + '"', inplace=True) - - # Query with filter options - if 'filter_dict' in grp_dict['data_proc'] and 'filter_string' in grp_dict['data_proc']: - raise Exception("""For plot group: {}, only one of filter_dict and filter_string can be specified.""".format(grp)) - elif 'filter_dict' in grp_dict['data_proc']: - filter_dict = grp_dict['data_proc']['filter_dict'] - for column in filter_dict.keys(): - filter_vals = filter_dict[column]['value'] - filter_op = filter_dict[column]['oper'] - if filter_op == 'isin': - pairdf_all.query(f'{column} == {filter_vals}', inplace=True) - elif filter_op == 'isnotin': - pairdf_all.query(f'{column} != {filter_vals}', inplace=True) - else: - pairdf_all.query(f'{column} {filter_op} {filter_vals}', inplace=True) - elif 'filter_string' in grp_dict['data_proc']: - pairdf_all.query(grp_dict['data_proc']['filter_string'], inplace=True) - - # Drop sites with greater than X percent NAN values - if 'rem_obs_by_nan_pct' in grp_dict['data_proc']: - grp_var = grp_dict['data_proc']['rem_obs_by_nan_pct']['group_var'] - pct_cutoff = grp_dict['data_proc']['rem_obs_by_nan_pct']['pct_cutoff'] - - if grp_dict['data_proc']['rem_obs_by_nan_pct']['times'] == 'hourly': - # Select only hours at the hour - hourly_pairdf_all = pairdf_all.reset_index().loc[pairdf_all.reset_index()['time'].dt.minute==0,:] - - # calculate total obs count, obs count with nan removed, and nan percent for each group - grp_fullcount = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) - grp_nonan_count = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values - else: - # calculate total obs count, obs count with nan removed, and nan percent for each group - grp_fullcount = pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) - grp_nonan_count = pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values - - grp_pct_nan = 100 - grp_nonan_count.div(grp_fullcount,axis=0)*100 - - # make list of sites meeting condition and select paired data by this by this - grp_select = grp_pct_nan.query(obsvar + ' < ' + str(pct_cutoff)).reset_index() - pairdf_all = pairdf_all.loc[pairdf_all[grp_var].isin(grp_select[grp_var].values)] - - # Drop NaNs - if grp_dict['data_proc']['rem_obs_nan'] == True: - # I removed drop=True in reset_index in order to keep 'time' as a column. - pairdf = pairdf_all.reset_index().dropna(subset=[modvar, obsvar]) - else: - print('Warning: set rem_obs_nan = True for regulatory metrics') - pairdf = pairdf_all.reset_index().dropna(subset=[modvar]) - - # JianHe: do we need provide a warning if pairdf is empty (no valid obsdata) for specific subdomain? - if pairdf.empty or pairdf[obsvar].isnull().all(): - print('Warning: no valid obs found for '+domain_name) - continue - - # JianHe: Determine if calculate regulatory values - cal_reg = obs_plot_dict.get('regulatory', False) - - if cal_reg: - # Reset use_ylabel for regulatory calculations - if 'ylabel_reg_plot' in obs_plot_dict.keys(): - use_ylabel = obs_plot_dict['ylabel_reg_plot'] - else: - use_ylabel = None - - df2 = ( - pairdf.copy() - .groupby("siteid") - .resample('H', on='time_local') - .mean() - .reset_index() - ) - - if obsvar == 'PM2.5': - pairdf_reg = splots.make_24hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) - elif obsvar == 'OZONE': - pairdf_reg = splots.make_8hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) - else: - print('Warning: no regulatory calculations found for ' + obsvar + '. Skipping plot.') - del df2 - continue - del df2 - if len(pairdf_reg[obsvar+'_reg']) == 0: - print('No valid data for '+obsvar+'_reg. Skipping plot.') - continue - else: - # Reset outname for regulatory options - outname = "{}.{}.{}.{}.{}.{}.{}".format(grp, plot_type, obsvar+'_reg', startdatename, enddatename, domain_type, domain_name) - else: - pairdf_reg = None - - if plot_type.lower() == 'spatial_bias': - if use_percentile is None: - outname = outname+'.mean' - else: - outname = outname+'.p'+'{:02d}'.format(use_percentile) - - if self.output_dir is not None: - outname = self.output_dir + '/' + outname # Extra / just in case. - - # Types of plots - if plot_type.lower() == 'timeseries': - if set_yaxis == True: - if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')): - vmin = obs_plot_dict['vmin_plot'] - vmax = obs_plot_dict['vmax_plot'] - else: - print('Warning: vmin_plot and vmax_plot not specified for ' + obsvar + ', so default used.') - vmin = None - vmax = None - else: - vmin = None - vmax = None - # Select time to use as index. - pairdf = pairdf.set_index(grp_dict['data_proc']['ts_select_time']) - a_w = grp_dict['data_proc']['ts_avg_window'] - if p_index == 0: - # First plot the observations. - ax = splots.make_timeseries( - pairdf, - pairdf_reg, - column=obsvar, - label=p.obs, - avg_window=a_w, - ylabel=use_ylabel, - vmin=vmin, - vmax=vmax, - domain_type=domain_type, - domain_name=domain_name, - plot_dict=obs_dict, - fig_dict=fig_dict, - text_dict=text_dict, - debug=self.debug - ) - # For all p_index plot the model. - ax = splots.make_timeseries( - pairdf, - pairdf_reg, - column=modvar, - label=p.model, - ax=ax, - avg_window=a_w, - ylabel=use_ylabel, - vmin=vmin, - vmax=vmax, - domain_type=domain_type, - domain_name=domain_name, - plot_dict=plot_dict, - text_dict=text_dict, - debug=self.debug - ) - # At the end save the plot. - if p_index == len(pair_labels) - 1: - savefig(outname + '.png', logo_height=150) - del (ax, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear axis for next plot. - if plot_type.lower() == 'boxplot': - if set_yaxis == True: - if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')): - vmin = obs_plot_dict['vmin_plot'] - vmax = obs_plot_dict['vmax_plot'] - else: - print('Warning: vmin_plot and vmax_plot not specified for ' + obsvar + ', so default used.') - vmin = None - vmax = None - else: - vmin = None - vmax = None - # First for p_index = 0 create the obs box plot data array. - if p_index == 0: - comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=obsvar, - label=p.obs, plot_dict=obs_dict) - # Then add the models to this dataarray. - comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=modvar, label=p.model, - plot_dict=plot_dict, comb_bx=comb_bx, - label_bx=label_bx) - # For the last p_index make the plot. - if p_index == len(pair_labels) - 1: - splots.make_boxplot( - comb_bx, - label_bx, - ylabel=use_ylabel, - vmin=vmin, - vmax=vmax, - outname=outname, - domain_type=domain_type, - domain_name=domain_name, - plot_dict=obs_dict, - fig_dict=fig_dict, - text_dict=text_dict, - debug=self.debug - ) - #Clear info for next plot. - del (comb_bx, label_bx, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) - elif plot_type.lower() == 'taylor': - if set_yaxis == True: - if 'ty_scale' in obs_plot_dict.keys(): - ty_scale = obs_plot_dict['ty_scale'] - else: - print('Warning: ty_scale not specified for ' + obsvar + ', so default used.') - ty_scale = 1.5 # Use default - else: - ty_scale = 1.5 # Use default - if p_index == 0: - # Plot initial obs/model - dia = splots.make_taylor( - pairdf, - pairdf_reg, - column_o=obsvar, - label_o=p.obs, - column_m=modvar, - label_m=p.model, - ylabel=use_ylabel, - ty_scale=ty_scale, - domain_type=domain_type, - domain_name=domain_name, - plot_dict=plot_dict, - fig_dict=fig_dict, - text_dict=text_dict, - debug=self.debug - ) - else: - # For the rest, plot on top of dia - dia = splots.make_taylor( - pairdf, - pairdf_reg, - column_o=obsvar, - label_o=p.obs, - column_m=modvar, - label_m=p.model, - dia=dia, - ylabel=use_ylabel, - ty_scale=ty_scale, - domain_type=domain_type, - domain_name=domain_name, - plot_dict=plot_dict, - text_dict=text_dict, - debug=self.debug - ) - # At the end save the plot. - if p_index == len(pair_labels) - 1: - savefig(outname + '.png', logo_height=70) - del (dia, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. - elif plot_type.lower() == 'spatial_bias': - if set_yaxis == True: - if 'vdiff_plot' in obs_plot_dict.keys(): - vdiff = obs_plot_dict['vdiff_plot'] - else: - print('Warning: vdiff_plot not specified for ' + obsvar + ', so default used.') - vdiff = None - else: - vdiff = None - # p_label needs to be added to the outname for this plot - outname = "{}.{}".format(outname, p_label) - splots.make_spatial_bias( - pairdf, - pairdf_reg, - column_o=obsvar, - label_o=p.obs, - column_m=modvar, - label_m=p.model, - ylabel=use_ylabel, - ptile=use_percentile, - vdiff=vdiff, - outname=outname, - domain_type=domain_type, - domain_name=domain_name, - fig_dict=fig_dict, - text_dict=text_dict, - debug=self.debug - ) - del (fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. - elif plot_type.lower() == 'spatial_bias_exceedance': - if cal_reg: - if set_yaxis == True: - if 'vdiff_reg_plot' in obs_plot_dict.keys(): - vdiff = obs_plot_dict['vdiff_reg_plot'] - else: - print('Warning: vdiff_reg_plot not specified for ' + obsvar + ', so default used.') - vdiff = None - else: - vdiff = None - - # p_label needs to be added to the outname for this plot - outname = "{}.{}".format(outname, p_label) - splots.make_spatial_bias_exceedance( - pairdf_reg, - column_o=obsvar+'_reg', - label_o=p.obs, - column_m=modvar+'_reg', - label_m=p.model, - ylabel=use_ylabel, - vdiff=vdiff, - outname=outname, - domain_type=domain_type, - domain_name=domain_name, - fig_dict=fig_dict, - text_dict=text_dict, - debug=self.debug - ) - del (fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. - else: - print('Warning: spatial_bias_exceedance plot only works when regulatory=True.') - # JianHe: need updates to include regulatory option for overlay plots - elif plot_type.lower() == 'spatial_overlay': - if set_yaxis == True: - if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot', 'nlevels_plot')): - vmin = obs_plot_dict['vmin_plot'] - vmax = obs_plot_dict['vmax_plot'] - nlevels = obs_plot_dict['nlevels_plot'] - elif all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')): - vmin = obs_plot_dict['vmin_plot'] - vmax = obs_plot_dict['vmax_plot'] - nlevels = None - else: - print('Warning: vmin_plot and vmax_plot not specified for ' + obsvar + ', so default used.') - vmin = None - vmax = None - nlevels = None - else: - vmin = None - vmax = None - nlevels = None - #Check if z dim is larger than 1. If so select, the first level as all models read through - #MONETIO will be reordered such that the first level is the level nearest to the surface. - # Create model slice and select time window for spatial plots - try: - self.models[p.model].obj.sizes['z'] - if self.models[p.model].obj.sizes['z'] > 1: #Select only surface values. - vmodel = self.models[p.model].obj.isel(z=0).expand_dims('z',axis=1).loc[ - dict(time=slice(self.start_time, self.end_time))] - else: - vmodel = self.models[p.model].obj.loc[dict(time=slice(self.start_time, self.end_time))] - except KeyError as e: - raise Exception("MONET requires an altitude dimension named 'z'") from e - - # Determine proj to use for spatial plots - proj = splots.map_projection(self.models[p.model]) - # p_label needs to be added to the outname for this plot - outname = "{}.{}".format(outname, p_label) - # For just the spatial overlay plot, you do not use the model data from the pair file - # So get the variable name again since pairing one could be _new. - # JianHe: only make overplay plots for non-regulatory variables for now - if not cal_reg: - splots.make_spatial_overlay( - pairdf, - vmodel, - column_o=obsvar, - label_o=p.obs, - column_m=p.model_vars[index], - label_m=p.model, - ylabel=use_ylabel, - vmin=vmin, - vmax=vmax, - nlevels=nlevels, - proj=proj, - outname=outname, - domain_type=domain_type, - domain_name=domain_name, - fig_dict=fig_dict, - text_dict=text_dict, - debug=self.debug - ) - else: - print('Warning: Spatial overlay plots are not available yet for regulatory metrics.') - - del (fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) #Clear info for next plot. - - # Restore figure count warning - plt.rcParams["figure.max_open_warning"] = initial_max_fig - - def stats(self): - """Calculate statistics specified in the input yaml file. - - This routine loops over all the domains and model/obs pairs for all the variables - specified in the mapping dictionary listed in :attr:`paired`. - - Creates a csv file storing the statistics and optionally a figure - visualizing the table. - - Returns - ------- - None - """ - from .stats import proc_stats as proc_stats - from .plots import surfplots as splots - - # first get the stats dictionary from the yaml file - stat_dict = self.control_dict['stats'] - # Calculate general items - startdatename = str(datetime.datetime.strftime(self.start_time, '%Y-%m-%d_%H')) - enddatename = str(datetime.datetime.strftime(self.end_time, '%Y-%m-%d_%H')) - stat_list = stat_dict['stat_list'] - # Determine stat_grp full name - stat_fullname_ns = proc_stats.produce_stat_dict(stat_list=stat_list, spaces=False) - stat_fullname_s = proc_stats.produce_stat_dict(stat_list=stat_list, spaces=True) - pair_labels = stat_dict['data'] - - # Determine rounding - if 'round_output' in stat_dict.keys(): - round_output = stat_dict['round_output'] - else: - round_output = 3 - - # Then loop over all the observations - # first get the observational obs labels - pair1 = self.paired[list(self.paired.keys())[0]] - obs_vars = pair1.obs_vars - for obsvar in obs_vars: - # Read in some plotting specifications stored with observations. - if self.obs[pair1.obs].variable_dict is not None: - if obsvar in self.obs[pair1.obs].variable_dict.keys(): - obs_plot_dict = self.obs[pair1.obs].variable_dict[obsvar] - else: - obs_plot_dict = {} - else: - obs_plot_dict = {} - - # JianHe: Determine if calculate regulatory values - cal_reg = obs_plot_dict.get('regulatory', False) - - # Next loop over all of the domains. - # Loop also over the domain types. - domain_types = stat_dict['domain_type'] - domain_names = stat_dict['domain_name'] - for domain in range(len(domain_types)): - domain_type = domain_types[domain] - domain_name = domain_names[domain] - - # The tables and text files will be output at this step in loop. - # Create an empty pandas dataarray. - df_o_d = pd.DataFrame() - # Determine outname - if cal_reg: - outname = "{}.{}.{}.{}.{}.{}".format('stats', obsvar+'_reg', domain_type, domain_name, startdatename, enddatename) - else: - outname = "{}.{}.{}.{}.{}.{}".format('stats', obsvar, domain_type, domain_name, startdatename, enddatename) - - # Determine plotting kwargs - if 'output_table_kwargs' in stat_dict.keys(): - out_table_kwargs = stat_dict['output_table_kwargs'] - else: - out_table_kwargs = None - - # Add Stat ID and FullName to pandas dictionary. - df_o_d['Stat_ID'] = stat_list - df_o_d['Stat_FullName'] = stat_fullname_ns - - # Specify title for stat plots. - if cal_reg: - if 'ylabel_reg_plot' in obs_plot_dict.keys(): - title = obs_plot_dict['ylabel_reg_plot'] + ': ' + domain_type + ' ' + domain_name - else: - title = obsvar + '_reg: ' + domain_type + ' ' + domain_name - else: - if 'ylabel_plot' in obs_plot_dict.keys(): - title = obs_plot_dict['ylabel_plot'] + ': ' + domain_type + ' ' + domain_name - else: - title = obsvar + ': ' + domain_type + ' ' + domain_name - - # Finally Loop through each of the pairs - for p_label in pair_labels: - p = self.paired[p_label] - # Create an empty list to store the stat_var - p_stat_list = [] - - # Loop through each of the stats - for stat_grp in stat_list: - - # find the pair model label that matches the obs var - index = p.obs_vars.index(obsvar) - modvar = p.model_vars[index] - - # Adjust the modvar as done in pairing script, if the species name in obs and model are the same. - if obsvar == modvar: - modvar = modvar + '_new' - - # convert to dataframe - pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"]) - - # Select only the analysis time window. - pairdf_all = pairdf_all.loc[self.start_time : self.end_time] - - # Query selected points if applicable - if domain_type != 'all': - pairdf_all.query(domain_type + ' == ' + '"' + domain_name + '"', inplace=True) - - # Query with filter options - if 'data_proc' in stat_dict: - if 'filter_dict' in stat_dict['data_proc'] and 'filter_string' in stat_dict['data_proc']: - raise Exception("For statistics, only one of filter_dict and filter_string can be specified.") - elif 'filter_dict' in stat_dict['data_proc']: - filter_dict = stat_dict['data_proc']['filter_dict'] - for column in filter_dict.keys(): - filter_vals = filter_dict[column]['value'] - filter_op = filter_dict[column]['oper'] - if filter_op == 'isin': - pairdf_all.query(f'{column} == {filter_vals}', inplace=True) - elif filter_op == 'isnotin': - pairdf_all.query(f'{column} != {filter_vals}', inplace=True) - else: - pairdf_all.query(f'{column} {filter_op} {filter_vals}', inplace=True) - elif 'filter_string' in stat_dict['data_proc']: - pairdf_all.query(stat_dict['data_proc']['filter_string'], inplace=True) - - # Drop sites with greater than X percent NAN values - if 'data_proc' in stat_dict: - if 'rem_obs_by_nan_pct' in stat_dict['data_proc']: - grp_var = stat_dict['data_proc']['rem_obs_by_nan_pct']['group_var'] - pct_cutoff = stat_dict['data_proc']['rem_obs_by_nan_pct']['pct_cutoff'] - - if stat_dict['data_proc']['rem_obs_by_nan_pct']['times'] == 'hourly': - # Select only hours at the hour - hourly_pairdf_all = pairdf_all.reset_index().loc[pairdf_all.reset_index()['time'].dt.minute==0,:] - - # calculate total obs count, obs count with nan removed, and nan percent for each group - grp_fullcount = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) - grp_nonan_count = hourly_pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values - else: - # calculate total obs count, obs count with nan removed, and nan percent for each group - grp_fullcount = pairdf_all[[grp_var,obsvar]].groupby(grp_var).size().rename({0:obsvar}) - grp_nonan_count = pairdf_all[[grp_var,obsvar]].groupby(grp_var).count() # counts only non NA values - - grp_pct_nan = 100 - grp_nonan_count.div(grp_fullcount,axis=0)*100 - - # make list of sites meeting condition and select paired data by this by this - grp_select = grp_pct_nan.query(obsvar + ' < ' + str(pct_cutoff)).reset_index() - pairdf_all = pairdf_all.loc[pairdf_all[grp_var].isin(grp_select[grp_var].values)] - - # Drop NaNs for model and observations in all cases. - pairdf = pairdf_all.reset_index().dropna(subset=[modvar, obsvar]) - - # JianHe: do we need provide a warning if pairdf is empty (no valid obsdata) for specific subdomain? - if pairdf[obsvar].isnull().all() or pairdf.empty: - print('Warning: no valid obs found for '+domain_name) - p_stat_list.append('NaN') - continue - - if cal_reg: - # Process regulatory values - df2 = ( - pairdf.copy() - .groupby("siteid") - .resample('H', on='time_local') - .mean() - .reset_index() - ) - - if obsvar == 'PM2.5': - pairdf_reg = splots.make_24hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) - elif obsvar == 'OZONE': - pairdf_reg = splots.make_8hr_regulatory(df2,[obsvar,modvar]).rename(index=str,columns={obsvar+'_y':obsvar+'_reg',modvar+'_y':modvar+'_reg'}) - else: - print('Warning: no regulatory calculations found for ' + obsvar + '. Setting stat calculation to NaN.') - del df2 - p_stat_list.append('NaN') - continue - del df2 - if len(pairdf_reg[obsvar+'_reg']) == 0: - print('No valid data for '+obsvar+'_reg. Setting stat calculation to NaN.') - p_stat_list.append('NaN') - continue - else: - # Drop NaNs for model and observations in all cases. - pairdf2 = pairdf_reg.reset_index().dropna(subset=[modvar+'_reg', obsvar+'_reg']) - - # Create empty list for all dom - # Calculate statistic and append to list - if obsvar == 'WD': # Use separate calculations for WD - p_stat_list.append(proc_stats.calc(pairdf, stat=stat_grp, obsvar=obsvar, modvar=modvar, wind=True)) - else: - if cal_reg: - p_stat_list.append(proc_stats.calc(pairdf2, stat=stat_grp, obsvar=obsvar+'_reg', modvar=modvar+'_reg', wind=False)) - else: - p_stat_list.append(proc_stats.calc(pairdf, stat=stat_grp, obsvar=obsvar, modvar=modvar, wind=False)) - - # Save the stat to a dataarray - df_o_d[p_label] = p_stat_list - - if self.output_dir is not None: - outname = self.output_dir + '/' + outname # Extra / just in case. - - # Save the pandas dataframe to a txt file - # Save rounded output - df_o_d = df_o_d.round(round_output) - df_o_d.to_csv(path_or_buf=outname + '.csv', index=False) - - if stat_dict['output_table'] == True: - # Output as a table graphic too. - # Change to use the name with full spaces. - df_o_d['Stat_FullName'] = stat_fullname_s - - proc_stats.create_table(df_o_d.drop(columns=['Stat_ID']), - outname=outname, - title=title, - out_table_kwargs=out_table_kwargs, - debug=self.debug - ) From 018be13e332729620149d52bb69c505611b0530f Mon Sep 17 00:00:00 2001 From: David Fillmore Date: Mon, 25 Sep 2023 16:43:38 +0000 Subject: [PATCH 60/64] Moved xesmf import into setup_regridder, with exception handler. --- melodies_monet/util/regrid_util.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py index 6de32f6f..1ccd2517 100644 --- a/melodies_monet/util/regrid_util.py +++ b/melodies_monet/util/regrid_util.py @@ -7,7 +7,6 @@ """ import os import xarray as xr -import xesmf as xe def setup_regridder(config, config_group='obs'): @@ -20,6 +19,12 @@ def setup_regridder(config, config_group='obs'): Returns regridder (dict of xe.Regridder): dictionary of regridder instances """ + try: + import xesmf as xe + except ImportError as e: + print('regrid_util: xesmf module not found') + raise + target_file = os.path.expandvars(config['analysis']['target_grid']) ds_target = xr.open_dataset(target_file) From e3e09a164dc37bf19a094b68d8d0015b24dcff22 Mon Sep 17 00:00:00 2001 From: David Fillmore Date: Mon, 25 Sep 2023 16:54:19 +0000 Subject: [PATCH 61/64] Reformatted docstrings in read_grid_util.py. --- melodies_monet/util/read_grid_util.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index a3c1fc69..08e8d281 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -13,8 +13,7 @@ def read_grid_models(config, date_str, model=None): Parameters config (dict): configuration dictionary date_str (str yyyy-mm-m_abbr-dd-ddd): date string - model: specific model to read - optional, if not specified all model in config['models'] will be read + model: specific model to read optional, if not specified all models in config['models'] will be read Returns model_datasets (dict of xr.Dataset): dictionary of model datasets @@ -47,11 +46,9 @@ def read_grid_obs(config, obs_vars, date_str, obs=None): Parameters config (dict): configuration dictionary - obs_vars (dict of dict): - nested dictionary keyed by obs set name and obs variable name + obs_vars (dict of dict): nested dictionary keyed by obs set name and obs variable name date_str (str yyyy-mm-m_abbr-dd-ddd): date string - obs: specific observation to read - optional, if not specified all obs in obs_vars will be read + obs: specific observation to read, optional, if not specified all obs in obs_vars will be read Returns obs_datasets (dict of xr.Dataset): dictionary of obs datasets From 2d0f8200037b1bfdbdf8090999fc23ae6f176e9d Mon Sep 17 00:00:00 2001 From: David Fillmore Date: Tue, 26 Sep 2023 21:06:05 +0000 Subject: [PATCH 62/64] Moved time_chunking_with_gridded_data option to top i open_model_files. --- melodies_monet/driver.py | 107 ++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index 5ce443d1..52086c30 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -371,54 +371,9 @@ def open_model_files(self, time_interval=None, control_dict=None): for obs_map in self.mapping: list_input_var = list_input_var + list(set(self.mapping[obs_map].keys()) - set(list_input_var)) #Only certain models need this option for speeding up i/o. - if 'cmaq' in self.model.lower(): - print('**** Reading CMAQ model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - if self.files_vert is not None: - self.mod_kwargs.update({'fname_vert' : self.files_vert}) - if self.files_surf is not None: - self.mod_kwargs.update({'fname_surf' : self.files_surf}) - if len(self.files) > 1: - self.mod_kwargs.update({'concatenate_forecasts' : True}) - self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'wrfchem' in self.model.lower(): - print('**** Reading WRF-Chem model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'rrfs' in self.model.lower(): - print('**** Reading RRFS-CMAQ model output...') - if self.files_pm25 is not None: - self.mod_kwargs.update({'fname_pm25' : self.files_pm25}) - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) - elif 'gsdchem' in self.model.lower(): - print('**** Reading GSD-Chem model output...') - if len(self.files) > 1: - self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs) - else: - self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs) - elif 'cesm_fv' in self.model.lower(): - print('**** Reading CESM FV model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs) - # CAM-chem-SE grid or MUSICAv0 - elif 'cesm_se' in self.model.lower(): - print('**** Reading CESM SE model output...') - self.mod_kwargs.update({'var_list' : list_input_var}) - if self.scrip_file.startswith("example:"): - from . import tutorial - example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:]) - self.scrip_file = tutorial.fetch_example(example_id) - self.mod_kwargs.update({'scrip_file' : self.scrip_file}) - self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs) - #self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs) - #self.obj.monet.scrip = self.obj_scrip - elif 'raqms' in self.model.lower(): - if len(self.files) > 1: - self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs) - else: - self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) - elif time_chunking_with_gridded_data: + + # move above + if time_chunking_with_gridded_data: date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j') print('model time chunk %s' % date_str) model_datasets, filenames = read_grid_util.read_grid_models( @@ -426,11 +381,59 @@ def open_model_files(self, time_interval=None, control_dict=None): print(filenames) self.obj = model_datasets[self.label] else: - print('**** Reading Unspecified model output. Take Caution...') - if len(self.files) > 1: - self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs) + if 'cmaq' in self.model.lower(): + print('**** Reading CMAQ model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + if self.files_vert is not None: + self.mod_kwargs.update({'fname_vert' : self.files_vert}) + if self.files_surf is not None: + self.mod_kwargs.update({'fname_surf' : self.files_surf}) + if len(self.files) > 1: + self.mod_kwargs.update({'concatenate_forecasts' : True}) + self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'wrfchem' in self.model.lower(): + print('**** Reading WRF-Chem model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'rrfs' in self.model.lower(): + print('**** Reading RRFS-CMAQ model output...') + if self.files_pm25 is not None: + self.mod_kwargs.update({'fname_pm25' : self.files_pm25}) + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs) + elif 'gsdchem' in self.model.lower(): + print('**** Reading GSD-Chem model output...') + if len(self.files) > 1: + self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs) + elif 'cesm_fv' in self.model.lower(): + print('**** Reading CESM FV model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs) + # CAM-chem-SE grid or MUSICAv0 + elif 'cesm_se' in self.model.lower(): + print('**** Reading CESM SE model output...') + self.mod_kwargs.update({'var_list' : list_input_var}) + if self.scrip_file.startswith("example:"): + from . import tutorial + example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:]) + self.scrip_file = tutorial.fetch_example(example_id) + self.mod_kwargs.update({'scrip_file' : self.scrip_file}) + self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs) + #self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs) + #self.obj.monet.scrip = self.obj_scrip + elif 'raqms' in self.model.lower(): + if len(self.files) > 1: + self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs) else: - self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs) + print('**** Reading Unspecified model output. Take Caution...') + if len(self.files) > 1: + self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs) + else: + self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs) self.mask_and_scale() def mask_and_scale(self): From e288e401b639cb723c4aaea04c74253e600cd6da Mon Sep 17 00:00:00 2001 From: dwfncar Date: Thu, 5 Oct 2023 18:36:58 -0600 Subject: [PATCH 63/64] Add copyright headers. --- melodies_monet/util/analysis_util.py | 4 ++++ melodies_monet/util/read_grid_util.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/melodies_monet/util/analysis_util.py b/melodies_monet/util/analysis_util.py index 8ce78a0d..7c888ae2 100644 --- a/melodies_monet/util/analysis_util.py +++ b/melodies_monet/util/analysis_util.py @@ -1,3 +1,7 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# + import os import logging from glob import glob diff --git a/melodies_monet/util/read_grid_util.py b/melodies_monet/util/read_grid_util.py index 08e8d281..d54c7b8c 100644 --- a/melodies_monet/util/read_grid_util.py +++ b/melodies_monet/util/read_grid_util.py @@ -1,3 +1,6 @@ +# Copyright (C) 2022 National Center for Atmospheric Research and National Oceanic and Atmospheric Administration +# SPDX-License-Identifier: Apache-2.0 +# import os import logging import xarray as xr From 2d85449cb706265268f36902aebfd36648664656 Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 23 Oct 2023 16:42:21 -0600 Subject: [PATCH 64/64] Add pandas pin to install commands --- docs/develop/developers_guide.rst | 2 +- docs/tutorial/installation.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/develop/developers_guide.rst b/docs/develop/developers_guide.rst index 9f1e3366..f75a0637 100644 --- a/docs/develop/developers_guide.rst +++ b/docs/develop/developers_guide.rst @@ -74,7 +74,7 @@ these instructions: $ conda create --name melodies-monet python=3.9 $ conda activate melodies-monet - $ conda install -y -c conda-forge pyyaml monet monetio netcdf4 wrf-python typer rich pooch jupyterlab + $ conda install -y -c conda-forge pyyaml pandas=1 monet monetio netcdf4 wrf-python typer rich pooch jupyterlab (b) Clone [#clone]_ and link the latest development versions of MONET and MONETIO from GitHub to your conda environment:: diff --git a/docs/tutorial/installation.rst b/docs/tutorial/installation.rst index 35b7f3dd..2c9c7ccf 100644 --- a/docs/tutorial/installation.rst +++ b/docs/tutorial/installation.rst @@ -34,7 +34,7 @@ First create and activate a conda environment:: Add dependencies from conda-forge:: - $ conda install -y -c conda-forge pyyaml monet monetio netcdf4 wrf-python typer rich pooch + $ conda install -y -c conda-forge pyyaml pandas=1 monet monetio netcdf4 wrf-python typer rich pooch Now, install the stable branch of MELODIES MONET to the environment::