From 9d8db0f4e8fbf1c8e1f85153fadb2369e8ac48f2 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 17 Dec 2024 13:04:04 +0000 Subject: [PATCH 1/9] hack to make maps with collocated data objects --- pyaerocom/aeroval/modelmaps_engine.py | 45 +++++++++++++++++++++------ 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/pyaerocom/aeroval/modelmaps_engine.py b/pyaerocom/aeroval/modelmaps_engine.py index 2028622b8..ee89e526c 100644 --- a/pyaerocom/aeroval/modelmaps_engine.py +++ b/pyaerocom/aeroval/modelmaps_engine.py @@ -1,9 +1,10 @@ +import glob import logging import os import xarray as xr -from pyaerocom import GriddedData, TsType, const, __version__ +from pyaerocom import GriddedData, TsType, const, __version__, ColocatedData from pyaerocom.aeroval._processing_base import DataImporter, ProcessingEngine from pyaerocom.aeroval.modelmaps_helpers import ( calc_contour_json, @@ -236,15 +237,35 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag var (str): variable name """ - try: - data = self.read_gridded_obsdata(model_name, var) - except EntryNotAvailable: + if self.cfg.processing_opts.only_json: # we have colocated data try: - data = self._read_model_data(model_name, var) - except Exception as e: - raise ModelVarNotAvailable( - f"Cannot read data for model {model_name} (variable {var}): {e}" + preprocessed_coldata_dir = self.cfg.model_cfg.get_entry(model_name).model_data_dir + mask = f"{preprocessed_coldata_dir}/*.nc" + except KeyError: + preprocessed_coldata_dir = self.cfg.obs_cfg.get_entry(model_name).coldata_dir + mask = f"{preprocessed_coldata_dir}/{model_name}/*.nc" + file_to_convert = glob.glob(mask) + if len(file_to_convert) != 1: + raise ValueError( + "Can only handle one colocated data object for plotting for a given (model, obs, var). " + "Note that when providing a colocated data object, it must be provided via the model_data_dir arugment in a ModelEntry instance. " + "It must also be provided via the coldata_dir argument in the ObsEntry instance. " + "Additionally, note that the coldatadir does not contain the model_name at the end of the directory, " + "whereas the coldata_dir does not." ) + coldata = ColocatedData(data=file_to_convert[0]) + data = coldata.data.sel(data_source=model_name) + # data = GriddedData(data.to_iris()) + else: + try: + data = self.read_gridded_obsdata(model_name, var) + except EntryNotAvailable: + try: + data = self._read_model_data(model_name, var) + except Exception as e: + raise ModelVarNotAvailable( + f"Cannot read data for model {model_name} (variable {var}): {e}" + ) var_ranges_defaults = self.cfg.var_scale_colmap @@ -255,7 +276,8 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag cmapinfo = var_ranges_defaults["default"] varinfo = VarinfoWeb(var, cmap=cmapinfo["colmap"], cmap_bins=cmapinfo["scale"]) - data = self._check_dimensions(data) + if not self.cfg.processing_opts.only_json: + data = self._check_dimensions(data) outdir = self.cfg.path_manager.get_json_output_dirs()["contour/overlay"] @@ -266,7 +288,10 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag if tst < freq: raise TemporalResolutionError(f"need {freq} or higher, got{tst}") elif tst > freq: - data = data.resample_time(str(freq)) + if isinstance(data, GriddedData): + data = data.resample_time(str(freq)) + elif isinstance(data, xr.DataArray): + data = data.resample(time=str(freq)[0].capitalize()).mean() data.check_unit() From fa075e9ee0c11ad2266f762288e2166fc63c9c29 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Mon, 6 Jan 2025 13:34:35 +0000 Subject: [PATCH 2/9] can make plots --- pyaerocom/aeroval/modelmaps_engine.py | 23 ++++++++++++++++++----- pyaerocom/aeroval/modelmaps_helpers.py | 12 +++++++++++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/pyaerocom/aeroval/modelmaps_engine.py b/pyaerocom/aeroval/modelmaps_engine.py index ee89e526c..a56be2778 100644 --- a/pyaerocom/aeroval/modelmaps_engine.py +++ b/pyaerocom/aeroval/modelmaps_engine.py @@ -255,6 +255,9 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag ) coldata = ColocatedData(data=file_to_convert[0]) data = coldata.data.sel(data_source=model_name) + data = data.drop_vars("data_source") + data = data.transpose("time", "latitude", "longitude") + data = data.sortby(["latitude", "longitude"]) # data = GriddedData(data.to_iris()) else: try: @@ -293,10 +296,10 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag elif isinstance(data, xr.DataArray): data = data.resample(time=str(freq)[0].capitalize()).mean() - data.check_unit() - tst = _jsdate_list(data) - data = data.to_xarray().load() + if isinstance(data, GriddedData): + data.check_unit() + data = data.to_xarray().load() files = [] if self.cfg.processing_opts.only_model_maps: @@ -505,7 +508,12 @@ def _check_ts_for_only_model_maps( ) timeseries[model_name].setdefault("obs_var", var) - timeseries[model_name].setdefault("obs_unit", data.units) + if isinstance(data, GriddedData): + timeseries[model_name].setdefault("obs_unit", data.units) + elif isinstance(data, xr.DataArray): + timeseries[name].setdefault("obs_unit", data.var_units[1]) + else: + raise ValueError("Can not determine obs units") timeseries[model_name].setdefault("obs_name", name) timeseries[model_name].setdefault( "var_name_web", self.cfg.obs_cfg.get_web_interface_name(name) @@ -553,7 +561,12 @@ def _check_ts_for_only_model_maps( timeseries[name].setdefault("station_name", "ALL") timeseries[name].setdefault("pyaerocom_version", __version__) timeseries[name].setdefault("mod_var", var) - timeseries[name].setdefault("mod_unit", data.units) + if isinstance(data, GriddedData): + timeseries[name].setdefault("mod_unit", data.units) + elif isinstance(data, xr.DataArray): + timeseries[name].setdefault("mod_unit", data.var_units[0]) + else: + raise ValueError("Can not determine model units") timeseries[name].setdefault("model_name", name) timeseries[name].setdefault("mod_freq_src", maps_freq) diff --git a/pyaerocom/aeroval/modelmaps_helpers.py b/pyaerocom/aeroval/modelmaps_helpers.py index 35eb4bf32..6d615264d 100644 --- a/pyaerocom/aeroval/modelmaps_helpers.py +++ b/pyaerocom/aeroval/modelmaps_helpers.py @@ -7,12 +7,14 @@ from seaborn import color_palette import io import xarray +import pandas as pd try: from geojsoncontour import contourf_to_geojson except ModuleNotFoundError: contourf_to_geojson = None +from pyaerocom import GriddedData from pyaerocom.aeroval.coldatatojson_helpers import _get_jsdate from pyaerocom.helpers import make_datetime_index from pyaerocom.tstype import TsType @@ -25,7 +27,15 @@ def _jsdate_list(data): tst = TsType(data.ts_type) - idx = make_datetime_index(data.start, data.stop, tst.to_pandas_freq()) + if isinstance(data, GriddedData): + start_yr = data.start + stop_yr = data.stop + elif isinstance(data, xarray.DataArray): + start_yr = pd.Timestamp(data.time.min().values).year + stop_yr = pd.Timestamp(data.time.max().values).year + else: + raise ValueError("data not correct type") + idx = make_datetime_index(start_yr, stop_yr, tst.to_pandas_freq()) return _get_jsdate(idx.values).tolist() From 98cc60fa142799dae351861b6c867e3f1f2c2679 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 7 Jan 2025 09:14:08 +0000 Subject: [PATCH 3/9] got a point where augustin needs to change some things on web side --- pyaerocom/aeroval/modelmaps_engine.py | 16 ++++++++++++++-- pyaerocom/aeroval/modelmaps_helpers.py | 12 ++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/pyaerocom/aeroval/modelmaps_engine.py b/pyaerocom/aeroval/modelmaps_engine.py index a56be2778..61b8758a7 100644 --- a/pyaerocom/aeroval/modelmaps_engine.py +++ b/pyaerocom/aeroval/modelmaps_engine.py @@ -12,6 +12,7 @@ _jsdate_list, CONTOUR, OVERLAY, + search_directory_recursively_for_netcdf_filenames_containing_strings, ) from pyaerocom.aeroval.json_utils import round_floats from pyaerocom.colocation.colocator import Colocator @@ -241,10 +242,21 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag try: preprocessed_coldata_dir = self.cfg.model_cfg.get_entry(model_name).model_data_dir mask = f"{preprocessed_coldata_dir}/*.nc" + file_to_convert = glob.glob(mask) except KeyError: preprocessed_coldata_dir = self.cfg.obs_cfg.get_entry(model_name).coldata_dir mask = f"{preprocessed_coldata_dir}/{model_name}/*.nc" - file_to_convert = glob.glob(mask) + matching_files = ( + search_directory_recursively_for_netcdf_filenames_containing_strings( + directory=preprocessed_coldata_dir, strings=[model_name, var] + ) + ) + + if len(matching_files) > 1: + logger.info( + f"Found more than one colocated data file for {model_name=} {var=}. Using the first one found - this theoretically should be consistent across files." + ) + file_to_convert = matching_files[:1] if len(file_to_convert) != 1: raise ValueError( "Can only handle one colocated data object for plotting for a given (model, obs, var). " @@ -511,7 +523,7 @@ def _check_ts_for_only_model_maps( if isinstance(data, GriddedData): timeseries[model_name].setdefault("obs_unit", data.units) elif isinstance(data, xr.DataArray): - timeseries[name].setdefault("obs_unit", data.var_units[1]) + timeseries[model_name].setdefault("obs_unit", data.var_units[1]) else: raise ValueError("Can not determine obs units") timeseries[model_name].setdefault("obs_name", name) diff --git a/pyaerocom/aeroval/modelmaps_helpers.py b/pyaerocom/aeroval/modelmaps_helpers.py index 6d615264d..96f325b37 100644 --- a/pyaerocom/aeroval/modelmaps_helpers.py +++ b/pyaerocom/aeroval/modelmaps_helpers.py @@ -8,6 +8,8 @@ import io import xarray import pandas as pd +import glob +import os try: from geojsoncontour import contourf_to_geojson @@ -153,3 +155,13 @@ def plot_overlay_pixel_maps( plt.close("all") return image + + +def search_directory_recursively_for_netcdf_filenames_containing_strings(directory, strings): + matching_files = [] + # Use glob to find all NetCDF files recursively + for nc_file in glob.iglob(os.path.join(directory, "**", "*.nc"), recursive=True): + # Check if all specified strings are in the filename + if all(s in os.path.basename(nc_file) for s in strings): + matching_files.append(nc_file) + return matching_files From 73f878d3a2bdd0a260bbbc3854dcf61e704c2e1a Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 7 Jan 2025 10:19:51 +0000 Subject: [PATCH 4/9] rm dead code and checks against None --- pyaerocom/aeroval/modelmaps_engine.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyaerocom/aeroval/modelmaps_engine.py b/pyaerocom/aeroval/modelmaps_engine.py index 61b8758a7..a060ccafd 100644 --- a/pyaerocom/aeroval/modelmaps_engine.py +++ b/pyaerocom/aeroval/modelmaps_engine.py @@ -270,7 +270,6 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag data = data.drop_vars("data_source") data = data.transpose("time", "latitude", "longitude") data = data.sortby(["latitude", "longitude"]) - # data = GriddedData(data.to_iris()) else: try: data = self.read_gridded_obsdata(model_name, var) @@ -559,8 +558,8 @@ def _check_ts_for_only_model_maps( if ( name in timeseries - and timeseries[name].get(maps_freq + "_mod", False) is not None - and timeseries[name].get(maps_freq + "_obs", False) is not None + and timeseries[name].get(maps_freq + "_mod", False) + and timeseries[name].get(maps_freq + "_obs", False) ): continue From 6b200c6c02e2725d855236cdb0baaf3e19d65665 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 7 Jan 2025 10:45:09 +0000 Subject: [PATCH 5/9] use right_menu in ModelMapsSetup --- pyaerocom/aeroval/setup_classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index b266dd2ad..936954fe6 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -129,7 +129,7 @@ class ModelMapsSetup(BaseModel): maps_freq: Literal["hourly", "daily", "monthly", "yearly", "coarsest"] = "coarsest" plot_types: dict[str, str | set[str]] | set[str] = {CONTOUR} boundaries: BoundingBox = BoundingBox(west=-180, east=180, north=90, south=-90) - map_observations_only_in_right_menu: bool = False + right_menu: tuple[str, ...] | None = None overlay_save_format: Literal["webp", "png"] = "webp" @field_validator("plot_types") From b68ebfcae29385cb099fad22fbedd55e38a2614b Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 7 Jan 2025 11:52:16 +0000 Subject: [PATCH 6/9] glob.escape preprocessed_coldata_dir --- pyaerocom/aeroval/modelmaps_engine.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyaerocom/aeroval/modelmaps_engine.py b/pyaerocom/aeroval/modelmaps_engine.py index a060ccafd..f3952c00d 100644 --- a/pyaerocom/aeroval/modelmaps_engine.py +++ b/pyaerocom/aeroval/modelmaps_engine.py @@ -240,11 +240,15 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag if self.cfg.processing_opts.only_json: # we have colocated data try: - preprocessed_coldata_dir = self.cfg.model_cfg.get_entry(model_name).model_data_dir + preprocessed_coldata_dir = glob.escape( + self.cfg.model_cfg.get_entry(model_name).model_data_dir + ) mask = f"{preprocessed_coldata_dir}/*.nc" file_to_convert = glob.glob(mask) except KeyError: - preprocessed_coldata_dir = self.cfg.obs_cfg.get_entry(model_name).coldata_dir + preprocessed_coldata_dir = glob.escape( + self.cfg.obs_cfg.get_entry(model_name).coldata_dir + ) mask = f"{preprocessed_coldata_dir}/{model_name}/*.nc" matching_files = ( search_directory_recursively_for_netcdf_filenames_containing_strings( From bff237948b1b857c16659c17a2c969ab432e2a57 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 7 Jan 2025 12:19:48 +0000 Subject: [PATCH 7/9] find_netcdf_files and _process_only_json --- pyaerocom/aeroval/modelmaps_engine.py | 77 ++++++++++++++------------ pyaerocom/aeroval/modelmaps_helpers.py | 4 +- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/pyaerocom/aeroval/modelmaps_engine.py b/pyaerocom/aeroval/modelmaps_engine.py index f3952c00d..b48e8eb43 100644 --- a/pyaerocom/aeroval/modelmaps_engine.py +++ b/pyaerocom/aeroval/modelmaps_engine.py @@ -12,7 +12,7 @@ _jsdate_list, CONTOUR, OVERLAY, - search_directory_recursively_for_netcdf_filenames_containing_strings, + find_netcdf_files, ) from pyaerocom.aeroval.json_utils import round_floats from pyaerocom.colocation.colocator import Colocator @@ -239,41 +239,7 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag """ if self.cfg.processing_opts.only_json: # we have colocated data - try: - preprocessed_coldata_dir = glob.escape( - self.cfg.model_cfg.get_entry(model_name).model_data_dir - ) - mask = f"{preprocessed_coldata_dir}/*.nc" - file_to_convert = glob.glob(mask) - except KeyError: - preprocessed_coldata_dir = glob.escape( - self.cfg.obs_cfg.get_entry(model_name).coldata_dir - ) - mask = f"{preprocessed_coldata_dir}/{model_name}/*.nc" - matching_files = ( - search_directory_recursively_for_netcdf_filenames_containing_strings( - directory=preprocessed_coldata_dir, strings=[model_name, var] - ) - ) - - if len(matching_files) > 1: - logger.info( - f"Found more than one colocated data file for {model_name=} {var=}. Using the first one found - this theoretically should be consistent across files." - ) - file_to_convert = matching_files[:1] - if len(file_to_convert) != 1: - raise ValueError( - "Can only handle one colocated data object for plotting for a given (model, obs, var). " - "Note that when providing a colocated data object, it must be provided via the model_data_dir arugment in a ModelEntry instance. " - "It must also be provided via the coldata_dir argument in the ObsEntry instance. " - "Additionally, note that the coldatadir does not contain the model_name at the end of the directory, " - "whereas the coldata_dir does not." - ) - coldata = ColocatedData(data=file_to_convert[0]) - data = coldata.data.sel(data_source=model_name) - data = data.drop_vars("data_source") - data = data.transpose("time", "latitude", "longitude") - data = data.sortby(["latitude", "longitude"]) + data = self._process_only_json(model_name, var) else: try: data = self.read_gridded_obsdata(model_name, var) @@ -598,3 +564,42 @@ def _check_ts_for_only_model_maps( raise ValueError( f"{name=} not is not in either {self.cfg.obs_cfg.keylist()=} nor {self.cfg.model_cfg.keylist()=}" ) + + def _process_only_json(self, model_name, var): + """Process data from ColocatedData for overlay map for if only_json = True.""" + try: + preprocessed_coldata_dir = glob.escape( + self.cfg.model_cfg.get_entry(model_name).model_data_dir + ) + mask = f"{preprocessed_coldata_dir}/*.nc" + file_to_convert = glob.glob(mask) + except KeyError: + preprocessed_coldata_dir = glob.escape( + self.cfg.obs_cfg.get_entry(model_name).coldata_dir + ) + mask = f"{preprocessed_coldata_dir}/{model_name}/*.nc" + matching_files = find_netcdf_files( + directory=preprocessed_coldata_dir, strings=[model_name, var] + ) + + if len(matching_files) > 1: + logger.info( + f"Found more than one colocated data file for {model_name=} {var=}. Using the first one found - this theoretically should be consistent across files." + ) + file_to_convert = matching_files[:1] + + if len(file_to_convert) != 1: + raise ValueError( + "Can only handle one colocated data object for plotting for a given (model, obs, var). " + "Note that when providing a colocated data object, it must be provided via the model_data_dir argument in a ModelEntry instance. " + "It must also be provided via the coldata_dir argument in the ObsEntry instance. " + "Additionally, note that the coldatadir does not contain the model_name at the end of the directory, " + "whereas the coldata_dir does not." + ) + + coldata = ColocatedData(data=file_to_convert[0]) + data = coldata.data.sel(data_source=model_name) + data = data.drop_vars("data_source") + data = data.transpose("time", "latitude", "longitude") + data = data.sortby(["latitude", "longitude"]) + return data diff --git a/pyaerocom/aeroval/modelmaps_helpers.py b/pyaerocom/aeroval/modelmaps_helpers.py index 96f325b37..309556d55 100644 --- a/pyaerocom/aeroval/modelmaps_helpers.py +++ b/pyaerocom/aeroval/modelmaps_helpers.py @@ -157,10 +157,10 @@ def plot_overlay_pixel_maps( return image -def search_directory_recursively_for_netcdf_filenames_containing_strings(directory, strings): +def find_netcdf_files(directory, strings): matching_files = [] # Use glob to find all NetCDF files recursively - for nc_file in glob.iglob(os.path.join(directory, "**", "*.nc"), recursive=True): + for nc_file in glob.escape(glob.iglob(os.path.join(directory, "**", "*.nc"), recursive=True)): # Check if all specified strings are in the filename if all(s in os.path.basename(nc_file) for s in strings): matching_files.append(nc_file) From 216c282d622abb87d5124056203c06de6707f556 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 7 Jan 2025 12:38:42 +0000 Subject: [PATCH 8/9] fix glob.escape in find_netcdf_files --- pyaerocom/aeroval/modelmaps_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/aeroval/modelmaps_helpers.py b/pyaerocom/aeroval/modelmaps_helpers.py index 309556d55..ac0744277 100644 --- a/pyaerocom/aeroval/modelmaps_helpers.py +++ b/pyaerocom/aeroval/modelmaps_helpers.py @@ -160,7 +160,7 @@ def plot_overlay_pixel_maps( def find_netcdf_files(directory, strings): matching_files = [] # Use glob to find all NetCDF files recursively - for nc_file in glob.escape(glob.iglob(os.path.join(directory, "**", "*.nc"), recursive=True)): + for nc_file in glob.iglob(os.path.join(glob.escape(directory), "**", "*.nc"), recursive=True): # Check if all specified strings are in the filename if all(s in os.path.basename(nc_file) for s in strings): matching_files.append(nc_file) From 2291cb19fa7c43ef6c92de1740da4f0d4f538a56 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Tue, 7 Jan 2025 13:13:17 +0000 Subject: [PATCH 9/9] # pragma: no cover --- pyaerocom/aeroval/modelmaps_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/aeroval/modelmaps_engine.py b/pyaerocom/aeroval/modelmaps_engine.py index b48e8eb43..35485d89d 100644 --- a/pyaerocom/aeroval/modelmaps_engine.py +++ b/pyaerocom/aeroval/modelmaps_engine.py @@ -565,7 +565,7 @@ def _check_ts_for_only_model_maps( f"{name=} not is not in either {self.cfg.obs_cfg.keylist()=} nor {self.cfg.model_cfg.keylist()=}" ) - def _process_only_json(self, model_name, var): + def _process_only_json(self, model_name, var): # pragma: no cover """Process data from ColocatedData for overlay map for if only_json = True.""" try: preprocessed_coldata_dir = glob.escape(