Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model maps with ColocatedData objects #1470

Merged
merged 9 commits into from
Jan 7, 2025
94 changes: 76 additions & 18 deletions pyaerocom/aeroval/modelmaps_engine.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import glob
import logging
import os
import xarray as xr


from pyaerocom import GriddedData, TsType, const, __version__
from pyaerocom import GriddedData, TsType, const, __version__, ColocatedData
from pyaerocom.aeroval._processing_base import DataImporter, ProcessingEngine
from pyaerocom.aeroval.modelmaps_helpers import (
calc_contour_json,
plot_overlay_pixel_maps,
_jsdate_list,
CONTOUR,
OVERLAY,
find_netcdf_files,
)
from pyaerocom.aeroval.json_utils import round_floats
from pyaerocom.colocation.colocator import Colocator
Expand Down Expand Up @@ -236,15 +238,18 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag
var (str): variable name
"""

try:
data = self.read_gridded_obsdata(model_name, var)
except EntryNotAvailable:
if self.cfg.processing_opts.only_json: # we have colocated data
data = self._process_only_json(model_name, var)
else:
try:
data = self._read_model_data(model_name, var)
except Exception as e:
raise ModelVarNotAvailable(
f"Cannot read data for model {model_name} (variable {var}): {e}"
)
data = self.read_gridded_obsdata(model_name, var)
except EntryNotAvailable:
try:
data = self._read_model_data(model_name, var)
except Exception as e:
raise ModelVarNotAvailable(
f"Cannot read data for model {model_name} (variable {var}): {e}"
)

var_ranges_defaults = self.cfg.var_scale_colmap

Expand All @@ -255,7 +260,8 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag
cmapinfo = var_ranges_defaults["default"]
varinfo = VarinfoWeb(var, cmap=cmapinfo["colmap"], cmap_bins=cmapinfo["scale"])

data = self._check_dimensions(data)
if not self.cfg.processing_opts.only_json:
data = self._check_dimensions(data)

outdir = self.cfg.path_manager.get_json_output_dirs()["contour/overlay"]
thorbjoernl marked this conversation as resolved.
Show resolved Hide resolved

Expand All @@ -266,12 +272,15 @@ def _process_overlay_map_var(self, model_name, var, reanalyse_existing): # prag
if tst < freq:
raise TemporalResolutionError(f"need {freq} or higher, got{tst}")
elif tst > freq:
data = data.resample_time(str(freq))

data.check_unit()
if isinstance(data, GriddedData):
data = data.resample_time(str(freq))
elif isinstance(data, xr.DataArray):
data = data.resample(time=str(freq)[0].capitalize()).mean()

tst = _jsdate_list(data)
data = data.to_xarray().load()
if isinstance(data, GriddedData):
data.check_unit()
data = data.to_xarray().load()
files = []

if self.cfg.processing_opts.only_model_maps:
Expand Down Expand Up @@ -480,7 +489,12 @@ def _check_ts_for_only_model_maps(
)

timeseries[model_name].setdefault("obs_var", var)
timeseries[model_name].setdefault("obs_unit", data.units)
if isinstance(data, GriddedData):
timeseries[model_name].setdefault("obs_unit", data.units)
elif isinstance(data, xr.DataArray):
timeseries[model_name].setdefault("obs_unit", data.var_units[1])
else:
raise ValueError("Can not determine obs units")
timeseries[model_name].setdefault("obs_name", name)
timeseries[model_name].setdefault(
"var_name_web", self.cfg.obs_cfg.get_web_interface_name(name)
Expand Down Expand Up @@ -514,8 +528,8 @@ def _check_ts_for_only_model_maps(

if (
name in timeseries
and timeseries[name].get(maps_freq + "_mod", False) is not None
and timeseries[name].get(maps_freq + "_obs", False) is not None
and timeseries[name].get(maps_freq + "_mod", False)
and timeseries[name].get(maps_freq + "_obs", False)
):
continue

Expand All @@ -528,7 +542,12 @@ def _check_ts_for_only_model_maps(
timeseries[name].setdefault("station_name", "ALL")
timeseries[name].setdefault("pyaerocom_version", __version__)
timeseries[name].setdefault("mod_var", var)
timeseries[name].setdefault("mod_unit", data.units)
if isinstance(data, GriddedData):
timeseries[name].setdefault("mod_unit", data.units)
elif isinstance(data, xr.DataArray):
timeseries[name].setdefault("mod_unit", data.var_units[0])
else:
raise ValueError("Can not determine model units")
timeseries[name].setdefault("model_name", name)
timeseries[name].setdefault("mod_freq_src", maps_freq)

Expand All @@ -545,3 +564,42 @@ def _check_ts_for_only_model_maps(
raise ValueError(
f"{name=} not is not in either {self.cfg.obs_cfg.keylist()=} nor {self.cfg.model_cfg.keylist()=}"
)

def _process_only_json(self, model_name, var): # pragma: no cover
"""Process data from ColocatedData for overlay map for if only_json = True."""
try:
preprocessed_coldata_dir = glob.escape(
self.cfg.model_cfg.get_entry(model_name).model_data_dir
)
mask = f"{preprocessed_coldata_dir}/*.nc"
file_to_convert = glob.glob(mask)
except KeyError:
preprocessed_coldata_dir = glob.escape(
self.cfg.obs_cfg.get_entry(model_name).coldata_dir
)
mask = f"{preprocessed_coldata_dir}/{model_name}/*.nc"
matching_files = find_netcdf_files(
directory=preprocessed_coldata_dir, strings=[model_name, var]
)

if len(matching_files) > 1:
logger.info(
f"Found more than one colocated data file for {model_name=} {var=}. Using the first one found - this theoretically should be consistent across files."
)
file_to_convert = matching_files[:1]

if len(file_to_convert) != 1:
raise ValueError(
"Can only handle one colocated data object for plotting for a given (model, obs, var). "
"Note that when providing a colocated data object, it must be provided via the model_data_dir argument in a ModelEntry instance. "
"It must also be provided via the coldata_dir argument in the ObsEntry instance. "
"Additionally, note that the coldatadir does not contain the model_name at the end of the directory, "
"whereas the coldata_dir does not."
)

coldata = ColocatedData(data=file_to_convert[0])
data = coldata.data.sel(data_source=model_name)
data = data.drop_vars("data_source")
data = data.transpose("time", "latitude", "longitude")
data = data.sortby(["latitude", "longitude"])
return data
24 changes: 23 additions & 1 deletion pyaerocom/aeroval/modelmaps_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
from seaborn import color_palette
import io
import xarray
import pandas as pd
import glob
import os

try:
from geojsoncontour import contourf_to_geojson
except ModuleNotFoundError:
contourf_to_geojson = None

from pyaerocom import GriddedData
from pyaerocom.aeroval.coldatatojson_helpers import _get_jsdate
from pyaerocom.helpers import make_datetime_index
from pyaerocom.tstype import TsType
Expand All @@ -25,7 +29,15 @@

def _jsdate_list(data):
tst = TsType(data.ts_type)
idx = make_datetime_index(data.start, data.stop, tst.to_pandas_freq())
if isinstance(data, GriddedData):
start_yr = data.start
stop_yr = data.stop
elif isinstance(data, xarray.DataArray):
start_yr = pd.Timestamp(data.time.min().values).year
stop_yr = pd.Timestamp(data.time.max().values).year

Check warning on line 37 in pyaerocom/aeroval/modelmaps_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/modelmaps_helpers.py#L35-L37

Added lines #L35 - L37 were not covered by tests
else:
raise ValueError("data not correct type")

Check warning on line 39 in pyaerocom/aeroval/modelmaps_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/modelmaps_helpers.py#L39

Added line #L39 was not covered by tests
idx = make_datetime_index(start_yr, stop_yr, tst.to_pandas_freq())
return _get_jsdate(idx.values).tolist()


Expand Down Expand Up @@ -143,3 +155,13 @@
plt.close("all")

return image


def find_netcdf_files(directory, strings):
matching_files = []

Check warning on line 161 in pyaerocom/aeroval/modelmaps_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/modelmaps_helpers.py#L161

Added line #L161 was not covered by tests
# Use glob to find all NetCDF files recursively
for nc_file in glob.iglob(os.path.join(glob.escape(directory), "**", "*.nc"), recursive=True):

Check warning on line 163 in pyaerocom/aeroval/modelmaps_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/modelmaps_helpers.py#L163

Added line #L163 was not covered by tests
# Check if all specified strings are in the filename
if all(s in os.path.basename(nc_file) for s in strings):
matching_files.append(nc_file)
return matching_files

Check warning on line 167 in pyaerocom/aeroval/modelmaps_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/modelmaps_helpers.py#L165-L167

Added lines #L165 - L167 were not covered by tests
2 changes: 1 addition & 1 deletion pyaerocom/aeroval/setup_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class ModelMapsSetup(BaseModel):
maps_freq: Literal["hourly", "daily", "monthly", "yearly", "coarsest"] = "coarsest"
plot_types: dict[str, str | set[str]] | set[str] = {CONTOUR}
boundaries: BoundingBox = BoundingBox(west=-180, east=180, north=90, south=-90)
map_observations_only_in_right_menu: bool = False
right_menu: tuple[str, ...] | None = None
overlay_save_format: Literal["webp", "png"] = "webp"

@field_validator("plot_types")
Expand Down
Loading