Not able to read the GEFS-Aerosol grib2 data #160

bbakernoaa · 2024-11-07T15:57:43Z

I have been trying to use cfgrib and add their localConcepts, of which I'm still struggling a little (with the total column mass density of aerosols), to read the NOAA GEFS-Aerosol data. A sample file can be found here. How grib handles aerosol data is fairly tricky as it requires many additional tables and also many variables share the same "shortName". I would greatly appreciate any help with this as it would really help to have a tool that properly handles the air composition data put out by the NWS.

xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFixedSurface':10})
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[16], line 1
----> 1 xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFixedSurface':10})

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/api.py:566, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    554 decoders = _resolve_decoders_kwargs(
    555     decode_cf,
    556     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    562     decode_coords=decode_coords,
    563 )
    565 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 566 backend_ds = backend.open_dataset(
    567     filename_or_obj,
    568     drop_variables=drop_variables,
    569     **decoders,
    570     **kwargs,
    571 )
    572 ds = _dataset_from_backend_dataset(
    573     backend_ds,
    574     filename_or_obj,
   (...)
    584     **kwargs,
    585 )
    586 return ds

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:82, in GribBackendEntrypoint.open_dataset(self, filename, drop_variables, filters)
     76     file_index = pd.DataFrame(f._index)
     78 # parse grib2io _index to dataframe and acquire non-geo possible dims
     79 # (scalar coord when not dim due to squeeze) parse_grib_index applies
     80 # filters to index and expands metadata based on product definition
     81 # template number
---> 82 file_index, non_geo_dims = parse_grib_index(file_index, filters)
     84 # Divide up records by variable
     85 frames, cube, extra_geo = make_variables(file_index, filename, non_geo_dims)

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:383, in parse_grib_index(index, filters)
    381 for k, v in filters.items():
    382     if k not in index.columns:
--> 383         kwarg = {k:index.msg.apply(lambda msg: getattr(msg, k))}
    384         index = index.assign(**kwarg)
    385     # adopt parts of xarray's sel logic  so that filters behave similarly
    386     # allowed to filter to nothing to make empty dataset

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/series.py:4924, in Series.apply(self, func, convert_dtype, args, by_row, **kwargs)
   4789 def apply(
   4790     self,
   4791     func: AggFuncType,
   (...)
   4796     **kwargs,
   4797 ) -> DataFrame | Series:
   4798     """
   4799     Invoke function on values of Series.
   4800 
   (...)
   4915     dtype: float64
   4916     """
   4917     return SeriesApply(
   4918         self,
   4919         func,
   4920         convert_dtype=convert_dtype,
   4921         by_row=by_row,
   4922         args=args,
   4923         kwargs=kwargs,
-> 4924     ).apply()

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/apply.py:1427, in SeriesApply.apply(self)
   1424     return self.apply_compat()
   1426 # self.func is Callable
-> 1427 return self.apply_standard()

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/apply.py:1507, in SeriesApply.apply_standard(self)
   1501 # row-wise access
   1502 # apply doesn't have a `na_action` keyword and for backward compat reasons
   1503 # we need to give `na_action="ignore"` for categorical data.
   1504 # TODO: remove the `na_action="ignore"` when that default has been changed in
   1505 #  Categorical (GH51645).
   1506 action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
-> 1507 mapped = obj._map_values(
   1508     mapper=curried, na_action=action, convert=self.convert_dtype
   1509 )
   1511 if len(mapped) and isinstance(mapped[0], ABCSeries):
   1512     # GH#43986 Need to do list(mapped) in order to get treated as nested
   1513     #  See also GH#25959 regarding EA support
   1514     return obj._constructor_expanddim(list(mapped), index=obj.index)

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/base.py:921, in IndexOpsMixin._map_values(self, mapper, na_action, convert)
    918 if isinstance(arr, ExtensionArray):
    919     return arr.map(mapper, na_action=na_action)
--> 921 return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/algorithms.py:1743, in map_array(arr, mapper, na_action, convert)
   1741 values = arr.astype(object, copy=False)
   1742 if na_action is None:
-> 1743     return lib.map_infer(values, mapper, convert=convert)
   1744 else:
   1745     return lib.map_infer_mask(
   1746         values, mapper, mask=isna(values).view(np.uint8), convert=convert
   1747     )

File lib.pyx:2972, in pandas._libs.lib.map_infer()

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:383, in parse_grib_index.<locals>.<lambda>(msg)
    381 for k, v in filters.items():
    382     if k not in index.columns:
--> 383         kwarg = {k:index.msg.apply(lambda msg: getattr(msg, k))}
    384         index = index.assign(**kwarg)
    385     # adopt parts of xarray's sel logic  so that filters behave similarly
    386     # allowed to filter to nothing to make empty dataset

AttributeError: 'Msg' object has no attribute 'typeOfFixedSurface'

In [17]: xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFirstFixedSurface':10})
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[17], line 1
----> 1 xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFirstFixedSurface':10})

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/api.py:566, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    554 decoders = _resolve_decoders_kwargs(
    555     decode_cf,
    556     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    562     decode_coords=decode_coords,
    563 )
    565 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 566 backend_ds = backend.open_dataset(
    567     filename_or_obj,
    568     drop_variables=drop_variables,
    569     **decoders,
    570     **kwargs,
    571 )
    572 ds = _dataset_from_backend_dataset(
    573     backend_ds,
    574     filename_or_obj,
   (...)
    584     **kwargs,
    585 )
    586 return ds

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:93, in GribBackendEntrypoint.open_dataset(self, filename, drop_variables, filters)
     91 ds = xr.Dataset()
     92 for var_df in frames:
---> 93     da = build_da_without_coords(var_df, cube, filename)
     94     ds[da.name] = da
     96 # assign coords from the cube; the cube prevents datarrays with
     97 # different shapes

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:526, in build_da_without_coords(index, cube, filename)
    524 data = indexing.LazilyIndexedArray(data)
    525 if len(dim_names) != len(data.shape):
--> 526     raise ValueError(
    527         "different number of dimensions on data "
    528         f"and dims: {len(data.shape)} vs {len(dim_names)}\n"
    529         "Grib2 messages could not be formed into a data cube; "
    530         "It's possible extra messages exist along a non-accounted for dimension based on PDTN\n"
    531         "It might be possible to get around this by applying a filter on the non-accounted for dimension"
    532         )
    533 da = xr.DataArray(data, dims=dim_names)
    535 da.encoding['original_shape'] = data.shape

ValueError: different number of dimensions on data and dims: 3 vs 2
Grib2 messages could not be formed into a data cube; It's possible extra messages exist along a non-accounted for dimension based on PDTN
It might be possible to get around this by applying a filter on the non-accounted for dimension

bbakernoaa linked a pull request Nov 14, 2024 that will close this issue

add aerosol related updates #161

Open

EricEngle-NOAA mentioned this issue Dec 17, 2024

Error when reading grib2 files with "Product Defn Template=4.46" from EMC/NCEP #162

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Not able to read the GEFS-Aerosol grib2 data #160

Not able to read the GEFS-Aerosol grib2 data #160

bbakernoaa commented Nov 7, 2024 •

edited

Loading

Not able to read the GEFS-Aerosol grib2 data #160

Not able to read the GEFS-Aerosol grib2 data #160

Comments

bbakernoaa commented Nov 7, 2024 • edited Loading

bbakernoaa commented Nov 7, 2024 •

edited

Loading