Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Not able to read the GEFS-Aerosol grib2 data #160

Open
bbakernoaa opened this issue Nov 7, 2024 · 0 comments · May be fixed by #161
Open

Not able to read the GEFS-Aerosol grib2 data #160

bbakernoaa opened this issue Nov 7, 2024 · 0 comments · May be fixed by #161

Comments

@bbakernoaa
Copy link

bbakernoaa commented Nov 7, 2024

I have been trying to use cfgrib and add their localConcepts, of which I'm still struggling a little (with the total column mass density of aerosols), to read the NOAA GEFS-Aerosol data. A sample file can be found here. How grib handles aerosol data is fairly tricky as it requires many additional tables and also many variables share the same "shortName". I would greatly appreciate any help with this as it would really help to have a tool that properly handles the air composition data put out by the NWS.

xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFixedSurface':10})
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[16], line 1
----> 1 xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFixedSurface':10})

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/api.py:566, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    554 decoders = _resolve_decoders_kwargs(
    555     decode_cf,
    556     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    562     decode_coords=decode_coords,
    563 )
    565 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 566 backend_ds = backend.open_dataset(
    567     filename_or_obj,
    568     drop_variables=drop_variables,
    569     **decoders,
    570     **kwargs,
    571 )
    572 ds = _dataset_from_backend_dataset(
    573     backend_ds,
    574     filename_or_obj,
   (...)
    584     **kwargs,
    585 )
    586 return ds

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:82, in GribBackendEntrypoint.open_dataset(self, filename, drop_variables, filters)
     76     file_index = pd.DataFrame(f._index)
     78 # parse grib2io _index to dataframe and acquire non-geo possible dims
     79 # (scalar coord when not dim due to squeeze) parse_grib_index applies
     80 # filters to index and expands metadata based on product definition
     81 # template number
---> 82 file_index, non_geo_dims = parse_grib_index(file_index, filters)
     84 # Divide up records by variable
     85 frames, cube, extra_geo = make_variables(file_index, filename, non_geo_dims)

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:383, in parse_grib_index(index, filters)
    381 for k, v in filters.items():
    382     if k not in index.columns:
--> 383         kwarg = {k:index.msg.apply(lambda msg: getattr(msg, k))}
    384         index = index.assign(**kwarg)
    385     # adopt parts of xarray's sel logic  so that filters behave similarly
    386     # allowed to filter to nothing to make empty dataset

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/series.py:4924, in Series.apply(self, func, convert_dtype, args, by_row, **kwargs)
   4789 def apply(
   4790     self,
   4791     func: AggFuncType,
   (...)
   4796     **kwargs,
   4797 ) -> DataFrame | Series:
   4798     """
   4799     Invoke function on values of Series.
   4800 
   (...)
   4915     dtype: float64
   4916     """
   4917     return SeriesApply(
   4918         self,
   4919         func,
   4920         convert_dtype=convert_dtype,
   4921         by_row=by_row,
   4922         args=args,
   4923         kwargs=kwargs,
-> 4924     ).apply()

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/apply.py:1427, in SeriesApply.apply(self)
   1424     return self.apply_compat()
   1426 # self.func is Callable
-> 1427 return self.apply_standard()

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/apply.py:1507, in SeriesApply.apply_standard(self)
   1501 # row-wise access
   1502 # apply doesn't have a `na_action` keyword and for backward compat reasons
   1503 # we need to give `na_action="ignore"` for categorical data.
   1504 # TODO: remove the `na_action="ignore"` when that default has been changed in
   1505 #  Categorical (GH51645).
   1506 action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
-> 1507 mapped = obj._map_values(
   1508     mapper=curried, na_action=action, convert=self.convert_dtype
   1509 )
   1511 if len(mapped) and isinstance(mapped[0], ABCSeries):
   1512     # GH#43986 Need to do list(mapped) in order to get treated as nested
   1513     #  See also GH#25959 regarding EA support
   1514     return obj._constructor_expanddim(list(mapped), index=obj.index)

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/base.py:921, in IndexOpsMixin._map_values(self, mapper, na_action, convert)
    918 if isinstance(arr, ExtensionArray):
    919     return arr.map(mapper, na_action=na_action)
--> 921 return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/pandas/core/algorithms.py:1743, in map_array(arr, mapper, na_action, convert)
   1741 values = arr.astype(object, copy=False)
   1742 if na_action is None:
-> 1743     return lib.map_infer(values, mapper, convert=convert)
   1744 else:
   1745     return lib.map_infer_mask(
   1746         values, mapper, mask=isna(values).view(np.uint8), convert=convert
   1747     )

File lib.pyx:2972, in pandas._libs.lib.map_infer()

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:383, in parse_grib_index.<locals>.<lambda>(msg)
    381 for k, v in filters.items():
    382     if k not in index.columns:
--> 383         kwarg = {k:index.msg.apply(lambda msg: getattr(msg, k))}
    384         index = index.assign(**kwarg)
    385     # adopt parts of xarray's sel logic  so that filters behave similarly
    386     # allowed to filter to nothing to make empty dataset

AttributeError: 'Msg' object has no attribute 'typeOfFixedSurface'

In [17]: xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFirstFixedSurface':10})
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[17], line 1
----> 1 xr.open_dataset('gefs.chem.t00z.a2d_0p25.f000.grib2', engine='grib2io', filters={'typeOfFirstFixedSurface':10})

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/xarray/backends/api.py:566, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    554 decoders = _resolve_decoders_kwargs(
    555     decode_cf,
    556     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    562     decode_coords=decode_coords,
    563 )
    565 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 566 backend_ds = backend.open_dataset(
    567     filename_or_obj,
    568     drop_variables=drop_variables,
    569     **decoders,
    570     **kwargs,
    571 )
    572 ds = _dataset_from_backend_dataset(
    573     backend_ds,
    574     filename_or_obj,
   (...)
    584     **kwargs,
    585 )
    586 return ds

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:93, in GribBackendEntrypoint.open_dataset(self, filename, drop_variables, filters)
     91 ds = xr.Dataset()
     92 for var_df in frames:
---> 93     da = build_da_without_coords(var_df, cube, filename)
     94     ds[da.name] = da
     96 # assign coords from the cube; the cube prevents datarrays with
     97 # different shapes

File ~/miniforge3/envs/dev/lib/python3.12/site-packages/grib2io/xarray_backend.py:526, in build_da_without_coords(index, cube, filename)
    524 data = indexing.LazilyIndexedArray(data)
    525 if len(dim_names) != len(data.shape):
--> 526     raise ValueError(
    527         "different number of dimensions on data "
    528         f"and dims: {len(data.shape)} vs {len(dim_names)}\n"
    529         "Grib2 messages could not be formed into a data cube; "
    530         "It's possible extra messages exist along a non-accounted for dimension based on PDTN\n"
    531         "It might be possible to get around this by applying a filter on the non-accounted for dimension"
    532         )
    533 da = xr.DataArray(data, dims=dim_names)
    535 da.encoding['original_shape'] = data.shape

ValueError: different number of dimensions on data and dims: 3 vs 2
Grib2 messages could not be formed into a data cube; It's possible extra messages exist along a non-accounted for dimension based on PDTN
It might be possible to get around this by applying a filter on the non-accounted for dimension
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant