Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve timestamp adjustment function in Glorys obc generation workflow #137

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions tools/boundary/boundary.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,8 @@ def add_coords(self, ds):
def regrid_velocity(
self, usource, vsource,
method='nearest_s2d', periodic=False, write=True,
flood=False, fill='b', xdim='lon', ydim='lat', zdim='z', rotate=True, **kwargs):
flood=False, fill='b', xdim='lon', ydim='lat', zdim='z', rotate=True,
time_attrs=None, time_encoding=None, **kwargs):
"""Interpolate velocity onto segment and (optionally) write to file.

Args:
Expand Down Expand Up @@ -607,6 +608,12 @@ def regrid_velocity(

ds_uv = self.rename_dims(ds_uv)

# Restore time attributes and encoding
if time_attrs:
ds_uv['time'].attrs = time_attrs
if time_encoding:
ds_uv['time'].encoding = time_encoding

if write:
self.to_netcdf(ds_uv, 'uv', **kwargs)

Expand All @@ -616,7 +623,8 @@ def regrid_tracer(
self, tsource,
method='nearest_s2d', periodic=False, write=True,
flood=False, fill='b', xdim='lon', ydim='lat', zdim='z',
regrid_suffix='t', source_var=None, **kwargs):
regrid_suffix='t', source_var=None,
time_attrs=None, time_encoding=None, **kwargs):
"""Regrid a tracer onto segment and (optionally) write to file.

Args:
Expand Down Expand Up @@ -685,6 +693,12 @@ def regrid_tracer(

tdest = self.rename_dims(tdest)
tdest = tdest.rename({name: f'{name}_{self.segstr}'})

# Restore time attributes and encoding
if time_attrs:
tdest['time'].attrs = time_attrs
if time_encoding:
tdest['time'].encoding = time_encoding

if write:
self.to_netcdf(tdest, name, **kwargs)
Expand Down
56 changes: 44 additions & 12 deletions tools/boundary/write_glorys_boundary_daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from os import path

import xarray
import numpy as np
import yaml
from boundary import Segment

Expand All @@ -53,18 +54,24 @@ def write_day(date, glorys_dir, segments, variables, output_prefix):
return

glorys = (
xarray.open_dataset(file_path)
xarray.open_dataset(file_path, decode_times=False)
.rename({'latitude': 'lat', 'longitude': 'lon', 'depth': 'z'})
)

# Capture time attributes and encoding
time_attrs = glorys['time'].attrs if 'time' in glorys.coords else None
time_encoding = glorys['time'].encoding if 'time' in glorys.coords else None

for segment in segments:
for variable in variables:
if variable == 'uv':
print(f"Processing {segment.border} {variable}")
segment.regrid_velocity(glorys['uo'], glorys['vo'], suffix=f"{date:%Y%m%d}", flood=False)
segment.regrid_velocity(glorys['uo'], glorys['vo'], suffix=f"{date:%Y%m%d}", flood=False,
time_attrs=time_attrs, time_encoding=time_encoding )
elif variable in ['thetao', 'so', 'zos']:
print(f"Processing {segment.border} {variable}")
segment.regrid_tracer(glorys[variable], suffix=f"{date:%Y%m%d}", flood=False)
segment.regrid_tracer(glorys[variable], suffix=f"{date:%Y%m%d}", flood=False,
time_attrs=time_attrs, time_encoding=time_encoding)

def concatenate_files(nsegments, output_dir, variables, ncrcat_names, first_date, last_date, adjust_timestamps=False):
"""Concatenate annual files using ncrcat."""
Expand Down Expand Up @@ -93,16 +100,41 @@ def concatenate_files(nsegments, output_dir, variables, ncrcat_names, first_date
adjust_file_timestamps(output_file)

def adjust_file_timestamps(file_path):
"""Adjust timestamps for the first and last records in a file."""
with xarray.open_dataset(file_path) as ds:
if 'time' in ds:
time = ds['time'].copy()
adjusted_time = time.astype('datetime64[ns]')

adjusted_time[0] = adjusted_time[0].dt.floor('D')
adjusted_time[-1] = adjusted_time[-1].dt.ceil('D')
"""
Adjust timestamps for the first and last records in a file while preserving attributes and raw numerical format.
"""
with xarray.open_dataset(file_path, decode_times=False) as ds:
# Explicitly load the dataset into memory if it's lazy-loaded
ds.load()

ds = ds.assign_coords(time=adjusted_time)
if 'time' in ds:
# Extract the time variable, attributes, and encoding
time = ds['time']
time_attrs = time.attrs # Save the original attributes
time_encoding = time.encoding # Save the original encoding
time_values = time.values.copy()

# Ensure the 'time' variable has more than one entry
if len(time_values) > 1:
# Adjust the first and last timestamps in raw numerical format
time_values[0] = np.floor(time_values[0]) # Floor to the start of the day
time_values[-1] = np.ceil(time_values[-1]) # Ceil to the end of the day

# Create a new DataArray for time while preserving attributes
new_time = xarray.DataArray(
time_values,
dims=time.dims,
attrs=time_attrs,
name='time'
)

# Assign the new time variable back to the dataset
ds = ds.assign_coords(time=new_time)

# Reapply the original encoding to ensure consistency
ds['time'].encoding = time_encoding

# Save the updated dataset
ds.to_netcdf(file_path)
print(f"Timestamps adjusted for {file_path}")

Expand Down
Loading