Skip to content

Commit

Permalink
Add _join method + CI pipeline (#4)
Browse files Browse the repository at this point in the history
* method to join paths while recognizing clouds

* generalize _join to act like os.path.join; write a quick test

* add CI to test this

* strange gha bug

* ability to manually trigger

* debug this yaml file

* remove dispatch

* windows now working
  • Loading branch information
timothyas authored Dec 8, 2023
1 parent 939a6b6 commit 9e9c239
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 6 deletions.
95 changes: 95 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# This uses actios:
# checkout: https://github.com/actions/checkout
# cache: https://github.com/actions/cache
# codecov-action: https://github.com/codecov/codecov-action

name: CI
on:
push:
branches:
- "*"
pull_request:
branches:
- "*"
workflow_dispatch:
logLevel:
options:
- info
- warning
- debug


concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
build:
name: Build (${{ matrix.python-version }} | ${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest"]
python-version: ["3.11"]
steps:
- uses: actions/checkout@v3
- name: Create conda environment
uses: mamba-org/setup-micromamba@v1
with:
cache-downloads: true
cache-environment: true
micromamba-version: "latest"
environment-file: environment.yaml
create-args: python=${{ matrix.python-version }}
init-shell: bash
- name: Install ufs2arco
shell: bash -l {0}
run: |
python -V
python -c "import setuptools; print(setuptools.__version__)"
python -m pip install -e . --no-deps
- name: Run Unit Tests
shell: bash -l {0}
run: |
python -V
coverage run --rcfile=coverage.toml -m pytest --verbose
- name: Get coverage report
shell: bash -l {0}
run: |
coverage report -m ; coverage xml
windows-build:
name: Build (${{ matrix.python-version }} | ${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["windows-latest"]
python-version: ["3.11"]
steps:
- uses: actions/checkout@v3
- name: Create conda environment
uses: mamba-org/setup-micromamba@v1
with:
cache-downloads: true
cache-environment: true
micromamba-version: "latest"
environment-file: environment.yaml
create-args: python=${{ matrix.python-version }}
init-shell: powershell
- name: Install ufs2arco
shell: pwsh
run: |
python -V
python -c "import setuptools; print(setuptools.__version__)"
python -m pip install -e . --no-deps
- name: Run Unit Tests
shell: pwsh
run: |
python -V
coverage run --rcfile=coverage.toml -m pytest --verbose
- name: Get coverage report
shell: pwsh
run: |
coverage report -m ; coverage xml
5 changes: 5 additions & 0 deletions coverage.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[tool.coverage.run]
omit = [
"tests/*.py",
"**/__init__.py",
]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ source="https://github.com/NOAA-PSL/ufs2arco"
documentation="https://ufs2arco.readthedocs.io/en/latest/"

[build-system]
requires = ["setuptools", "setuptools-scm"]
requires = ["setuptools>=64.0.0", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
Expand Down
84 changes: 84 additions & 0 deletions tests/config-replay.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
FV3Dataset:
file_prefixes:
- bfg_
- sfg_

path_out : "gcs://noaa-ufs-gefsv13replay/ufs-hr1/0.25-degree/03h-freq/zarr/"
coords_path_out: "gcs://noaa-ufs-gefsv13replay/ufs-hr1/0.25-degree/coordinates/zarr/"
forecast_hours : [0, 3]

chunks_in:
# estimated 37MB per chunk (the full 3D field)
time : 1
pfull : 127
grid_yt : 768
grid_xt : 1536

chunks_out:
time : 1
pfull : 127
grid_yt : 768
grid_xt : 1536

coords:
- phalf
- pfull
- grid_xt
- grid_yt
- ak
- bk

data_vars:
# 3D atmospheric vars
- tmp
- ugrd
- vgrd
- delz
- dzdt
- dpres
- spfh
- o3mr
# 3D land vars
- soilt1
- soilt2
- soilt3
- soilt4
- soill1
- soill2
- soill3
- soill4
- soilw1
- soilw2
- soilw3
- soilw4
# 2D vars
- snod
- prateb_ave
- pressfc
- weasd
- f10m
- sfcr
# Surface forcing vars
- land
- vtype
- sotyp
- veg
- icec
- tmpsfc
# For TOA solar radiation
- dswrf_avetoa
- ulwrf_avetoa
- uswrf_avetoa
# Ease comparison
- tmp2m
- ugrd10m
- vgrd10m
# For cloudy assimilation
- clwmr
- grle
- icmr
- rwmr
- snmr
# For future operational implementation
- ntrnc
- nicp
21 changes: 21 additions & 0 deletions tests/test_ufsdataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest

from os.path import join, dirname

from ufs2arco import FV3Dataset


@pytest.mark.parametrize(
"prefix", ["gcs://", "s3://", "https://", "/contrib", "/scratch/"]
)
def test_join_cloud(prefix):

dummy_path = lambda p : str(p)
fname = join(dirname(__file__), "config-replay.yaml")
ufs = FV3Dataset(dummy_path, fname)

path = ufs._join(prefix, "directory", "fv3.zarr")
if "://" in prefix:
assert path == f"{prefix}directory/fv3.zarr"
else:
assert path == join(prefix, "directory", "fv3.zarr")
31 changes: 26 additions & 5 deletions ufs2arco/ufsdataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from pathlib import Path
from os import path
from os.path import join
from collections.abc import Iterable
from typing import Dict, List, Callable
import fsspec
Expand Down Expand Up @@ -50,15 +49,15 @@ class UFSDataset:
@property
def data_path(self):
"""Where to write forecast data variables to"""
return str(Path(self.path_out) / self.zarr_name)
return self._join(self.path_out, self.zarr_name)

@property
def coords_path(self) -> str:
"""Where to write static coordinates to"""
if self.coords_path_out is None:
return str(Path(self.path_out) / "coordinates" / self.zarr_name)
return self._join(self.path_out, "coordinates", self.zarr_name)
else:
return str(Path(self.coords_path_out) / self.zarr_name)
return self._join(self.coords_path_out, self.zarr_name)

@property
def default_open_dataset_kwargs(self) -> Dict:
Expand Down Expand Up @@ -365,3 +364,25 @@ def _time2ftime(time, cycles):
},
)
return xftime

@staticmethod
def _join(a, *p):
"""System independent join operation"""
clouds = ("gcs://", "s3://", "https://")
if any(x in a for x in clouds) or any(any(x in this_path for x in clouds) for this_path in p):
try:
assert isinstance(a, str) and all(isinstance(this_path, str) for this_path in p)
except:
raise TypeError(f"For cloud storage, paths need to be strings.")

path = a
join_char = "/" if a[-1] != "/" else ""
for this_path in p:
path += join_char
path += this_path
join_char = "/" if this_path[-1] != "/" else ""

return path

else:
return join(a, *p)

0 comments on commit 9e9c239

Please sign in to comment.