Skip to content

Commit

Permalink
Implement fs copy from HTTP (ufs-community#669)
Browse files Browse the repository at this point in the history
  • Loading branch information
maddenp-noaa authored Dec 10, 2024
1 parent a95fa0b commit 2eacebf
Show file tree
Hide file tree
Showing 15 changed files with 383 additions and 79 deletions.
6 changes: 4 additions & 2 deletions docs/sections/user_guide/cli/tools/fs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ The ``uw`` mode for handling filesystem items (files and directories).

The ``copy`` action stages files in a target directory by copying files. Any ``KEY`` positional arguments are used to navigate, in the order given, from the top of the config to the :ref:`file block <files_yaml>`.

Source paths prefixed with ``http://`` or ``https://`` will be copied from their upstream network locations to the local filesystem.

.. literalinclude:: fs/copy-help.cmd
:emphasize-lines: 1
.. literalinclude:: fs/copy-help.out
Expand All @@ -23,7 +25,7 @@ The ``copy`` action stages files in a target directory by copying files. Any ``K
Examples
^^^^^^^^

Given ``copy-config.yaml`` containing
Given ``copy-config.yaml`` containing a mapping from local-filesystem destination paths to source paths

.. literalinclude:: fs/copy-config.yaml
:language: yaml
Expand All @@ -32,7 +34,7 @@ Given ``copy-config.yaml`` containing
.. literalinclude:: fs/copy-exec.out
:language: text

Here, ``foo`` and ``bar`` are copies of their respective source files.
Here, ``foo`` and ``bar`` are copies of their respective local-filesystem source files, and ``gpl`` is a copy of the upstream network source.

The ``--cycle`` and ``--leadtime`` options can be used to make Python ``datetime`` and ``timedelta`` objects, respectively, available for use in Jinja2 expression in the config. For example:

Expand Down
1 change: 1 addition & 0 deletions docs/sections/user_guide/cli/tools/fs/copy-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
config:
files:
foo: src/foo
licenses/gpl: https://www.gnu.org/licenses/gpl-3.0.txt
subdir/bar: src/bar
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[2024-08-26T23:03:40] INFO Validating config against internal schema: files-to-stage
[2024-08-26T23:03:40] INFO 0 UW schema-validation errors found in fs config
[2024-08-26T23:03:40] ERROR Relative path 'foo' requires the target directory to be specified
[2024-12-07T01:01:51] INFO Validating config against internal schema: files-to-stage
[2024-12-07T01:01:53] INFO 0 UW schema-validation errors found in fs config
[2024-12-07T01:01:53] ERROR Relative path 'foo' requires target directory to be specified
39 changes: 23 additions & 16 deletions docs/sections/user_guide/cli/tools/fs/copy-exec.out
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
[2024-08-26T23:03:41] INFO Validating config against internal schema: files-to-stage
[2024-08-26T23:03:41] INFO 0 UW schema-validation errors found in fs config
[2024-08-26T23:03:41] INFO File copies: Initial state: Not Ready
[2024-08-26T23:03:41] INFO File copies: Checking requirements
[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Initial state: Not Ready
[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Checking requirements
[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Requirement(s) ready
[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Executing
[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Final state: Ready
[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Initial state: Not Ready
[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Checking requirements
[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Requirement(s) ready
[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Executing
[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Final state: Ready
[2024-08-26T23:03:41] INFO File copies: Final state: Ready
[2024-12-07T01:01:56] INFO Validating config against internal schema: files-to-stage
[2024-12-07T01:01:56] INFO 0 UW schema-validation errors found in fs config
[2024-12-07T01:01:56] INFO File copies: Initial state: Not Ready
[2024-12-07T01:01:56] INFO File copies: Checking requirements
[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Initial state: Not Ready
[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Checking requirements
[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Requirement(s) ready
[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Executing
[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Final state: Ready
[2024-12-07T01:01:56] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Initial state: Not Ready
[2024-12-07T01:01:56] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Checking requirements
[2024-12-07T01:01:58] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Requirement(s) ready
[2024-12-07T01:01:58] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Executing
[2024-12-07T01:01:58] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Final state: Ready
[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Initial state: Not Ready
[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Checking requirements
[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Requirement(s) ready
[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Executing
[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Final state: Ready
[2024-12-07T01:01:58] INFO File copies: Final state: Ready

copy-dst
├── foo
├── licenses
│   └── gpl
└── subdir
└── bar

2 directories, 2 files
3 directories, 3 files
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[2024-08-26T23:03:41] INFO Validating config against internal schema: files-to-stage
[2024-08-26T23:03:41] INFO 0 UW schema-validation errors found in fs config
[2024-08-26T23:03:41] ERROR Relative path 'foo' requires the target directory to be specified
[2024-12-07T01:01:55] INFO Validating config against internal schema: files-to-stage
[2024-12-07T01:01:55] INFO 0 UW schema-validation errors found in fs config
[2024-12-07T01:01:55] ERROR Relative path 'foo' requires target directory to be specified
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[2024-08-26T23:03:44] INFO Validating config against internal schema: makedirs
[2024-08-26T23:03:45] INFO 0 UW schema-validation errors found in fs config
[2024-08-26T23:03:45] ERROR Relative path 'foo' requires the target directory to be specified
[2024-12-07T01:01:55] INFO Validating config against internal schema: makedirs
[2024-12-07T01:01:55] INFO 0 UW schema-validation errors found in fs config
[2024-12-07T01:01:55] ERROR Relative path 'foo' requires target directory to be specified
4 changes: 3 additions & 1 deletion recipe/meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"pytest-xdist =3.6.*",
"python >=3.9,<3.13",
"pyyaml =6.0.*",
"requests =2.32.*",
"setuptools"
],
"run": [
Expand All @@ -31,7 +32,8 @@
"jsonschema >=4.18,<4.24",
"lxml =5.3.*",
"python >=3.9,<3.13",
"pyyaml =6.0.*"
"pyyaml =6.0.*",
"requests =2.32.*"
]
},
"version": "2.5.0"
Expand Down
1 change: 1 addition & 0 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ requirements:
- lxml 5.3.*
- python >=3.9,<3.13
- pyyaml 6.0.*
- requests 2.32.*
test:
requires:
- black 24.8.*
Expand Down
3 changes: 2 additions & 1 deletion src/uwtools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ def main() -> None:
modes = {**tools, **drivers}
sys.exit(0 if modes[args[STR.mode]](args) else 1)
except UWError as e:
log.error(str(e))
for line in str(e).split("\n"):
log.error(line)
sys.exit(1)


Expand Down
60 changes: 48 additions & 12 deletions src/uwtools/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional, Union
from urllib.parse import urlparse

from iotaa import dryrun, tasks

Expand Down Expand Up @@ -56,20 +57,44 @@ def __init__(
)
self._config, _ = walk_key_path(yaml_config.data, key_path or [])
self._validate()
self._check_paths()
self._check_target_dir()
self._check_destination_paths()

def _check_paths(self) -> None:
def _check_destination_paths(self) -> None:
"""
Check that all paths are absolute if no target directory is specified.
Check that destination paths are valid.
:parm paths: The paths to check.
:raises: UWConfigError if no target directory is specified and a relative path is.
:raises: UWConfigError when a bad path is detected.
"""
if not self._target_dir:
errmsg = "Relative path '%s' requires the target directory to be specified"
for dst in self._dst_paths:
if not Path(dst).is_absolute():
raise UWConfigError(errmsg % dst)
for dst in self._dst_paths:
scheme = urlparse(dst).scheme
absolute = scheme or Path(dst).is_absolute()
if scheme and scheme != STR.url_scheme_file:
msg = "Non-filesystem destination path '%s' not currently supported"
raise UWConfigError(msg % dst)
if self._target_dir and scheme:
msg = "Non-filesystem path '%s' invalid when target directory is specified"
raise UWConfigError(msg % dst)
if self._target_dir and absolute:
msg = "Path '%s' must be relative when target directory is specified"
raise UWConfigError(msg % dst)
if not self._target_dir and not absolute:
msg = "Relative path '%s' requires target directory to be specified"
raise UWConfigError(msg % dst)

def _check_target_dir(self) -> None:
"""
Check that target directory is valid.
:raises: UWConfigError when a bad path is detected.
"""
if (
self._target_dir
and (scheme := urlparse(str(self._target_dir)).scheme)
and scheme != STR.url_scheme_file
):
msg = "Non-filesystem path '%s' invalid as target directory"
raise UWConfigError(msg % self._target_dir)

@property
@abstractmethod
Expand Down Expand Up @@ -124,9 +149,20 @@ def go(self):
"""
Copy files.
"""
dst = lambda k: Path(self._target_dir / k if self._target_dir else k)
yield "File copies"
yield [filecopy(src=Path(v), dst=dst(k)) for k, v in self._config.items()]
yield [
filecopy(src=src, dst=self._simple(self._target_dir) / self._simple(dst))
for dst, src in self._config.items()
]

@staticmethod
def _simple(path: Union[Path, str]) -> Path:
"""
Convert a path, potentially prefixed with scheme file://, into a simple filesystem path.
:param path: The path to convert.
"""
return Path(urlparse(str(path)).path)


class Linker(FileStager):
Expand Down
1 change: 1 addition & 0 deletions src/uwtools/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ class STR:
updatefmt: str = "update_format"
updatevalues: str = "update_values"
upp: str = "upp"
url_scheme_file: str = "file"
validate: str = "validate"
valsfile: str = "values_file"
valsfmt: str = "values_format"
Expand Down
2 changes: 1 addition & 1 deletion src/uwtools/tests/api/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def kwargs(tmp_path):
f.touch()
config = {"a": {"b": {str(dstdir / "f1"): str(srcfile1), str(dstdir / "f2"): str(srcfile2)}}}
return {
"target_dir": dstdir,
"target_dir": None,
"config": config,
"cycle": dt.datetime.now(),
"leadtime": dt.timedelta(hours=6),
Expand Down
89 changes: 81 additions & 8 deletions src/uwtools/tests/test_fs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# pylint: disable=missing-class-docstring
# pylint: disable=missing-function-docstring
# pylint: disable=protected-access
# pylint: disable=redefined-outer-name

from pathlib import Path
from unittest.mock import Mock, patch

import iotaa
import yaml
from pytest import fixture, mark, raises
Expand Down Expand Up @@ -48,8 +52,19 @@ def _schema(self):
# Tests


@mark.parametrize("src_fn", [str, Path])
@mark.parametrize("dst_fn", [str, Path])
@mark.parametrize("td_fn", [str, Path])
def test_fs_Copier_go(src_fn, dst_fn, td_fn):
src, td, dst = src_fn("/src/file"), td_fn("/dst"), dst_fn("file")
obj = Mock(_config={dst: src}, _simple=fs.Copier._simple, _target_dir=td)
with patch.object(fs, "filecopy") as filecopy:
fs.Copier.go(obj)
filecopy.assert_called_once_with(src=src, dst=Path("/dst/file"))


@mark.parametrize("source", ("dict", "file"))
def test_Copier(assets, source):
def test_fs_Copier_go_live(assets, source):
dstdir, cfgdict, cfgfile = assets
config = cfgdict if source == "dict" else cfgfile
assert not (dstdir / "foo").exists()
Expand All @@ -59,7 +74,7 @@ def test_Copier(assets, source):
assert (dstdir / "subdir" / "bar").is_file()


def test_Copier_config_file_dry_run(assets):
def test_fs_Copier_go_live_config_file_dry_run(assets):
dstdir, cfgdict, _ = assets
assert not (dstdir / "foo").exists()
assert not (dstdir / "subdir" / "bar").exists()
Expand All @@ -69,7 +84,7 @@ def test_Copier_config_file_dry_run(assets):
iotaa.dryrun(False)


def test_Copier_no_targetdir_abspath_pass(assets):
def test_fs_Copier_go_live_no_targetdir_abspath_pass(assets):
dstdir, cfgdict, _ = assets
old = cfgdict["a"]["b"]
cfgdict = {str(dstdir / "foo"): old["foo"], str(dstdir / "bar"): old["subdir/bar"]}
Expand All @@ -81,19 +96,26 @@ def test_Copier_no_targetdir_relpath_fail(assets):
_, cfgdict, _ = assets
with raises(UWConfigError) as e:
fs.Copier(config=cfgdict, key_path=["a", "b"]).go()
errmsg = "Relative path '%s' requires the target directory to be specified"
errmsg = "Relative path '%s' requires target directory to be specified"
assert errmsg % "foo" in str(e.value)


def test_fs_Copier__simple():
assert fs.Copier._simple("relative/path") == Path("relative/path")
assert fs.Copier._simple("/absolute/path") == Path("/absolute/path")
assert fs.Copier._simple("file:///absolute/path") == Path("/absolute/path")
assert fs.Copier._simple("") == Path("")


@mark.parametrize("source", ("dict", "file"))
def test_FilerStager(assets, source):
def test_fs_FilerStager(assets, source):
dstdir, cfgdict, cfgfile = assets
config = cfgdict if source == "dict" else cfgfile
assert fs.FileStager(target_dir=dstdir, config=config, key_path=["a", "b"])


@mark.parametrize("source", ("dict", "file"))
def test_Linker(assets, source):
def test_fs_Linker(assets, source):
dstdir, cfgdict, cfgfile = assets
config = cfgdict if source == "dict" else cfgfile
assert not (dstdir / "foo").exists()
Expand All @@ -103,8 +125,59 @@ def test_Linker(assets, source):
assert (dstdir / "subdir" / "bar").is_symlink()


@mark.parametrize(
"path,target_dir,msg,fail_expected",
[
(
"/other/path",
"/some/path",
"Path '%s' must be relative when target directory is specified",
True,
),
(
"foo://bucket/a/b",
None,
"Non-filesystem destination path '%s' not currently supported",
True,
),
(
"relpath",
None,
"Relative path '%s' requires target directory to be specified",
True,
),
(
"file://foo.com/a/b",
"/some/path",
"Non-filesystem path '%s' invalid when target directory is specified",
True,
),
("other/path", "/some/path", None, False),
("other/path", "file:///some/path", None, False),
],
)
def test_fs_Stager__check_destination_paths_fail(path, target_dir, msg, fail_expected):
obj = Mock(_dst_paths=[path], _target_dir=target_dir)
if fail_expected:
with raises(UWConfigError) as e:
fs.Stager._check_destination_paths(obj)
assert str(e.value) == msg % path


@mark.parametrize(
"path,fail_expected",
[("foo://bucket/a/b", True), ("/some/path", False), ("file:///some/path", False)],
)
def test_fs_Stager__check_target_dir_fail_bad_scheme(path, fail_expected):
obj = Mock(_target_dir="foo://bucket/a/b")
if fail_expected:
with raises(UWConfigError) as e:
fs.Stager._check_target_dir(obj)
assert str(e.value) == "Non-filesystem path '%s' invalid as target directory" % path


@mark.parametrize("source", ("dict", "file"))
def test_Stager__config_block_fail_bad_key_path(assets, source):
def test_fs_Stager__config_block_fail_bad_key_path(assets, source):
dstdir, cfgdict, cfgfile = assets
config = cfgdict if source == "dict" else cfgfile
with raises(UWConfigError) as e:
Expand All @@ -113,7 +186,7 @@ def test_Stager__config_block_fail_bad_key_path(assets, source):


@mark.parametrize("val", [None, True, False, "str", 42, 3.14, [], tuple()])
def test_Stager__config_block_fails_bad_type(assets, val):
def test_fs_Stager__config_block_fails_bad_type(assets, val):
dstdir, cfgdict, _ = assets
cfgdict["a"]["b"] = val
with raises(UWConfigError) as e:
Expand Down
Loading

0 comments on commit 2eacebf

Please sign in to comment.