From 2eacebf2047c756992883319c53d22e56555ab57 Mon Sep 17 00:00:00 2001 From: Paul Madden <136389411+maddenp-noaa@users.noreply.github.com> Date: Tue, 10 Dec 2024 08:17:55 -0700 Subject: [PATCH] Implement fs copy from HTTP (#669) --- docs/sections/user_guide/cli/tools/fs.rst | 6 +- .../user_guide/cli/tools/fs/copy-config.yaml | 1 + .../tools/fs/copy-exec-no-target-dir-err.out | 6 +- .../user_guide/cli/tools/fs/copy-exec.out | 39 +++-- .../tools/fs/link-exec-no-target-dir-err.out | 6 +- .../fs/makedirs-exec-no-target-dir-err.out | 6 +- recipe/meta.json | 4 +- recipe/meta.yaml | 1 + src/uwtools/cli.py | 3 +- src/uwtools/fs.py | 60 +++++-- src/uwtools/strings.py | 1 + src/uwtools/tests/api/test_fs.py | 2 +- src/uwtools/tests/test_fs.py | 89 ++++++++++- src/uwtools/tests/utils/test_tasks.py | 148 ++++++++++++++++-- src/uwtools/utils/tasks.py | 90 +++++++++-- 15 files changed, 383 insertions(+), 79 deletions(-) diff --git a/docs/sections/user_guide/cli/tools/fs.rst b/docs/sections/user_guide/cli/tools/fs.rst index cb61dd7bf..153404e98 100644 --- a/docs/sections/user_guide/cli/tools/fs.rst +++ b/docs/sections/user_guide/cli/tools/fs.rst @@ -15,6 +15,8 @@ The ``uw`` mode for handling filesystem items (files and directories). The ``copy`` action stages files in a target directory by copying files. Any ``KEY`` positional arguments are used to navigate, in the order given, from the top of the config to the :ref:`file block `. +Source paths prefixed with ``http://`` or ``https://`` will be copied from their upstream network locations to the local filesystem. + .. literalinclude:: fs/copy-help.cmd :emphasize-lines: 1 .. literalinclude:: fs/copy-help.out @@ -23,7 +25,7 @@ The ``copy`` action stages files in a target directory by copying files. Any ``K Examples ^^^^^^^^ -Given ``copy-config.yaml`` containing +Given ``copy-config.yaml`` containing a mapping from local-filesystem destination paths to source paths .. literalinclude:: fs/copy-config.yaml :language: yaml @@ -32,7 +34,7 @@ Given ``copy-config.yaml`` containing .. literalinclude:: fs/copy-exec.out :language: text -Here, ``foo`` and ``bar`` are copies of their respective source files. +Here, ``foo`` and ``bar`` are copies of their respective local-filesystem source files, and ``gpl`` is a copy of the upstream network source. The ``--cycle`` and ``--leadtime`` options can be used to make Python ``datetime`` and ``timedelta`` objects, respectively, available for use in Jinja2 expression in the config. For example: diff --git a/docs/sections/user_guide/cli/tools/fs/copy-config.yaml b/docs/sections/user_guide/cli/tools/fs/copy-config.yaml index 17c45a3e6..98d02f202 100644 --- a/docs/sections/user_guide/cli/tools/fs/copy-config.yaml +++ b/docs/sections/user_guide/cli/tools/fs/copy-config.yaml @@ -1,4 +1,5 @@ config: files: foo: src/foo + licenses/gpl: https://www.gnu.org/licenses/gpl-3.0.txt subdir/bar: src/bar diff --git a/docs/sections/user_guide/cli/tools/fs/copy-exec-no-target-dir-err.out b/docs/sections/user_guide/cli/tools/fs/copy-exec-no-target-dir-err.out index 71b4ca3a1..17a9f799f 100644 --- a/docs/sections/user_guide/cli/tools/fs/copy-exec-no-target-dir-err.out +++ b/docs/sections/user_guide/cli/tools/fs/copy-exec-no-target-dir-err.out @@ -1,3 +1,3 @@ -[2024-08-26T23:03:40] INFO Validating config against internal schema: files-to-stage -[2024-08-26T23:03:40] INFO 0 UW schema-validation errors found in fs config -[2024-08-26T23:03:40] ERROR Relative path 'foo' requires the target directory to be specified +[2024-12-07T01:01:51] INFO Validating config against internal schema: files-to-stage +[2024-12-07T01:01:53] INFO 0 UW schema-validation errors found in fs config +[2024-12-07T01:01:53] ERROR Relative path 'foo' requires target directory to be specified diff --git a/docs/sections/user_guide/cli/tools/fs/copy-exec.out b/docs/sections/user_guide/cli/tools/fs/copy-exec.out index 57221e756..46a06048e 100644 --- a/docs/sections/user_guide/cli/tools/fs/copy-exec.out +++ b/docs/sections/user_guide/cli/tools/fs/copy-exec.out @@ -1,22 +1,29 @@ -[2024-08-26T23:03:41] INFO Validating config against internal schema: files-to-stage -[2024-08-26T23:03:41] INFO 0 UW schema-validation errors found in fs config -[2024-08-26T23:03:41] INFO File copies: Initial state: Not Ready -[2024-08-26T23:03:41] INFO File copies: Checking requirements -[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Initial state: Not Ready -[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Checking requirements -[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Requirement(s) ready -[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Executing -[2024-08-26T23:03:41] INFO Copy src/foo -> copy-dst/foo: Final state: Ready -[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Initial state: Not Ready -[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Checking requirements -[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Requirement(s) ready -[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Executing -[2024-08-26T23:03:41] INFO Copy src/bar -> copy-dst/subdir/bar: Final state: Ready -[2024-08-26T23:03:41] INFO File copies: Final state: Ready +[2024-12-07T01:01:56] INFO Validating config against internal schema: files-to-stage +[2024-12-07T01:01:56] INFO 0 UW schema-validation errors found in fs config +[2024-12-07T01:01:56] INFO File copies: Initial state: Not Ready +[2024-12-07T01:01:56] INFO File copies: Checking requirements +[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Initial state: Not Ready +[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Checking requirements +[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Requirement(s) ready +[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Executing +[2024-12-07T01:01:56] INFO Copy src/foo -> copy-dst/foo: Final state: Ready +[2024-12-07T01:01:56] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Initial state: Not Ready +[2024-12-07T01:01:56] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Checking requirements +[2024-12-07T01:01:58] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Requirement(s) ready +[2024-12-07T01:01:58] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Executing +[2024-12-07T01:01:58] INFO Copy https://www.gnu.org/licenses/gpl-3.0.txt -> copy-dst/licenses/gpl: Final state: Ready +[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Initial state: Not Ready +[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Checking requirements +[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Requirement(s) ready +[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Executing +[2024-12-07T01:01:58] INFO Copy src/bar -> copy-dst/subdir/bar: Final state: Ready +[2024-12-07T01:01:58] INFO File copies: Final state: Ready copy-dst ├── foo +├── licenses +│   └── gpl └── subdir └── bar -2 directories, 2 files +3 directories, 3 files diff --git a/docs/sections/user_guide/cli/tools/fs/link-exec-no-target-dir-err.out b/docs/sections/user_guide/cli/tools/fs/link-exec-no-target-dir-err.out index dcb5593ed..7118df558 100644 --- a/docs/sections/user_guide/cli/tools/fs/link-exec-no-target-dir-err.out +++ b/docs/sections/user_guide/cli/tools/fs/link-exec-no-target-dir-err.out @@ -1,3 +1,3 @@ -[2024-08-26T23:03:41] INFO Validating config against internal schema: files-to-stage -[2024-08-26T23:03:41] INFO 0 UW schema-validation errors found in fs config -[2024-08-26T23:03:41] ERROR Relative path 'foo' requires the target directory to be specified +[2024-12-07T01:01:55] INFO Validating config against internal schema: files-to-stage +[2024-12-07T01:01:55] INFO 0 UW schema-validation errors found in fs config +[2024-12-07T01:01:55] ERROR Relative path 'foo' requires target directory to be specified diff --git a/docs/sections/user_guide/cli/tools/fs/makedirs-exec-no-target-dir-err.out b/docs/sections/user_guide/cli/tools/fs/makedirs-exec-no-target-dir-err.out index 84c7710bf..63c47798e 100644 --- a/docs/sections/user_guide/cli/tools/fs/makedirs-exec-no-target-dir-err.out +++ b/docs/sections/user_guide/cli/tools/fs/makedirs-exec-no-target-dir-err.out @@ -1,3 +1,3 @@ -[2024-08-26T23:03:44] INFO Validating config against internal schema: makedirs -[2024-08-26T23:03:45] INFO 0 UW schema-validation errors found in fs config -[2024-08-26T23:03:45] ERROR Relative path 'foo' requires the target directory to be specified +[2024-12-07T01:01:55] INFO Validating config against internal schema: makedirs +[2024-12-07T01:01:55] INFO 0 UW schema-validation errors found in fs config +[2024-12-07T01:01:55] ERROR Relative path 'foo' requires target directory to be specified diff --git a/recipe/meta.json b/recipe/meta.json index 8cfa7a58f..cdccd2d82 100644 --- a/recipe/meta.json +++ b/recipe/meta.json @@ -22,6 +22,7 @@ "pytest-xdist =3.6.*", "python >=3.9,<3.13", "pyyaml =6.0.*", + "requests =2.32.*", "setuptools" ], "run": [ @@ -31,7 +32,8 @@ "jsonschema >=4.18,<4.24", "lxml =5.3.*", "python >=3.9,<3.13", - "pyyaml =6.0.*" + "pyyaml =6.0.*", + "requests =2.32.*" ] }, "version": "2.5.0" diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 9a9dfd0d9..83e95675d 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -21,6 +21,7 @@ requirements: - lxml 5.3.* - python >=3.9,<3.13 - pyyaml 6.0.* + - requests 2.32.* test: requires: - black 24.8.* diff --git a/src/uwtools/cli.py b/src/uwtools/cli.py index 036b04cbf..84c162f1c 100644 --- a/src/uwtools/cli.py +++ b/src/uwtools/cli.py @@ -94,7 +94,8 @@ def main() -> None: modes = {**tools, **drivers} sys.exit(0 if modes[args[STR.mode]](args) else 1) except UWError as e: - log.error(str(e)) + for line in str(e).split("\n"): + log.error(line) sys.exit(1) diff --git a/src/uwtools/fs.py b/src/uwtools/fs.py index c492cee4d..f7d214a49 100644 --- a/src/uwtools/fs.py +++ b/src/uwtools/fs.py @@ -6,6 +6,7 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import Optional, Union +from urllib.parse import urlparse from iotaa import dryrun, tasks @@ -56,20 +57,44 @@ def __init__( ) self._config, _ = walk_key_path(yaml_config.data, key_path or []) self._validate() - self._check_paths() + self._check_target_dir() + self._check_destination_paths() - def _check_paths(self) -> None: + def _check_destination_paths(self) -> None: """ - Check that all paths are absolute if no target directory is specified. + Check that destination paths are valid. - :parm paths: The paths to check. - :raises: UWConfigError if no target directory is specified and a relative path is. + :raises: UWConfigError when a bad path is detected. """ - if not self._target_dir: - errmsg = "Relative path '%s' requires the target directory to be specified" - for dst in self._dst_paths: - if not Path(dst).is_absolute(): - raise UWConfigError(errmsg % dst) + for dst in self._dst_paths: + scheme = urlparse(dst).scheme + absolute = scheme or Path(dst).is_absolute() + if scheme and scheme != STR.url_scheme_file: + msg = "Non-filesystem destination path '%s' not currently supported" + raise UWConfigError(msg % dst) + if self._target_dir and scheme: + msg = "Non-filesystem path '%s' invalid when target directory is specified" + raise UWConfigError(msg % dst) + if self._target_dir and absolute: + msg = "Path '%s' must be relative when target directory is specified" + raise UWConfigError(msg % dst) + if not self._target_dir and not absolute: + msg = "Relative path '%s' requires target directory to be specified" + raise UWConfigError(msg % dst) + + def _check_target_dir(self) -> None: + """ + Check that target directory is valid. + + :raises: UWConfigError when a bad path is detected. + """ + if ( + self._target_dir + and (scheme := urlparse(str(self._target_dir)).scheme) + and scheme != STR.url_scheme_file + ): + msg = "Non-filesystem path '%s' invalid as target directory" + raise UWConfigError(msg % self._target_dir) @property @abstractmethod @@ -124,9 +149,20 @@ def go(self): """ Copy files. """ - dst = lambda k: Path(self._target_dir / k if self._target_dir else k) yield "File copies" - yield [filecopy(src=Path(v), dst=dst(k)) for k, v in self._config.items()] + yield [ + filecopy(src=src, dst=self._simple(self._target_dir) / self._simple(dst)) + for dst, src in self._config.items() + ] + + @staticmethod + def _simple(path: Union[Path, str]) -> Path: + """ + Convert a path, potentially prefixed with scheme file://, into a simple filesystem path. + + :param path: The path to convert. + """ + return Path(urlparse(str(path)).path) class Linker(FileStager): diff --git a/src/uwtools/strings.py b/src/uwtools/strings.py index 6e281c457..fb941a547 100644 --- a/src/uwtools/strings.py +++ b/src/uwtools/strings.py @@ -142,6 +142,7 @@ class STR: updatefmt: str = "update_format" updatevalues: str = "update_values" upp: str = "upp" + url_scheme_file: str = "file" validate: str = "validate" valsfile: str = "values_file" valsfmt: str = "values_format" diff --git a/src/uwtools/tests/api/test_fs.py b/src/uwtools/tests/api/test_fs.py index 858b7b724..643d1ce4a 100644 --- a/src/uwtools/tests/api/test_fs.py +++ b/src/uwtools/tests/api/test_fs.py @@ -17,7 +17,7 @@ def kwargs(tmp_path): f.touch() config = {"a": {"b": {str(dstdir / "f1"): str(srcfile1), str(dstdir / "f2"): str(srcfile2)}}} return { - "target_dir": dstdir, + "target_dir": None, "config": config, "cycle": dt.datetime.now(), "leadtime": dt.timedelta(hours=6), diff --git a/src/uwtools/tests/test_fs.py b/src/uwtools/tests/test_fs.py index bb0caf4d7..227c52764 100644 --- a/src/uwtools/tests/test_fs.py +++ b/src/uwtools/tests/test_fs.py @@ -1,7 +1,11 @@ # pylint: disable=missing-class-docstring # pylint: disable=missing-function-docstring +# pylint: disable=protected-access # pylint: disable=redefined-outer-name +from pathlib import Path +from unittest.mock import Mock, patch + import iotaa import yaml from pytest import fixture, mark, raises @@ -48,8 +52,19 @@ def _schema(self): # Tests +@mark.parametrize("src_fn", [str, Path]) +@mark.parametrize("dst_fn", [str, Path]) +@mark.parametrize("td_fn", [str, Path]) +def test_fs_Copier_go(src_fn, dst_fn, td_fn): + src, td, dst = src_fn("/src/file"), td_fn("/dst"), dst_fn("file") + obj = Mock(_config={dst: src}, _simple=fs.Copier._simple, _target_dir=td) + with patch.object(fs, "filecopy") as filecopy: + fs.Copier.go(obj) + filecopy.assert_called_once_with(src=src, dst=Path("/dst/file")) + + @mark.parametrize("source", ("dict", "file")) -def test_Copier(assets, source): +def test_fs_Copier_go_live(assets, source): dstdir, cfgdict, cfgfile = assets config = cfgdict if source == "dict" else cfgfile assert not (dstdir / "foo").exists() @@ -59,7 +74,7 @@ def test_Copier(assets, source): assert (dstdir / "subdir" / "bar").is_file() -def test_Copier_config_file_dry_run(assets): +def test_fs_Copier_go_live_config_file_dry_run(assets): dstdir, cfgdict, _ = assets assert not (dstdir / "foo").exists() assert not (dstdir / "subdir" / "bar").exists() @@ -69,7 +84,7 @@ def test_Copier_config_file_dry_run(assets): iotaa.dryrun(False) -def test_Copier_no_targetdir_abspath_pass(assets): +def test_fs_Copier_go_live_no_targetdir_abspath_pass(assets): dstdir, cfgdict, _ = assets old = cfgdict["a"]["b"] cfgdict = {str(dstdir / "foo"): old["foo"], str(dstdir / "bar"): old["subdir/bar"]} @@ -81,19 +96,26 @@ def test_Copier_no_targetdir_relpath_fail(assets): _, cfgdict, _ = assets with raises(UWConfigError) as e: fs.Copier(config=cfgdict, key_path=["a", "b"]).go() - errmsg = "Relative path '%s' requires the target directory to be specified" + errmsg = "Relative path '%s' requires target directory to be specified" assert errmsg % "foo" in str(e.value) +def test_fs_Copier__simple(): + assert fs.Copier._simple("relative/path") == Path("relative/path") + assert fs.Copier._simple("/absolute/path") == Path("/absolute/path") + assert fs.Copier._simple("file:///absolute/path") == Path("/absolute/path") + assert fs.Copier._simple("") == Path("") + + @mark.parametrize("source", ("dict", "file")) -def test_FilerStager(assets, source): +def test_fs_FilerStager(assets, source): dstdir, cfgdict, cfgfile = assets config = cfgdict if source == "dict" else cfgfile assert fs.FileStager(target_dir=dstdir, config=config, key_path=["a", "b"]) @mark.parametrize("source", ("dict", "file")) -def test_Linker(assets, source): +def test_fs_Linker(assets, source): dstdir, cfgdict, cfgfile = assets config = cfgdict if source == "dict" else cfgfile assert not (dstdir / "foo").exists() @@ -103,8 +125,59 @@ def test_Linker(assets, source): assert (dstdir / "subdir" / "bar").is_symlink() +@mark.parametrize( + "path,target_dir,msg,fail_expected", + [ + ( + "/other/path", + "/some/path", + "Path '%s' must be relative when target directory is specified", + True, + ), + ( + "foo://bucket/a/b", + None, + "Non-filesystem destination path '%s' not currently supported", + True, + ), + ( + "relpath", + None, + "Relative path '%s' requires target directory to be specified", + True, + ), + ( + "file://foo.com/a/b", + "/some/path", + "Non-filesystem path '%s' invalid when target directory is specified", + True, + ), + ("other/path", "/some/path", None, False), + ("other/path", "file:///some/path", None, False), + ], +) +def test_fs_Stager__check_destination_paths_fail(path, target_dir, msg, fail_expected): + obj = Mock(_dst_paths=[path], _target_dir=target_dir) + if fail_expected: + with raises(UWConfigError) as e: + fs.Stager._check_destination_paths(obj) + assert str(e.value) == msg % path + + +@mark.parametrize( + "path,fail_expected", + [("foo://bucket/a/b", True), ("/some/path", False), ("file:///some/path", False)], +) +def test_fs_Stager__check_target_dir_fail_bad_scheme(path, fail_expected): + obj = Mock(_target_dir="foo://bucket/a/b") + if fail_expected: + with raises(UWConfigError) as e: + fs.Stager._check_target_dir(obj) + assert str(e.value) == "Non-filesystem path '%s' invalid as target directory" % path + + @mark.parametrize("source", ("dict", "file")) -def test_Stager__config_block_fail_bad_key_path(assets, source): +def test_fs_Stager__config_block_fail_bad_key_path(assets, source): dstdir, cfgdict, cfgfile = assets config = cfgdict if source == "dict" else cfgfile with raises(UWConfigError) as e: @@ -113,7 +186,7 @@ def test_Stager__config_block_fail_bad_key_path(assets, source): @mark.parametrize("val", [None, True, False, "str", 42, 3.14, [], tuple()]) -def test_Stager__config_block_fails_bad_type(assets, val): +def test_fs_Stager__config_block_fails_bad_type(assets, val): dstdir, cfgdict, _ = assets cfgdict["a"]["b"] = val with raises(UWConfigError) as e: diff --git a/src/uwtools/tests/utils/test_tasks.py b/src/uwtools/tests/utils/test_tasks.py index aa606ed09..1fe044bb4 100644 --- a/src/uwtools/tests/utils/test_tasks.py +++ b/src/uwtools/tests/utils/test_tasks.py @@ -1,14 +1,29 @@ -# pylint: disable=missing-function-docstring +# pylint: disable=missing-function-docstring,protected-access +import logging import os import stat -from unittest.mock import patch +from pathlib import Path +from typing import Union +from unittest.mock import Mock, patch +from iotaa import asset, external +from pytest import mark, raises + +from uwtools.exceptions import UWConfigError +from uwtools.logging import log +from uwtools.tests.support import logged from uwtools.utils import tasks # Helpers +@external +def exists(x): + yield x + yield asset(x, lambda: True) + + def ready(taskval): return taskval.ready() @@ -37,37 +52,75 @@ def test_tasks_executable(tmp_path): assert ready(tasks.executable(program=p)) -def test_tasks_existing_missing(tmp_path): - path = tmp_path / "x" +@mark.parametrize("prefix", ["", "file://"]) +def test_tasks_existing_local_missing(caplog, prefix, tmp_path): + log.setLevel(logging.INFO) + base = tmp_path / "x" + path = prefix + str(base) if prefix else base assert not ready(tasks.existing(path=path)) + assert logged(caplog, "Filesystem item %s: State: Not Ready (external asset)" % base) -def test_tasks_existing_present_directory(tmp_path): +def test_tasks_existing_local_present_directory(caplog, tmp_path): + log.setLevel(logging.INFO) path = tmp_path / "directory" path.mkdir() assert ready(tasks.existing(path=path)) + assert logged(caplog, "Filesystem item %s: State: Ready" % path) -def test_tasks_existing_present_file(tmp_path): - path = tmp_path / "file" - path.touch() +@mark.parametrize("prefix", ["", "file://"]) +def test_tasks_existing_local_present_file(caplog, prefix, tmp_path): + log.setLevel(logging.INFO) + base = tmp_path / "file" + base.touch() + path = prefix + str(base) if prefix else base assert ready(tasks.existing(path=path)) + assert logged(caplog, "Filesystem item %s: State: Ready" % base) -def test_tasks_existing_present_symlink(tmp_path): - path = tmp_path / "symlink" - path.symlink_to(os.devnull) +@mark.parametrize("prefix", ["", "file://"]) +def test_tasks_existing_local_present_symlink(caplog, prefix, tmp_path): + log.setLevel(logging.INFO) + base = tmp_path / "symlink" + base.symlink_to(os.devnull) + path = prefix + str(base) if prefix else base assert ready(tasks.existing(path=path)) + assert logged(caplog, "Filesystem item %s: State: Ready" % base) + + +@mark.parametrize("scheme", ["http", "https"]) +@mark.parametrize("code,expected", [(200, True), (404, False)]) +def test_tasks_existing_remote(caplog, code, expected, scheme): + log.setLevel(logging.INFO) + path = f"{scheme}://foo.com/obj" + with patch.object(tasks.requests, "head", return_value=Mock(status_code=code)) as head: + state = ready(tasks.existing(path=path)) + assert state is expected + head.assert_called_with(path, allow_redirects=True, timeout=3) + msg = "Remote object %s: State: %s" % (path, "Ready" if state else "Not Ready (external asset)") + assert logged(caplog, msg) + +def test_tasks_existing_bad_scheme(): + path = "foo://bucket/a/b" + with raises(UWConfigError) as e: + tasks.existing(path=path) + assert str(e.value) == f"Scheme 'foo' in '{path}' not supported" -def test_tasks_file_missing(tmp_path): + +@mark.parametrize("prefix", ["", "file://"]) +def test_tasks_file_missing(prefix, tmp_path): path = tmp_path / "file" + path = "%s%s" % (prefix, path) if prefix else path assert not ready(tasks.file(path=path)) -def test_tasks_file_present(tmp_path): +@mark.parametrize("prefix", ["", "file://"]) +def test_tasks_file_present(prefix, tmp_path): path = tmp_path / "file" path.touch() + path = "%s%s" % (prefix, path) if prefix else path assert ready(tasks.file(path=path)) @@ -89,19 +142,80 @@ def test_tasks_filecopy_directory_hierarchy(tmp_path): assert dst.is_file() -def test_tasks_symlink_simple(tmp_path): +@mark.parametrize("code,expected", [(200, True), (404, False)]) +@mark.parametrize("src", ["http://foo.com/obj", "https://foo.com/obj"]) +def test_tasks_filecopy_source_http(code, expected, src, tmp_path): + log.setLevel(logging.INFO) + dst = tmp_path / "a-file" + assert not dst.is_file() + with patch.object(tasks, "existing", exists): + with patch.object(tasks, "requests") as requests: + response = requests.get() + response.status_code = code + response.content = "data".encode("utf-8") + tasks.filecopy(src=src, dst=dst) + requests.get.assert_called_with(src, allow_redirects=True, timeout=3) + assert dst.is_file() is expected + + +@mark.parametrize( + "src,ok", + [("/src/file", True), ("file:///src/file", True), ("foo://bucket/a/b", False)], +) +def test_tasks_filecopy_source_local(src, ok): + dst = "/dst/file" + with patch.object(tasks.Path, "mkdir") as mkdir: + if ok: + with patch.object(tasks, "file", exists): + with patch.object(tasks, "copy") as copy: + tasks.filecopy(src=src, dst=dst) + mkdir.assert_called_once_with(parents=True, exist_ok=True) + copy.assert_called_once_with(Path("/src/file"), Path(dst)) + else: + with raises(UWConfigError) as e: + tasks.filecopy(src=src, dst=dst) + assert str(e.value) == f"Scheme 'foo' in '{src}' not supported" + + +@mark.parametrize("prefix", ["", "file://"]) +def test_tasks_symlink_simple(prefix, tmp_path): target = tmp_path / "target" link = tmp_path / "link" target.touch() assert not link.is_file() - tasks.symlink(target=target, linkname=link) + t2, l2 = ["%s%s" % (prefix, x) if prefix else x for x in (target, link)] + tasks.symlink(target=t2, linkname=l2) assert link.is_symlink() -def test_tasks_symlink_directory_hierarchy(tmp_path): +@mark.parametrize("prefix", ["", "file://"]) +def test_tasks_symlink_directory_hierarchy(prefix, tmp_path): target = tmp_path / "target" link = tmp_path / "foo" / "bar" / "link" target.touch() assert not link.is_file() - tasks.symlink(target=target, linkname=link) + t2, l2 = ["%s%s" % (prefix, x) if prefix else x for x in (target, link)] + tasks.symlink(target=t2, linkname=l2) assert link.is_symlink() + + +def test__bad_scheme(): + path = "foo://bucket/a/b" + with raises(UWConfigError) as e: + tasks.existing(path=path) + assert str(e.value) == f"Scheme 'foo' in '{path}' not supported" + + +def test__local_path_fail(): + path = "foo://bucket/a/b" + with patch.object(tasks, "_bad_scheme") as _bad_scheme: + tasks._local_path(path) + _bad_scheme.assert_called_once_with(path, "foo") + + +@mark.parametrize("prefix", ["", "file://"]) +@mark.parametrize("wrapper", [str, Path]) +def test__local_path_pass(prefix, wrapper): + path = "/some/file" + p2: Union[str, Path] = str(f"{prefix}{path}") if wrapper == str else Path(path) + assert tasks._local_path(p2) == Path(path) diff --git a/src/uwtools/utils/tasks.py b/src/uwtools/utils/tasks.py index 7659fe2be..de66e3c9b 100644 --- a/src/uwtools/utils/tasks.py +++ b/src/uwtools/utils/tasks.py @@ -5,10 +5,18 @@ import os from pathlib import Path from shutil import copy, which -from typing import Union +from types import SimpleNamespace as ns +from typing import NoReturn, Union +from urllib.parse import urlparse +import requests from iotaa import asset, external, task +from uwtools.exceptions import UWConfigError +from uwtools.logging import log + +SCHEMES = ns(http=("http", "https"), local=("", "file")) + @task def directory(path: Path): @@ -35,52 +43,83 @@ def executable(program: Union[Path, str]): @external -def existing(path: Path): +def existing(path: Union[Path, str]): """ - An existing filesystem item (file, directory, or symlink). + An existing file, directory, symlink, or remote object. :param path: Path to the item. + :raises: UWConfigError for unsupported URL schemes. """ - yield "Filesystem item %s" % path - yield asset(path, path.exists) + info = urlparse(str(path)) + scheme = info.scheme + if scheme in SCHEMES.local: + path = _local_path(path) + yield "Filesystem item %s" % path + yield asset(path, path.exists) + elif scheme in SCHEMES.http: + path = str(path) + ready = lambda: requests.head(path, allow_redirects=True, timeout=3).status_code == 200 + yield "Remote object %s" % path + yield asset(path, ready) + else: + _bad_scheme(path, scheme) @external -def file(path: Path, context: str = ""): +def file(path: Union[Path, str], context: str = ""): """ An existing file or symlink to an existing file. :param path: Path to the file. :param context: Optional additional context for the file. """ + path = _local_path(path) suffix = f" ({context})" if context else "" yield "File %s%s" % (path, suffix) yield asset(path, path.is_file) @task -def filecopy(src: Path, dst: Path): +def filecopy(src: Union[Path, str], dst: Union[Path, str]): """ A copy of an existing file. :param src: Path to the source file. :param dst: Path to the destination file to create. + :raises: UWConfigError for unsupported URL schemes. """ yield "Copy %s -> %s" % (src, dst) - yield asset(dst, dst.is_file) - yield file(src) - dst.parent.mkdir(parents=True, exist_ok=True) - copy(src, dst) + yield asset(Path(dst), Path(dst).is_file) + dst = _local_path(dst) # currently no support for remote destinations + src_scheme = urlparse(str(src)).scheme + if src_scheme in SCHEMES.local: + src = _local_path(src) + yield file(src) + dst.parent.mkdir(parents=True, exist_ok=True) + copy(src, dst) + elif src_scheme in SCHEMES.http: + src = str(src) + yield existing(src) + dst.parent.mkdir(parents=True, exist_ok=True) + response = requests.get(src, allow_redirects=True, timeout=3) + if (code := response.status_code) == 200: + with open(dst, "wb") as f: + f.write(response.content) + else: + log.error("Could not get '%s', HTTP status was: %s", src, code) + else: + _bad_scheme(src, src_scheme) @task -def symlink(target: Path, linkname: Path): +def symlink(target: Union[Path, str], linkname: Union[Path, str]): """ A symbolic link. :param target: The existing file or directory. :param linkname: The symlink to create. """ + target, linkname = map(_local_path, [target, linkname]) yield "Link %s -> %s" % (linkname, target) yield asset(linkname, linkname.exists) yield existing(target) @@ -89,3 +128,30 @@ def symlink(target: Path, linkname: Path): src=target if target.is_absolute() else os.path.relpath(target, linkname.parent), dst=linkname, ) + + +# Private helpers + + +def _bad_scheme(path: Union[Path, str], scheme: str) -> NoReturn: + """ + Fail on an unsupported URL scheme. + + :param path: The path with a bad scheme. + :param scheme: The scheme. + :raises: UWConfigError. + """ + raise UWConfigError(f"Scheme '{scheme}' in '{path}' not supported") + + +def _local_path(path: Union[Path, str]) -> Path: + """ + Ensure path is local and return simple version. + + :param path: The local path to check. + :raises: UWConfigError if a non-local scheme is specified. + """ + info = urlparse(str(path)) + if info.scheme and info.scheme not in SCHEMES.local: + _bad_scheme(path, info.scheme) + return Path(info.path)