diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a0de71f..0064445 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,7 +42,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [2.7, 3.5, 3.9] + python: [2.7, 3.6, 3.9] name: Test py${{ matrix.python }} steps: - uses: actions/checkout@v2 diff --git a/miutil/fdio.py b/miutil/fdio.py index 6c37a65..ed6fef9 100644 --- a/miutil/fdio.py +++ b/miutil/fdio.py @@ -1,8 +1,9 @@ import logging from contextlib import contextmanager from os import makedirs -from shutil import rmtree +from shutil import copyfileobj, rmtree from tempfile import mkdtemp +from zipfile import ZipFile try: from collections.abc import Iterable @@ -13,9 +14,12 @@ except ImportError: fspath = str try: - from pathlib2 import Path -except ImportError: from pathlib import Path +except ImportError: + from pathlib2 import Path + +from tqdm.auto import tqdm +from tqdm.utils import CallbackIOWrapper log = logging.getLogger(__name__) @@ -46,3 +50,21 @@ def tmpdir(*args, **kwargs): d = mkdtemp(*args, **kwargs) yield d rmtree(d) + + +def extractall(fzip, dest, desc="Extracting"): + """zipfile.Zipfile(fzip).extractall(dest) with progress""" + dest = Path(dest).expanduser() + with ZipFile(fzip) as zipf, tqdm( + desc=desc, + unit="B", + unit_scale=True, + unit_divisor=1024, + total=sum(getattr(i, "file_size", 0) for i in zipf.infolist()), + ) as pbar: + for i in zipf.infolist(): + if not getattr(i, "file_size", 0): # directory + zipf.extract(i, fspath(dest)) + else: + with zipf.open(i) as fi, open(fspath(dest / i.filename), "wb") as fo: + copyfileobj(CallbackIOWrapper(pbar.update, fi), fo) diff --git a/miutil/mlab/__init__.py b/miutil/mlab/__init__.py index 4bc485d..e273a6b 100644 --- a/miutil/mlab/__init__.py +++ b/miutil/mlab/__init__.py @@ -13,9 +13,7 @@ try: FileNotFoundError except NameError: - - class FileNotFoundError(OSError): - pass + FileNotFoundError = OSError from ..fdio import tmpdir diff --git a/miutil/web.py b/miutil/web.py index 47c048d..d20826a 100644 --- a/miutil/web.py +++ b/miutil/web.py @@ -1,10 +1,13 @@ import logging from os import W_OK, access, path, remove +from shutil import copyfileobj +from urllib import request +from urllib.parse import urlparse import requests from tqdm.auto import tqdm -from .fdio import create_dir, fspath +from .fdio import Path, create_dir, fspath log = logging.getLogger(__name__) @@ -63,3 +66,31 @@ def get_file(fname, origin, cache_dir=None, chunk_size=None): raise return fpath + + +def urlopen_cached(url, outdir, fname=None, mode="rb"): + """ + Download `url` to `outdir/fname`. + Cache based on `url` at `outdir/fname`.url + + Args: + url (str): source + outdir (path-like): destination + fname (str): optional, auto-detected from `url` if not given + mode (str): for returned file object + Returns: + file + """ + outdir = Path(outdir).expanduser() + outdir.mkdir(exist_ok=True) + if fname is None: + fname = Path(urlparse(url).path).name + fout = outdir / fname + cache = outdir / (fspath(fname) + ".url") + if not fout.is_file() or not cache.is_file() or cache.read_text().strip() != url: + fi = request.urlopen(url) + with fout.open("wb") as raw: + with tqdm.wrapattr(raw, "write", total=getattr(fi, "length", None)) as fo: + copyfileobj(fi, fo) + cache.write_text(url) + return fout.open(mode) diff --git a/setup.cfg b/setup.cfg index 2de4924..5fe1f46 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,7 +29,6 @@ classifiers= Programming Language :: Python :: 2 Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 - Programming Language :: Python :: 3.5 Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 @@ -45,7 +44,7 @@ setup_requires=setuptools>=42; wheel; setuptools_scm[toml]>=3.4 install_requires= pathlib2; python_version <= "2.7" packages=find: -python_requires=>=2.7 +python_requires=>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.* [options.extras_require] dev= pre-commit diff --git a/tests/test_fdio.py b/tests/test_fdio.py index 820295e..dbd9a7e 100644 --- a/tests/test_fdio.py +++ b/tests/test_fdio.py @@ -2,6 +2,8 @@ from os import path from shutil import rmtree +from pytest import importorskip + from miutil import fdio @@ -45,3 +47,18 @@ def test_tmpdir(): assert path.exists(tmpdir) res = tmpdir assert not path.exists(res) + + +def test_extractall(tmp_path): + web = importorskip("miutil.web") + tmpdir = tmp_path / "extractall" + assert not tmpdir.exists() + url = "https://github.com/AMYPAD/miutil/archive/v0.6.0.zip" + with web.urlopen_cached(url, tmpdir) as fd: + fdio.extractall(fd, tmpdir) + + assert (tmpdir / "miutil-0.6.0" / "README.rst").is_file() + assert ( + "Medical imaging utilities." + in (tmpdir / "miutil-0.6.0" / "README.rst").read_text() + ) diff --git a/tests/test_web.py b/tests/test_web.py index e81dd03..c373fc6 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -12,3 +12,14 @@ def test_get_file(tmp_path): cache_dir=tmpdir, ) assert (tmpdir / "README.rst").is_file() + + +def test_urlopen_cached(tmp_path): + tmpdir = tmp_path / "urlopen_cached" + assert not tmpdir.exists() + url = "https://github.com/AMYPAD/miutil/raw/master/README.rst" + with web.urlopen_cached(url, tmpdir, mode="r") as fd: + assert "Medical imaging utilities" in fd.read() + + assert (tmpdir / "README.rst").is_file() + assert (tmpdir / "README.rst.url").read_text() == url