diff --git a/HISTORY.md b/HISTORY.md index fdfad994..894234ed 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # cloudpathlib Changelog +## v0.12.1 (2023-01-04) + + - Fix glob logic for buckets; add regression test; add error on globbing all buckets ([Issue #311](https://github.com/drivendataorg/cloudpathlib/issues/311), [PR #312](https://github.com/drivendataorg/cloudpathlib/pull/312)) + ## v0.12.0 (2022-12-30) - API Change: `S3Client` supports an `extra_args` kwarg now to pass extra args down to `boto3` functions; this enables Requester Pays bucket access and bucket encryption. (Issues [#254](https://github.com/drivendataorg/cloudpathlib/issues/254), [#180](https://github.com/drivendataorg/cloudpathlib/issues/180); [PR #307](https://github.com/drivendataorg/cloudpathlib/pull/307)) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 74cbf4fa..a4e91db3 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -357,6 +357,11 @@ def _glob_checks(self, pattern: str) -> None: if pattern.startswith(self.cloud_prefix) or pattern.startswith("/"): raise CloudPathNotImplementedError("Non-relative patterns are unsupported") + if self.drive == "": + raise CloudPathNotImplementedError( + ".glob is only supported within a bucket or container; you can use `.iterdir` to list buckets; for example, CloudPath('s3://').iterdir()" + ) + def _glob( self: DerivedCloudPath, selector, recursive: bool ) -> Generator[DerivedCloudPath, None, None]: @@ -390,12 +395,13 @@ def _build_tree(trunk, branch, nodes, is_dir): root = _CloudPathSelectable( self.name, - [p.name for p in self.parents[:-1]], # all parents except bucket/container + [], # nothing above self will be returned, so initial parents is empty file_tree, ) for p in selector.select_from(root): - yield self.client.CloudPath(f"{self.cloud_prefix}{self.drive}/{p}") + # select_from returns self.name/... so strip before joining + yield (self / str(p)[len(self.name) + 1 :]) def glob(self: DerivedCloudPath, pattern: str) -> Generator[DerivedCloudPath, None, None]: self._glob_checks(pattern) diff --git a/setup.py b/setup.py index 1857150f..6b1b8040 100644 --- a/setup.py +++ b/setup.py @@ -61,5 +61,5 @@ def load_requirements(path: Path): "Source Code": "https://github.com/drivendataorg/cloudpathlib", }, url="https://github.com/drivendataorg/cloudpathlib", - version="0.12.0", + version="0.12.1", ) diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index 3592d49c..1ba8fd22 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -191,6 +191,25 @@ def _check_glob(pattern, glob_method): ) +def test_glob_buckets(rig): + # CloudPath("s3://").glob("*") results in error + drive_level = rig.path_class(rig.path_class.cloud_prefix) + + with pytest.raises(CloudPathNotImplementedError): + list(drive_level.glob("*")) + + # CloudPath("s3://bucket").glob("*") should work + # bucket level glob returns correct results + # regression test for #311 + bucket = rig.path_class(f"{rig.path_class.cloud_prefix}{rig.drive}") + + first_result = next(bucket.glob("*")) + + # assert all parts are unique + assert first_result.drive == rig.drive + assert len(first_result.parts) == len(set(first_result.parts)) + + def test_glob_many_open_files(rig): # test_glob_many_open_files # Adapted from: https://github.com/python/cpython/blob/7ffe7ba30fc051014977c6f393c51e57e71a6648/Lib/test/test_pathlib.py#L1697-L1712