From bd6c861d7931480ed433a682d6dad169ce777764 Mon Sep 17 00:00:00 2001 From: Nikolaos Episkopos <58558195+nepiskopos@users.noreply.github.com> Date: Sun, 21 Apr 2024 04:34:39 +0300 Subject: [PATCH 1/5] Add recursive sudirectories and files extraction. --- py7zr/py7zr.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/py7zr/py7zr.py b/py7zr/py7zr.py index 6503b9ad..e9ec72fd 100644 --- a/py7zr/py7zr.py +++ b/py7zr/py7zr.py @@ -531,6 +531,7 @@ def _extract( targets: Optional[Collection[str]] = None, return_dict: bool = False, callback: Optional[ExtractCallback] = None, + recursive: Optional[bool] = False, ) -> Optional[Dict[str, IO[Any]]]: if callback is None: pass @@ -560,9 +561,14 @@ def _extract( fnames: Dict[str, int] = {} # check duplicated filename in one archive? self.q.put(("pre", None, None)) for f in self.files: - if targets is not None and f.filename not in targets: - self.worker.register_filelike(f.id, None) - continue + if targets is not None and recursive == False: + if f.filename not in targets: + self.worker.register_filelike(f.id, None) + continue + elif targets is not None and recursive == True: + if f.filename not in targets and not any([target in f.filename for target in targets]): + self.worker.register_filelike(f.id, None) + continue # When archive has a multiple files which have same name # To guarantee order of archive, multi-thread decompression becomes off. @@ -997,10 +1003,10 @@ def read(self, targets: Optional[Collection[str]] = None) -> Optional[Dict[str, self._dict = {} return self._extract(path=None, targets=targets, return_dict=True) - def extract(self, path: Optional[Any] = None, targets: Optional[Collection[str]] = None) -> None: + def extract(self, path: Optional[Any] = None, targets: Optional[Collection[str]] = None, recursive: Optional[bool] = False) -> None: if not self._is_none_or_collection(targets): raise TypeError("Wrong argument type given.") - self._extract(path, targets, return_dict=False) + self._extract(path, targets, return_dict=False, recursive=recursive) def reporter(self, callback: ExtractCallback): while True: From 8e2d32964b687b9d8f5a10ce45c8f62d44dea315 Mon Sep 17 00:00:00 2001 From: Nikolaos Episkopos <58558195+nepiskopos@users.noreply.github.com> Date: Sun, 21 Apr 2024 16:10:47 +0300 Subject: [PATCH 2/5] Improve recursive sudirectories and files extraction. --- py7zr/py7zr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py7zr/py7zr.py b/py7zr/py7zr.py index e9ec72fd..425a5a25 100644 --- a/py7zr/py7zr.py +++ b/py7zr/py7zr.py @@ -566,7 +566,7 @@ def _extract( self.worker.register_filelike(f.id, None) continue elif targets is not None and recursive == True: - if f.filename not in targets and not any([target in f.filename for target in targets]): + if f.filename not in targets and not any([f.filename.startswith(target) for target in targets]): self.worker.register_filelike(f.id, None) continue From fddc3797e0c1c32d2d4bb9698609246d7d725533 Mon Sep 17 00:00:00 2001 From: Nikolaos Episkopos <58558195+nepiskopos@users.noreply.github.com> Date: Sun, 12 May 2024 01:24:17 +0300 Subject: [PATCH 3/5] Attempt to fix CI issues Attempt to fix error for Test Python 3.9 on ubuntu-22.04 --- py7zr/py7zr.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/py7zr/py7zr.py b/py7zr/py7zr.py index 425a5a25..04d54035 100644 --- a/py7zr/py7zr.py +++ b/py7zr/py7zr.py @@ -561,11 +561,11 @@ def _extract( fnames: Dict[str, int] = {} # check duplicated filename in one archive? self.q.put(("pre", None, None)) for f in self.files: - if targets is not None and recursive == False: + if targets is not None and recursive is False: if f.filename not in targets: self.worker.register_filelike(f.id, None) continue - elif targets is not None and recursive == True: + elif targets is not None and recursive is True: if f.filename not in targets and not any([f.filename.startswith(target) for target in targets]): self.worker.register_filelike(f.id, None) continue @@ -1003,7 +1003,8 @@ def read(self, targets: Optional[Collection[str]] = None) -> Optional[Dict[str, self._dict = {} return self._extract(path=None, targets=targets, return_dict=True) - def extract(self, path: Optional[Any] = None, targets: Optional[Collection[str]] = None, recursive: Optional[bool] = False) -> None: + def extract(self, path: Optional[Any] = None, targets: Optional[Collection[str]] = None, + recursive: Optional[bool] = False) -> None: if not self._is_none_or_collection(targets): raise TypeError("Wrong argument type given.") self._extract(path, targets, return_dict=False, recursive=recursive) From ba1478c6ae0f5d3ab1cb8445ab217a8b6ce85533 Mon Sep 17 00:00:00 2001 From: Nikolaos Episkopos <58558195+nepiskopos@users.noreply.github.com> Date: Sun, 12 May 2024 01:50:45 +0300 Subject: [PATCH 4/5] Update documentation and contributors --- docs/api.rst | 18 +++++++++++++++++- docs/authors.rst | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index d3f37f9f..33db526e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -186,7 +186,7 @@ SevenZipFile Object py7zr looks for files and directories as same as specified in element of 'targets'. - When the method get a ``str`` object or another object other than collection + When the method gets a ``str`` object or another object other than collection such as LIST or SET, it will raise :exc:`TypeError`. Once extract() called, the ``SevenZipFile`` object become exhausted, @@ -199,6 +199,20 @@ SevenZipFile Object 'somedir/somefile' then pass a list: ['somedirectory', 'somedir/somefile'] as a target argument. + +.. py:method:: SevenZipFile.extract(path=None, targets=None, recursive=True) + + 'recursive' is a BOOLEAN which if set True, helps with simplifying subcontents + extraction. + + Instead of specifying all files / directories under a parent + directory by passing a list of 'targets', specifying only the parent directory + and setting 'recursive' to True forces an automatic extraction of all + subdirectories and subcontents recursively. + + If 'recursive' is not set, it defaults to False, so the extraction proceeds as + if the parameter did not exist. + Please see 'tests/test_basic.py: test_py7zr_extract_and_getnames()' for example code. @@ -210,6 +224,8 @@ SevenZipFile Object targets = [f if filter_pattern.match(f) for f in allfiles] with SevenZipFile('archive.7z', 'r') as zip: zip.extract(targets=targets) + with SevenZipFile('archive.7z', 'r') as zip: + zip.extract(targets=targets, recursive=True) .. py:method:: SevenZipFile.readall() diff --git a/docs/authors.rst b/docs/authors.rst index 57a4fcff..9ccd7a3a 100644 --- a/docs/authors.rst +++ b/docs/authors.rst @@ -23,6 +23,7 @@ Contributors, listed alphabetically, are: * Kyle Altendorf -- Fix multithreading problem (#82) * Martin Larralde -- Fix writef method (#397) * Megan Leet -- Fix infinite loop when extraction (#354) +* Nikolaos Episkopos -- Add recursive subdirectories extraction (#585) * @padremayi -- Fix crash on wrong crationtime in archive (#275) * @royopa -- Fix typo (#108) * Sergei -- Update report_update() (#558) From c4f15c3c159d17c16f3ee26fde885c1831b0092f Mon Sep 17 00:00:00 2001 From: Nikolaos <58558195+nepiskopos@users.noreply.github.com> Date: Sat, 18 May 2024 23:26:23 +0300 Subject: [PATCH 5/5] Attempt to fix another CI build failure issue using black. --- py7zr/py7zr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/py7zr/py7zr.py b/py7zr/py7zr.py index 04d54035..681e0bf0 100644 --- a/py7zr/py7zr.py +++ b/py7zr/py7zr.py @@ -1003,8 +1003,9 @@ def read(self, targets: Optional[Collection[str]] = None) -> Optional[Dict[str, self._dict = {} return self._extract(path=None, targets=targets, return_dict=True) - def extract(self, path: Optional[Any] = None, targets: Optional[Collection[str]] = None, - recursive: Optional[bool] = False) -> None: + def extract( + self, path: Optional[Any] = None, targets: Optional[Collection[str]] = None, recursive: Optional[bool] = False + ) -> None: if not self._is_none_or_collection(targets): raise TypeError("Wrong argument type given.") self._extract(path, targets, return_dict=False, recursive=recursive)