From 73b0ea1617ddf1a94d6f25c48932bb6b094bbbd3 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 19:56:39 +0100 Subject: [PATCH 01/13] add util.sir_size() --- CHANGELOG.md | 10 ++++++++-- iblutil/__init__.py | 2 +- iblutil/util.py | 25 +++++++++++++++++++++++++ tests/test_util.py | 21 +++++++++++++++++++++ 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fee4b0..006250b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,19 @@ * Minor releases (X.1.X) are new features such as added functions or small changes that don't cause major compatibility issues. * Major releases (1.X.X) are major new features or changes that break backward compatibility in a big way. -## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.9.0] +## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.10.0] + +### Added + +- util.dir_size: method to determine size of directory in bytes + +## [1.9.0] ### Added - numerical.hash_uuids returns the hash of a collection of UUIDs -## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.8.0] +## [1.8.0] ### Modified diff --git a/iblutil/__init__.py b/iblutil/__init__.py index e5102d3..52af183 100644 --- a/iblutil/__init__.py +++ b/iblutil/__init__.py @@ -1 +1 @@ -__version__ = '1.9.0' +__version__ = '1.10.0' diff --git a/iblutil/util.py b/iblutil/util.py index 470152d..7457236 100644 --- a/iblutil/util.py +++ b/iblutil/util.py @@ -1,4 +1,5 @@ from itertools import takewhile +from os import scandir from pathlib import Path import collections import colorlog @@ -253,3 +254,27 @@ def rrmdir(folder: Path, levels: int = 0): to_remove = (folder, *[folder.parents[n] for n in range(levels)]) # filter list to those that are empty; if statement always true as rmdir returns None return [f for f in takewhile(lambda f: not any(f.iterdir()), to_remove) if not f.rmdir()] + + +def dir_size(directory: str | Path) -> int: + """ + Calculate the total size of a directory including all its subdirectories and files. + + Parameters + ---------- + directory : str + The path to the directory for which the size needs to be calculated. + + Returns + ------- + int + The total size of the directory in bytes. + """ + total_bytes = 0 + with scandir(directory) as it: + for entry in it: + if entry.is_dir(): + total_bytes += dir_size(entry.path) + else: + total_bytes += entry.stat().st_size + return total_bytes diff --git a/tests/test_util.py b/tests/test_util.py index 5a49e4e..6476575 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,6 @@ import unittest import types +from os import path, makedirs from pathlib import Path import tempfile import logging @@ -7,6 +8,7 @@ import numpy as np from iblutil import util +from iblutil.util import dir_size class TestBunch(unittest.TestCase): @@ -180,5 +182,24 @@ def test_rrmdir(self): self.assertTrue(file.exists()) +class TestDirSize(unittest.TestCase): + + def test_dir_size(self): + with tempfile.TemporaryDirectory() as test_dir: + sub_dir = path.join(test_dir, 'sub_dir') + makedirs(sub_dir) + file1 = path.join(test_dir, 'file1') + file2 = path.join(sub_dir, 'file2') + file3 = path.join(sub_dir, 'file3') + with open(file1, 'w') as f: + f.write('Old pond') + with open(file2, 'w') as f: + f.write('A frog jumps in') + with open(file3, 'w') as f: + f.write('The sound of water') + expected = path.getsize(file1) + path.getsize(file2) + path.getsize(file3) + self.assertEqual(dir_size(test_dir), expected) + + if __name__ == '__main__': unittest.main(exit=False) From 202cb8f214d8725176bb9743f3ca3dacd4c29dcf Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 19:57:56 +0100 Subject: [PATCH 02/13] Update test_util.py --- tests/test_util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 6476575..8061fb7 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -8,7 +8,6 @@ import numpy as np from iblutil import util -from iblutil.util import dir_size class TestBunch(unittest.TestCase): @@ -198,7 +197,7 @@ def test_dir_size(self): with open(file3, 'w') as f: f.write('The sound of water') expected = path.getsize(file1) + path.getsize(file2) + path.getsize(file3) - self.assertEqual(dir_size(test_dir), expected) + self.assertEqual(util.dir_size(test_dir), expected) if __name__ == '__main__': From 33b56073fbc58d785dab0ccd6eba030978ca3328 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 20:01:57 +0100 Subject: [PATCH 03/13] typing ... --- iblutil/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iblutil/util.py b/iblutil/util.py index 7457236..e05108b 100644 --- a/iblutil/util.py +++ b/iblutil/util.py @@ -256,13 +256,13 @@ def rrmdir(folder: Path, levels: int = 0): return [f for f in takewhile(lambda f: not any(f.iterdir()), to_remove) if not f.rmdir()] -def dir_size(directory: str | Path) -> int: +def dir_size(directory: Union[str, Path]) -> int: """ Calculate the total size of a directory including all its subdirectories and files. Parameters ---------- - directory : str + directory : str | Path The path to the directory for which the size needs to be calculated. Returns From bdfd024b232cbe647a0d076cfa0628471f1ad9f1 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 20:03:49 +0100 Subject: [PATCH 04/13] typing - again! --- iblutil/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/iblutil/util.py b/iblutil/util.py index e05108b..755f05e 100644 --- a/iblutil/util.py +++ b/iblutil/util.py @@ -6,6 +6,7 @@ import copy import logging import sys +from typing import Union import numpy as np From 9b1ea775301b70fc6390b11fc3f2d523e3e1b190 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 20:06:16 +0100 Subject: [PATCH 05/13] add pathlib.Path to the mix --- tests/test_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_util.py b/tests/test_util.py index 8061fb7..6238618 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -187,7 +187,7 @@ def test_dir_size(self): with tempfile.TemporaryDirectory() as test_dir: sub_dir = path.join(test_dir, 'sub_dir') makedirs(sub_dir) - file1 = path.join(test_dir, 'file1') + file1 = Path(path.join(test_dir, 'file1')) file2 = path.join(sub_dir, 'file2') file3 = path.join(sub_dir, 'file3') with open(file1, 'w') as f: From 50654099e0c6434ee7f98ba61bb315da8008ae05 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 20:07:34 +0100 Subject: [PATCH 06/13] Update test_util.py --- tests/test_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_util.py b/tests/test_util.py index 6238618..afd1a64 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -187,7 +187,7 @@ def test_dir_size(self): with tempfile.TemporaryDirectory() as test_dir: sub_dir = path.join(test_dir, 'sub_dir') makedirs(sub_dir) - file1 = Path(path.join(test_dir, 'file1')) + file1 = path.join(test_dir, 'file1') file2 = path.join(sub_dir, 'file2') file3 = path.join(sub_dir, 'file3') with open(file1, 'w') as f: @@ -198,6 +198,7 @@ def test_dir_size(self): f.write('The sound of water') expected = path.getsize(file1) + path.getsize(file2) + path.getsize(file3) self.assertEqual(util.dir_size(test_dir), expected) + self.assertEqual(util.dir_size(Path(test_dir)), expected) if __name__ == '__main__': From 9b50c8c67383f7b462b93674dfbdd7f1ea0af682 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 20:10:46 +0100 Subject: [PATCH 07/13] Update test_util.py --- tests/test_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_util.py b/tests/test_util.py index afd1a64..5b34188 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -197,7 +197,7 @@ def test_dir_size(self): with open(file3, 'w') as f: f.write('The sound of water') expected = path.getsize(file1) + path.getsize(file2) + path.getsize(file3) - self.assertEqual(util.dir_size(test_dir), expected) + self.assertEqual(util.dir_size(str(test_dir)), expected) self.assertEqual(util.dir_size(Path(test_dir)), expected) From aafa201ddba35ed1415c33fcc45e89a1c889fc53 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 20:23:14 +0100 Subject: [PATCH 08/13] make things simpler --- tests/test_util.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 5b34188..103cc67 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,6 +1,5 @@ import unittest import types -from os import path, makedirs from pathlib import Path import tempfile import logging @@ -184,21 +183,20 @@ def test_rrmdir(self): class TestDirSize(unittest.TestCase): def test_dir_size(self): - with tempfile.TemporaryDirectory() as test_dir: - sub_dir = path.join(test_dir, 'sub_dir') - makedirs(sub_dir) - file1 = path.join(test_dir, 'file1') - file2 = path.join(sub_dir, 'file2') - file3 = path.join(sub_dir, 'file3') - with open(file1, 'w') as f: - f.write('Old pond') - with open(file2, 'w') as f: - f.write('A frog jumps in') - with open(file3, 'w') as f: - f.write('The sound of water') - expected = path.getsize(file1) + path.getsize(file2) + path.getsize(file3) - self.assertEqual(util.dir_size(str(test_dir)), expected) - self.assertEqual(util.dir_size(Path(test_dir)), expected) + with tempfile.TemporaryDirectory() as temp_dir: + dir1 = Path(temp_dir) + dir2 = Path(dir1).joinpath('sub_dir') + dir2.mkdir() + file1 = dir1.joinpath('file1') + file2 = dir2.joinpath('file2') + file3 = dir2.joinpath('file3') + with open(file1, 'w') as f1, open(file2, 'w') as f2, open(file3, 'w') as f3: + f1.write('Old pond') + f2.write('A frog jumps in') + f3.write('The sound of water') + expected = file1.stat().st_size + file2.stat().st_size + file3.stat().st_size + self.assertEqual(util.dir_size(str(dir1)), expected) + self.assertEqual(util.dir_size(dir1), expected) if __name__ == '__main__': From c1cc09af86a885d232a3cf196b8b00296ef9d417 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 20:53:49 +0100 Subject: [PATCH 09/13] explicitly check for is_file() - to exclude symlinks --- iblutil/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iblutil/util.py b/iblutil/util.py index 755f05e..6b6bcff 100644 --- a/iblutil/util.py +++ b/iblutil/util.py @@ -276,6 +276,6 @@ def dir_size(directory: Union[str, Path]) -> int: for entry in it: if entry.is_dir(): total_bytes += dir_size(entry.path) - else: + elif entry.is_file(): total_bytes += entry.stat().st_size return total_bytes From d16d1f35180d67f7a505557bec71ad27197f2719 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 21:29:48 +0100 Subject: [PATCH 10/13] add option for following symlinks --- iblutil/util.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/iblutil/util.py b/iblutil/util.py index 6b6bcff..a068087 100644 --- a/iblutil/util.py +++ b/iblutil/util.py @@ -257,7 +257,7 @@ def rrmdir(folder: Path, levels: int = 0): return [f for f in takewhile(lambda f: not any(f.iterdir()), to_remove) if not f.rmdir()] -def dir_size(directory: Union[str, Path]) -> int: +def dir_size(directory: str | Path, follow_symlinks: bool = False) -> int: """ Calculate the total size of a directory including all its subdirectories and files. @@ -265,6 +265,8 @@ def dir_size(directory: Union[str, Path]) -> int: ---------- directory : str | Path The path to the directory for which the size needs to be calculated. + follow_symlinks : bool, optional + Whether to follow symbolic links when calculating the size. Default is False. Returns ------- @@ -274,8 +276,10 @@ def dir_size(directory: Union[str, Path]) -> int: total_bytes = 0 with scandir(directory) as it: for entry in it: - if entry.is_dir(): - total_bytes += dir_size(entry.path) + if entry.is_symlink() and not follow_symlinks: + continue + elif entry.is_dir(): + total_bytes += dir_size(entry.path, follow_symlinks) elif entry.is_file(): total_bytes += entry.stat().st_size return total_bytes From 50a4d6d85e3050ac0f0419a68a1f9f8df9edd34e Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 21:30:58 +0100 Subject: [PATCH 11/13] Update util.py --- iblutil/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iblutil/util.py b/iblutil/util.py index a068087..e84c030 100644 --- a/iblutil/util.py +++ b/iblutil/util.py @@ -257,7 +257,7 @@ def rrmdir(folder: Path, levels: int = 0): return [f for f in takewhile(lambda f: not any(f.iterdir()), to_remove) if not f.rmdir()] -def dir_size(directory: str | Path, follow_symlinks: bool = False) -> int: +def dir_size(directory: Union[str, Path], follow_symlinks: bool = False) -> int: """ Calculate the total size of a directory including all its subdirectories and files. From f0756268a36fe729efddd6415be97f128731174d Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 21:38:32 +0100 Subject: [PATCH 12/13] test symlink option --- tests/test_util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_util.py b/tests/test_util.py index 103cc67..b68f32c 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -194,10 +194,12 @@ def test_dir_size(self): f1.write('Old pond') f2.write('A frog jumps in') f3.write('The sound of water') + symlink = dir2.joinpath('symlink_file') + symlink.symlink_to(file1) expected = file1.stat().st_size + file2.stat().st_size + file3.stat().st_size self.assertEqual(util.dir_size(str(dir1)), expected) self.assertEqual(util.dir_size(dir1), expected) - + self.assertEqual(util.dir_size(dir1, True), expected + file1.stat().st_size) if __name__ == '__main__': unittest.main(exit=False) From 3fe32fc2acf0bb49de21a07b962640d79bc08004 Mon Sep 17 00:00:00 2001 From: Florian Rau Date: Mon, 17 Jun 2024 21:39:48 +0100 Subject: [PATCH 13/13] flake --- tests/test_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_util.py b/tests/test_util.py index b68f32c..b58bda6 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -201,5 +201,6 @@ def test_dir_size(self): self.assertEqual(util.dir_size(dir1), expected) self.assertEqual(util.dir_size(dir1, True), expected + file1.stat().st_size) + if __name__ == '__main__': unittest.main(exit=False)