diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fee4b0..006250b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,19 @@ * Minor releases (X.1.X) are new features such as added functions or small changes that don't cause major compatibility issues. * Major releases (1.X.X) are major new features or changes that break backward compatibility in a big way. -## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.9.0] +## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.10.0] + +### Added + +- util.dir_size: method to determine size of directory in bytes + +## [1.9.0] ### Added - numerical.hash_uuids returns the hash of a collection of UUIDs -## [Latest](https://github.com/int-brain-lab/iblutil/commits/main) [1.8.0] +## [1.8.0] ### Modified diff --git a/iblutil/__init__.py b/iblutil/__init__.py index e5102d3..52af183 100644 --- a/iblutil/__init__.py +++ b/iblutil/__init__.py @@ -1 +1 @@ -__version__ = '1.9.0' +__version__ = '1.10.0' diff --git a/iblutil/util.py b/iblutil/util.py index 470152d..e84c030 100644 --- a/iblutil/util.py +++ b/iblutil/util.py @@ -1,10 +1,12 @@ from itertools import takewhile +from os import scandir from pathlib import Path import collections import colorlog import copy import logging import sys +from typing import Union import numpy as np @@ -253,3 +255,31 @@ def rrmdir(folder: Path, levels: int = 0): to_remove = (folder, *[folder.parents[n] for n in range(levels)]) # filter list to those that are empty; if statement always true as rmdir returns None return [f for f in takewhile(lambda f: not any(f.iterdir()), to_remove) if not f.rmdir()] + + +def dir_size(directory: Union[str, Path], follow_symlinks: bool = False) -> int: + """ + Calculate the total size of a directory including all its subdirectories and files. + + Parameters + ---------- + directory : str | Path + The path to the directory for which the size needs to be calculated. + follow_symlinks : bool, optional + Whether to follow symbolic links when calculating the size. Default is False. + + Returns + ------- + int + The total size of the directory in bytes. + """ + total_bytes = 0 + with scandir(directory) as it: + for entry in it: + if entry.is_symlink() and not follow_symlinks: + continue + elif entry.is_dir(): + total_bytes += dir_size(entry.path, follow_symlinks) + elif entry.is_file(): + total_bytes += entry.stat().st_size + return total_bytes diff --git a/tests/test_util.py b/tests/test_util.py index 5a49e4e..b58bda6 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -180,5 +180,27 @@ def test_rrmdir(self): self.assertTrue(file.exists()) +class TestDirSize(unittest.TestCase): + + def test_dir_size(self): + with tempfile.TemporaryDirectory() as temp_dir: + dir1 = Path(temp_dir) + dir2 = Path(dir1).joinpath('sub_dir') + dir2.mkdir() + file1 = dir1.joinpath('file1') + file2 = dir2.joinpath('file2') + file3 = dir2.joinpath('file3') + with open(file1, 'w') as f1, open(file2, 'w') as f2, open(file3, 'w') as f3: + f1.write('Old pond') + f2.write('A frog jumps in') + f3.write('The sound of water') + symlink = dir2.joinpath('symlink_file') + symlink.symlink_to(file1) + expected = file1.stat().st_size + file2.stat().st_size + file3.stat().st_size + self.assertEqual(util.dir_size(str(dir1)), expected) + self.assertEqual(util.dir_size(dir1), expected) + self.assertEqual(util.dir_size(dir1, True), expected + file1.stat().st_size) + + if __name__ == '__main__': unittest.main(exit=False)