From 1434b407b9f6c034320ed33910e5647f1978ebca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Fri, 18 Mar 2022 14:33:17 +0100 Subject: [PATCH] BUG: avoid smart decoding/encoding roundtrips for ints as it creates bugs downstream. Implement inifix.dumps and inifix.loads --- CHANGELOG.md | 5 +++++ README.md | 21 +++++++++++---------- inifix/__init__.py | 4 +++- inifix/enotation.py | 28 +++++++++++++++------------- inifix/io.py | 37 +++++++++++++++++++++++++++---------- setup.cfg | 2 +- tests/test_io.py | 27 +++++++++++++++++++++++++-- 7 files changed, 87 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6bc9c9..08f3e74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.2.0] - 2022-03-18 + +- ENH: add two functions to the public API to read from and write to strings (`inifix.loads` and `inifix.dumps`) +- BUG: use more conservative rules in int/float casting rules to better match Idefix's reading routines. + ## [1.1.0] - 2022-02-23 ENH: inifix-format now produces more compact files, with fewer empty lines. diff --git a/README.md b/README.md index c7c10b3..5ec412c 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ and maps to }, "Time Integrator": { "CFL": 0.001, - "tstop": 1000 + "tstop": 1000.0 } } ``` @@ -79,16 +79,15 @@ and maps to { "mode": "fargo", "CFL": 0.001, - "tstop": 1000 + "tstop": 1000.0 } ``` Note that strings using e-notation (e.g. `1e-3` or `1E3` here) are decoded as -numbers. They are cast to `int` if no precision loss ensues, and `float` -otherwise. Reversly, when writing files, numbers are re-encoded using e-notation -if it leads to a more compact representation. For instance, `100000` is encoded -as `1e5`, but `10` is left unchanged because `1e1` is longer. -In cases where both reprensations are equally compact (e.g. `100` VS `1e2`), -e-notation is prefered in encoding. +floats. Reversely, when writing files, floats are re-encoded using e-notation +if it leads to a more compact representation. For instance, `100000.0` is encoded +as `1e5`, but `189.0` is left unchanged because `1.89e2` takes one more character. +In cases where both reprensations are equally compact (e.g. `1.0` VS `1e0`), +decimal is prefered in encoding. While decoding, `e` can be lower or upper case, but they are always encoded as lower case. @@ -103,8 +102,10 @@ pip install inifix ## Usage The public API mimicks that of Python's standard library `json`, -and consists in two main functions: `inifix.load` and `inifix.dump`. - +and consists in four main functions: +- `inifix.load` and `inifix.dump` read from and write to files respectively +- `inifix.loads` reads from a `str` and returns a `dict`, while `inifix.dumps` + does the reverse operation. ### Reading data `inifix.load` reads from a file and returns a `dict` diff --git a/inifix/__init__.py b/inifix/__init__.py index 7eed3b5..7f32007 100644 --- a/inifix/__init__.py +++ b/inifix/__init__.py @@ -1,5 +1,7 @@ from .io import dump +from .io import dumps from .io import load +from .io import loads from .validation import validate_inifile_schema -__version__ = "1.1.0" +__version__ = "1.2.0" diff --git a/inifix/enotation.py b/inifix/enotation.py index 85bcc8d..549a61c 100644 --- a/inifix/enotation.py +++ b/inifix/enotation.py @@ -1,5 +1,4 @@ import re -from typing import Union ENOTATION_REGEXP = re.compile(r"\d+(\.\d*)?e[+-]?\d+?") @@ -43,7 +42,10 @@ def decode(s: str, /) -> int: Traceback (most recent call last): ... ValueError - + >>> ENotationIO.decode("notanumber") + Traceback (most recent call last): + ... + ValueError """ s = s.lower() @@ -91,7 +93,7 @@ def simplify(s: str, /) -> str: return s.replace("+", "") @staticmethod - def encode(r: Union[float, int], /) -> str: + def encode(r: float, /) -> str: """ Convert a real number `r` to string, using scientific notation. @@ -106,7 +108,7 @@ def encode(r: Union[float, int], /) -> str: Returns ------- ret: str - A string representing a number in sci notation. + A string representing a number in sci notation Examples -------- @@ -125,24 +127,24 @@ def encode(r: Union[float, int], /) -> str: >>> ENotationIO.encode(1e-15) '1e-15' >>> ENotationIO.encode(0.0) - '0' + '0e0' >>> ENotationIO.encode(0) - '0' + '0e0' """ base = str(r) if "e" in base: return ENotationIO.simplify(base) if not base.strip(".0"): - return "0" + return "0e0" max_ndigit = len(base.replace(".", "")) - 1 fmt = f".{max_ndigit}e" s = "{:^{}}".format(r, fmt) return ENotationIO.simplify(s) @staticmethod - def encode_preferential(r: Union[float, int], /) -> str: + def encode_preferential(r: float, /) -> str: """ - Convert a real number `r` to string, using sci notation if + Convert a float `r` to string, using sci notation if and only if it saves space. Examples @@ -150,14 +152,14 @@ def encode_preferential(r: Union[float, int], /) -> str: >>> ENotationIO.encode_preferential(189_000_000) '1.89e8' >>> ENotationIO.encode_preferential(189) - '189' + '189.0' >>> ENotationIO.encode_preferential(900) - '900' + '9e2' >>> ENotationIO.encode_preferential(1) - '1' + '1.0' >>> ENotationIO.encode_preferential(0.7) '0.7' >>> ENotationIO.encode_preferential(0.00007) '7e-5' """ - return min(str(r), ENotationIO.encode(r), key=lambda x: len(x)) + return min(str(float(r)), ENotationIO.encode(r), key=lambda x: len(x)) diff --git a/inifix/io.py b/inifix/io.py index c897527..d323ce0 100644 --- a/inifix/io.py +++ b/inifix/io.py @@ -46,7 +46,6 @@ def str_caster(s: str) -> str: CASTERS: List[Callable] = [ int, - ENotationIO.decode, float, bool_caster, str_caster, @@ -107,11 +106,14 @@ def _normalize_data(data: str) -> List[str]: def _tokenize_line( - line: str, file: TextIO, line_number: int + line: str, line_number: int, file: Optional[TextIO] ) -> Tuple[str, List[Scalar]]: key, *raw_values = line.split() if not raw_values: - raise ValueError(f"Failed to parse {file}:{line_number}:\n{line}") + if file is None: + raise ValueError(f"Failed to parse line {line_number}: {line!r}") + else: + raise ValueError(f"Failed to parse {file}:{line_number}:\n{line}") values = [] for val in raw_values: @@ -130,15 +132,10 @@ def _tokenize_line( return key, values -def _from_file_descriptor(file: TextIO) -> InifixConfT: - data = file.read() - +def _from_string(data: str, file: Optional[TextIO] = None) -> InifixConfT: # see https://github.com/python/mypy/issues/6463 container: InifixConfT = {} # type: ignore[assignment] lines = _normalize_data(data) - if not "".join(lines): - raise ValueError(f"{file.name!r} appears to be empty.") - section = Section() # the default target is a nameless section for line_number, line in enumerate(lines, start=1): if not line: @@ -159,6 +156,14 @@ def _from_file_descriptor(file: TextIO) -> InifixConfT: return container +def _from_file_descriptor(file: TextIO) -> InifixConfT: + data = file.read() + lines = _normalize_data(data) + if not "".join(lines): + raise ValueError(f"{file.name!r} appears to be empty.") + return _from_string(data, file=file) + + def _from_path(file: PathLike) -> InifixConfT: file = os.fspath(file) with open(file) as fh: @@ -169,7 +174,7 @@ def _from_path(file: PathLike) -> InifixConfT: def _encode(v: Scalar) -> str: - if isinstance(v, (float, int)): + if isinstance(v, float): return ENotationIO.encode_preferential(v) return str(v) @@ -219,6 +224,10 @@ def load(source: Union[InifixConfT, PathLike, TextIO], /) -> InifixConfT: return source +def loads(source: str, /) -> InifixConfT: + return _from_string(source) + + def dump(data: InifixConfT, /, file: Union[PathLike, TextIOBase]) -> None: """ Write data to a file. @@ -238,3 +247,11 @@ def dump(data: InifixConfT, /, file: Union[PathLike, TextIOBase]) -> None: _write_to_buffer(data, file) # type: ignore except AttributeError: _write_to_file(data, file) + + +def dumps(data: InifixConfT, /) -> str: + from io import StringIO + + s = StringIO() + dump(data, file=s) + return s.getvalue() diff --git a/setup.cfg b/setup.cfg index c650714..ee9f49a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = inifix -version = 1.1.0 +version = 1.2.0 description = I/O facility for Idefix/Pluto configuration files long_description = file: README.md long_description_content_type = text/markdown diff --git a/tests/test_io.py b/tests/test_io.py index 3b33505..6078ad6 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -7,7 +7,9 @@ from inifix.io import _tokenize_line from inifix.io import dump +from inifix.io import dumps from inifix.io import load +from inifix.io import loads from inifix.io import Section @@ -158,13 +160,34 @@ def test_dump_to_file_path(inifile, tmp_path): assert f"[{key}]\n" in body2 -def test_load_empty_file(capsys, tmp_path): +def test_load_empty_file(tmp_path): target = tmp_path / "empty_file" target.touch() - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match=re.escape(f"{str(target)!r} appears to be empty.") + ): load(target) def test_load_from_descriptor(inifile): with open(inifile) as fh: load(fh) + + +def test_loads_empty_str(): + ret = loads("") + assert ret == {} + + +def test_loads_invalid_str(): + with pytest.raises(ValueError, match="Failed to parse line 1: 'invalid'"): + loads("invalid") + + +def test_loads_dumps_roundtrip(inifile): + with open(inifile) as fh: + data = fh.read() + d1 = loads(data) + s1 = dumps(d1) + d2 = loads(s1) + assert d1 == d2