Merge pull request #99 from neutrinoceros/hotfix_smart_typecasting

BUG: hotfix smart typecasting
neutrinoceros · Mar 18, 2022 · 63460ce · 63460ce
2 parents e54bdbe + 1434b40
commit 63460ce
Show file tree

Hide file tree

Showing 7 changed files with 87 additions and 37 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## [1.2.0] - 2022-03-18
+
+- ENH: add two functions to the public API to read from and write to strings (`inifix.loads` and `inifix.dumps`)
+- BUG: use more conservative rules in int/float casting rules to better match Idefix's reading routines.
+
 ## [1.1.0] - 2022-02-23
 
 ENH: inifix-format now produces more compact files, with fewer empty lines.

diff --git a/README.md b/README.md
@@ -59,7 +59,7 @@ and maps to
     },
     "Time Integrator": {
         "CFL": 0.001,
-        "tstop": 1000
+        "tstop": 1000.0
     }
 }
 ```
@@ -79,16 +79,15 @@ and maps to
 {
     "mode": "fargo",
     "CFL": 0.001,
-    "tstop": 1000
+    "tstop": 1000.0
 }
 ```
 Note that strings using e-notation (e.g. `1e-3` or `1E3` here) are decoded as
-numbers. They are cast to `int` if no precision loss ensues, and `float`
-otherwise. Reversly, when writing files, numbers are re-encoded using e-notation
-if it leads to a more compact representation. For instance, `100000` is encoded
-as `1e5`, but `10` is left unchanged because `1e1` is longer.
-In cases where both reprensations are equally compact (e.g. `100` VS `1e2`),
-e-notation is prefered in encoding.
+floats. Reversely, when writing files, floats are re-encoded using e-notation
+if it leads to a more compact representation. For instance, `100000.0` is encoded
+as `1e5`, but `189.0` is left unchanged because `1.89e2` takes one more character.
+In cases where both reprensations are equally compact (e.g. `1.0` VS `1e0`),
+decimal is prefered in encoding.
 
 While decoding, `e` can be lower or upper case, but they are always encoded as
 lower case.
@@ -103,8 +102,10 @@ pip install inifix
 ## Usage
 
 The public API mimicks that of Python's standard library `json`,
-and consists in two main functions: `inifix.load` and `inifix.dump`.
-
+and consists in four main functions:
+- `inifix.load` and `inifix.dump` read from and write to files respectively
+- `inifix.loads` reads from a `str` and returns a `dict`, while `inifix.dumps`
+  does the reverse operation.
 
 ### Reading data
 `inifix.load` reads from a file and returns a `dict`

diff --git a/inifix/__init__.py b/inifix/__init__.py
@@ -1,5 +1,7 @@
 from .io import dump
+from .io import dumps
 from .io import load
+from .io import loads
 from .validation import validate_inifile_schema
 
-__version__ = "1.1.0"
+__version__ = "1.2.0"
diff --git a/inifix/enotation.py b/inifix/enotation.py
@@ -1,5 +1,4 @@
 import re
-from typing import Union
 
 
 ENOTATION_REGEXP = re.compile(r"\d+(\.\d*)?e[+-]?\d+?")
@@ -43,7 +42,10 @@ def decode(s: str, /) -> int:
         Traceback (most recent call last):
         ...
         ValueError
-
+        >>> ENotationIO.decode("notanumber")
+        Traceback (most recent call last):
+        ...
+        ValueError
         """
         s = s.lower()
 
@@ -91,7 +93,7 @@ def simplify(s: str, /) -> str:
         return s.replace("+", "")
 
     @staticmethod
-    def encode(r: Union[float, int], /) -> str:
+    def encode(r: float, /) -> str:
         """
         Convert a real number `r` to string, using scientific notation.
 
@@ -106,7 +108,7 @@ def encode(r: Union[float, int], /) -> str:
         Returns
         -------
         ret: str
-            A string representing a number in sci notation.
+            A string representing a number in sci notation
 
         Examples
         --------
@@ -125,39 +127,39 @@ def encode(r: Union[float, int], /) -> str:
         >>> ENotationIO.encode(1e-15)
         '1e-15'
         >>> ENotationIO.encode(0.0)
-        '0'
+        '0e0'
         >>> ENotationIO.encode(0)
-        '0'
+        '0e0'
         """
         base = str(r)
         if "e" in base:
             return ENotationIO.simplify(base)
         if not base.strip(".0"):
-            return "0"
+            return "0e0"
         max_ndigit = len(base.replace(".", "")) - 1
         fmt = f".{max_ndigit}e"
         s = "{:^{}}".format(r, fmt)
         return ENotationIO.simplify(s)
 
     @staticmethod
-    def encode_preferential(r: Union[float, int], /) -> str:
+    def encode_preferential(r: float, /) -> str:
         """
-        Convert a real number `r` to string, using sci notation if
+        Convert a float `r` to string, using sci notation if
         and only if it saves space.
 
         Examples
         --------
         >>> ENotationIO.encode_preferential(189_000_000)
         '1.89e8'
         >>> ENotationIO.encode_preferential(189)
-        '189'
+        '189.0'
         >>> ENotationIO.encode_preferential(900)
-        '900'
+        '9e2'
         >>> ENotationIO.encode_preferential(1)
-        '1'
+        '1.0'
         >>> ENotationIO.encode_preferential(0.7)
         '0.7'
         >>> ENotationIO.encode_preferential(0.00007)
         '7e-5'
         """
-        return min(str(r), ENotationIO.encode(r), key=lambda x: len(x))
+        return min(str(float(r)), ENotationIO.encode(r), key=lambda x: len(x))
diff --git a/inifix/io.py b/inifix/io.py
@@ -46,7 +46,6 @@ def str_caster(s: str) -> str:
 
 CASTERS: List[Callable] = [
     int,
-    ENotationIO.decode,
     float,
     bool_caster,
     str_caster,
@@ -107,11 +106,14 @@ def _normalize_data(data: str) -> List[str]:
 
 
 def _tokenize_line(
-    line: str, file: TextIO, line_number: int
+    line: str, line_number: int, file: Optional[TextIO]
 ) -> Tuple[str, List[Scalar]]:
     key, *raw_values = line.split()
     if not raw_values:
-        raise ValueError(f"Failed to parse {file}:{line_number}:\n{line}")
+        if file is None:
+            raise ValueError(f"Failed to parse line {line_number}: {line!r}")
+        else:
+            raise ValueError(f"Failed to parse {file}:{line_number}:\n{line}")
 
     values = []
     for val in raw_values:
@@ -130,15 +132,10 @@ def _tokenize_line(
     return key, values
 
 
-def _from_file_descriptor(file: TextIO) -> InifixConfT:
-    data = file.read()
-
+def _from_string(data: str, file: Optional[TextIO] = None) -> InifixConfT:
     # see https://github.com/python/mypy/issues/6463
     container: InifixConfT = {}  # type: ignore[assignment]
     lines = _normalize_data(data)
-    if not "".join(lines):
-        raise ValueError(f"{file.name!r} appears to be empty.")
-
     section = Section()  # the default target is a nameless section
     for line_number, line in enumerate(lines, start=1):
         if not line:
@@ -159,6 +156,14 @@ def _from_file_descriptor(file: TextIO) -> InifixConfT:
     return container
 
 
+def _from_file_descriptor(file: TextIO) -> InifixConfT:
+    data = file.read()
+    lines = _normalize_data(data)
+    if not "".join(lines):
+        raise ValueError(f"{file.name!r} appears to be empty.")
+    return _from_string(data, file=file)
+
+
 def _from_path(file: PathLike) -> InifixConfT:
     file = os.fspath(file)
     with open(file) as fh:
@@ -169,7 +174,7 @@ def _from_path(file: PathLike) -> InifixConfT:
 
 
 def _encode(v: Scalar) -> str:
-    if isinstance(v, (float, int)):
+    if isinstance(v, float):
         return ENotationIO.encode_preferential(v)
     return str(v)
 
@@ -219,6 +224,10 @@ def load(source: Union[InifixConfT, PathLike, TextIO], /) -> InifixConfT:
     return source
 
 
+def loads(source: str, /) -> InifixConfT:
+    return _from_string(source)
+
+
 def dump(data: InifixConfT, /, file: Union[PathLike, TextIOBase]) -> None:
     """
     Write data to a file.
@@ -238,3 +247,11 @@ def dump(data: InifixConfT, /, file: Union[PathLike, TextIOBase]) -> None:
         _write_to_buffer(data, file)  # type: ignore
     except AttributeError:
         _write_to_file(data, file)
+
+
+def dumps(data: InifixConfT, /) -> str:
+    from io import StringIO
+
+    s = StringIO()
+    dump(data, file=s)
+    return s.getvalue()
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = inifix
-version = 1.1.0
+version = 1.2.0
 description = I/O facility for Idefix/Pluto configuration files
 long_description = file: README.md
 long_description_content_type = text/markdown

diff --git a/tests/test_io.py b/tests/test_io.py
@@ -7,7 +7,9 @@
 
 from inifix.io import _tokenize_line
 from inifix.io import dump
+from inifix.io import dumps
 from inifix.io import load
+from inifix.io import loads
 from inifix.io import Section
 
 
@@ -158,13 +160,34 @@ def test_dump_to_file_path(inifile, tmp_path):
             assert f"[{key}]\n" in body2
 
 
-def test_load_empty_file(capsys, tmp_path):
+def test_load_empty_file(tmp_path):
     target = tmp_path / "empty_file"
     target.touch()
-    with pytest.raises(ValueError):
+    with pytest.raises(
+        ValueError, match=re.escape(f"{str(target)!r} appears to be empty.")
+    ):
         load(target)
 
 
 def test_load_from_descriptor(inifile):
     with open(inifile) as fh:
         load(fh)
+
+
+def test_loads_empty_str():
+    ret = loads("")
+    assert ret == {}
+
+
+def test_loads_invalid_str():
+    with pytest.raises(ValueError, match="Failed to parse line 1: 'invalid'"):
+        loads("invalid")
+
+
+def test_loads_dumps_roundtrip(inifile):
+    with open(inifile) as fh:
+        data = fh.read()
+    d1 = loads(data)
+    s1 = dumps(d1)
+    d2 = loads(s1)
+    assert d1 == d2