From aa4e4e02209980b61d7d0999bdfd001118a0bbc8 Mon Sep 17 00:00:00 2001 From: Nicholas Smith Date: Tue, 18 Jun 2024 15:54:43 -0400 Subject: [PATCH] Update to pybind11 v2.12 and migrate to string infinity (#242) * Update to pybind11 v2.12 * Require string representation of infinity Closes #208 * MSVC doesn't like "or" * Fixup typing * Use MacOS 13 runner for python matrix, latest for 3.12 As the MacOS 14+ runners use the M1 chip and those don't support python versions older than 3.10 * Drop pypy * Add some more tests for string inf --- .github/workflows/ci.yml | 7 ++-- pybind11 | 2 +- src/correction.cc | 51 ++++++++++++++---------- src/correctionlib/schemav2.py | 75 ++++++++++++++++++----------------- tests/test_core.py | 2 +- tests/test_issue208.py | 41 +++++++++++++++++++ 6 files changed, 116 insertions(+), 62 deletions(-) create mode 100644 tests/test_issue208.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 950f832e..058b1ebc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,12 +31,11 @@ jobs: fail-fast: false matrix: python-version: ["3.7", "3.11", "3.12"] - runs-on: [ubuntu-latest, macos-latest, windows-latest] + runs-on: [ubuntu-latest, macos-13, windows-latest] include: - - python-version: pypy-3.7 - runs-on: ubuntu-latest - + - python-version: "3.12" + runs-on: macos-latest steps: - uses: actions/checkout@v4 with: diff --git a/pybind11 b/pybind11 index aa304c9c..3e9dfa28 160000 --- a/pybind11 +++ b/pybind11 @@ -1 +1 @@ -Subproject commit aa304c9c7d725ffb9d10af08a3b34cb372307020 +Subproject commit 3e9dfa2866941655c56877882565e7577de6fc7b diff --git a/src/correction.cc b/src/correction.cc index 4757f5e1..6bc36692 100644 --- a/src/correction.cc +++ b/src/correction.cc @@ -195,6 +195,30 @@ namespace { } throw std::runtime_error("Error: could not find variable " + std::string(name) + " in inputs"); } + + double parse_edge(const rapidjson::Value& edge) { + if ( edge.IsDouble() ) { + return edge.GetDouble(); + } else if ( edge.IsString() ) { + std::string_view str = edge.GetString(); + if ((str == "inf") || (str == "+inf")) return std::numeric_limits::infinity(); + else if (str == "-inf") return -std::numeric_limits::infinity(); + } + throw std::runtime_error("Invalid edge type"); + } + + std::vector parse_bin_edges(const rapidjson::Value::ConstArray& edges) { + std::vector result; + result.reserve(edges.Size()); + for (const auto& edge : edges) { + double val = parse_edge(edge); + if ( result.size() > 0 && result.back() >= val ) { + throw std::runtime_error("binning edges are not monotone increasing"); + } + result.push_back(val); + } + return result; + } } // end of anonymous namespace Variable::Variable(const JSONObject& json) : @@ -235,7 +259,7 @@ void Variable::validate(const Type& t) const { Variable Variable::from_string(const char * data) { rapidjson::Document json; - rapidjson::ParseResult ok = json.Parse(data); + rapidjson::ParseResult ok = json.Parse(data); if (!ok) { throw std::runtime_error( std::string("JSON parse error: ") + rapidjson::GetParseError_En(ok.Code()) @@ -283,7 +307,7 @@ Formula::Formula(const JSONObject& json, const std::vector& inputs, bo Formula::Ref Formula::from_string(const char * data, std::vector& inputs) { rapidjson::Document json; - rapidjson::ParseResult ok = json.Parse(data); + rapidjson::ParseResult ok = json.Parse(data); if (!ok) { throw std::runtime_error( std::string("JSON parse error: ") + rapidjson::GetParseError_En(ok.Code()) @@ -403,14 +427,7 @@ Binning::Binning(const JSONObject& json, const Correction& context) // set bins_ const auto &edgesObj = json.getRequiredValue("edges"); if ( edgesObj.IsArray() ) { // non-uniform binning - std::vector edges; - rapidjson::Value::ConstArray edgesArr = edgesObj.GetArray(); - for (const auto& edge : edgesArr) { - if ( ! edge.IsDouble() ) { throw std::runtime_error("Invalid edges array type"); } - double val = edge.GetDouble(); - if ( edges.size() > 0 && edges.back() >= val ) { throw std::runtime_error("binning edges are not monotone increasing"); } - edges.push_back(val); - } + std::vector edges = parse_bin_edges(edgesObj.GetArray()); if ( edges.size() != content.Size() + 1 ) { throw std::runtime_error("Inconsistency in Binning: number of content nodes does not match binning"); } @@ -470,13 +487,7 @@ MultiBinning::MultiBinning(const JSONObject& json, const Correction& context) for (const auto& dimension : edges) { const auto& input = inputs[idx]; if ( dimension.IsArray() ) { // non-uniform binning - std::vector dim_edges; - dim_edges.reserve(dimension.GetArray().Size()); - for (const auto& item : dimension.GetArray()) { - double val = item.GetDouble(); - if ( dim_edges.size() > 0 && dim_edges.back() >= val ) { throw std::runtime_error("binning edges are not monotone increasing"); } - dim_edges.push_back(val); - } + std::vector dim_edges = parse_bin_edges(dimension.GetArray()); if ( ! input.IsString() ) { throw std::runtime_error("invalid multibinning input type"); } axes_.push_back({input_index(input.GetString(), context.inputs()), 0, _NonUniformBins(std::move(dim_edges))}); } else if ( dimension.IsObject() ) { // UniformBinning @@ -776,14 +787,14 @@ std::unique_ptr CorrectionSet::from_file(const std::string& fn) { #ifdef WITH_ZLIB gzFile_s* fpz = gzopen(fn.c_str(), "r"); rapidjson::GzFileReadStream is(fpz, readBuffer, sizeof(readBuffer)); - ok = json.ParseStream(is); + ok = json.ParseStream(is); gzclose(fpz); #else throw std::runtime_error("Gzip-compressed JSON files are only supported if ZLIB is found when the package is built"); #endif } else { rapidjson::FileReadStream is(fp, readBuffer, sizeof(readBuffer)); - ok = json.ParseStream(is); + ok = json.ParseStream(is); fclose(fp); } if (!ok) { @@ -798,7 +809,7 @@ std::unique_ptr CorrectionSet::from_file(const std::string& fn) { std::unique_ptr CorrectionSet::from_string(const char * data) { rapidjson::Document json; - rapidjson::ParseResult ok = json.Parse(data); + rapidjson::ParseResult ok = json.Parse(data); if (!ok) { throw std::runtime_error( std::string("JSON parse error: ") + rapidjson::GetParseError_En(ok.Code()) diff --git a/src/correctionlib/schemav2.py b/src/correctionlib/schemav2.py index 060cc5d0..92588204 100644 --- a/src/correctionlib/schemav2.py +++ b/src/correctionlib/schemav2.py @@ -1,8 +1,10 @@ +import math import sys from collections import defaultdict from typing import Dict, List, Optional, Set, Tuple, Union from pydantic import ( + AfterValidator, BaseModel, ConfigDict, Field, @@ -18,10 +20,14 @@ import correctionlib.highlevel -if sys.version_info >= (3, 8): +if sys.version_info >= (3, 9): + from typing import Annotated, Literal +elif sys.version_info >= (3, 8): from typing import Literal + + from typing_extensions import Annotated else: - from typing_extensions import Literal + from typing_extensions import Annotated, Literal VERSION = 2 @@ -184,6 +190,29 @@ def validate_edges(cls, high: float, info: ValidationInfo) -> float: return high +Infinity = Literal["inf", "+inf", "-inf"] +Edges = List[Union[float, Infinity]] + + +def validate_nonuniform_edges(edges: Edges) -> Edges: + for edge in edges: + if edge in ("inf", "+inf", "-inf"): + continue + if isinstance(edge, float): + if not math.isfinite(edge): + raise ValueError( + f"Edges array contains non-finite values: {edges}. Replace infinities with 'inf' or '-inf'. NaN is not allowed." + ) + floatedges = [float(x) for x in edges] + for lo, hi in zip(floatedges[:-1], floatedges[1:]): + if lo >= hi: + raise ValueError(f"Binning edges not monotonically increasing: {edges}") + return edges + + +NonUniformBinning = Annotated[Edges, AfterValidator(validate_nonuniform_edges)] + + class Binning(Model): """1-dimensional binning in an input variable""" @@ -191,7 +220,7 @@ class Binning(Model): input: str = Field( description="The name of the correction input variable this binning applies to" ) - edges: Union[List[float], UniformBinning] = Field( + edges: Union[NonUniformBinning, UniformBinning] = Field( description="Edges of the binning, either as a list of monotonically increasing floats or as an instance of UniformBinning. edges[i] <= x < edges[i+1] => f(x, ...) = content[i](...)" ) content: List[Content] @@ -199,20 +228,6 @@ class Binning(Model): description="Overflow behavior for out-of-bounds values" ) - @field_validator("edges") - @classmethod - def validate_edges( - cls, edges: Union[List[float], UniformBinning] - ) -> Union[List[float], UniformBinning]: - if isinstance(edges, list): - for lo, hi in zip(edges[:-1], edges[1:]): - if hi <= lo: - raise ValueError( - f"Binning edges not monotonically increasing: {edges}" - ) - - return edges - @field_validator("content") @classmethod def validate_content( @@ -234,8 +249,10 @@ def summarize( ) -> None: nodecount["Binning"] += 1 inputstats[self.input].overflow &= self.flow != "error" - low = self.edges[0] if isinstance(self.edges, list) else self.edges.low - high = self.edges[-1] if isinstance(self.edges, list) else self.edges.high + low = float(self.edges[0]) if isinstance(self.edges, list) else self.edges.low + high = ( + float(self.edges[-1]) if isinstance(self.edges, list) else self.edges.high + ) inputstats[self.input].min = min(inputstats[self.input].min, low) inputstats[self.input].max = max(inputstats[self.input].max, high) for item in self.content: @@ -253,7 +270,7 @@ class MultiBinning(Model): description="The names of the correction input variables this binning applies to", min_length=1, ) - edges: List[Union[List[float], UniformBinning]] = Field( + edges: List[Union[NonUniformBinning, UniformBinning]] = Field( description="Bin edges for each input" ) content: List[Content] = Field( @@ -266,20 +283,6 @@ class MultiBinning(Model): description="Overflow behavior for out-of-bounds values" ) - @field_validator("edges") - @classmethod - def validate_edges( - cls, edges: List[Union[List[float], UniformBinning]] - ) -> List[Union[List[float], UniformBinning]]: - for i, dim in enumerate(edges): - if isinstance(dim, list): - for lo, hi in zip(dim[:-1], dim[1:]): - if hi <= lo: - raise ValueError( - f"MultiBinning edges for axis {i} are not monotone increasing: {dim}" - ) - return edges - @field_validator("content") @classmethod def validate_content( @@ -303,8 +306,8 @@ def summarize( ) -> None: nodecount["MultiBinning"] += 1 for input, edges in zip(self.inputs, self.edges): - low = edges[0] if isinstance(edges, list) else edges.low - high = edges[-1] if isinstance(edges, list) else edges.high + low = float(edges[0]) if isinstance(edges, list) else edges.low + high = float(edges[-1]) if isinstance(edges, list) else edges.high inputstats[input].overflow &= self.flow != "error" inputstats[input].min = min(inputstats[input].min, low) inputstats[input].max = max(inputstats[input].max, high) diff --git a/tests/test_core.py b/tests/test_core.py index defede27..fdb288d6 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -67,7 +67,7 @@ def test_evaluator(): { "nodetype": "binning", "input": "pt", - "edges": [0, 20, 40, float("inf")], + "edges": [0, 20, 40, "inf"], "flow": "error", "content": [ schema.Category.model_validate( diff --git a/tests/test_issue208.py b/tests/test_issue208.py new file mode 100644 index 00000000..e9ea52e1 --- /dev/null +++ b/tests/test_issue208.py @@ -0,0 +1,41 @@ +import pytest + +from correctionlib import schemav2 as schema + + +def _make_binning(edges, content): + corr = schema.Correction( + name="test corr", + version=2, + inputs=[ + schema.Variable(name="x", type="real"), + ], + output=schema.Variable(name="a scale", type="real"), + data=schema.Binning.model_validate( + { + "nodetype": "binning", + "input": "x", + "edges": edges, + "flow": "error", + "content": content, + } + ), + ) + return corr.to_evaluator() + + +def test_string_infinity(): + corr = _make_binning([0, 20, 40, "inf"], [1.0, 1.1, 1.2]) + assert corr.evaluate(10.0) == 1.0 + assert corr.evaluate(100.0) == 1.2 + corr = _make_binning([0, 20, 40, "+inf"], [1.0, 1.1, 1.2]) + assert corr.evaluate(100.0) == 1.2 + corr = _make_binning(["-inf", 20, 40, "+inf"], [1.0, 1.1, 1.2]) + assert corr.evaluate(-100.0) == 1.0 + + with pytest.raises(ValueError): + _make_binning([0, 20, 40, "infinity"], [1.0, 1.1, 1.2]) + with pytest.raises(ValueError): + _make_binning([0, 20, 40, float("inf")], [1.0, 1.1, 1.2]) + with pytest.raises(ValueError): + _make_binning([0, "inf", 20, 40], [1.0, 1.1, 1.2])