Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to pybind11 v2.12 and migrate to string infinity #242

Merged
merged 7 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,11 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.7", "3.11", "3.12"]
runs-on: [ubuntu-latest, macos-latest, windows-latest]
runs-on: [ubuntu-latest, macos-13, windows-latest]

include:
- python-version: pypy-3.7
runs-on: ubuntu-latest

- python-version: "3.12"
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion pybind11
Submodule pybind11 updated 170 files
51 changes: 31 additions & 20 deletions src/correction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,30 @@ namespace {
}
throw std::runtime_error("Error: could not find variable " + std::string(name) + " in inputs");
}

double parse_edge(const rapidjson::Value& edge) {
if ( edge.IsDouble() ) {
return edge.GetDouble();
} else if ( edge.IsString() ) {
std::string_view str = edge.GetString();
if ((str == "inf") || (str == "+inf")) return std::numeric_limits<double>::infinity();
else if (str == "-inf") return -std::numeric_limits<double>::infinity();
}
throw std::runtime_error("Invalid edge type");
}

std::vector<double> parse_bin_edges(const rapidjson::Value::ConstArray& edges) {
std::vector<double> result;
result.reserve(edges.Size());
for (const auto& edge : edges) {
double val = parse_edge(edge);
if ( result.size() > 0 && result.back() >= val ) {
throw std::runtime_error("binning edges are not monotone increasing");
}
result.push_back(val);
}
return result;
}
} // end of anonymous namespace

Variable::Variable(const JSONObject& json) :
Expand Down Expand Up @@ -235,7 +259,7 @@ void Variable::validate(const Type& t) const {

Variable Variable::from_string(const char * data) {
rapidjson::Document json;
rapidjson::ParseResult ok = json.Parse<rapidjson::kParseNanAndInfFlag>(data);
rapidjson::ParseResult ok = json.Parse(data);
if (!ok) {
throw std::runtime_error(
std::string("JSON parse error: ") + rapidjson::GetParseError_En(ok.Code())
Expand Down Expand Up @@ -283,7 +307,7 @@ Formula::Formula(const JSONObject& json, const std::vector<Variable>& inputs, bo

Formula::Ref Formula::from_string(const char * data, std::vector<Variable>& inputs) {
rapidjson::Document json;
rapidjson::ParseResult ok = json.Parse<rapidjson::kParseNanAndInfFlag>(data);
rapidjson::ParseResult ok = json.Parse(data);
if (!ok) {
throw std::runtime_error(
std::string("JSON parse error: ") + rapidjson::GetParseError_En(ok.Code())
Expand Down Expand Up @@ -403,14 +427,7 @@ Binning::Binning(const JSONObject& json, const Correction& context)
// set bins_
const auto &edgesObj = json.getRequiredValue("edges");
if ( edgesObj.IsArray() ) { // non-uniform binning
std::vector<double> edges;
rapidjson::Value::ConstArray edgesArr = edgesObj.GetArray();
for (const auto& edge : edgesArr) {
if ( ! edge.IsDouble() ) { throw std::runtime_error("Invalid edges array type"); }
double val = edge.GetDouble();
if ( edges.size() > 0 && edges.back() >= val ) { throw std::runtime_error("binning edges are not monotone increasing"); }
edges.push_back(val);
}
std::vector<double> edges = parse_bin_edges(edgesObj.GetArray());
if ( edges.size() != content.Size() + 1 ) {
throw std::runtime_error("Inconsistency in Binning: number of content nodes does not match binning");
}
Expand Down Expand Up @@ -470,13 +487,7 @@ MultiBinning::MultiBinning(const JSONObject& json, const Correction& context)
for (const auto& dimension : edges) {
const auto& input = inputs[idx];
if ( dimension.IsArray() ) { // non-uniform binning
std::vector<double> dim_edges;
dim_edges.reserve(dimension.GetArray().Size());
for (const auto& item : dimension.GetArray()) {
double val = item.GetDouble();
if ( dim_edges.size() > 0 && dim_edges.back() >= val ) { throw std::runtime_error("binning edges are not monotone increasing"); }
dim_edges.push_back(val);
}
std::vector<double> dim_edges = parse_bin_edges(dimension.GetArray());
if ( ! input.IsString() ) { throw std::runtime_error("invalid multibinning input type"); }
axes_.push_back({input_index(input.GetString(), context.inputs()), 0, _NonUniformBins(std::move(dim_edges))});
} else if ( dimension.IsObject() ) { // UniformBinning
Expand Down Expand Up @@ -776,14 +787,14 @@ std::unique_ptr<CorrectionSet> CorrectionSet::from_file(const std::string& fn) {
#ifdef WITH_ZLIB
gzFile_s* fpz = gzopen(fn.c_str(), "r");
rapidjson::GzFileReadStream is(fpz, readBuffer, sizeof(readBuffer));
ok = json.ParseStream<rapidjson::kParseNanAndInfFlag>(is);
ok = json.ParseStream(is);
gzclose(fpz);
#else
throw std::runtime_error("Gzip-compressed JSON files are only supported if ZLIB is found when the package is built");
#endif
} else {
rapidjson::FileReadStream is(fp, readBuffer, sizeof(readBuffer));
ok = json.ParseStream<rapidjson::kParseNanAndInfFlag>(is);
ok = json.ParseStream(is);
fclose(fp);
}
if (!ok) {
Expand All @@ -798,7 +809,7 @@ std::unique_ptr<CorrectionSet> CorrectionSet::from_file(const std::string& fn) {

std::unique_ptr<CorrectionSet> CorrectionSet::from_string(const char * data) {
rapidjson::Document json;
rapidjson::ParseResult ok = json.Parse<rapidjson::kParseNanAndInfFlag>(data);
rapidjson::ParseResult ok = json.Parse(data);
if (!ok) {
throw std::runtime_error(
std::string("JSON parse error: ") + rapidjson::GetParseError_En(ok.Code())
Expand Down
75 changes: 39 additions & 36 deletions src/correctionlib/schemav2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import math
import sys
from collections import defaultdict
from typing import Dict, List, Optional, Set, Tuple, Union

from pydantic import (
AfterValidator,
BaseModel,
ConfigDict,
Field,
Expand All @@ -18,10 +20,14 @@

import correctionlib.highlevel

if sys.version_info >= (3, 8):
if sys.version_info >= (3, 9):
from typing import Annotated, Literal
elif sys.version_info >= (3, 8):
from typing import Literal

from typing_extensions import Annotated
else:
from typing_extensions import Literal
from typing_extensions import Annotated, Literal


VERSION = 2
Expand Down Expand Up @@ -184,35 +190,44 @@ def validate_edges(cls, high: float, info: ValidationInfo) -> float:
return high


Infinity = Literal["inf", "+inf", "-inf"]
Edges = List[Union[float, Infinity]]


def validate_nonuniform_edges(edges: Edges) -> Edges:
for edge in edges:
if edge in ("inf", "+inf", "-inf"):
continue
if isinstance(edge, float):
if not math.isfinite(edge):
raise ValueError(
f"Edges array contains non-finite values: {edges}. Replace infinities with 'inf' or '-inf'. NaN is not allowed."
)
floatedges = [float(x) for x in edges]
for lo, hi in zip(floatedges[:-1], floatedges[1:]):
if lo >= hi:
raise ValueError(f"Binning edges not monotonically increasing: {edges}")
return edges


NonUniformBinning = Annotated[Edges, AfterValidator(validate_nonuniform_edges)]


class Binning(Model):
"""1-dimensional binning in an input variable"""

nodetype: Literal["binning"]
input: str = Field(
description="The name of the correction input variable this binning applies to"
)
edges: Union[List[float], UniformBinning] = Field(
edges: Union[NonUniformBinning, UniformBinning] = Field(
description="Edges of the binning, either as a list of monotonically increasing floats or as an instance of UniformBinning. edges[i] <= x < edges[i+1] => f(x, ...) = content[i](...)"
)
content: List[Content]
flow: Union[Content, Literal["clamp", "error"]] = Field(
description="Overflow behavior for out-of-bounds values"
)

@field_validator("edges")
@classmethod
def validate_edges(
cls, edges: Union[List[float], UniformBinning]
) -> Union[List[float], UniformBinning]:
if isinstance(edges, list):
for lo, hi in zip(edges[:-1], edges[1:]):
if hi <= lo:
raise ValueError(
f"Binning edges not monotonically increasing: {edges}"
)

return edges

@field_validator("content")
@classmethod
def validate_content(
Expand All @@ -234,8 +249,10 @@ def summarize(
) -> None:
nodecount["Binning"] += 1
inputstats[self.input].overflow &= self.flow != "error"
low = self.edges[0] if isinstance(self.edges, list) else self.edges.low
high = self.edges[-1] if isinstance(self.edges, list) else self.edges.high
low = float(self.edges[0]) if isinstance(self.edges, list) else self.edges.low
high = (
float(self.edges[-1]) if isinstance(self.edges, list) else self.edges.high
)
inputstats[self.input].min = min(inputstats[self.input].min, low)
inputstats[self.input].max = max(inputstats[self.input].max, high)
for item in self.content:
Expand All @@ -253,7 +270,7 @@ class MultiBinning(Model):
description="The names of the correction input variables this binning applies to",
min_length=1,
)
edges: List[Union[List[float], UniformBinning]] = Field(
edges: List[Union[NonUniformBinning, UniformBinning]] = Field(
description="Bin edges for each input"
)
content: List[Content] = Field(
Expand All @@ -266,20 +283,6 @@ class MultiBinning(Model):
description="Overflow behavior for out-of-bounds values"
)

@field_validator("edges")
@classmethod
def validate_edges(
cls, edges: List[Union[List[float], UniformBinning]]
) -> List[Union[List[float], UniformBinning]]:
for i, dim in enumerate(edges):
if isinstance(dim, list):
for lo, hi in zip(dim[:-1], dim[1:]):
if hi <= lo:
raise ValueError(
f"MultiBinning edges for axis {i} are not monotone increasing: {dim}"
)
return edges

@field_validator("content")
@classmethod
def validate_content(
Expand All @@ -303,8 +306,8 @@ def summarize(
) -> None:
nodecount["MultiBinning"] += 1
for input, edges in zip(self.inputs, self.edges):
low = edges[0] if isinstance(edges, list) else edges.low
high = edges[-1] if isinstance(edges, list) else edges.high
low = float(edges[0]) if isinstance(edges, list) else edges.low
high = float(edges[-1]) if isinstance(edges, list) else edges.high
inputstats[input].overflow &= self.flow != "error"
inputstats[input].min = min(inputstats[input].min, low)
inputstats[input].max = max(inputstats[input].max, high)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_evaluator():
{
"nodetype": "binning",
"input": "pt",
"edges": [0, 20, 40, float("inf")],
"edges": [0, 20, 40, "inf"],
"flow": "error",
"content": [
schema.Category.model_validate(
Expand Down
41 changes: 41 additions & 0 deletions tests/test_issue208.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import pytest

from correctionlib import schemav2 as schema


def _make_binning(edges, content):
corr = schema.Correction(
name="test corr",
version=2,
inputs=[
schema.Variable(name="x", type="real"),
],
output=schema.Variable(name="a scale", type="real"),
data=schema.Binning.model_validate(
{
"nodetype": "binning",
"input": "x",
"edges": edges,
"flow": "error",
"content": content,
}
),
)
return corr.to_evaluator()


def test_string_infinity():
corr = _make_binning([0, 20, 40, "inf"], [1.0, 1.1, 1.2])
assert corr.evaluate(10.0) == 1.0
assert corr.evaluate(100.0) == 1.2
corr = _make_binning([0, 20, 40, "+inf"], [1.0, 1.1, 1.2])
assert corr.evaluate(100.0) == 1.2
corr = _make_binning(["-inf", 20, 40, "+inf"], [1.0, 1.1, 1.2])
assert corr.evaluate(-100.0) == 1.0

with pytest.raises(ValueError):
_make_binning([0, 20, 40, "infinity"], [1.0, 1.1, 1.2])
with pytest.raises(ValueError):
_make_binning([0, 20, 40, float("inf")], [1.0, 1.1, 1.2])
with pytest.raises(ValueError):
_make_binning([0, "inf", 20, 40], [1.0, 1.1, 1.2])
Loading