Skip to content

Commit

Permalink
fix(library/vm/vm_json): avoid overflow and maximize precision in jso…
Browse files Browse the repository at this point in the history
…n serialization (#743)

The original code for reading integers from the json would truncate large integers. This now ensures that no precision is lost converting between the `json` C++ API and the lean API.
This now also uses `uint64_t` or `int64_t` to write Lean integers into json if possible, choosing whichever the lean integer fits in.

Unfortunately our json library does not support big integers, so overflow into inexact floats is unavoidable for larger integers. This probably is made worse by the fact that we don't have a `native.double`, so the precision loss can be quite significant.

This also adds a `json.decidable_eq` instance, since it's useful in the tests and I needed it in downstream tests too.

Alternative to #740.

Co-authored-by: Eric Wieser <wieser.eric@gmail.com>
  • Loading branch information
EdAyers and eric-wieser committed Jul 13, 2022
1 parent 84516f9 commit a17a192
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 29 deletions.
14 changes: 14 additions & 0 deletions library/init/meta/json.lean
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,18 @@ meta instance : has_to_format json := ⟨to_format⟩
meta instance : has_to_string json := ⟨json.unparse⟩
meta instance : has_repr json := ⟨json.unparse⟩

meta instance : decidable_eq json :=
λ j₁ j₂, begin
cases j₁; cases j₂; simp; try {apply decidable.false},
-- do this explicitly casewise to be extra sure we don't recurse unintentionally, as meta code
-- doesn't protect against this.
case json.of_string { exact string.has_decidable_eq _ _ },
case json.of_float { exact native.float.dec_eq _ _ },
case json.of_int { exact int.decidable_eq _ _ },
case json.of_bool { exact bool.decidable_eq _ _ },
case json.null { exact decidable.true },
case json.array { letI := decidable_eq, exact list.decidable_eq _ _ },
case json.object { letI := decidable_eq, exact list.decidable_eq _ _ },
end

end json
32 changes: 22 additions & 10 deletions src/library/vm/vm_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,20 @@ json to_json(vm_obj const & o) {
std::string s = to_string(cfield(o, 0));
return json(s);
} case json_idx::vint: {
int i = to_int(cfield(o, 0));
return json(i);
auto oval = cfield(o, 0);
if (is_simple(oval)) {
return json(to_int(oval));
}
auto m = to_mpz(oval);
if (m.is<uint64_t>()) {
return json(static_cast<uint64_t>(m));
} else if (m.is<int64_t>()) {
return json(static_cast<int64_t>(m));
} else {
// lose precision and store as a float. We do not store as a double, as this leads to
// asymmetric behavior between the parser and serializer.
return json(static_cast<float>(m.get_double()));
}
} case json_idx::vfloat: {
float f = to_float(cfield(o, 0));
return json(f);
Expand Down Expand Up @@ -71,16 +83,16 @@ vm_obj to_obj(json const & j) {
if (j.is_null()) {
return mk_vm_simple(json_idx::vnull);
} else if (j.is_boolean()) {
return mk_vm_constructor(json_idx::vbool, mk_vm_bool(j));
return mk_vm_constructor(json_idx::vbool, mk_vm_bool(j.get<bool>()));
} else if (j.is_number_float()) {
float f = j;
return mk_vm_constructor(json_idx::vfloat, to_obj(f));
} else if (j.is_number()) {
int i = j;
return mk_vm_constructor(json_idx::vint, mk_vm_nat(i));
// note that this throws away a lot of precision, as the vm floats are not doubles
return mk_vm_constructor(json_idx::vfloat, to_obj(j.get<double>()));
} else if (j.is_number_unsigned()) {
return mk_vm_constructor(json_idx::vint, mk_vm_int(j.get<std::uint64_t>()));
} else if (j.is_number_integer()) {
return mk_vm_constructor(json_idx::vint, mk_vm_int(j.get<std::int64_t>()));
} else if (j.is_string()) {
std::string s = j;
return mk_vm_constructor(json_idx::vstring, to_obj(s));
return mk_vm_constructor(json_idx::vstring, to_obj(j.get<std::string>()));
} else if (j.is_array()) {
vm_obj o = mk_vm_nil();
for (auto i = j.crbegin(); i != j.crend(); i++) {
Expand Down
56 changes: 37 additions & 19 deletions tests/lean/json.lean
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,6 @@
#eval to_string $ json.unparse (json.object [])
run_cmd tactic.success_if_fail $ json.parse "spurgles"

meta def ball : list bool → bool :=
λ xs, xs.foldl band tt

meta instance : decidable_eq native.float := by apply_instance

meta def json.compare : Π (x y : json), bool
| (json.of_string s) (json.of_string s') := s = s'
| (json.of_int k) (json.of_int k') := k = k'
| (json.of_float x) (json.of_float x') := x = x'
| (json.of_bool b) (json.of_bool b') := b = b'
| (json.null) (json.null) := tt
| (json.object kvs) (json.object kvs') := (list.zip kvs kvs').foldr
(λ ⟨⟨k₁, v₁⟩, ⟨k₂, v₂⟩⟩ acc,
json.compare k₁ k₂ && json.compare v₁ v₂ && acc) tt
| (json.array args) (json.array args') := (list.zip args args').foldr
(λ ⟨j₁, j₂⟩ acc, acc && json.compare j₁ j₂) tt
| _ _ := ff

meta def test_parse_unparse : tactic unit := do {
f ← native.float.of_string "0.4",
let obj : json := json.object
Expand All @@ -34,16 +16,52 @@ meta def test_parse_unparse : tactic unit := do {
, json.of_int 1
, json.of_int 2
, json.of_int 3
, json.of_int (-1)
, json.of_int (-1000)
-- minimum int64_t and maximum uint64_t. Larger integers overflow to floats
, json.of_int (-0x8000000000000000)
, json.of_int (0xFFFFFFFFFFFFFFFF)
, "this is a \"string with an annoying quote in it"
]
)
],
let obj_msg := json.unparse obj,
obj' ← json.parse obj_msg,
guard (obj.compare obj') <|> tactic.trace format!"FAILED:\n{obj}\n{obj'}",
guard (obj = obj') <|> tactic.trace format!"FAILED:\n{obj}\n{obj'}",

let obj_msg' := json.unparse obj',
guard (obj_msg = obj_msg') <|> tactic.trace format!"FAILED:\n{obj_msg}\n{obj_msg'}"
}

#eval test_parse_unparse

run_cmd do
-- this is the maximum value of a float32
let obj_i : json := json.of_int 0xFFFFFF00000000000000000000000000,
let msg := json.unparse obj_i,
obj_f ← json.parse msg,
-- the large int is turned into a float
guard (obj_f = json.of_float (0xFFFFFF00000000000000000000000000 : native.float))

run_cmd do
-- this is the maximum integer that after truncation fits a float32
let obj_i : json := json.of_int 0xFFFFFF7FFFFFFFFFFFFFFFFFFFFFFFFF,
let msg := json.unparse obj_i,
obj_f ← json.parse msg,
-- the large int is truncated
guard (obj_f = json.of_float (0xFFFFFF00000000000000000000000000 : native.float))

run_cmd do
-- this is the smallest integer that does not fit in a float32
let obj_i : json := json.of_int 0xFFFFFF80000000000000000000000000,
let msg := json.unparse obj_i,
obj_f ← json.parse msg,
-- the large int overflows to infinity, which cannot be stored in json
guard (obj_f = json.null)

run_cmd do
-- this is the smallest integer that does not fit in a float32
let msg : string := to_string 0xFFFFFF80000000000000000000000000,
obj_f ← json.parse msg,
-- the large int overflows to (https://github.com/nlohmann/json does not support big integers)
guard (obj_f = json.of_float native.float.infinity)

0 comments on commit a17a192

Please sign in to comment.