fix(library/vm/vm_json): avoid overflow and maximize precision in jso…

…n serialization (#743) The original code for reading integers from the json would truncate large integers. This now ensures that no precision is lost converting between the `json` C++ API and the lean API. This now also uses `uint64_t` or `int64_t` to write Lean integers into json if possible, choosing whichever the lean integer fits in. Unfortunately our json library does not support big integers, so overflow into inexact floats is unavoidable for larger integers. This probably is made worse by the fact that we don't have a `native.double`, so the precision loss can be quite significant. This also adds a `json.decidable_eq` instance, since it's useful in the tests and I needed it in downstream tests too. Alternative to #740. Co-authored-by: Eric Wieser <wieser.eric@gmail.com>
leanprover-community · Jul 13, 2022 · a17a192 · a17a192
1 parent 84516f9
commit a17a192
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 29 deletions.
diff --git a/library/init/meta/json.lean b/library/init/meta/json.lean
@@ -43,4 +43,18 @@ meta instance : has_to_format json := ⟨to_format⟩
 meta instance : has_to_string json := ⟨json.unparse⟩
 meta instance : has_repr json := ⟨json.unparse⟩
 
+meta instance : decidable_eq json :=
+λ j₁ j₂, begin
+  cases j₁; cases j₂; simp; try {apply decidable.false},
+  -- do this explicitly casewise to be extra sure we don't recurse unintentionally, as meta code
+  -- doesn't protect against this.
+  case json.of_string { exact string.has_decidable_eq _ _ },
+  case json.of_float { exact native.float.dec_eq _ _ },
+  case json.of_int { exact int.decidable_eq _ _ },
+  case json.of_bool { exact bool.decidable_eq _ _ },
+  case json.null { exact decidable.true },
+  case json.array { letI := decidable_eq, exact list.decidable_eq _ _ },
+  case json.object { letI := decidable_eq, exact list.decidable_eq _ _ },
+end
+
 end json
diff --git a/src/library/vm/vm_json.cpp b/src/library/vm/vm_json.cpp
@@ -32,8 +32,20 @@ json to_json(vm_obj const & o) {
         std::string s = to_string(cfield(o, 0));
         return json(s);
     } case json_idx::vint: {
-        int i = to_int(cfield(o, 0));
-        return json(i);
+        auto oval = cfield(o, 0);
+        if (is_simple(oval)) {
+            return json(to_int(oval));
+        }
+        auto m = to_mpz(oval);
+        if (m.is<uint64_t>()) {
+            return json(static_cast<uint64_t>(m));
+        } else if (m.is<int64_t>()) {
+            return json(static_cast<int64_t>(m));
+        } else {
+            // lose precision and store as a float. We do not store as a double, as this leads to
+            // asymmetric behavior between the parser and serializer.
+            return json(static_cast<float>(m.get_double()));
+        }
     } case json_idx::vfloat: {
         float f = to_float(cfield(o, 0));
         return json(f);
@@ -71,16 +83,16 @@ vm_obj to_obj(json const & j) {
     if (j.is_null()) {
         return mk_vm_simple(json_idx::vnull);
     } else if (j.is_boolean()) {
-        return mk_vm_constructor(json_idx::vbool, mk_vm_bool(j));
+        return mk_vm_constructor(json_idx::vbool, mk_vm_bool(j.get<bool>()));
     } else if (j.is_number_float()) {
-        float f = j;
-        return mk_vm_constructor(json_idx::vfloat, to_obj(f));
-    } else if (j.is_number()) {
-        int i = j;
-        return mk_vm_constructor(json_idx::vint, mk_vm_nat(i));
+        // note that this throws away a lot of precision, as the vm floats are not doubles
+        return mk_vm_constructor(json_idx::vfloat, to_obj(j.get<double>()));
+    } else if (j.is_number_unsigned()) {
+        return mk_vm_constructor(json_idx::vint, mk_vm_int(j.get<std::uint64_t>()));
+    } else if (j.is_number_integer()) {
+        return mk_vm_constructor(json_idx::vint, mk_vm_int(j.get<std::int64_t>()));
     } else if (j.is_string()) {
-        std::string s = j;
-        return mk_vm_constructor(json_idx::vstring, to_obj(s));
+        return mk_vm_constructor(json_idx::vstring, to_obj(j.get<std::string>()));
     } else if (j.is_array()) {
         vm_obj o = mk_vm_nil();
         for (auto i = j.crbegin(); i != j.crend(); i++) {

diff --git a/tests/lean/json.lean b/tests/lean/json.lean
@@ -3,24 +3,6 @@
 #eval to_string $ json.unparse (json.object [])
 run_cmd tactic.success_if_fail $ json.parse "spurgles"
 
-meta def ball : list bool → bool :=
-λ xs, xs.foldl band tt
-
-meta instance : decidable_eq native.float := by apply_instance
-
-meta def json.compare : Π (x y : json), bool
-| (json.of_string s) (json.of_string s') := s = s'
-| (json.of_int k) (json.of_int k') := k = k'
-| (json.of_float x) (json.of_float x') := x = x'
-| (json.of_bool b) (json.of_bool b') := b = b'
-| (json.null) (json.null) := tt
-| (json.object kvs) (json.object kvs') := (list.zip kvs kvs').foldr
-  (λ ⟨⟨k₁, v₁⟩, ⟨k₂, v₂⟩⟩ acc,
-  json.compare k₁ k₂ && json.compare v₁ v₂ && acc) tt
-| (json.array args) (json.array args') := (list.zip args args').foldr
-  (λ ⟨j₁, j₂⟩ acc, acc && json.compare j₁ j₂) tt
-| _ _ := ff
-
 meta def test_parse_unparse : tactic unit := do {
   f ← native.float.of_string "0.4",
   let obj : json := json.object
@@ -34,16 +16,52 @@ meta def test_parse_unparse : tactic unit := do {
             , json.of_int 1
             , json.of_int 2
             , json.of_int 3
+            , json.of_int (-1)
+            , json.of_int (-1000)
+            -- minimum int64_t and maximum uint64_t. Larger integers overflow to floats
+            , json.of_int (-0x8000000000000000)
+            , json.of_int (0xFFFFFFFFFFFFFFFF)
             , "this is a \"string with an annoying quote in it"
           ]
         )
     ],
   let obj_msg := json.unparse obj,
   obj' ← json.parse obj_msg,
-  guard (obj.compare obj') <|> tactic.trace format!"FAILED:\n{obj}\n{obj'}",
+  guard (obj = obj') <|> tactic.trace format!"FAILED:\n{obj}\n{obj'}",
 
   let obj_msg' := json.unparse obj',
   guard (obj_msg = obj_msg') <|> tactic.trace format!"FAILED:\n{obj_msg}\n{obj_msg'}"
 }
 
 #eval test_parse_unparse
+
+run_cmd do
+  -- this is the maximum value of a float32
+  let obj_i : json := json.of_int 0xFFFFFF00000000000000000000000000,
+  let msg := json.unparse obj_i,
+  obj_f ← json.parse msg,
+  -- the large int is turned into a float
+  guard (obj_f = json.of_float (0xFFFFFF00000000000000000000000000 : native.float))
+
+run_cmd do
+  -- this is the maximum integer that after truncation fits a float32
+  let obj_i : json := json.of_int 0xFFFFFF7FFFFFFFFFFFFFFFFFFFFFFFFF,
+  let msg := json.unparse obj_i,
+  obj_f ← json.parse msg,
+  -- the large int is truncated
+  guard (obj_f = json.of_float (0xFFFFFF00000000000000000000000000 : native.float))
+
+run_cmd do
+  -- this is the smallest integer that does not fit in a float32
+  let obj_i : json := json.of_int 0xFFFFFF80000000000000000000000000,
+  let msg := json.unparse obj_i,
+  obj_f ← json.parse msg,
+  -- the large int overflows to infinity, which cannot be stored in json
+  guard (obj_f = json.null)
+
+run_cmd do
+  -- this is the smallest integer that does not fit in a float32
+  let msg : string := to_string 0xFFFFFF80000000000000000000000000,
+  obj_f ← json.parse msg,
+  -- the large int overflows to (https://github.com/nlohmann/json does not support big integers)
+  guard (obj_f = json.of_float native.float.infinity)