diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index c6dcadbf..413fd619 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -330,6 +330,55 @@ (comment (get public-types-spec 96)) +;;;; Type history +;; To help support release targeting, we keep track of when new type ids are added + +(comment + (set! *print-length* nil) + (vec (sort (keys type-ids))) + + (let [id-history ; { #{type-ids}} + {340 ; v3.4.0 (2024-04-30), added 2 + ;; New: map-entry meta-protocol-key + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 + 105 106 110 111 112 113 114 115} + + 330 ; v3.3.0 (2023-10-11), added 11 + ;; New: long-pos-sm long-pos-md long-pos-lg long-neg-sm long-neg-md long-neg-lg + ;; str-sm* vec-sm* set-sm* map-sm* sql-date + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 105 106 + 110 111 112 113 114 115} + + 320 ; v3.2.0 (2022-07-18), added none + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115} + + 313 ; v3.1.3 (2022-06-23) + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115} + + 300 ; v3.0.0 (2020-09-20), added 5 + ;; New: time-instant time-duration time-period kw-md sym-md + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 80 + 81 82 90 91 100 101 102 105 106 110 111 112 113 114 115}}] + + (defn diff [new-release old-release] + (vec (sort (clojure.set/difference (id-history new-release) (id-history old-release)))))) + + (diff 340 330)) + ;;;; Ns imports (for convenience of lib consumers) (enc/defaliases @@ -427,6 +476,47 @@ (thaw (freeze [1 1 0 1 1])))) +(let [;; Initially target compatibility with v3.2.0 (2020-07-18) + ;; Next bump will be to v3.4.0 (2024-04-30) + target-release + (enc/get-env {:as :edn, :default 320} + :taoensso.nippy.target-release) + + target>= + (fn [min-release] + (if target-release + (>= (long target-release) (long min-release)) + true))] + + (defmacro ^:private target-release< [min-release] (not (target>= min-release))) + (defmacro ^:private target-release>= + "Returns true iff `target-release` is nil or >= given `min-release`. + Used to help ease data migration for changes to core data types. + + When support is added for a new type in Nippy version X, it necessarily means + that data containing that new type and frozen with Nippy version X is unthawable + with Nippy versions < X. + + Earlier versions of Nippy will throw an exception on thawing affected data: + \"Unrecognized type id (). Data frozen with newer Nippy version?\" + + This can present a challenge when updating to new versions of Nippy, e.g.: + + - Rolling updates could lead to old and new versions of Nippy temporarily co-existing. + - Data written with new types could limit your ability to revert a Nippy update. + + There's no easy solution to this in general, but we CAN at least help reduce the + burden related to CHANGES in core data types by rolling out changed types in 2 phases: + + 1. Nippy vX reads new (changed) type, writes old type + 2. Nippy vX+1 writes new (changed) type + + When relevant, we can then warn users in the CHANGELOG to not leapfrog necessary version + updates (e.g. Nippy vX -> Nippy vX+2)." + [min-release] (target>= min-release))) + +(comment (macroexpand '(target-release>= 340))) + ;;;; Java Serializable config ;; Unfortunately quite a bit of complexity to do this safely @@ -662,9 +752,10 @@ (let [ba (.getBytes s StandardCharsets/UTF_8) len (alength ba)] (enc/cond - (sm-count?* len) (do (write-id out id-str-sm*) (write-sm-count* out len)) - (md-count? len) (do (write-id out id-str-md) (write-md-count out len)) - :else (do (write-id out id-str-lg) (write-lg-count out len))) + (and (target-release>= 330) (sm-count?* len)) (do (write-id out id-str-sm*) (write-sm-count* out len)) + (and (target-release< 330) (sm-count? len)) (do (write-id out id-str-sm_) (write-sm-count out len)) + (md-count? len) (do (write-id out id-str-md) (write-md-count out len)) + :else (do (write-id out id-str-lg) (write-lg-count out len))) (.write out ba 0 len)))) @@ -692,8 +783,26 @@ (.write out ba 0 len))) +(defn- write-long-legacy [^DataOutput out ^long n] + (enc/cond + (zero? n) (write-id out id-long-0) + (pos? n) + (enc/cond + (<= n Byte/MAX_VALUE) (do (write-id out id-long-sm_) (.writeByte out n)) + (<= n Short/MAX_VALUE) (do (write-id out id-long-md_) (.writeShort out n)) + (<= n Integer/MAX_VALUE) (do (write-id out id-long-lg_) (.writeInt out n)) + :else (do (write-id out id-long-xl) (.writeLong out n))) + + :else + (enc/cond + (>= n Byte/MIN_VALUE) (do (write-id out id-long-sm_) (.writeByte out n)) + (>= n Short/MIN_VALUE) (do (write-id out id-long-md_) (.writeShort out n)) + (>= n Integer/MIN_VALUE) (do (write-id out id-long-lg_) (.writeInt out n)) + :else (do (write-id out id-long-xl) (.writeLong out n))))) + (defn- write-long [^DataOutput out ^long n] (enc/cond + (target-release< 330) (write-long-legacy out n) (zero? n) (write-id out id-long-0) (pos? n) (enc/cond @@ -719,14 +828,10 @@ (write-id out id-vec-0) (do (enc/cond - (sm-count?* cnt) - (enc/cond - (== cnt 2) (write-id out id-vec-2) - (== cnt 3) (write-id out id-vec-3) - :else (do (write-id out id-vec-sm*) (write-sm-count* out cnt))) - - (md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt)) - :else (do (write-id out id-vec-lg) (write-lg-count out cnt))) + (and (target-release>= 330) (sm-count?* cnt)) (do (write-id out id-vec-sm*) (write-sm-count* out cnt)) + (and (target-release< 330) (sm-count? cnt)) (do (write-id out id-vec-sm_) (write-sm-count out cnt)) + (md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt)) + :else (do (write-id out id-vec-lg) (write-lg-count out cnt))) (-run! (fn [in] (-freeze-with-meta! in out)) v))))) @@ -817,6 +922,8 @@ (write-counted-coll out id-empty id-sm id-md id-lg coll) (write-uncounted-coll out id-empty id-sm id-md id-lg coll)))) +(def ^:private ^:const meta-protocol-key ::meta-protocol-key) + ;; Micro-optimization: ;; As (write-kvs out id-map-0 id-map-sm id-map-md id-map-lg x) (defn- write-map [^DataOutput out m is-metadata?] @@ -825,9 +932,10 @@ (write-id out id-map-0) (do (enc/cond - (sm-count?* cnt) (do (write-id out id-map-sm*) (write-sm-count* out cnt)) - (md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt)) - :else (do (write-id out id-map-lg) (write-lg-count out cnt))) + (and (target-release>= 330) (sm-count?* cnt)) (do (write-id out id-map-sm*) (write-sm-count* out cnt)) + (and (target-release< 330) (sm-count? cnt)) (do (write-id out id-map-sm_) (write-sm-count out cnt)) + (md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt)) + :else (do (write-id out id-map-lg) (write-lg-count out cnt))) (-run-kv! (fn [k v] @@ -835,7 +943,9 @@ (do ;; Strip Clojure v1.10+ metadata protocol extensions ;; (used by defprotocol `:extend-via-metadata`) - (write-id out id-meta-protocol-key) + (if (target-release>= 340) + (write-id out id-meta-protocol-key) + (-freeze-without-meta! meta-protocol-key out)) (write-id out id-nil)) (do (-freeze-with-meta! k out) @@ -852,9 +962,10 @@ (write-id out id-set-0) (do (enc/cond - (sm-count?* cnt) (do (write-id out id-set-sm*) (write-sm-count* out cnt)) - (md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt)) - :else (do (write-id out id-set-lg) (write-lg-count out cnt))) + (and (target-release>= 330) (sm-count?* cnt)) (do (write-id out id-set-sm*) (write-sm-count* out cnt)) + (and (target-release< 330) (sm-count? cnt)) (do (write-id out id-set-sm_) (write-sm-count out cnt)) + (md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt)) + :else (do (write-id out id-set-lg) (write-lg-count out cnt))) (-run! (fn [in] (-freeze-with-meta! in out)) s))))) @@ -1533,11 +1644,11 @@ id-false false id-char (.readChar in) - id-meta-protocol-key ::meta-protocol-key + id-meta-protocol-key meta-protocol-key id-meta (let [m (thaw-from-in! in) ; Always consume from stream x (thaw-from-in! in)] - (if-let [m (when *incl-metadata?* (not-empty (dissoc m ::meta-protocol-key)))] + (if-let [m (when *incl-metadata?* (not-empty (dissoc m meta-protocol-key)))] (with-meta x m) (do x))) diff --git a/test/taoensso/nippy_tests.clj b/test/taoensso/nippy_tests.clj index 12e12232..b13489bd 100644 --- a/test/taoensso/nippy_tests.clj +++ b/test/taoensso/nippy_tests.clj @@ -192,6 +192,19 @@ (defn ba-hash [^bytes ba] (hash (seq ba))) +(defn gen-hashes [] (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data)) +(defn cmp-hashes [new old] (vec (sort (reduce-kv (fn [s k v] (if (= (get old k) v) s (conj s k))) #{} new)))) + +(def ref-hashes-v341 + {:deftype -148586793, :lazy-seq-empty 1277437598, :true -1809580601, :long 598276629, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta -858252893, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 2005004017, :many-strings 1738215727, :nested -1350538572, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 813550992, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -921330463, :subvec 709331681, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1207486853, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord -553848560, :sorted-map -1160380145, :sql-date 80018667, :map-entry 1219306839, :false-boxed 1506926383, :uri 870148616, :period -2043530540, :many-longs -1109794519, :uuid -338331115, :set 1649942133, :kw-ns 1050084331, :map 1989337680, :many-doubles -827569787, :char 858269588}) + +(def ref-hashes-v340 + {:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588}) + +(comment + (cmp-hashes ref-hashes-v341 ref-hashes-v340) + [:defrecord :deftype :list :long :many-longs :many-strings :map :map-entry :meta :nested :set :sorted-map :sorted-set :str-short :subvec :uri :vector]) + (deftest _stable-serialized-output (testing "Stable serialized output" @@ -204,12 +217,9 @@ (is (ba= (freeze (sorted-map :a 1 :b 1)) (freeze (sorted-map :b 1 :a 1))) "Sorted structures are generally safe") - ;; Track serialized output of stress data so that we can at least be aware of - ;; (and warn about) unintended changes for common/elementary types, etc. Note that - ;; reference hashes will need to be recalculated on changes to stress data. - (let [reference-hashes ; (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data) - {:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588} - + ;; Track serialized output of stress data so that we can warn about changes to + ;; core types. Hashes will need to be recalculated on changes to stress data. + (let [reference-hashes ref-hashes-v341 failures ; #{{:keys [k v]}} (reduce-kv (fn [failures k v]