From ce68109e8265d39f5725310ce23fdd7abc66b2ef Mon Sep 17 00:00:00 2001 From: Brendan Maginnis Date: Wed, 30 Oct 2024 16:40:13 +0000 Subject: [PATCH 1/5] Fix orjson serialization of dict keys Apply `_simple_default` serialization on dict keys to resolve this exception ```python Traceback (most recent call last): File "/Users/brendan/.local/share/virtualenvs/core-bRkH1-KB/lib/python3.11/site-packages/langsmith/client.py", line 281, in _dumps_json_single return orjson.dumps( ^^^^^^^^^^^^^ TypeError: Dict key must a type serializable with OPT_NON_STR_KEYS During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/Users/brendan/.local/share/virtualenvs/core-bRkH1-KB/lib/python3.11/site-packages/langsmith/client.py", line 5802, in _tracing_thread_handle_batch client.multipart_ingest_runs(create=create, update=update, pre_sampled=True) File "/Users/brendan/.local/share/virtualenvs/core-bRkH1-KB/lib/python3.11/site-packages/langsmith/client.py", line 1675, in multipart_ingest_runs valb = _dumps_json(value) ^^^^^^^^^^^^^^^^^^ File "/Users/brendan/.local/share/virtualenvs/core-bRkH1-KB/lib/python3.11/site-packages/langsmith/client.py", line 321, in _dumps_json return _dumps_json_single(obj, _serialize_json) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/brendan/.local/share/virtualenvs/core-bRkH1-KB/lib/python3.11/site-packages/langsmith/client.py", line 292, in _dumps_json_single result = json.dumps( ^^^^^^^^^^^ File "/usr/local/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/json/__init__.py", line 238, in dumps **kw).encode(obj) ^^^^^^^^^^^ File "/usr/local/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/json/encoder.py", line 200, in encode chunks = self.iterencode(o, _one_shot=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/json/encoder.py", line 258, in iterencode return _iterencode(o, 0) ^^^^^^^^^^^^^^^^^ TypeError: keys must be str, int, float, bool or None, not PosixPath ``` --- python/langsmith/_internal/_serde.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py index 69940bce0..dc0cdd2fd 100644 --- a/python/langsmith/_internal/_serde.py +++ b/python/langsmith/_internal/_serde.py @@ -37,6 +37,8 @@ def _simple_default(obj): return obj.isoformat() if isinstance(obj, uuid.UUID): return str(obj) + if isinstance(obj, dict): + return {_simple_default(key): value for key, value in obj.items()} if hasattr(obj, "model_dump") and callable(obj.model_dump): return obj.model_dump() elif hasattr(obj, "dict") and callable(obj.dict): From 5e37fe016b4207a021501f04261054a7141b6388 Mon Sep 17 00:00:00 2001 From: Brendan Maginnis Date: Wed, 30 Oct 2024 17:09:22 +0000 Subject: [PATCH 2/5] Make it work for pydantic classes too --- python/langsmith/_internal/_serde.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py index dc0cdd2fd..fae8f8488 100644 --- a/python/langsmith/_internal/_serde.py +++ b/python/langsmith/_internal/_serde.py @@ -40,7 +40,7 @@ def _simple_default(obj): if isinstance(obj, dict): return {_simple_default(key): value for key, value in obj.items()} if hasattr(obj, "model_dump") and callable(obj.model_dump): - return obj.model_dump() + return obj.model_dump(mode="json") elif hasattr(obj, "dict") and callable(obj.dict): return obj.dict() elif hasattr(obj, "_asdict") and callable(obj._asdict): From 97b098dc819daeb04be452672bfa445a96f4c7ae Mon Sep 17 00:00:00 2001 From: Brendan Maginnis Date: Thu, 31 Oct 2024 08:28:22 +0000 Subject: [PATCH 3/5] add test and clean up _serde --- python/langsmith/_internal/_serde.py | 29 +++++++++++--------------- python/tests/unit_tests/test_client.py | 21 ++++++++++++++++--- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py index fae8f8488..845fd934f 100644 --- a/python/langsmith/_internal/_serde.py +++ b/python/langsmith/_internal/_serde.py @@ -10,11 +10,7 @@ import pathlib import re import uuid -from typing import ( - Any, - Callable, - Optional, -) +from typing import Any import orjson @@ -90,11 +86,14 @@ def _serialize_json(obj: Any) -> Any: return list(obj) serialization_methods = [ - ("model_dump", True), # Pydantic V2 with non-serializable fields - ("dict", False), # Pydantic V1 with non-serializable field - ("to_dict", False), # dataclasses-json + ( + "model_dump", + {"exclude_none": True, "mode": "json"}, + ), # Pydantic V2 with non-serializable fields + ("to_dict", {}), # dataclasses-json + ("dict", {}), # Pydantic V1 with non-serializable field ] - for attr, exclude_none in serialization_methods: + for attr, kwargs in serialization_methods: if ( hasattr(obj, attr) and callable(getattr(obj, attr)) @@ -102,9 +101,7 @@ def _serialize_json(obj: Any) -> Any: ): try: method = getattr(obj, attr) - response = ( - method(exclude_none=exclude_none) if exclude_none else method() - ) + response = method(**kwargs) if not isinstance(response, dict): return str(response) return response @@ -126,13 +123,11 @@ def _elide_surrogates(s: bytes) -> bytes: return result -def _dumps_json_single( - obj: Any, default: Optional[Callable[[Any], Any]] = None -) -> bytes: +def _dumps_json_single(obj: Any) -> bytes: try: return orjson.dumps( obj, - default=default or _simple_default, + default=_serialize_json, option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS | orjson.OPT_SERIALIZE_UUID @@ -170,4 +165,4 @@ def dumps_json(obj: Any, depth: int = 0) -> bytes: str The JSON formatted string. """ - return _dumps_json_single(obj, _serialize_json) + return _dumps_json_single(obj) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index dd212373e..7dda213ca 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -7,6 +7,7 @@ import json import logging import math +import pathlib import sys import time import uuid @@ -723,6 +724,7 @@ def test_pydantic_serialize() -> None: class ChildPydantic(BaseModel): uid: uuid.UUID + child_path_keys: Dict[pathlib.Path, pathlib.Path] class MyPydantic(BaseModel): foo: str @@ -730,9 +732,16 @@ class MyPydantic(BaseModel): tim: datetime ex: Optional[str] = None child: Optional[ChildPydantic] = None + path_keys: Dict[pathlib.Path, pathlib.Path] obj = MyPydantic( - foo="bar", uid=test_uuid, tim=test_time, child=ChildPydantic(uid=test_uuid) + foo="bar", + uid=test_uuid, + tim=test_time, + child=ChildPydantic( + uid=test_uuid, child_path_keys={pathlib.Path("foo"): pathlib.Path("bar")} + ), + path_keys={pathlib.Path("foo"): pathlib.Path("bar")}, ) res = json.loads(json.dumps(obj, default=_serialize_json)) expected = { @@ -741,7 +750,9 @@ class MyPydantic(BaseModel): "tim": test_time.isoformat(), "child": { "uid": str(test_uuid), + "child_path_keys": {"foo": "bar"}, }, + "path_keys": {"foo": "bar"}, } assert res == expected @@ -781,6 +792,7 @@ def __repr__(self): class MyPydantic(BaseModel): foo: str bar: int + path_keys: Dict[pathlib.Path, "MyPydantic"] @dataclasses.dataclass class MyDataclass: @@ -820,7 +832,11 @@ class MyNamedTuple(NamedTuple): "class_with_tee": ClassWithTee(), "my_dataclass": MyDataclass("foo", 1), "my_enum": MyEnum.FOO, - "my_pydantic": MyPydantic(foo="foo", bar=1), + "my_pydantic": MyPydantic( + foo="foo", + bar=1, + path_keys={pathlib.Path("foo"): MyPydantic(foo="foo", bar=1, path_keys={})}, + ), "my_pydantic_class": MyPydantic, "person": Person(name="foo_person"), "a_bool": True, @@ -847,7 +863,6 @@ class MyNamedTuple(NamedTuple): "my_dataclass": {"foo": "foo", "bar": 1}, "my_enum": "foo", "my_pydantic": {"foo": "foo", "bar": 1}, - "my_pydantic_class": lambda x: "MyPydantic" in x, "person": {"name": "foo_person"}, "a_bool": True, "a_none": None, From 586eb4fa64816d67b30057ceff2a30b7642d314f Mon Sep 17 00:00:00 2001 From: Brendan Maginnis Date: Thu, 31 Oct 2024 08:33:03 +0000 Subject: [PATCH 4/5] remove dead code --- python/langsmith/_internal/_serde.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py index 845fd934f..4d2976140 100644 --- a/python/langsmith/_internal/_serde.py +++ b/python/langsmith/_internal/_serde.py @@ -31,16 +31,8 @@ def _simple_default(obj): # https://github.com/ijl/orjson#serialize if isinstance(obj, datetime.datetime): return obj.isoformat() - if isinstance(obj, uuid.UUID): + elif isinstance(obj, uuid.UUID): return str(obj) - if isinstance(obj, dict): - return {_simple_default(key): value for key, value in obj.items()} - if hasattr(obj, "model_dump") and callable(obj.model_dump): - return obj.model_dump(mode="json") - elif hasattr(obj, "dict") and callable(obj.dict): - return obj.dict() - elif hasattr(obj, "_asdict") and callable(obj._asdict): - return obj._asdict() elif isinstance(obj, BaseException): return {"error": type(obj).__name__, "message": str(obj)} elif isinstance(obj, (set, frozenset, collections.deque)): @@ -90,8 +82,8 @@ def _serialize_json(obj: Any) -> Any: "model_dump", {"exclude_none": True, "mode": "json"}, ), # Pydantic V2 with non-serializable fields - ("to_dict", {}), # dataclasses-json ("dict", {}), # Pydantic V1 with non-serializable field + ("to_dict", {}), # dataclasses-json ] for attr, kwargs in serialization_methods: if ( From 8432823af6dfb52e311a0120b406f71542c98cef Mon Sep 17 00:00:00 2001 From: Brendan Maginnis Date: Tue, 5 Nov 2024 10:37:29 +0000 Subject: [PATCH 5/5] update tests --- python/tests/unit_tests/test_client.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 4580ba896..1940e27a9 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -858,7 +858,12 @@ class MyNamedTuple(NamedTuple): "class_with_tee": "tee_a, tee_b", "my_dataclass": {"foo": "foo", "bar": 1}, "my_enum": "foo", - "my_pydantic": {"foo": "foo", "bar": 1}, + "my_pydantic": { + "foo": "foo", + "bar": 1, + "path_keys": {"foo": {"foo": "foo", "bar": 1, "path_keys": {}}}, + }, + "my_pydantic_class": lambda x: "MyPydantic" in x, "person": {"name": "foo_person"}, "a_bool": True, "a_none": None,