diff --git a/langfuse/_utils/serializer.py b/langfuse/_utils/serializer.py index 27294bf80..6dcf2a6ce 100644 --- a/langfuse/_utils/serializer.py +++ b/langfuse/_utils/serializer.py @@ -1,6 +1,7 @@ """@private""" import datetime as dt +import decimal import enum import math from asyncio import Queue @@ -72,6 +73,14 @@ def _default_inner(self, obj: Any) -> Any: if np is not None and isinstance(obj, np.ndarray): return obj.tolist() + # Serialize Decimal as its exact string form rather than via float(): + # float() would silently round high-precision values (and overflow on + # very large ones), and JSON numbers are parsed as doubles downstream + # anyway. str() preserves the exact value; NaN/Infinity render as + # "NaN"/"Infinity"/"-Infinity", matching the float handling below. + if isinstance(obj, decimal.Decimal): + return str(obj) + if isinstance(obj, float) and math.isnan(obj): return "NaN" @@ -140,7 +149,19 @@ def _default_inner(self, obj: Any) -> Any: return list(obj) if isinstance(obj, dict): - return {self.default(k): self.default(v) for k, v in obj.items()} + result = {} + for k, v in obj.items(): + serialized_key = self.default(k) + # JSON object keys must be scalars. If a key serializes to a + # container/object (e.g. tuple, set, or custom object), fall back + # to its string form so a single non-primitive key does not + # discard the whole dict. + if not isinstance( + serialized_key, (str, int, float, bool, type(None)) + ): + serialized_key = str(k) + result[serialized_key] = self.default(v) + return result if isinstance(obj, list): return [self.default(item) for item in obj] diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py index f4c8dde86..baf3a529a 100644 --- a/tests/unit/test_serializer.py +++ b/tests/unit/test_serializer.py @@ -2,6 +2,7 @@ import threading from dataclasses import dataclass from datetime import date, datetime, timezone +from decimal import Decimal from enum import Enum from pathlib import Path from uuid import UUID @@ -304,3 +305,43 @@ def test_dict_with_non_string_keys_is_serialized(input_obj, expected): result = json.loads(EventSerializer().encode(input_obj)) assert result == expected + + +def test_decimal_is_preserved_exactly(): + # Serialized to its exact string form (never the "" fallback) + assert json.loads(EventSerializer().encode(Decimal("19.99"))) == "19.99" + assert json.loads(EventSerializer().encode({"price": Decimal("19.99")})) == { + "price": "19.99" + } + # High-precision values are preserved exactly (a float() conversion would + # silently round these). + assert ( + json.loads(EventSerializer().encode(Decimal("1.0000000000000001"))) + == "1.0000000000000001" + ) + assert ( + json.loads(EventSerializer().encode(Decimal("123456789012345678"))) + == "123456789012345678" + ) + + +def test_decimal_special_values(): + assert EventSerializer().encode(Decimal("NaN")) == '"NaN"' + assert EventSerializer().encode(Decimal("Infinity")) == '"Infinity"' + assert EventSerializer().encode(Decimal("-Infinity")) == '"-Infinity"' + + +def test_dict_with_non_primitive_keys_preserves_values(): + # A tuple key must not discard the entire dict (previously the whole dict + # serialized to a "" string). + result = json.loads(EventSerializer().encode({(1, 2): "v", "other": "data"})) + assert result == {"(1, 2)": "v", "other": "data"} + + +def test_dict_with_custom_object_key_uses_str(): + class _Key: + def __str__(self) -> str: + return "custom-key" + + result = json.loads(EventSerializer().encode({_Key(): "val", "k2": "x"})) + assert result == {"custom-key": "val", "k2": "x"}