From 538ec1dc9140856deb566a93b8efdbeb1592dde1 Mon Sep 17 00:00:00 2001
From: Asad Majeed <80450984+ASAD-BE18@users.noreply.github.com>
Date: Wed, 10 Jun 2026 14:00:01 +0500
Subject: [PATCH 1/2] fix(langchain): handle Anthropic cache_creation
 nested-dict in _parse_usage_model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Anthropic's extended prompt-caching API returns cache_creation as a nested
dict keyed by cache tier (e.g. {"ephemeral_1h_input_tokens": 500,
"ephemeral_5m_input_tokens": 0}) inside the LLM output usage object.

Previously this value was passed through to UpdateGenerationBody.usageDetails
unchanged.  On v2.x (final filter: not None/not str) the nested dict
survived, causing a Pydantic ValidationError ("value is not a valid integer")
that was silently swallowed by the ingestion queue — dropping the entire
generation end() event and leaving every trace with endTime=null and
input/output=0.  On v4.x (final filter: isinstance(v, int)) the nested dict
was silently discarded, losing all cache-creation token data.

Fix: before the final int-filter, pop cache_creation, flatten each tier
value into a cache_creation_{tier_key} entry, and set the aggregated
cache_creation_input_tokens total via setdefault so the legacy field is
not overwritten if already present.

Closes #1697
---
 langfuse/langchain/CallbackHandler.py | 16 ++++++
 tests/unit/test_parse_usage_model.py  | 72 +++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/langfuse/langchain/CallbackHandler.py b/langfuse/langchain/CallbackHandler.py
index 1349f6ae0..1be2043c8 100644
--- a/langfuse/langchain/CallbackHandler.py
+++ b/langfuse/langchain/CallbackHandler.py
@@ -1647,6 +1647,22 @@ def _parse_usage_model(usage: Union[pydantic.BaseModel, dict]) -> Any:
                             0, usage_model[f"input_modality_{item['modality']}"] - value
                         )
 
+        # Anthropic extended prompt caching: cache_creation is a dict keyed by cache tier.
+        # Example: {"ephemeral_1h_input_tokens": 500, "ephemeral_5m_input_tokens": 0}
+        # Flatten into individual keys and expose an aggregated total that mirrors the
+        # legacy cache_creation_input_tokens field for backward-compatible cost tracking.
+        if "cache_creation" in usage_model and isinstance(
+            usage_model["cache_creation"], dict
+        ):
+            cache_creation = usage_model.pop("cache_creation")
+            total = 0
+            for tier_key, tier_val in cache_creation.items():
+                if isinstance(tier_val, int):
+                    usage_model[f"cache_creation_{tier_key}"] = tier_val
+                    total += tier_val
+            if total > 0:
+                usage_model.setdefault("cache_creation_input_tokens", total)
+
     usage_model = {k: v for k, v in usage_model.items() if isinstance(v, int)}
 
     return usage_model if usage_model else None
diff --git a/tests/unit/test_parse_usage_model.py b/tests/unit/test_parse_usage_model.py
index df441523c..0001cfbb2 100644
--- a/tests/unit/test_parse_usage_model.py
+++ b/tests/unit/test_parse_usage_model.py
@@ -1,6 +1,78 @@
 from langfuse.langchain.CallbackHandler import _parse_usage_model
 
 
+def test_anthropic_cache_creation_dict_flattened():
+    """Anthropic extended caching: cache_creation dict is flattened into per-tier keys
+    and an aggregated cache_creation_input_tokens total is added."""
+    usage = {
+        "input_tokens": 9454,
+        "output_tokens": 380,
+        "cache_read_input_tokens": 0,
+        "cache_creation": {
+            "ephemeral_1h_input_tokens": 500,
+            "ephemeral_5m_input_tokens": 200,
+        },
+    }
+    result = _parse_usage_model(usage)
+
+    # Core fields survive
+    assert result["input"] == 9454
+    assert result["output"] == 380
+    assert result["cache_read_input_tokens"] == 0
+
+    # Per-tier keys are present and individually correct
+    assert result["cache_creation_ephemeral_1h_input_tokens"] == 500
+    assert result["cache_creation_ephemeral_5m_input_tokens"] == 200
+
+    # Aggregated total equals sum of all tiers
+    assert result["cache_creation_input_tokens"] == 700
+
+    # The original nested dict must not be present
+    assert "cache_creation" not in result
+
+
+def test_anthropic_cache_creation_all_zeros_no_aggregate():
+    """When all cache_creation tier values are zero no aggregate key is added
+    (avoids noise in traces where caching did not fire)."""
+    usage = {
+        "input_tokens": 100,
+        "output_tokens": 50,
+        "cache_creation": {
+            "ephemeral_1h_input_tokens": 0,
+            "ephemeral_5m_input_tokens": 0,
+        },
+    }
+    result = _parse_usage_model(usage)
+
+    assert result["input"] == 100
+    assert result["output"] == 50
+    # Per-tier zero keys are still stored
+    assert result["cache_creation_ephemeral_1h_input_tokens"] == 0
+    assert result["cache_creation_ephemeral_5m_input_tokens"] == 0
+    # No aggregate added when total is zero
+    assert "cache_creation_input_tokens" not in result
+    assert "cache_creation" not in result
+
+
+def test_anthropic_cache_creation_legacy_field_not_overwritten():
+    """If both the legacy cache_creation_input_tokens (int) and the new cache_creation
+    (dict) are present, the legacy value is preserved and the dict total is not added."""
+    usage = {
+        "input_tokens": 100,
+        "output_tokens": 50,
+        "cache_creation_input_tokens": 300,  # legacy field already present
+        "cache_creation": {
+            "ephemeral_1h_input_tokens": 200,
+            "ephemeral_5m_input_tokens": 100,
+        },
+    }
+    result = _parse_usage_model(usage)
+
+    # setdefault must not overwrite the existing legacy value
+    assert result["cache_creation_input_tokens"] == 300
+    assert "cache_creation" not in result
+
+
 def test_standard_tier_input_token_details():
     """Standard tier: audio and cache_read are subtracted from input."""
     usage = {

From fe3af4d0d319ad23b0212a2f8fff8153ef99f92c Mon Sep 17 00:00:00 2001
From: Asad Majeed <80450984+ASAD-BE18@users.noreply.github.com>
Date: Fri, 12 Jun 2026 15:12:32 +0500
Subject: [PATCH 2/2] test(langchain): use distinct legacy value in setdefault
 test

The legacy value (300) equalled the tier sum (200+100=300), so the
assertion would pass even if setdefault were replaced with a plain
assignment. Use 999 to make the test discriminating.
---
 tests/unit/test_parse_usage_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_parse_usage_model.py b/tests/unit/test_parse_usage_model.py
index 0001cfbb2..131631a6c 100644
--- a/tests/unit/test_parse_usage_model.py
+++ b/tests/unit/test_parse_usage_model.py
@@ -60,7 +60,7 @@ def test_anthropic_cache_creation_legacy_field_not_overwritten():
     usage = {
         "input_tokens": 100,
         "output_tokens": 50,
-        "cache_creation_input_tokens": 300,  # legacy field already present
+        "cache_creation_input_tokens": 999,  # legacy field already present; intentionally != tier sum (300)
         "cache_creation": {
             "ephemeral_1h_input_tokens": 200,
             "ephemeral_5m_input_tokens": 100,
@@ -69,7 +69,7 @@ def test_anthropic_cache_creation_legacy_field_not_overwritten():
     result = _parse_usage_model(usage)
 
     # setdefault must not overwrite the existing legacy value
-    assert result["cache_creation_input_tokens"] == 300
+    assert result["cache_creation_input_tokens"] == 999
     assert "cache_creation" not in result