review comments

akihikokuroda · akihikokuroda · commit 3f17c6039426 · 2026-04-02T13:00:04.000-04:00
Signed-off-by: Akihiko Kuroda &lt;akihikokuroda2020@gmail.com&gt;
diff --git a/docs/examples/citation_requirement_example.py b/docs/examples/citation_requirement_example.py
@@ -13,7 +13,7 @@
 from mellea.backends.huggingface import LocalHFBackend
 from mellea.stdlib.components import Document, Message
 from mellea.stdlib.context import ChatContext
-from mellea.stdlib.requirements.rag import CitationRequirement, citation_check
+from mellea.stdlib.requirements.rag import CitationRequirement
 
 
 async def main():
@@ -51,8 +51,8 @@ async def main():
     ctx = ChatContext().add(Message("user", "What colors are the sky and grass?"))
     ctx = ctx.add(Message("assistant", response, documents=docs))
 
-    # Example 1: Using CitationRequirement directly
-    print("\n--- Example 1: CitationRequirement with 70% coverage ---")
+    # Example 1: Documents in constructor
+    print("\n--- Example 1: CitationRequirement with documents in constructor ---")
     req = CitationRequirement(min_citation_coverage=0.7, documents=docs)
     result = await req.validate(backend, ctx)
 
@@ -64,9 +64,9 @@ async def main():
         )
         print(f"Reason: {reason_preview}")
 
-    # Example 2: Using citation_check factory
-    print("\n--- Example 2: Using citation_check factory ---")
-    req2 = citation_check(docs, min_citation_coverage=0.8)
+    # Example 2: Higher coverage threshold
+    print("\n--- Example 2: Higher coverage threshold (80%) ---")
+    req2 = CitationRequirement(min_citation_coverage=0.8, documents=docs)
     result2 = await req2.validate(backend, ctx)
 
     print(f"Validation passed: {result2.as_bool()}")
diff --git a/mellea/stdlib/requirements/__init__.py b/mellea/stdlib/requirements/__init__.py
@@ -4,7 +4,7 @@
 from ...core import Requirement, ValidationResult, default_output_to_bool
 from .md import as_markdown_list, is_markdown_list, is_markdown_table
 from .python_reqs import PythonExecutionReq
-from .rag import CitationRequirement, citation_check
+from .rag import CitationRequirement
 from .requirement import (
     ALoraRequirement,
     LLMaJRequirement,
@@ -25,7 +25,6 @@
     "ValidationResult",
     "as_markdown_list",
     "check",
-    "citation_check",
     "default_output_to_bool",
     "is_markdown_list",
     "is_markdown_table",
diff --git a/mellea/stdlib/requirements/rag.py b/mellea/stdlib/requirements/rag.py
@@ -159,34 +159,19 @@ async def validate(
                 reason=f"Backend {backend.__class__.__name__} does not support adapters required for citation detection",
             )
 
-        # More specific check for HuggingFace backend
-        try:
-            from ...backends.huggingface import LocalHFBackend
-
-            if not isinstance(backend, LocalHFBackend):
-                return ValidationResult(
-                    False,
-                    reason=f"Citation detection requires LocalHFBackend (HuggingFace), "
-                    f"but got {backend.__class__.__name__}. The find_citations intrinsic "
-                    f"only works with HuggingFace models.",
-                )
-        except ImportError:
-            return ValidationResult(
-                False,
-                reason="HuggingFace backend not available. Please install mellea[hf] to use citation detection.",
-            )
-
         # Create context before the response by getting all but the last message
         all_messages = ctx.as_list()
         if len(all_messages) > 1:
             # Rebuild context without last message
+            # Import here to avoid circular dependency
             from ..context import ChatContext
 
             context_before_response = ChatContext()
             for msg in all_messages[:-1]:
                 context_before_response = context_before_response.add(msg)
         else:
             # If only one message, use empty context
+            # Import here to avoid circular dependency
             from ..context import ChatContext
 
             context_before_response = ChatContext()
@@ -195,7 +180,9 @@ async def validate(
         total_chars = len(response)
         if total_chars == 0:
             return ValidationResult(
-                True, reason="Empty response has 100% citation coverage", score=1.0
+                True,
+                reason="Empty response is considered to have adequate citation coverage",
+                score=1.0,
             )
 
         # Call find_citations intrinsic
@@ -283,57 +270,3 @@ def _build_reason(
             )
 
         return reason
-
-
-def citation_check(
-    documents: Iterable[Document] | Iterable[str],
-    min_citation_coverage: float = 0.8,
-    description: str | None = None,
-) -> CitationRequirement:
-    """Create a citation coverage requirement with pre-attached documents.
-
-    This is a convenience factory function that creates a CitationRequirement
-    with documents already attached. This is useful when you have a fixed set of
-    documents to validate against and want a cleaner API.
-
-    **Important**: This requirement requires a HuggingFace backend (LocalHFBackend).
-
-    Args:
-        documents: Documents to check for citations. Can be Document objects
-            or strings (will be converted to Documents).
-        min_citation_coverage: Minimum ratio of cited content (0.0-1.0),
-            defaults to 0.8 (80% coverage).
-        description: Custom description for the requirement. If None,
-            generates a description based on coverage threshold.
-
-    Returns:
-        A CitationRequirement with documents attached
-
-    Example:
-        ```python
-        from mellea.backends.huggingface import LocalHFBackend
-        from mellea.stdlib.requirements.rag import citation_check
-        from mellea.stdlib.components import Document
-
-        backend = LocalHFBackend(model_id="meta-llama/Llama-3.2-1B-Instruct")
-        docs = [
-            Document(doc_id="1", text="The sky is blue."),
-            Document(doc_id="2", text="Grass is green.")
-        ]
-        req = citation_check(docs, min_citation_coverage=0.8)
-
-        # Use with instruct() - no need to attach documents to messages
-        result = m.instruct(
-            "Answer: {{query}}",
-            grounding_context={"query": "What color is the sky?"},
-            requirements=[req],
-            backend=backend,
-            strategy=RejectionSamplingStrategy()
-        )
-        ```
-    """
-    return CitationRequirement(
-        min_citation_coverage=min_citation_coverage,
-        documents=documents,
-        description=description,
-    )
diff --git a/test/stdlib/requirements/test_rag_requirements.py b/test/stdlib/requirements/test_rag_requirements.py
@@ -6,7 +6,7 @@
 from mellea.backends.huggingface import LocalHFBackend
 from mellea.stdlib.components import Document, Message
 from mellea.stdlib.context import ChatContext
-from mellea.stdlib.requirements.rag import CitationRequirement, citation_check
+from mellea.stdlib.requirements.rag import CitationRequirement
 
 
 @pytest.mark.huggingface
@@ -72,34 +72,6 @@ async def test_citation_requirement_with_constructor_documents():
     assert result.reason is not None
 
 
-@pytest.mark.huggingface
-@pytest.mark.llm
-@pytest.mark.requires_heavy_ram
-async def test_citation_check_factory():
-    """Test citation_check factory function."""
-    backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
-
-    # Create documents
-    docs = [Document(doc_id="doc1", text="The sky is blue during the day.")]
-
-    # Create a response
-    response = "The sky is blue."
-
-    # Create context
-    ctx = ChatContext().add(Message("user", "What color is the sky?"))
-    ctx = ctx.add(Message("assistant", response))
-
-    # Use factory function
-    req = citation_check(docs, min_citation_coverage=0.5)
-
-    # Validate
-    result = await req.validate(backend, ctx)
-
-    # Should work the same as CitationRequirement
-    assert isinstance(result.score, float)
-    assert result.reason is not None
-
-
 async def test_citation_requirement_empty_context():
     """Test citation requirement with empty context."""
     # Create a mock backend - we don't need a real one for this test
@@ -169,13 +141,12 @@ async def test_citation_requirement_no_documents():
 
 
 async def test_citation_requirement_wrong_backend():
-    """Test citation requirement with non-HuggingFace backend."""
-    try:
-        from mellea.backends.ollama import OllamaBackend  # type: ignore
-    except ImportError:
-        pytest.skip("Ollama backend not available")
+    """Test citation requirement with non-adapter backend."""
+    from unittest.mock import Mock
 
-    backend = OllamaBackend(model_id="llama3.2")  # type: ignore
+    # Create a mock backend that doesn't support adapters
+    backend = Mock()
+    backend.__class__.__name__ = "MockBackend"
 
     # Create documents
     docs = [Document(doc_id="doc1", text="The sky is blue.")]
@@ -190,10 +161,10 @@ async def test_citation_requirement_wrong_backend():
     # Validate
     result = await req.validate(backend, ctx)
 
-    # Should fail with clear error about backend requirement
+    # Should fail with clear error about adapter requirement
     assert not result.as_bool()
     assert result.reason is not None
-    assert "LocalHFBackend" in result.reason or "HuggingFace" in result.reason
+    assert "adapter" in result.reason.lower()
 
 
 def test_citation_requirement_invalid_coverage():
@@ -256,40 +227,60 @@ async def test_citation_requirement_empty_response():
     # Validate
     result = await req.validate(backend, ctx)
 
-    # Empty response should pass (100% coverage of nothing)
+    # Empty response should pass (considered to have adequate coverage)
     assert result.as_bool()
     assert result.score == 1.0
+    assert result.reason is not None
+    assert "adequate citation coverage" in result.reason.lower()
 
 
-@pytest.mark.huggingface
-@pytest.mark.llm
-@pytest.mark.requires_heavy_ram
 async def test_citation_requirement_threshold_boundary():
-    """Test citation requirement at exact threshold boundary."""
-    backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
+    """Test citation requirement at exact threshold boundary.
+
+    This test mocks the find_citations intrinsic to return a controlled
+    result that produces exactly the threshold coverage (80%).
+    """
+    from unittest.mock import Mock, patch
+
+    backend = Mock(spec=LocalHFBackend)
 
     # Create documents
     docs = [Document(doc_id="doc1", text="The sky is blue during the day.")]
 
-    # Create a response
-    response = "The sky is blue."
+    # Create a response with 10 characters
+    response = "1234567890"
 
     # Create context
     ctx = ChatContext().add(Message("user", "What color is the sky?"))
     ctx = ctx.add(Message("assistant", response, documents=docs))
 
-    # Create requirement with specific threshold
-    req = CitationRequirement(min_citation_coverage=0.8)
-
-    # Validate
-    result = await req.validate(backend, ctx)
+    # Mock find_citations to return exactly 8 characters cited (80% of 10)
+    mock_citations = [
+        {
+            "response_begin": 0,
+            "response_end": 8,  # 8 characters cited
+            "response_text": "12345678",
+            "citation_doc_id": "doc1",
+            "citation_text": "The sky is blue",
+        }
+    ]
 
-    # Check that score is calculated
-    assert isinstance(result.score, float)
-    assert 0.0 <= result.score <= 1.0
+    with patch(
+        "mellea.stdlib.components.intrinsic.rag.find_citations",
+        return_value=mock_citations,
+    ):
+        # Test at exact threshold (0.8)
+        req = CitationRequirement(min_citation_coverage=0.8)
+        result = await req.validate(backend, ctx)
 
-    # Result should match threshold comparison
-    if result.score >= 0.8:
+        # At exact threshold, should pass (>= comparison)
         assert result.as_bool()
-    else:
-        assert not result.as_bool()
+        assert result.score == 0.8
+
+        # Test just below threshold (0.81)
+        req_above = CitationRequirement(min_citation_coverage=0.81)
+        result_above = await req_above.validate(backend, ctx)
+
+        # Just below threshold, should fail
+        assert not result_above.as_bool()
+        assert result_above.score == 0.8