66from mellea .backends .huggingface import LocalHFBackend
77from mellea .stdlib .components import Document , Message
88from mellea .stdlib .context import ChatContext
9- from mellea .stdlib .requirements .rag import CitationRequirement , citation_check
9+ from mellea .stdlib .requirements .rag import CitationRequirement
1010
1111
1212@pytest .mark .huggingface
@@ -72,34 +72,6 @@ async def test_citation_requirement_with_constructor_documents():
7272 assert result .reason is not None
7373
7474
75- @pytest .mark .huggingface
76- @pytest .mark .llm
77- @pytest .mark .requires_heavy_ram
78- async def test_citation_check_factory ():
79- """Test citation_check factory function."""
80- backend = LocalHFBackend (model_id = "ibm-granite/granite-4.0-micro" )
81-
82- # Create documents
83- docs = [Document (doc_id = "doc1" , text = "The sky is blue during the day." )]
84-
85- # Create a response
86- response = "The sky is blue."
87-
88- # Create context
89- ctx = ChatContext ().add (Message ("user" , "What color is the sky?" ))
90- ctx = ctx .add (Message ("assistant" , response ))
91-
92- # Use factory function
93- req = citation_check (docs , min_citation_coverage = 0.5 )
94-
95- # Validate
96- result = await req .validate (backend , ctx )
97-
98- # Should work the same as CitationRequirement
99- assert isinstance (result .score , float )
100- assert result .reason is not None
101-
102-
10375async def test_citation_requirement_empty_context ():
10476 """Test citation requirement with empty context."""
10577 # Create a mock backend - we don't need a real one for this test
@@ -169,13 +141,12 @@ async def test_citation_requirement_no_documents():
169141
170142
171143async def test_citation_requirement_wrong_backend ():
172- """Test citation requirement with non-HuggingFace backend."""
173- try :
174- from mellea .backends .ollama import OllamaBackend # type: ignore
175- except ImportError :
176- pytest .skip ("Ollama backend not available" )
144+ """Test citation requirement with non-adapter backend."""
145+ from unittest .mock import Mock
177146
178- backend = OllamaBackend (model_id = "llama3.2" ) # type: ignore
147+ # Create a mock backend that doesn't support adapters
148+ backend = Mock ()
149+ backend .__class__ .__name__ = "MockBackend"
179150
180151 # Create documents
181152 docs = [Document (doc_id = "doc1" , text = "The sky is blue." )]
@@ -190,10 +161,10 @@ async def test_citation_requirement_wrong_backend():
190161 # Validate
191162 result = await req .validate (backend , ctx )
192163
193- # Should fail with clear error about backend requirement
164+ # Should fail with clear error about adapter requirement
194165 assert not result .as_bool ()
195166 assert result .reason is not None
196- assert "LocalHFBackend " in result .reason or "HuggingFace" in result . reason
167+ assert "adapter " in result .reason . lower ()
197168
198169
199170def test_citation_requirement_invalid_coverage ():
@@ -256,40 +227,60 @@ async def test_citation_requirement_empty_response():
256227 # Validate
257228 result = await req .validate (backend , ctx )
258229
259- # Empty response should pass (100% coverage of nothing )
230+ # Empty response should pass (considered to have adequate coverage )
260231 assert result .as_bool ()
261232 assert result .score == 1.0
233+ assert result .reason is not None
234+ assert "adequate citation coverage" in result .reason .lower ()
262235
263236
264- @pytest .mark .huggingface
265- @pytest .mark .llm
266- @pytest .mark .requires_heavy_ram
267237async def test_citation_requirement_threshold_boundary ():
268- """Test citation requirement at exact threshold boundary."""
269- backend = LocalHFBackend (model_id = "ibm-granite/granite-4.0-micro" )
238+ """Test citation requirement at exact threshold boundary.
239+
240+ This test mocks the find_citations intrinsic to return a controlled
241+ result that produces exactly the threshold coverage (80%).
242+ """
243+ from unittest .mock import Mock , patch
244+
245+ backend = Mock (spec = LocalHFBackend )
270246
271247 # Create documents
272248 docs = [Document (doc_id = "doc1" , text = "The sky is blue during the day." )]
273249
274- # Create a response
275- response = "The sky is blue. "
250+ # Create a response with 10 characters
251+ response = "1234567890 "
276252
277253 # Create context
278254 ctx = ChatContext ().add (Message ("user" , "What color is the sky?" ))
279255 ctx = ctx .add (Message ("assistant" , response , documents = docs ))
280256
281- # Create requirement with specific threshold
282- req = CitationRequirement (min_citation_coverage = 0.8 )
283-
284- # Validate
285- result = await req .validate (backend , ctx )
257+ # Mock find_citations to return exactly 8 characters cited (80% of 10)
258+ mock_citations = [
259+ {
260+ "response_begin" : 0 ,
261+ "response_end" : 8 , # 8 characters cited
262+ "response_text" : "12345678" ,
263+ "citation_doc_id" : "doc1" ,
264+ "citation_text" : "The sky is blue" ,
265+ }
266+ ]
286267
287- # Check that score is calculated
288- assert isinstance (result .score , float )
289- assert 0.0 <= result .score <= 1.0
268+ with patch (
269+ "mellea.stdlib.components.intrinsic.rag.find_citations" ,
270+ return_value = mock_citations ,
271+ ):
272+ # Test at exact threshold (0.8)
273+ req = CitationRequirement (min_citation_coverage = 0.8 )
274+ result = await req .validate (backend , ctx )
290275
291- # Result should match threshold comparison
292- if result .score >= 0.8 :
276+ # At exact threshold, should pass (>= comparison)
293277 assert result .as_bool ()
294- else :
295- assert not result .as_bool ()
278+ assert result .score == 0.8
279+
280+ # Test just below threshold (0.81)
281+ req_above = CitationRequirement (min_citation_coverage = 0.81 )
282+ result_above = await req_above .validate (backend , ctx )
283+
284+ # Just below threshold, should fail
285+ assert not result_above .as_bool ()
286+ assert result_above .score == 0.8
0 commit comments