|
18 | 18 | ) |
19 | 19 | from mellea.formatters.granite.granite3.granite33.output import ( |
20 | 20 | Granite33OutputProcessor, |
| 21 | + _add_citation_response_spans, |
21 | 22 | _get_docs_from_citations, |
22 | 23 | _parse_citations_text, |
23 | 24 | _remove_citations_from_response_text, |
@@ -205,6 +206,94 @@ def test_special_token_lines_ignored(self): |
205 | 206 | assert len(found_real) == 1 |
206 | 207 |
|
207 | 208 |
|
| 209 | +# --------------------------------------------------------------------------- |
| 210 | +# _add_citation_response_spans |
| 211 | +# --------------------------------------------------------------------------- |
| 212 | + |
| 213 | + |
| 214 | +class TestAddCitationResponseSpans: |
| 215 | + """Tests for _add_citation_response_spans (issue #843 regression guard).""" |
| 216 | + |
| 217 | + def _make_citation(self, doc_id: str = "1") -> dict: |
| 218 | + return {"doc_id": doc_id, "context_text": "some context"} |
| 219 | + |
| 220 | + def test_response_end_uses_sentence_length_not_full_response(self): |
| 221 | + """Regression: response_end must be index + len(sentence), not index + len(full_response). |
| 222 | +
|
| 223 | + Before the fix, _add_citation_response_spans used len(response_text_without_citations) |
| 224 | + — the full response length — instead of len(response_text) — the cited sentence length. |
| 225 | + This caused response_end to overshoot for any sentence that is not the last one. |
| 226 | + """ |
| 227 | + sent1 = "Short sentence." |
| 228 | + sent2 = "This is the second sentence, which is longer." |
| 229 | + cite_tag = f'{CITE_START}{{"document_id": "1"}}{CITE_END}' |
| 230 | + response_with_citations = f"{sent1} {cite_tag} {sent2}" |
| 231 | + response_without_citations = f"{sent1} {sent2}" |
| 232 | + |
| 233 | + result = _add_citation_response_spans( |
| 234 | + [self._make_citation("1")], |
| 235 | + response_with_citations, |
| 236 | + response_without_citations, |
| 237 | + ) |
| 238 | + |
| 239 | + assert len(result) == 1 |
| 240 | + citation = result[0] |
| 241 | + begin = citation["response_begin"] |
| 242 | + end = citation["response_end"] |
| 243 | + text = citation["response_text"] |
| 244 | + |
| 245 | + # The span must exactly bracket the cited sentence in the clean response |
| 246 | + assert response_without_citations[begin:end] == text |
| 247 | + # Span length must equal the sentence length, not the full response length |
| 248 | + assert end - begin == len(text) |
| 249 | + assert end <= len(response_without_citations) |
| 250 | + |
| 251 | + def test_multiple_citations_each_span_correct(self): |
| 252 | + """Each citation span must cover only its own sentence.""" |
| 253 | + sent1 = "First sentence." |
| 254 | + sent2 = "Second sentence." |
| 255 | + cite1 = f'{CITE_START}{{"document_id": "1"}}{CITE_END}' |
| 256 | + cite2 = f'{CITE_START}{{"document_id": "2"}}{CITE_END}' |
| 257 | + response_with = f"{sent1} {cite1} {sent2} {cite2}" |
| 258 | + response_without = f"{sent1} {sent2}" |
| 259 | + |
| 260 | + result = _add_citation_response_spans( |
| 261 | + [self._make_citation("1"), self._make_citation("2")], |
| 262 | + response_with, |
| 263 | + response_without, |
| 264 | + ) |
| 265 | + |
| 266 | + assert len(result) == 2 |
| 267 | + for citation in result: |
| 268 | + begin = citation["response_begin"] |
| 269 | + end = citation["response_end"] |
| 270 | + text = citation["response_text"] |
| 271 | + assert response_without[begin:end] == text |
| 272 | + assert end - begin == len(text) |
| 273 | + assert end <= len(response_without) |
| 274 | + |
| 275 | + # The two spans must not overlap |
| 276 | + spans = sorted((c["response_begin"], c["response_end"]) for c in result) |
| 277 | + assert spans[0][1] <= spans[1][0] |
| 278 | + |
| 279 | + def test_single_sentence_response(self): |
| 280 | + """Single-sentence response: span must cover the full clean response.""" |
| 281 | + sent = "The only sentence." |
| 282 | + cite_tag = f'{CITE_START}{{"document_id": "1"}}{CITE_END}' |
| 283 | + response_with = f"{sent} {cite_tag}" |
| 284 | + response_without = sent |
| 285 | + |
| 286 | + result = _add_citation_response_spans( |
| 287 | + [self._make_citation("1")], response_with, response_without |
| 288 | + ) |
| 289 | + |
| 290 | + assert len(result) == 1 |
| 291 | + citation = result[0] |
| 292 | + begin = citation["response_begin"] |
| 293 | + end = citation["response_end"] |
| 294 | + assert response_without[begin:end] == citation["response_text"] |
| 295 | + |
| 296 | + |
208 | 297 | # --------------------------------------------------------------------------- |
209 | 298 | # Granite33OutputProcessor.transform |
210 | 299 | # --------------------------------------------------------------------------- |
|
0 commit comments