Skip to content

Commit 3a491db

Browse files
unamedkrclaude
andcommitted
fix(rlv): second-pass audit — 5 additional hardening fixes
Critical: - locator.py: bounds-check chunk_id in _llm_locate() before gist.chunks access - locator.py: guard empty rrf_ranked (IndexError when all chunks excluded) - locator.py: BM25 division-by-zero guard (max(denom, 1e-9)) - locator.py: deterministic RRF sort by (-score, chunk_id) for tie-breaking Robustness: - _llm.py: LLMResult.is_error field + broader exception handling (ConnectionResetError, TimeoutError, OSError) - lookup.py: early return on LLM error (both select and direct paths) - gist.py: empty/whitespace document guard in build_gist() Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 68b2506 commit 3a491db

4 files changed

Lines changed: 36 additions & 4 deletions

File tree

bench/rlv/stages/_llm.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class LLMResult:
5353
raw: str # the full CLI stdout+stderr
5454
n_tokens: int # generated token count
5555
elapsed: float # wall seconds
56+
is_error: bool = False # True if the call failed (text contains error message)
5657

5758

5859
def estimate_tokens(text: str) -> int:
@@ -252,9 +253,11 @@ def llm_call(
252253
try:
253254
with urllib.request.urlopen(req, timeout=600) as resp:
254255
payload = json.loads(resp.read().decode("utf-8"))
255-
except (urllib.error.URLError, urllib.error.HTTPError) as e:
256+
except (urllib.error.URLError, urllib.error.HTTPError, ConnectionResetError,
257+
TimeoutError, OSError) as e:
256258
elapsed = time.time() - t0
257-
return LLMResult(text=f"[ERROR: {e}]", raw=str(e), n_tokens=0, elapsed=elapsed)
259+
return LLMResult(text=f"[ERROR: {e}]", raw=str(e), n_tokens=0,
260+
elapsed=elapsed, is_error=True)
258261
elapsed = time.time() - t0
259262

260263
text = ""

bench/rlv/stages/gist.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,12 @@ def build_gist(
211211
a richer index for cases where the chunk head text isn't
212212
representative of the section.
213213
"""
214+
# Guard: empty or whitespace-only documents produce no chunks
215+
if not doc_text or not doc_text.strip():
216+
if verbose:
217+
print(f"[gist] doc_id={doc_id} — empty document, returning empty gist")
218+
return Gist(doc_id=doc_id, n_chars=len(doc_text or ""), chunks=[])
219+
214220
chunks_raw = chunk_document(doc_text, chunk_chars=chunk_chars)
215221
if verbose:
216222
print(f"[gist] doc_id={doc_id} len={len(doc_text)} chars, {len(chunks_raw)} chunks "

bench/rlv/stages/locator.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ def _bm25_score_chunks(question: str, gist: Gist, excluded: List[int],
286286
w[:min(4, len(w))] == term[:min(4, len(term))]))
287287
n = df.get(term, 0)
288288
idf = math.log((N - n + 0.5) / (n + 0.5) + 1.0) if n < N else 0.0
289-
tf_norm = (tf * (k1 + 1)) / (tf + k1 * (1 - b + b * dl / max(avg_dl, 1)))
289+
denom = tf + k1 * (1 - b + b * dl / max(avg_dl, 1))
290+
tf_norm = (tf * (k1 + 1)) / max(denom, 1e-9)
290291
score += idf * tf_norm
291292
scores.append((chunk.chunk_id, score))
292293

@@ -330,6 +331,8 @@ def _llm_locate(
330331

331332
lines = []
332333
for choice_num, cid in enumerate(available, start=1):
334+
if cid >= len(gist.chunks):
335+
continue # skip invalid chunk_id
333336
chunk = gist.chunks[cid]
334337
text = (chunk.full_text or chunk.head_text).replace("\n", " ").strip()
335338
# Show first 2 sentences (more context than just head)
@@ -405,11 +408,21 @@ def locate(
405408
rrf[cid] = rrf.get(cid, 0) + 1.0 / (rrf_k + rank)
406409
for rank, (cid, _) in enumerate(bm25_scores):
407410
rrf[cid] = rrf.get(cid, 0) + 1.0 / (rrf_k + rank)
408-
rrf_ranked = sorted(rrf.items(), key=lambda x: x[1], reverse=True)
411+
# Sort by (score DESC, chunk_id ASC) for deterministic tie-breaking
412+
rrf_ranked = sorted(rrf.items(), key=lambda x: (-x[1], x[0]))
409413

410414
if verbose:
411415
print(f"[locator] rrf top3: {rrf_ranked[:3]}")
412416

417+
# Guard: if no chunks survived scoring, return first available
418+
if not rrf_ranked:
419+
chunk = available[0]
420+
return RegionPointer(
421+
chunk_id=chunk.chunk_id, confidence="low", method="fallback",
422+
candidates=[], char_start=chunk.char_start, char_end=chunk.char_end,
423+
score=0.0,
424+
)
425+
413426
# --- Step 4: LLM classification on top candidates ---
414427
# Always run LLM on the top 5 RRF candidates (not just when ambiguous)
415428
top_candidates = [cid for cid, _ in rrf_ranked[:5]]

bench/rlv/stages/lookup.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ def lookup(
133133
print(f"[lookup] chunk {region.chunk_id} ({len(region_text)} chars), "
134134
f"{len(sentences)} sentences -> {mode}")
135135
result = _llm.llm_call(prompt, max_tokens=64)
136+
if result.is_error:
137+
return LookupResult(
138+
answer=result.text, region_text=region_text,
139+
chunk_id=region.chunk_id, raw_llm_output=result.text, method="error",
140+
)
136141
return LookupResult(
137142
answer=result.text.strip(),
138143
region_text=region_text,
@@ -156,6 +161,11 @@ def lookup(
156161

157162
# Only need a single digit — minimize tokens for slow CPU models
158163
result = _llm.llm_call(prompt, max_tokens=8)
164+
if result.is_error:
165+
return LookupResult(
166+
answer=result.text, region_text=region_text,
167+
chunk_id=region.chunk_id, raw_llm_output=result.text, method="error",
168+
)
159169
idx = _parse_sentence_index(result.text, len(sentences))
160170

161171
if idx < 1:

0 commit comments

Comments
 (0)