Skip to content

Commit 68b2506

Browse files
unamedkrclaude
andcommitted
fix(rlv): pre-experiment stability audit — 4 critical + 3 moderate bugs
Critical: - lookup.py: NameError on verbose mode (undefined `selected` variable) - locator.py: IndexError on empty gist.chunks (empty document crash) - verifier.py: ANSWER_NOISE_TOKENS used substring match — filtered valid names like "Context City" via "text" substring. Now exact match. - lookup.py: sentence splitter broke on abbreviations (Dr., Mr., J.K.) Now merges fragments after common abbreviations and single-letter initials. Moderate: - _llm.py: unified server detection used loose "unified" substring match; now checks Path.name.startswith("quant-server-unified") - _llm.py: added Phi-3.5-Q4_K_M to CLIFF_BUDGET table - locator.py: added comments clarifying 1-indexed choice parser logic Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3d06909 commit 68b2506

4 files changed

Lines changed: 39 additions & 9 deletions

File tree

bench/rlv/stages/_llm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"models/Llama-3.2-3B-Instruct-Q8_0.gguf": 1024,
4444
"models/Llama-3.2-1B-Instruct-Q8_0.gguf": 512,
4545
"models/Phi-3.5-mini-instruct-Q8_0.gguf": 1024,
46+
"models/Phi-3.5-mini-instruct-Q4_K_M.gguf": 1024,
4647
}
4748

4849

@@ -124,7 +125,7 @@ def start_server(
124125
port += 1
125126

126127
# Build command — unified server only supports -p and -j (no -k/-v/-H)
127-
is_unified = "unified" in str(binary)
128+
is_unified = str(Path(binary).name).startswith("quant-server-unified")
128129
if is_unified:
129130
cmd = [str(binary), str(model), "-p", str(port), "-j", str(threads)]
130131
else:

bench/rlv/stages/locator.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,9 +343,11 @@ def _llm_locate(
343343
result = _llm.llm_call(prompt, max_tokens=8)
344344
if verbose:
345345
print(f"[locator-llm] response: {result.text!r}")
346+
# Parser accepts [0, n_max). Choices are 1-indexed, so n_max = N+1.
347+
# Post-filter: reject 0 (not a valid choice) and > N (out of bounds).
346348
choice = _parse_locator_response(result.text, len(available) + 1)
347349
if choice < 1 or choice > len(available):
348-
return -1
350+
return -1 # parse failure or out-of-range → caller falls back to keyword winner
349351
return available[choice - 1]
350352

351353

@@ -368,6 +370,12 @@ def locate(
368370

369371
available = [c for c in gist.chunks if c.chunk_id not in excluded]
370372
if not available:
373+
if not gist.chunks:
374+
# Empty document — return a dummy pointer
375+
return RegionPointer(
376+
chunk_id=0, confidence="low", method="fallback",
377+
char_start=0, char_end=0, score=0.0,
378+
)
371379
chunk = gist.chunks[0]
372380
return RegionPointer(
373381
chunk_id=0, confidence="low", method="fallback",

bench/rlv/stages/lookup.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,34 @@ class LookupResult:
5454
method: str = "" # "select" | "quote" | "select-fallback"
5555

5656

57-
_SENTENCE_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
57+
# Common abbreviations that end with a period but aren't sentence endings.
58+
_ABBREVIATIONS = {"dr", "mr", "mrs", "ms", "jr", "sr", "st", "vs", "etc",
59+
"prof", "rev", "gen", "corp", "inc", "ltd", "vol", "no",
60+
"approx", "dept", "est", "govt"}
5861

5962

6063
def _split_into_sentences(text: str) -> List[str]:
61-
"""Split text into sentences. Conservative: snap on period/!? followed
62-
by whitespace. Filters out tiny fragments that aren't real sentences."""
63-
parts = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
64-
return [p for p in parts if len(p) >= 8]
64+
"""Split text into sentences. Snaps on period/!?/whitespace but avoids
65+
splitting on common abbreviations (Dr., Mr., etc.) and single-letter
66+
initials (J. K. Rowling).
67+
Filters out tiny fragments (< 8 chars) that aren't real sentences."""
68+
# Strategy: split on `. ` / `! ` / `? `, then re-join fragments that
69+
# ended with an abbreviation or single letter.
70+
raw = re.split(r"(?<=[.!?])\s+", text)
71+
merged: List[str] = []
72+
for frag in raw:
73+
frag = frag.strip()
74+
if not frag:
75+
continue
76+
if merged:
77+
prev = merged[-1]
78+
# Check if prev ended with an abbreviation or single initial
79+
last_word = prev.rsplit(None, 1)[-1].rstrip(".").lower() if prev else ""
80+
if last_word in _ABBREVIATIONS or (len(last_word) == 1 and last_word.isalpha()):
81+
merged[-1] = prev + " " + frag
82+
continue
83+
merged.append(frag)
84+
return [s for s in merged if len(s) >= 8]
6585

6686

6787
def _parse_sentence_index(text: str, n_sentences: int) -> int:
@@ -170,7 +190,7 @@ def lookup(
170190
window.append(sentences[i])
171191
answer = " ".join(window)
172192
if verbose:
173-
print(f"[lookup] selected sentence {idx}/{len(sentences)}: {selected[:80]!r}")
193+
print(f"[lookup] selected sentence {idx}/{len(sentences)}: {sentences[idx-1][:80]!r}")
174194
return LookupResult(
175195
answer=answer,
176196
region_text=region_text,

bench/rlv/stages/verifier.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ def _extract_answer_key_terms(answer: str) -> tuple[list[str], list[str]]:
7878
key = term.lower()
7979
if key in seen:
8080
continue
81-
if any(noise in key for noise in ANSWER_NOISE_TOKENS):
81+
# Exact word match (not substring) — "text" must not filter "context"
82+
if key in ANSWER_NOISE_TOKENS:
8283
continue
8384
seen.add(key)
8485
word_terms.append(term)

0 commit comments

Comments
 (0)