fix(rlv): pre-experiment stability audit — 4 critical + 3 moderate bugs

unamedkr · claude · unamedkr · commit 68b25069d568 · 2026-04-12T20:55:42.000+09:00
Critical:
- lookup.py: NameError on verbose mode (undefined `selected` variable)
- locator.py: IndexError on empty gist.chunks (empty document crash)
- verifier.py: ANSWER_NOISE_TOKENS used substring match — filtered
  valid names like "Context City" via "text" substring. Now exact match.
- lookup.py: sentence splitter broke on abbreviations (Dr., Mr., J.K.)
  Now merges fragments after common abbreviations and single-letter initials.

Moderate:
- _llm.py: unified server detection used loose "unified" substring match;
  now checks Path.name.startswith("quant-server-unified")
- _llm.py: added Phi-3.5-Q4_K_M to CLIFF_BUDGET table
- locator.py: added comments clarifying 1-indexed choice parser logic

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/bench/rlv/stages/_llm.py b/bench/rlv/stages/_llm.py
@@ -43,6 +43,7 @@
     "models/Llama-3.2-3B-Instruct-Q8_0.gguf": 1024,
     "models/Llama-3.2-1B-Instruct-Q8_0.gguf": 512,
     "models/Phi-3.5-mini-instruct-Q8_0.gguf": 1024,
+    "models/Phi-3.5-mini-instruct-Q4_K_M.gguf": 1024,
 }
 
 
@@ -124,7 +125,7 @@ def start_server(
         port += 1
 
     # Build command — unified server only supports -p and -j (no -k/-v/-H)
-    is_unified = "unified" in str(binary)
+    is_unified = str(Path(binary).name).startswith("quant-server-unified")
     if is_unified:
         cmd = [str(binary), str(model), "-p", str(port), "-j", str(threads)]
     else:
diff --git a/bench/rlv/stages/locator.py b/bench/rlv/stages/locator.py
@@ -343,9 +343,11 @@ def _llm_locate(
     result = _llm.llm_call(prompt, max_tokens=8)
     if verbose:
         print(f"[locator-llm] response: {result.text!r}")
+    # Parser accepts [0, n_max). Choices are 1-indexed, so n_max = N+1.
+    # Post-filter: reject 0 (not a valid choice) and > N (out of bounds).
     choice = _parse_locator_response(result.text, len(available) + 1)
     if choice < 1 or choice > len(available):
-        return -1
+        return -1  # parse failure or out-of-range → caller falls back to keyword winner
     return available[choice - 1]
 
 
@@ -368,6 +370,12 @@ def locate(
 
     available = [c for c in gist.chunks if c.chunk_id not in excluded]
     if not available:
+        if not gist.chunks:
+            # Empty document — return a dummy pointer
+            return RegionPointer(
+                chunk_id=0, confidence="low", method="fallback",
+                char_start=0, char_end=0, score=0.0,
+            )
         chunk = gist.chunks[0]
         return RegionPointer(
             chunk_id=0, confidence="low", method="fallback",
diff --git a/bench/rlv/stages/lookup.py b/bench/rlv/stages/lookup.py
@@ -54,14 +54,34 @@ class LookupResult:
     method: str = ""  # "select" | "quote" | "select-fallback"
 
 
-_SENTENCE_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
+# Common abbreviations that end with a period but aren't sentence endings.
+_ABBREVIATIONS = {"dr", "mr", "mrs", "ms", "jr", "sr", "st", "vs", "etc",
+                  "prof", "rev", "gen", "corp", "inc", "ltd", "vol", "no",
+                  "approx", "dept", "est", "govt"}
 
 
 def _split_into_sentences(text: str) -> List[str]:
-    """Split text into sentences. Conservative: snap on period/!? followed
-    by whitespace. Filters out tiny fragments that aren't real sentences."""
-    parts = [s.strip() for s in _SENTENCE_SPLIT_RE.split(text) if s.strip()]
-    return [p for p in parts if len(p) >= 8]
+    """Split text into sentences. Snaps on period/!?/whitespace but avoids
+    splitting on common abbreviations (Dr., Mr., etc.) and single-letter
+    initials (J. K. Rowling).
+    Filters out tiny fragments (< 8 chars) that aren't real sentences."""
+    # Strategy: split on `. ` / `! ` / `? `, then re-join fragments that
+    # ended with an abbreviation or single letter.
+    raw = re.split(r"(?<=[.!?])\s+", text)
+    merged: List[str] = []
+    for frag in raw:
+        frag = frag.strip()
+        if not frag:
+            continue
+        if merged:
+            prev = merged[-1]
+            # Check if prev ended with an abbreviation or single initial
+            last_word = prev.rsplit(None, 1)[-1].rstrip(".").lower() if prev else ""
+            if last_word in _ABBREVIATIONS or (len(last_word) == 1 and last_word.isalpha()):
+                merged[-1] = prev + " " + frag
+                continue
+        merged.append(frag)
+    return [s for s in merged if len(s) >= 8]
 
 
 def _parse_sentence_index(text: str, n_sentences: int) -> int:
@@ -170,7 +190,7 @@ def lookup(
             window.append(sentences[i])
     answer = " ".join(window)
     if verbose:
-        print(f"[lookup] selected sentence {idx}/{len(sentences)}: {selected[:80]!r}")
+        print(f"[lookup] selected sentence {idx}/{len(sentences)}: {sentences[idx-1][:80]!r}")
     return LookupResult(
         answer=answer,
         region_text=region_text,
diff --git a/bench/rlv/stages/verifier.py b/bench/rlv/stages/verifier.py
@@ -78,7 +78,8 @@ def _extract_answer_key_terms(answer: str) -> tuple[list[str], list[str]]:
         key = term.lower()
         if key in seen:
             continue
-        if any(noise in key for noise in ANSWER_NOISE_TOKENS):
+        # Exact word match (not substring) — "text" must not filter "context"
+        if key in ANSWER_NOISE_TOKENS:
             continue
         seen.add(key)
         word_terms.append(term)