@@ -165,36 +165,8 @@ def _literal_verify(
165165 if len (answer ) < 200 and any (p in answer_head for p in refusal_phrases ):
166166 return "UNSURE" , f"answer is a refusal ('{ answer [:60 ]} ...')"
167167
168- # Phase A-2: Answer-Question alignment check.
169- # The answer must actually ADDRESS the question type. An answer that
170- # contains region-grounded facts but doesn't answer the specific
171- # question is "related but wrong" — the hardest hallucination to catch.
172- # This is RLV's core differentiator: detecting WRONG answers, not just
173- # fabricated ones.
174- q_lower = question .lower ()
175- answer_norm = answer .lower ()
176-
177- # "When/what year/what date" → answer must contain a year or date
178- if re .search (r'\b(what year|in what year|when did|what date|on what date)\b' , q_lower ):
179- has_year = bool (re .search (r'\b(1[0-9]{3}|20[0-9]{2})\b' , answer ))
180- has_month = bool (re .search (r'\b(january|february|march|april|may|june|july|august|september|october|november|december)\b' , answer .lower ()))
181- if not has_year and not has_month :
182- return "UNSURE" , f"temporal question but answer has no year/date"
183-
184- # "After/before which battle/event" → answer must name a specific event
185- # AND the answer must contain an event-type word (battle, war, etc.)
186- # "They were modernized in 1934" doesn't answer "after which battle?"
187- if re .search (r'\b(which battle|after which battle|what battle|which war|after which war)\b' , q_lower ):
188- event_words = ["battle" , "war" , "rebellion" , "siege" , "campaign" , "invasion" , "attack" , "offensive" ]
189- has_event_word = any (w in answer .lower () for w in event_words )
190- if not has_event_word :
191- return "UNSURE" , f"battle/war question but answer names no battle/war"
192-
193- # "What does X mean" → answer should contain a definition signal
194- if re .search (r'\b(what does|what is the meaning|what does the (?:name|word|term))\b' , q_lower ):
195- has_def = any (w in answer .lower () for w in ["means" , "meaning" , "refers to" , "derived from" , "to cut" , "headed" ])
196- if not has_def and len (answer ) < 150 :
197- return "UNSURE" , f"definition question but answer lacks definition"
168+ # Phase A-2: removed hardcoded type-specific alignment checks.
169+ # The universal LLM coherence check in verify() handles this properly.
198170
199171 word_terms , number_terms = _extract_answer_key_terms (answer )
200172 if not word_terms and not number_terms :
@@ -268,6 +240,54 @@ def verify(
268240 question , answer , region_text ,
269241 chunk_id = chunk_id , gist = gist ,
270242 )
243+ if verdict == "CONFIDENT" :
244+ # Phase A-2: LLM coherence check — the CORE of RLV's value.
245+ # ONE universal prompt, no type-specific branching.
246+ #
247+ # Speed optimization: skip coherence for high-confidence answers.
248+ # - "[NONE]" answers: instant UNSURE (model already said it can't answer)
249+ # - Short specific answers (names, numbers): fast-accept
250+ # - Vague/generic answers: require coherence check
251+ answer_stripped = answer .strip ()
252+
253+ # Self-check: model said NONE → instant UNSURE
254+ if answer_stripped .startswith ("[NONE]" ):
255+ if verbose :
256+ print (f"[verifier] model self-check: NONE → UNSURE" )
257+ return VerifyResult (verdict = "UNSURE" , reason = "model self-check: NONE" , method = "self-check" )
258+
259+ # Fast-accept: short answer with specific entity (name/number/date)
260+ # These are almost always correct when literal check passes
261+ is_specific = (
262+ len (answer_stripped ) < 80 and
263+ (bool (re .search (r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b' , answer_stripped )) or # proper noun
264+ bool (re .search (r'\b\d{3,4}\b' , answer_stripped ))) # year/number
265+ )
266+ if is_specific :
267+ if verbose :
268+ print (f"[verifier] fast-accept: specific answer ({ answer_stripped [:40 ]} )" )
269+ return VerifyResult (verdict = "CONFIDENT" , reason = reason , method = "literal+fast-accept" )
270+
271+ # Generic/vague answer → LLM coherence check required
272+ coherence_prompt = (
273+ f"A user asked: \" { question } \" \n "
274+ f"The system answered: \" { answer [:200 ]} \" \n \n "
275+ f"Is the user's specific question answered? "
276+ f"Not just related information, but the EXACT thing asked. "
277+ f"YES or NO."
278+ )
279+ coherence_result = _llm .llm_call (coherence_prompt , max_tokens = 4 )
280+ coherence_text = coherence_result .text .strip ().lower ()[:10 ]
281+ if verbose :
282+ print (f"[verifier] coherence check: { coherence_text !r} " )
283+ if "no" in coherence_text and "yes" not in coherence_text :
284+ return VerifyResult (
285+ verdict = "UNSURE" ,
286+ reason = f"literal:CONFIDENT but coherence:NO ({ reason } )" ,
287+ method = "literal+coherence" ,
288+ )
289+ return VerifyResult (verdict = "CONFIDENT" , reason = reason , method = "literal+coherence" )
290+
271291 if verdict != "UNSURE" or not use_llm_fallback :
272292 if verbose :
273293 print (f"[verifier] literal -> { verdict } ({ reason } )" )
0 commit comments