Skip to content

Commit e4a4913

Browse files
hydropixclaude
andcommitted
fix: add adaptive context retry loop to refinement phase (#122)
The refinement phase was failing silently on context overflow instead of retrying with a larger context like the translation phase does. Now refinement mirrors translation behavior: it retries on ContextOverflowError, RepetitionLoopError, and truncated responses by progressively increasing the context window. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 58a2155 commit e4a4913

1 file changed

Lines changed: 115 additions & 68 deletions

File tree

src/core/translator.py

Lines changed: 115 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -775,84 +775,131 @@ async def _make_refinement_request(
775775
Returns:
776776
Tuple of (refined_text or None, LLMResponse)
777777
"""
778-
try:
779-
# Extract refinement instructions from prompt_options
780-
refinement_instructions = prompt_options.get('refinement_instructions', '') if prompt_options else ''
781-
782-
# Generate refinement prompts
783-
prompt_pair = generate_refinement_prompt(
784-
draft_translation=draft_translation,
785-
context_before=context_before,
786-
context_after=context_after,
787-
previous_refined_context=previous_refined_context,
788-
target_language=target_language,
789-
has_placeholders=False,
790-
prompt_options=prompt_options,
791-
additional_instructions=refinement_instructions
792-
)
778+
# Extract refinement instructions from prompt_options
779+
refinement_instructions = prompt_options.get('refinement_instructions', '') if prompt_options else ''
793780

794-
# Log the request
795-
if log_callback:
796-
log_callback("refinement_request", "Sending refinement request to LLM", data={
797-
'type': 'refinement_request',
798-
'system_prompt': prompt_pair.system,
799-
'user_prompt': prompt_pair.user,
800-
'model': model
801-
})
802-
803-
start_time = time.time()
804-
client = llm_client or default_client
805-
806-
# Set context from manager if available
807-
if context_manager and hasattr(client, 'context_window'):
808-
new_ctx = context_manager.get_context_size()
809-
if client.context_window != new_ctx:
810-
client.context_window = new_ctx
781+
# Generate refinement prompts
782+
prompt_pair = generate_refinement_prompt(
783+
draft_translation=draft_translation,
784+
context_before=context_before,
785+
context_after=context_after,
786+
previous_refined_context=previous_refined_context,
787+
target_language=target_language,
788+
has_placeholders=False,
789+
prompt_options=prompt_options,
790+
additional_instructions=refinement_instructions
791+
)
811792

812-
llm_response = await client.make_request(
813-
prompt_pair.user, model, system_prompt=prompt_pair.system
814-
)
815-
execution_time = time.time() - start_time
793+
client = llm_client or default_client
794+
last_response: Optional[LLMResponse] = None
816795

817-
if not llm_response:
818-
return None, None
796+
# Retry loop with adaptive context (mirrors translation logic)
797+
while True:
798+
try:
799+
# Log the request
800+
if log_callback:
801+
log_callback("refinement_request", "Sending refinement request to LLM", data={
802+
'type': 'refinement_request',
803+
'system_prompt': prompt_pair.system,
804+
'user_prompt': prompt_pair.user,
805+
'model': model
806+
})
819807

820-
full_raw_response = llm_response.content
808+
start_time = time.time()
821809

822-
# Log the response
823-
if log_callback:
824-
log_callback("refinement_response", "Refinement response received", data={
825-
'type': 'refinement_response',
826-
'response': full_raw_response,
827-
'execution_time': execution_time,
828-
'model': model,
829-
'tokens': {
830-
'prompt': llm_response.prompt_tokens,
831-
'completion': llm_response.completion_tokens,
832-
'total': llm_response.context_used,
833-
'limit': llm_response.context_limit
834-
}
835-
})
810+
# Set context from manager if available
811+
if context_manager and hasattr(client, 'context_window'):
812+
new_ctx = context_manager.get_context_size()
813+
if client.context_window != new_ctx:
814+
if log_callback:
815+
log_callback("context_update",
816+
f"📐 Refinement context window: {client.context_window}{new_ctx}")
817+
client.context_window = new_ctx
836818

837-
# Extract refined text
838-
refined_text = client.extract_translation(full_raw_response)
819+
llm_response = await client.make_request(
820+
prompt_pair.user, model, system_prompt=prompt_pair.system
821+
)
822+
execution_time = time.time() - start_time
839823

840-
if refined_text:
841-
return refined_text, llm_response
842-
else:
843-
# Fallback to raw response if no tags found
844-
if draft_translation not in full_raw_response:
845-
return full_raw_response.strip(), llm_response
824+
if not llm_response:
825+
return None, None
826+
827+
last_response = llm_response
828+
829+
# Check if we should retry with larger context (adaptive strategy)
830+
if context_manager and llm_response.was_truncated:
831+
if context_manager.should_retry_with_larger_context(
832+
llm_response.was_truncated, llm_response.context_used
833+
):
834+
context_manager.increase_context()
835+
continue # Retry with larger context
836+
837+
full_raw_response = llm_response.content
838+
839+
# Log the response
840+
if log_callback:
841+
log_callback("refinement_response", "Refinement response received", data={
842+
'type': 'refinement_response',
843+
'response': full_raw_response,
844+
'execution_time': execution_time,
845+
'model': model,
846+
'tokens': {
847+
'prompt': llm_response.prompt_tokens,
848+
'completion': llm_response.completion_tokens,
849+
'total': llm_response.context_used,
850+
'limit': llm_response.context_limit
851+
}
852+
})
853+
854+
# Extract refined text
855+
refined_text = client.extract_translation(full_raw_response)
856+
857+
if refined_text:
858+
return refined_text, llm_response
846859
else:
860+
# Fallback to raw response if no tags found
861+
if draft_translation not in full_raw_response:
862+
return full_raw_response.strip(), llm_response
863+
else:
864+
if log_callback:
865+
log_callback("refinement_warning",
866+
"WARNING: Refinement response contains input. Using original.")
867+
return None, llm_response
868+
869+
except RepetitionLoopError as e:
870+
# Repetition loop detected - try increasing context (double increase)
871+
if context_manager:
872+
old_context = context_manager.get_context_size()
873+
context_manager.increase_context()
874+
context_manager.increase_context() # Double increase for repetition loops
875+
new_context = context_manager.get_context_size()
876+
877+
if new_context > old_context:
878+
if log_callback:
879+
log_callback("refinement_repetition_retry",
880+
f"🔄 Refinement repetition loop! Increasing context from {old_context} to {new_context} tokens")
881+
continue # Retry with larger context
882+
883+
# No context manager or can't increase further
884+
if log_callback:
885+
log_callback("refinement_error",
886+
f"⚠️ Refinement repetition loop, cannot recover: {e}")
887+
return None, last_response
888+
889+
except ContextOverflowError as e:
890+
# Context overflow - try increasing context
891+
if context_manager and context_manager.should_retry_with_larger_context(True, 0):
892+
context_manager.increase_context()
847893
if log_callback:
848-
log_callback("refinement_warning",
849-
"WARNING: Refinement response contains input. Using original.")
850-
return None, llm_response
894+
log_callback("refinement_overflow_retry",
895+
f"⚠️ Refinement context overflow! Retrying with context {context_manager.get_context_size()}")
896+
continue # Retry with larger context
851897

852-
except (ContextOverflowError, RepetitionLoopError) as e:
853-
if log_callback:
854-
log_callback("refinement_error", f"Refinement error: {e}")
855-
return None, None
898+
# Can't increase further
899+
if log_callback:
900+
log_callback("refinement_error",
901+
f"⚠️ Refinement context overflow, cannot recover: {e}")
902+
return None, last_response
856903

857904

858905
async def refine_chunks(

0 commit comments

Comments
 (0)