@@ -775,84 +775,131 @@ async def _make_refinement_request(
775775 Returns:
776776 Tuple of (refined_text or None, LLMResponse)
777777 """
778- try :
779- # Extract refinement instructions from prompt_options
780- refinement_instructions = prompt_options .get ('refinement_instructions' , '' ) if prompt_options else ''
781-
782- # Generate refinement prompts
783- prompt_pair = generate_refinement_prompt (
784- draft_translation = draft_translation ,
785- context_before = context_before ,
786- context_after = context_after ,
787- previous_refined_context = previous_refined_context ,
788- target_language = target_language ,
789- has_placeholders = False ,
790- prompt_options = prompt_options ,
791- additional_instructions = refinement_instructions
792- )
778+ # Extract refinement instructions from prompt_options
779+ refinement_instructions = prompt_options .get ('refinement_instructions' , '' ) if prompt_options else ''
793780
794- # Log the request
795- if log_callback :
796- log_callback ("refinement_request" , "Sending refinement request to LLM" , data = {
797- 'type' : 'refinement_request' ,
798- 'system_prompt' : prompt_pair .system ,
799- 'user_prompt' : prompt_pair .user ,
800- 'model' : model
801- })
802-
803- start_time = time .time ()
804- client = llm_client or default_client
805-
806- # Set context from manager if available
807- if context_manager and hasattr (client , 'context_window' ):
808- new_ctx = context_manager .get_context_size ()
809- if client .context_window != new_ctx :
810- client .context_window = new_ctx
781+ # Generate refinement prompts
782+ prompt_pair = generate_refinement_prompt (
783+ draft_translation = draft_translation ,
784+ context_before = context_before ,
785+ context_after = context_after ,
786+ previous_refined_context = previous_refined_context ,
787+ target_language = target_language ,
788+ has_placeholders = False ,
789+ prompt_options = prompt_options ,
790+ additional_instructions = refinement_instructions
791+ )
811792
812- llm_response = await client .make_request (
813- prompt_pair .user , model , system_prompt = prompt_pair .system
814- )
815- execution_time = time .time () - start_time
793+ client = llm_client or default_client
794+ last_response : Optional [LLMResponse ] = None
816795
817- if not llm_response :
818- return None , None
796+ # Retry loop with adaptive context (mirrors translation logic)
797+ while True :
798+ try :
799+ # Log the request
800+ if log_callback :
801+ log_callback ("refinement_request" , "Sending refinement request to LLM" , data = {
802+ 'type' : 'refinement_request' ,
803+ 'system_prompt' : prompt_pair .system ,
804+ 'user_prompt' : prompt_pair .user ,
805+ 'model' : model
806+ })
819807
820- full_raw_response = llm_response . content
808+ start_time = time . time ()
821809
822- # Log the response
823- if log_callback :
824- log_callback ("refinement_response" , "Refinement response received" , data = {
825- 'type' : 'refinement_response' ,
826- 'response' : full_raw_response ,
827- 'execution_time' : execution_time ,
828- 'model' : model ,
829- 'tokens' : {
830- 'prompt' : llm_response .prompt_tokens ,
831- 'completion' : llm_response .completion_tokens ,
832- 'total' : llm_response .context_used ,
833- 'limit' : llm_response .context_limit
834- }
835- })
810+ # Set context from manager if available
811+ if context_manager and hasattr (client , 'context_window' ):
812+ new_ctx = context_manager .get_context_size ()
813+ if client .context_window != new_ctx :
814+ if log_callback :
815+ log_callback ("context_update" ,
816+ f"📐 Refinement context window: { client .context_window } → { new_ctx } " )
817+ client .context_window = new_ctx
836818
837- # Extract refined text
838- refined_text = client .extract_translation (full_raw_response )
819+ llm_response = await client .make_request (
820+ prompt_pair .user , model , system_prompt = prompt_pair .system
821+ )
822+ execution_time = time .time () - start_time
839823
840- if refined_text :
841- return refined_text , llm_response
842- else :
843- # Fallback to raw response if no tags found
844- if draft_translation not in full_raw_response :
845- return full_raw_response .strip (), llm_response
824+ if not llm_response :
825+ return None , None
826+
827+ last_response = llm_response
828+
829+ # Check if we should retry with larger context (adaptive strategy)
830+ if context_manager and llm_response .was_truncated :
831+ if context_manager .should_retry_with_larger_context (
832+ llm_response .was_truncated , llm_response .context_used
833+ ):
834+ context_manager .increase_context ()
835+ continue # Retry with larger context
836+
837+ full_raw_response = llm_response .content
838+
839+ # Log the response
840+ if log_callback :
841+ log_callback ("refinement_response" , "Refinement response received" , data = {
842+ 'type' : 'refinement_response' ,
843+ 'response' : full_raw_response ,
844+ 'execution_time' : execution_time ,
845+ 'model' : model ,
846+ 'tokens' : {
847+ 'prompt' : llm_response .prompt_tokens ,
848+ 'completion' : llm_response .completion_tokens ,
849+ 'total' : llm_response .context_used ,
850+ 'limit' : llm_response .context_limit
851+ }
852+ })
853+
854+ # Extract refined text
855+ refined_text = client .extract_translation (full_raw_response )
856+
857+ if refined_text :
858+ return refined_text , llm_response
846859 else :
860+ # Fallback to raw response if no tags found
861+ if draft_translation not in full_raw_response :
862+ return full_raw_response .strip (), llm_response
863+ else :
864+ if log_callback :
865+ log_callback ("refinement_warning" ,
866+ "WARNING: Refinement response contains input. Using original." )
867+ return None , llm_response
868+
869+ except RepetitionLoopError as e :
870+ # Repetition loop detected - try increasing context (double increase)
871+ if context_manager :
872+ old_context = context_manager .get_context_size ()
873+ context_manager .increase_context ()
874+ context_manager .increase_context () # Double increase for repetition loops
875+ new_context = context_manager .get_context_size ()
876+
877+ if new_context > old_context :
878+ if log_callback :
879+ log_callback ("refinement_repetition_retry" ,
880+ f"🔄 Refinement repetition loop! Increasing context from { old_context } to { new_context } tokens" )
881+ continue # Retry with larger context
882+
883+ # No context manager or can't increase further
884+ if log_callback :
885+ log_callback ("refinement_error" ,
886+ f"⚠️ Refinement repetition loop, cannot recover: { e } " )
887+ return None , last_response
888+
889+ except ContextOverflowError as e :
890+ # Context overflow - try increasing context
891+ if context_manager and context_manager .should_retry_with_larger_context (True , 0 ):
892+ context_manager .increase_context ()
847893 if log_callback :
848- log_callback ("refinement_warning " ,
849- "WARNING: Refinement response contains input. Using original. " )
850- return None , llm_response
894+ log_callback ("refinement_overflow_retry " ,
895+ f"⚠️ Refinement context overflow! Retrying with context { context_manager . get_context_size () } " )
896+ continue # Retry with larger context
851897
852- except (ContextOverflowError , RepetitionLoopError ) as e :
853- if log_callback :
854- log_callback ("refinement_error" , f"Refinement error: { e } " )
855- return None , None
898+ # Can't increase further
899+ if log_callback :
900+ log_callback ("refinement_error" ,
901+ f"⚠️ Refinement context overflow, cannot recover: { e } " )
902+ return None , last_response
856903
857904
858905async def refine_chunks (
0 commit comments