@@ -464,6 +464,31 @@ def _record_completion_metrics(self, task, current_time):
464464 main_process_metrics .request_inference_time .observe (current_time - task .inference_start_time )
465465 main_process_metrics .request_generation_tokens .observe (self .tokens_counter [task .request_id ])
466466
467+ def clear_data (self ):
468+ if envs .ENABLE_V1_KVCACHE_SCHEDULER :
469+ self .resource_manager .clear_data ()
470+ for i in range (self .cfg .max_num_seqs ):
471+ if self .resource_manager .stop_flags [i ]:
472+ continue
473+ task = self .resource_manager .tasks_list [i ]
474+ result = RequestOutput (
475+ request_id = task .request_id ,
476+ outputs = CompletionOutput (
477+ index = i ,
478+ send_idx = self .tokens_counter [task .request_id ],
479+ token_ids = task .eos_token_ids ,
480+ draft_token_ids = [],
481+ ),
482+ finished = True ,
483+ metrics = RequestMetrics (
484+ arrival_time = time .time (),
485+ request_start_time = task .arrival_time ,
486+ ),
487+ )
488+ is_prefill = task .disaggregate_info is not None and task .disaggregate_info ["role" ] == "prefill"
489+ self ._recycle_resources (task .request_id , i , task , result , is_prefill )
490+ llm_logger .warning (f"clear data for task { task .request_id } " )
491+
467492 def _record_speculative_decoding_mertics (self , accept_num ):
468493 """Record metrics of speculative decoding"""
469494 if not hasattr (main_process_metrics , "spec_decode_draft_acceptance_rate" ):
0 commit comments