@@ -69,7 +69,6 @@ def __init__(self, args):
6969 self .init_inputs ()
7070
7171 # whether use speculate decoding
72- logger .info (f'speculate_method: { self .config .speculate_method } ' )
7372 if self .config .speculate_method is not None :
7473 if self .config .speculate_method == "inference_with_reference" :
7574 self .proposer = InferenceWithReferenceProposer (
@@ -279,7 +278,6 @@ def init_inputs(self):
279278 self .share_inputs ["ori_seq_lens_encoder" ] = paddle .full (
280279 shape = [self .args .max_batch_size , 1 ], fill_value = 0 , dtype = "int32" )
281280 # speculate decoding input
282- logger .info (f'Speculative method: { self .config .speculate_method } ' )
283281 if self .config .speculate_method is not None :
284282 self .share_inputs ["accept_tokens" ] = paddle .full (
285283 shape = [self .args .max_batch_size , self .model_cfg ["speculate_max_draft_token_num" ] + 1 ], fill_value = 0 , dtype = "int64"
@@ -512,34 +510,16 @@ def run(self):
512510 continue
513511
514512 if self .proposer is not None :
515- logger .info ("start run proposer" )
516- logger .info (f'before draft_tokens: { self .share_inputs ["draft_tokens" ]} ' )
517-
518513 self .proposer .run (
519514 self .share_inputs ,
520515 real_batch_size = self .args .max_batch_size ,
521516 seq_lens_this_time = self .share_inputs ["seq_lens_this_time" ],
522517 )
523- logger .info (f'after draft_tokens: { self .share_inputs ["draft_tokens" ]} ' )
524- logger .info ("finish run proposer" )
525- # logger.info(f'input_ids: {self.share_inputs["input_ids"]}')
526- # logger.info(f'input_ids_cpu: {self.share_inputs["input_ids_cpu"]}')
527- # logger.info(f'seq_lens_this_time: {self.share_inputs["seq_lens_this_time"]}')
528- # logger.info(f'seq_lens_encoder: {self.share_inputs["seq_lens_encoder"]}')
529- # logger.info(f'seq_lens_decoder: {self.share_inputs["seq_lens_decoder"]}')
530- # logger.info(f'step_idx: {self.share_inputs["step_idx"]}')
531- # logger.info(f'next_tokens: {self.share_inputs["next_tokens"]}')
532- # logger.info(f'before block_tables: {self.share_inputs["block_tables"]}')
533518
534519 self .infer_engine .predictor .run ()
535- logger .info (f'after accept_tokens: { self .share_inputs ["accept_tokens" ]} ' )
536- logger .info (f'after accept_num: { self .share_inputs ["accept_num" ]} ' )
537- # logger.info(f'after block_tables: {self.share_inputs["block_tables"]}')
538-
539520 self .share_inputs ['infer_seed' ].add_ (infer_seed_increment )
540521 self .share_inputs ['infer_seed' ][:] %= self .MAX_INFER_SEED
541522 if self .free_list_len > 0 :
542- logger .info (f'free_list_len > 0' )
543523 self .step_cuda (seq_lens_this_time )
544524
545525
0 commit comments