Skip to content

Commit d5b6499

Browse files
remove debug log
1 parent e52155f commit d5b6499

1 file changed

Lines changed: 0 additions & 20 deletions

File tree

llm/server/server/engine/infer.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ def __init__(self, args):
6969
self.init_inputs()
7070

7171
# whether use speculate decoding
72-
logger.info(f'speculate_method: {self.config.speculate_method}')
7372
if self.config.speculate_method is not None:
7473
if self.config.speculate_method == "inference_with_reference":
7574
self.proposer = InferenceWithReferenceProposer(
@@ -279,7 +278,6 @@ def init_inputs(self):
279278
self.share_inputs["ori_seq_lens_encoder"] = paddle.full(
280279
shape=[self.args.max_batch_size, 1], fill_value=0, dtype="int32")
281280
# speculate decoding input
282-
logger.info(f'Speculative method: {self.config.speculate_method}')
283281
if self.config.speculate_method is not None:
284282
self.share_inputs["accept_tokens"] = paddle.full(
285283
shape=[self.args.max_batch_size, self.model_cfg["speculate_max_draft_token_num"] + 1], fill_value=0, dtype="int64"
@@ -512,34 +510,16 @@ def run(self):
512510
continue
513511

514512
if self.proposer is not None:
515-
logger.info("start run proposer")
516-
logger.info(f'before draft_tokens: {self.share_inputs["draft_tokens"]}')
517-
518513
self.proposer.run(
519514
self.share_inputs,
520515
real_batch_size=self.args.max_batch_size,
521516
seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
522517
)
523-
logger.info(f'after draft_tokens: {self.share_inputs["draft_tokens"]}')
524-
logger.info("finish run proposer")
525-
# logger.info(f'input_ids: {self.share_inputs["input_ids"]}')
526-
# logger.info(f'input_ids_cpu: {self.share_inputs["input_ids_cpu"]}')
527-
# logger.info(f'seq_lens_this_time: {self.share_inputs["seq_lens_this_time"]}')
528-
# logger.info(f'seq_lens_encoder: {self.share_inputs["seq_lens_encoder"]}')
529-
# logger.info(f'seq_lens_decoder: {self.share_inputs["seq_lens_decoder"]}')
530-
# logger.info(f'step_idx: {self.share_inputs["step_idx"]}')
531-
# logger.info(f'next_tokens: {self.share_inputs["next_tokens"]}')
532-
# logger.info(f'before block_tables: {self.share_inputs["block_tables"]}')
533518

534519
self.infer_engine.predictor.run()
535-
logger.info(f'after accept_tokens: {self.share_inputs["accept_tokens"]}')
536-
logger.info(f'after accept_num: {self.share_inputs["accept_num"]}')
537-
# logger.info(f'after block_tables: {self.share_inputs["block_tables"]}')
538-
539520
self.share_inputs['infer_seed'].add_(infer_seed_increment)
540521
self.share_inputs['infer_seed'][:] %= self.MAX_INFER_SEED
541522
if self.free_list_len > 0:
542-
logger.info(f'free_list_len > 0')
543523
self.step_cuda(seq_lens_this_time)
544524

545525

0 commit comments

Comments
 (0)