Skip to content

Commit bdaabf0

Browse files
authored
[Cherry-Pick][Speculative Decoding] Return accepted tokens per head in response (#5947) (#5952)
* adjust log level * add accepted tokens per head
1 parent f12b7a7 commit bdaabf0

2 files changed

Lines changed: 11 additions & 4 deletions

File tree

fastdeploy/output/token_processor.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -549,15 +549,17 @@ def _compute_speculative_status(self, result: RequestOutput):
549549
rejected_tokens=req_rejected_tokens,
550550
accept_ratio=req_accept_ratio,
551551
average_accept_length=req_avg_accept_length,
552+
accepted_tokens_per_head=accept_num_list[: self.cfg.speculative_config.num_speculative_tokens + 1],
552553
accept_ratio_per_head=accept_ratio_per_head[: self.cfg.speculative_config.num_speculative_tokens],
553554
)
554555

555556
# Log
556-
spec_logger.debug(
557+
spec_logger.info(
557558
f"req_id: {result.request_id}, total_step: {req_total_step}, "
558-
f"accept_ratio: {accept_ratio}, average_accept_lenght: {req_avg_accept_length},"
559-
f"accepted_tokens: {req_accepted_tokens}, rejected_tokens: {req_rejected_tokens}"
560-
f"accept_ratio_per_head: {accept_ratio_per_head}"
559+
f"accept_ratio: {accept_ratio}, average_accept_length: {req_avg_accept_length}, "
560+
f"accepted_tokens: {req_accepted_tokens}, rejected_tokens: {req_rejected_tokens}, "
561+
f"accepted_tokens_per_head: {accept_num_list[: self.cfg.speculative_config.num_speculative_tokens + 1]}, "
562+
f"accept_ratio_per_head: {accept_ratio_per_head[: self.cfg.speculative_config.num_speculative_tokens]}"
561563
)
562564

563565
# Clear request record

fastdeploy/worker/output.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,11 @@ class SpeculateMetrics:
154154
"""
155155
average_accept_length: float
156156

157+
"""
158+
The number of accepted tokens of each head in the current request
159+
"""
160+
accepted_tokens_per_head: list[int]
161+
157162
"""
158163
Average acceptance rate of each head in the current request
159164
"""

0 commit comments

Comments
 (0)