Skip to content

Commit 0a6981f

Browse files
authored
[BugFix] Fix inference_start_time (#4922) (#4930)
* fix inference_start_time * fix inference_start_time
1 parent 2926bf6 commit 0a6981f

1 file changed

Lines changed: 3 additions & 2 deletions

File tree

fastdeploy/entrypoints/openai/serving_chat.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ async def chat_completion_stream_generator(
192192
num_cached_tokens = 0
193193
num_image_tokens = [0] * num_choices
194194
tool_called = [False] * num_choices
195+
inference_start_time = [0] * num_choices
195196
max_streaming_response_tokens = (
196197
request.max_streaming_response_tokens
197198
if request.max_streaming_response_tokens is not None
@@ -268,9 +269,9 @@ async def chat_completion_stream_generator(
268269

269270
if res["metrics"]["first_token_time"] is not None:
270271
arrival_time = res["metrics"]["first_token_time"]
271-
inference_start_time = res["metrics"]["inference_start_time"]
272+
inference_start_time[idx] = res["metrics"]["inference_start_time"]
272273
else:
273-
arrival_time = res["metrics"]["arrival_time"] - inference_start_time
274+
arrival_time = res["metrics"]["arrival_time"] - inference_start_time[idx]
274275
if first_iteration:
275276
num_prompt_tokens = len(prompt_token_ids)
276277
num_cached_tokens = res.get("num_cached_tokens", 0)

0 commit comments

Comments
 (0)