File tree Expand file tree Collapse file tree
fastdeploy/entrypoints/openai Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -192,6 +192,7 @@ async def chat_completion_stream_generator(
192192 num_cached_tokens = 0
193193 num_image_tokens = [0 ] * num_choices
194194 tool_called = [False ] * num_choices
195+ inference_start_time = [0 ] * num_choices
195196 max_streaming_response_tokens = (
196197 request .max_streaming_response_tokens
197198 if request .max_streaming_response_tokens is not None
@@ -268,9 +269,9 @@ async def chat_completion_stream_generator(
268269
269270 if res ["metrics" ]["first_token_time" ] is not None :
270271 arrival_time = res ["metrics" ]["first_token_time" ]
271- inference_start_time = res ["metrics" ]["inference_start_time" ]
272+ inference_start_time [ idx ] = res ["metrics" ]["inference_start_time" ]
272273 else :
273- arrival_time = res ["metrics" ]["arrival_time" ] - inference_start_time
274+ arrival_time = res ["metrics" ]["arrival_time" ] - inference_start_time [ idx ]
274275 if first_iteration :
275276 num_prompt_tokens = len (prompt_token_ids )
276277 num_cached_tokens = res .get ("num_cached_tokens" , 0 )
You can’t perform that action at this time.
0 commit comments