Skip to content

Commit 417261b

Browse files
committed
fix: make system_fingerprint consistent in m serve
Added streaming support w/ setting system_fingerprint. Make it consistent. We are currently just setting it to None but now it is consistent for future use. Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
1 parent ca5205e commit 417261b

3 files changed

Lines changed: 74 additions & 5 deletions

File tree

cli/serve/app.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ async def endpoint(request: ChatCompletionRequest):
161161
model_options=model_options,
162162
)
163163

164+
# system_fingerprint represents backend config hash, not model name
165+
# The model name is already in response.model (line 73)
166+
# Leave as None since we don't track backend config fingerprints yet
167+
system_fingerprint = None
168+
164169
# Handle streaming response
165170
if request.stream:
166171
return StreamingResponse(
@@ -170,15 +175,11 @@ async def endpoint(request: ChatCompletionRequest):
170175
model=request.model,
171176
created=created_timestamp,
172177
stream_options=request.stream_options,
178+
system_fingerprint=system_fingerprint,
173179
),
174180
media_type="text/event-stream",
175181
)
176182

177-
# system_fingerprint represents backend config hash, not model name
178-
# The model name is already in response.model (line 73)
179-
# Leave as None since we don't track backend config fingerprints yet
180-
system_fingerprint = None
181-
182183
return ChatCompletion(
183184
id=completion_id,
184185
model=request.model,

mellea/helpers/openai_compatible_helpers.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ async def stream_chat_completion_chunks(
252252
model: str,
253253
created: int,
254254
stream_options: dict[str, Any] | None = None,
255+
system_fingerprint: str | None = None,
255256
) -> AsyncGenerator[str, None]:
256257
"""Generate OpenAI-compatible SSE chat completion chunks from a model output.
257258
@@ -263,6 +264,8 @@ async def stream_chat_completion_chunks(
263264
stream_options: OpenAI-compatible streaming options. Currently supports
264265
``include_usage`` (bool) to control whether usage stats are included
265266
in the final chunk. Defaults to including usage when available.
267+
system_fingerprint: Backend configuration fingerprint to include in chunks.
268+
Defaults to ``None``.
266269
267270
Yields:
268271
Server-sent event payload strings representing OpenAI-compatible chat
@@ -289,6 +292,7 @@ async def stream_chat_completion_chunks(
289292
)
290293
],
291294
object="chat.completion.chunk",
295+
system_fingerprint=system_fingerprint,
292296
)
293297
yield f"data: {initial_chunk.model_dump_json()}\n\n"
294298

@@ -310,6 +314,7 @@ async def stream_chat_completion_chunks(
310314
)
311315
],
312316
object="chat.completion.chunk",
317+
system_fingerprint=system_fingerprint,
313318
)
314319
yield f"data: {chunk.model_dump_json()}\n\n"
315320

@@ -333,6 +338,7 @@ async def stream_chat_completion_chunks(
333338
)
334339
],
335340
object="chat.completion.chunk",
341+
system_fingerprint=system_fingerprint,
336342
usage=usage,
337343
)
338344
yield f"data: {final_chunk.model_dump_json()}\n\n"

test/cli/test_serve_streaming.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,68 @@ async def mock_astream():
628628
assert last_chunk["usage"] is not None
629629
assert last_chunk["usage"]["total_tokens"] == 8
630630

631+
@pytest.mark.asyncio
632+
async def test_streaming_system_fingerprint_always_none(
633+
self, mock_module, streaming_request
634+
):
635+
"""Test that system_fingerprint is None in all streaming chunks.
636+
637+
Per OpenAI spec, system_fingerprint represents a hash of backend config,
638+
not the model name. The model name is in chunk.model.
639+
We don't track backend config fingerprints yet, so it should be None.
640+
"""
641+
mock_output = ModelOutputThunk(None)
642+
mock_output._computed = False
643+
mock_output._generate_type = mock_output._generate_type.ASYNC
644+
645+
chunks = ["Hello", " world"]
646+
647+
async def mock_astream():
648+
if chunks:
649+
chunk = chunks.pop(0)
650+
if not chunks:
651+
mock_output._computed = True
652+
return chunk
653+
mock_output._computed = True
654+
return ""
655+
656+
mock_output.astream = mock_astream
657+
mock_output.is_computed = lambda: mock_output._computed
658+
mock_output.usage = {
659+
"prompt_tokens": 5,
660+
"completion_tokens": 2,
661+
"total_tokens": 7,
662+
}
663+
mock_module.serve.return_value = mock_output
664+
665+
# Create test app
666+
app = FastAPI()
667+
app.add_api_route(
668+
"/v1/chat/completions", make_chat_endpoint(mock_module), methods=["POST"]
669+
)
670+
client = TestClient(app)
671+
672+
# Make streaming request
673+
response = client.post(
674+
"/v1/chat/completions", json=streaming_request.model_dump(mode="json")
675+
)
676+
677+
assert response.status_code == 200
678+
679+
# Parse all chunks
680+
events = []
681+
for line in response.text.strip().split("\n\n"):
682+
if line.startswith("data: "):
683+
data = line[6:]
684+
if data != "[DONE]":
685+
events.append(json.loads(data))
686+
687+
# All chunks should have system_fingerprint as None
688+
for chunk in events:
689+
assert chunk["system_fingerprint"] is None
690+
# Model name should be in the model field
691+
assert chunk["model"] == "test-model"
692+
631693
@pytest.mark.asyncio
632694
async def test_stream_options_ignored_for_non_streaming(self, mock_module):
633695
"""Test that stream_options is ignored when stream=False (usage always included)."""

0 commit comments

Comments
 (0)