Skip to content

Commit 823b7d9

Browse files
committed
feat: add tool calling support to m serve
Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
1 parent fdddf8c commit 823b7d9

6 files changed

Lines changed: 750 additions & 10 deletions

File tree

cli/serve/app.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
import asyncio
44
import importlib.util
55
import inspect
6+
import json
67
import os
78
import sys
89
import time
910
import uuid
11+
from typing import Literal
1012

1113
import typer
1214
import uvicorn
@@ -19,11 +21,13 @@
1921
from .models import (
2022
ChatCompletion,
2123
ChatCompletionMessage,
24+
ChatCompletionMessageToolCall,
2225
ChatCompletionRequest,
2326
Choice,
2427
CompletionUsage,
2528
OpenAIError,
2629
OpenAIErrorResponse,
30+
ToolCallFunction,
2731
)
2832

2933
app = FastAPI(
@@ -104,13 +108,13 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:
104108
"response_format", # Response format (json_object) - not yet implemented
105109
"functions", # Legacy function calling - not yet implemented
106110
"function_call", # Legacy function calling - not yet implemented
107-
"tools", # Tool calling - not yet implemented
108-
"tool_choice", # Tool choice - not yet implemented
111+
# Tool choice is passed through as-is (not a ModelOption sentinel)
109112
}
110113
openai_to_model_option = {
111114
"temperature": ModelOption.TEMPERATURE,
112115
"max_tokens": ModelOption.MAX_NEW_TOKENS,
113116
"seed": ModelOption.SEED,
117+
"tools": ModelOption.TOOLS,
114118
}
115119

116120
filtered_options = {
@@ -172,6 +176,35 @@ async def endpoint(request: ChatCompletionRequest):
172176
total_tokens=total_tokens,
173177
)
174178

179+
# Extract tool calls from the ModelOutputThunk if available
180+
tool_calls = None
181+
finish_reason: Literal[
182+
"stop", "length", "content_filter", "tool_calls", "function_call"
183+
] = "stop"
184+
if (
185+
hasattr(output, "tool_calls")
186+
and output.tool_calls is not None
187+
and isinstance(output.tool_calls, dict)
188+
):
189+
tool_calls = []
190+
for tool_name, model_tool_call in output.tool_calls.items():
191+
# Generate a unique ID for this tool call
192+
tool_call_id = f"call_{uuid.uuid4().hex[:24]}"
193+
194+
# Serialize the arguments to JSON string
195+
args_json = json.dumps(model_tool_call.args)
196+
197+
tool_calls.append(
198+
ChatCompletionMessageToolCall(
199+
id=tool_call_id,
200+
type="function",
201+
function=ToolCallFunction(
202+
name=model_tool_call.name, arguments=args_json
203+
),
204+
)
205+
)
206+
finish_reason = "tool_calls"
207+
175208
# system_fingerprint represents backend config hash, not model name
176209
# The model name is already in response.model (line 73)
177210
# Leave as None since we don't track backend config fingerprints yet
@@ -185,9 +218,11 @@ async def endpoint(request: ChatCompletionRequest):
185218
Choice(
186219
index=0,
187220
message=ChatCompletionMessage(
188-
content=output.value, role="assistant"
221+
content=output.value,
222+
role="assistant",
223+
tool_calls=tool_calls,
189224
),
190-
finish_reason="stop",
225+
finish_reason=finish_reason,
191226
)
192227
],
193228
object="chat.completion", # type: ignore

cli/serve/models.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,29 @@ class ChatCompletionRequest(BaseModel):
6262
extra: dict[str, Any] = Field(default_factory=dict)
6363

6464

65+
class ToolCallFunction(BaseModel):
66+
"""Function details for a tool call."""
67+
68+
name: str
69+
"""The name of the function to call."""
70+
71+
arguments: str
72+
"""The arguments to call the function with, as a JSON string."""
73+
74+
75+
class ChatCompletionMessageToolCall(BaseModel):
76+
"""A tool call generated by the model."""
77+
78+
id: str
79+
"""The ID of the tool call."""
80+
81+
type: Literal["function"]
82+
"""The type of the tool. Currently, only 'function' is supported."""
83+
84+
function: ToolCallFunction
85+
"""The function that the model called."""
86+
87+
6588
# Taking this from OpenAI types https://github.com/openai/openai-python/blob/main/src/openai/types/chat/chat_completion.py,
6689
class ChatCompletionMessage(BaseModel):
6790
content: str | None = None
@@ -73,6 +96,9 @@ class ChatCompletionMessage(BaseModel):
7396
role: Literal["assistant"]
7497
"""The role of the author of this message."""
7598

99+
tool_calls: list[ChatCompletionMessageToolCall] | None = None
100+
"""The tool calls generated by the model, such as function calls."""
101+
76102

77103
class Choice(BaseModel):
78104
index: int
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
"""Client example for testing tool calling with m serve.
2+
3+
This script demonstrates how to interact with an m serve server
4+
that supports tool calling using the OpenAI-compatible API.
5+
6+
Usage:
7+
1. Start the server:
8+
uv run m serve docs/examples/m_serve/m_serve_example_tool_calling.py
9+
10+
2. Run this client:
11+
uv run python docs/examples/m_serve/client_tool_calling.py
12+
"""
13+
14+
import json
15+
16+
import requests
17+
18+
# Server configuration
19+
BASE_URL = "http://localhost:8080"
20+
ENDPOINT = f"{BASE_URL}/v1/chat/completions"
21+
22+
# Define tools in OpenAI format
23+
tools = [
24+
{
25+
"type": "function",
26+
"function": {
27+
"name": "get_weather",
28+
"description": "Get the current weather in a given location",
29+
"parameters": {
30+
"type": "object",
31+
"properties": {
32+
"location": {
33+
"type": "string",
34+
"description": "The city name, e.g. San Francisco",
35+
},
36+
"units": {
37+
"type": "string",
38+
"enum": ["celsius", "fahrenheit"],
39+
"description": "Temperature units",
40+
},
41+
},
42+
"required": ["location"],
43+
},
44+
},
45+
},
46+
{
47+
"type": "function",
48+
"function": {
49+
"name": "calculator",
50+
"description": "Evaluate a mathematical expression",
51+
"parameters": {
52+
"type": "object",
53+
"properties": {
54+
"expression": {
55+
"type": "string",
56+
"description": "The mathematical expression to evaluate",
57+
}
58+
},
59+
"required": ["expression"],
60+
},
61+
},
62+
},
63+
]
64+
65+
66+
def make_request(messages: list[dict], tools: list[dict] | None = None) -> dict:
67+
"""Make a request to the m serve API.
68+
69+
Args:
70+
messages: List of message dictionaries
71+
tools: Optional list of tool definitions
72+
73+
Returns:
74+
Response dictionary from the API
75+
"""
76+
payload = {
77+
"model": "gpt-3.5-turbo", # Model name (not used by m serve)
78+
"messages": messages,
79+
"temperature": 0.7,
80+
}
81+
82+
if tools:
83+
payload["tools"] = tools
84+
payload["tool_choice"] = "auto"
85+
86+
response = requests.post(ENDPOINT, json=payload, timeout=30)
87+
response.raise_for_status()
88+
return response.json()
89+
90+
91+
def main():
92+
"""Run example tool calling interactions."""
93+
print("=" * 60)
94+
print("Tool Calling Example with m serve")
95+
print("=" * 60)
96+
97+
# Example 1: Request that should trigger weather tool
98+
print("\n1. Weather Query")
99+
print("-" * 60)
100+
messages = [{"role": "user", "content": "What's the weather like in Tokyo?"}]
101+
102+
print(f"User: {messages[0]['content']}")
103+
response = make_request(messages, tools=tools)
104+
105+
choice = response["choices"][0]
106+
print(f"\nFinish Reason: {choice['finish_reason']}")
107+
108+
if choice.get("message", {}).get("tool_calls"):
109+
print("\nTool Calls:")
110+
for tool_call in choice["message"]["tool_calls"]:
111+
func = tool_call["function"]
112+
args = json.loads(func["arguments"])
113+
print(f" - {func['name']}({json.dumps(args)})")
114+
else:
115+
print(f"Assistant: {choice['message']['content']}")
116+
117+
# Example 2: Request that should trigger calculator tool
118+
print("\n\n2. Math Query")
119+
print("-" * 60)
120+
messages = [{"role": "user", "content": "What is 15 * 23 + 7?"}]
121+
122+
print(f"User: {messages[0]['content']}")
123+
response = make_request(messages, tools=tools)
124+
125+
choice = response["choices"][0]
126+
print(f"\nFinish Reason: {choice['finish_reason']}")
127+
128+
if choice.get("message", {}).get("tool_calls"):
129+
print("\nTool Calls:")
130+
for tool_call in choice["message"]["tool_calls"]:
131+
func = tool_call["function"]
132+
args = json.loads(func["arguments"])
133+
print(f" - {func['name']}({json.dumps(args)})")
134+
else:
135+
print(f"Assistant: {choice['message']['content']}")
136+
137+
# Example 3: Request without tools (normal chat)
138+
print("\n\n3. Normal Chat (No Tools)")
139+
print("-" * 60)
140+
messages = [{"role": "user", "content": "Hello! How are you?"}]
141+
142+
print(f"User: {messages[0]['content']}")
143+
response = make_request(messages, tools=None)
144+
145+
choice = response["choices"][0]
146+
print(f"\nFinish Reason: {choice['finish_reason']}")
147+
print(f"Assistant: {choice['message']['content']}")
148+
149+
# Example 4: Multi-turn conversation with tool use
150+
print("\n\n4. Multi-turn Conversation")
151+
print("-" * 60)
152+
messages = [{"role": "user", "content": "What's the weather in Paris?"}]
153+
154+
print(f"User: {messages[0]['content']}")
155+
response = make_request(messages, tools=tools)
156+
157+
choice = response["choices"][0]
158+
assistant_message = choice["message"]
159+
160+
if assistant_message.get("tool_calls"):
161+
print("\nAssistant requested tool calls:")
162+
for tool_call in assistant_message["tool_calls"]:
163+
func = tool_call["function"]
164+
args = json.loads(func["arguments"])
165+
print(f" - {func['name']}({json.dumps(args)})")
166+
167+
# Simulate tool execution
168+
if func["name"] == "get_weather":
169+
tool_result = f"The weather in {args['location']} is sunny and 22°C"
170+
else:
171+
tool_result = "Tool result"
172+
173+
# Add tool response to conversation
174+
messages.append(
175+
{
176+
"role": "assistant",
177+
"content": assistant_message.get("content"),
178+
"tool_calls": assistant_message["tool_calls"],
179+
}
180+
)
181+
messages.append(
182+
{
183+
"role": "tool",
184+
"tool_call_id": tool_call["id"],
185+
"content": tool_result,
186+
}
187+
)
188+
189+
# Get final response after tool execution
190+
print("\nGetting final response after tool execution...")
191+
response = make_request(messages, tools=tools)
192+
choice = response["choices"][0]
193+
print(f"Assistant: {choice['message']['content']}")
194+
195+
print("\n" + "=" * 60)
196+
print("Examples completed!")
197+
print("=" * 60)
198+
199+
200+
if __name__ == "__main__":
201+
try:
202+
main()
203+
except requests.exceptions.ConnectionError:
204+
print("Error: Could not connect to server.")
205+
print("Make sure the server is running:")
206+
print(" uv run m serve docs/examples/m_serve/m_serve_example_tool_calling.py")
207+
except Exception as e:
208+
print(f"Error: {e}")

0 commit comments

Comments
 (0)