generative-computing
diff --git a/‎cli/serve/app.py‎
Lines changed: 39 additions & 4 deletions b/‎cli/serve/app.py‎
Lines changed: 39 additions & 4 deletions
diff --git a/‎cli/serve/models.py‎
Lines changed: 26 additions & 0 deletions b/‎cli/serve/models.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎docs/examples/m_serve/client_tool_calling.py‎
Lines changed: 208 additions & 0 deletions b/‎docs/examples/m_serve/client_tool_calling.py‎
Lines changed: 208 additions & 0 deletions
@@ -3,10 +3,12 @@
 import asyncio
 import importlib.util
 import inspect
+import json
 import os
 import sys
 import time
 import uuid
+from typing import Literal
 
 import typer
 import uvicorn
@@ -19,11 +21,13 @@
 from .models import (
     ChatCompletion,
     ChatCompletionMessage,
+    ChatCompletionMessageToolCall,
     ChatCompletionRequest,
     Choice,
     CompletionUsage,
     OpenAIError,
     OpenAIErrorResponse,
+    ToolCallFunction,
 )
 
 app = FastAPI(
@@ -104,13 +108,13 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:
         "response_format",  # Response format (json_object) - not yet implemented
         "functions",  # Legacy function calling - not yet implemented
         "function_call",  # Legacy function calling - not yet implemented
-        "tools",  # Tool calling - not yet implemented
-        "tool_choice",  # Tool choice - not yet implemented
+        # Tool choice is passed through as-is (not a ModelOption sentinel)
     }
     openai_to_model_option = {
         "temperature": ModelOption.TEMPERATURE,
         "max_tokens": ModelOption.MAX_NEW_TOKENS,
         "seed": ModelOption.SEED,
+        "tools": ModelOption.TOOLS,
     }
 
     filtered_options = {
@@ -172,6 +176,35 @@ async def endpoint(request: ChatCompletionRequest):
                     total_tokens=total_tokens,
                 )
 
+            # Extract tool calls from the ModelOutputThunk if available
+            tool_calls = None
+            finish_reason: Literal[
+                "stop", "length", "content_filter", "tool_calls", "function_call"
+            ] = "stop"
+            if (
+                hasattr(output, "tool_calls")
+                and output.tool_calls is not None
+                and isinstance(output.tool_calls, dict)
+            ):
+                tool_calls = []
+                for tool_name, model_tool_call in output.tool_calls.items():
+                    # Generate a unique ID for this tool call
+                    tool_call_id = f"call_{uuid.uuid4().hex[:24]}"
+
+                    # Serialize the arguments to JSON string
+                    args_json = json.dumps(model_tool_call.args)
+
+                    tool_calls.append(
+                        ChatCompletionMessageToolCall(
+                            id=tool_call_id,
+                            type="function",
+                            function=ToolCallFunction(
+                                name=model_tool_call.name, arguments=args_json
+                            ),
+                        )
+                    )
+                finish_reason = "tool_calls"
+
             # system_fingerprint represents backend config hash, not model name
             # The model name is already in response.model (line 73)
             # Leave as None since we don't track backend config fingerprints yet
@@ -185,9 +218,11 @@ async def endpoint(request: ChatCompletionRequest):
                     Choice(
                         index=0,
                         message=ChatCompletionMessage(
-                            content=output.value, role="assistant"
+                            content=output.value,
+                            role="assistant",
+                            tool_calls=tool_calls,
                         ),
-                        finish_reason="stop",
+                        finish_reason=finish_reason,
                     )
                 ],
                 object="chat.completion",  # type: ignore
 
@@ -62,6 +62,29 @@ class ChatCompletionRequest(BaseModel):
     extra: dict[str, Any] = Field(default_factory=dict)
 
 
+class ToolCallFunction(BaseModel):
+    """Function details for a tool call."""
+
+    name: str
+    """The name of the function to call."""
+
+    arguments: str
+    """The arguments to call the function with, as a JSON string."""
+
+
+class ChatCompletionMessageToolCall(BaseModel):
+    """A tool call generated by the model."""
+
+    id: str
+    """The ID of the tool call."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only 'function' is supported."""
+
+    function: ToolCallFunction
+    """The function that the model called."""
+
+
 # Taking this from OpenAI types https://github.com/openai/openai-python/blob/main/src/openai/types/chat/chat_completion.py,
 class ChatCompletionMessage(BaseModel):
     content: str | None = None
@@ -73,6 +96,9 @@ class ChatCompletionMessage(BaseModel):
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    tool_calls: list[ChatCompletionMessageToolCall] | None = None
+    """The tool calls generated by the model, such as function calls."""
+
 
 class Choice(BaseModel):
     index: int
 
@@ -0,0 +1,208 @@
+"""Client example for testing tool calling with m serve.
+
+This script demonstrates how to interact with an m serve server
+that supports tool calling using the OpenAI-compatible API.
+
+Usage:
+    1. Start the server:
+       uv run m serve docs/examples/m_serve/m_serve_example_tool_calling.py
+
+    2. Run this client:
+       uv run python docs/examples/m_serve/client_tool_calling.py
+"""
+
+import json
+
+import requests
+
+# Server configuration
+BASE_URL = "http://localhost:8080"
+ENDPOINT = f"{BASE_URL}/v1/chat/completions"
+
+# Define tools in OpenAI format
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name, e.g. San Francisco",
+                    },
+                    "units": {
+                        "type": "string",
+                        "enum": ["celsius", "fahrenheit"],
+                        "description": "Temperature units",
+                    },
+                },
+                "required": ["location"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "calculator",
+            "description": "Evaluate a mathematical expression",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {
+                        "type": "string",
+                        "description": "The mathematical expression to evaluate",
+                    }
+                },
+                "required": ["expression"],
+            },
+        },
+    },
+]
+
+
+def make_request(messages: list[dict], tools: list[dict] | None = None) -> dict:
+    """Make a request to the m serve API.
+
+    Args:
+        messages: List of message dictionaries
+        tools: Optional list of tool definitions
+
+    Returns:
+        Response dictionary from the API
+    """
+    payload = {
+        "model": "gpt-3.5-turbo",  # Model name (not used by m serve)
+        "messages": messages,
+        "temperature": 0.7,
+    }
+
+    if tools:
+        payload["tools"] = tools
+        payload["tool_choice"] = "auto"
+
+    response = requests.post(ENDPOINT, json=payload, timeout=30)
+    response.raise_for_status()
+    return response.json()
+
+
+def main():
+    """Run example tool calling interactions."""
+    print("=" * 60)
+    print("Tool Calling Example with m serve")
+    print("=" * 60)
+
+    # Example 1: Request that should trigger weather tool
+    print("\n1. Weather Query")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "What's the weather like in Tokyo?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=tools)
+
+    choice = response["choices"][0]
+    print(f"\nFinish Reason: {choice['finish_reason']}")
+
+    if choice.get("message", {}).get("tool_calls"):
+        print("\nTool Calls:")
+        for tool_call in choice["message"]["tool_calls"]:
+            func = tool_call["function"]
+            args = json.loads(func["arguments"])
+            print(f"  - {func['name']}({json.dumps(args)})")
+    else:
+        print(f"Assistant: {choice['message']['content']}")
+
+    # Example 2: Request that should trigger calculator tool
+    print("\n\n2. Math Query")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "What is 15 * 23 + 7?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=tools)
+
+    choice = response["choices"][0]
+    print(f"\nFinish Reason: {choice['finish_reason']}")
+
+    if choice.get("message", {}).get("tool_calls"):
+        print("\nTool Calls:")
+        for tool_call in choice["message"]["tool_calls"]:
+            func = tool_call["function"]
+            args = json.loads(func["arguments"])
+            print(f"  - {func['name']}({json.dumps(args)})")
+    else:
+        print(f"Assistant: {choice['message']['content']}")
+
+    # Example 3: Request without tools (normal chat)
+    print("\n\n3. Normal Chat (No Tools)")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "Hello! How are you?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=None)
+
+    choice = response["choices"][0]
+    print(f"\nFinish Reason: {choice['finish_reason']}")
+    print(f"Assistant: {choice['message']['content']}")
+
+    # Example 4: Multi-turn conversation with tool use
+    print("\n\n4. Multi-turn Conversation")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "What's the weather in Paris?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=tools)
+
+    choice = response["choices"][0]
+    assistant_message = choice["message"]
+
+    if assistant_message.get("tool_calls"):
+        print("\nAssistant requested tool calls:")
+        for tool_call in assistant_message["tool_calls"]:
+            func = tool_call["function"]
+            args = json.loads(func["arguments"])
+            print(f"  - {func['name']}({json.dumps(args)})")
+
+            # Simulate tool execution
+            if func["name"] == "get_weather":
+                tool_result = f"The weather in {args['location']} is sunny and 22°C"
+            else:
+                tool_result = "Tool result"
+
+            # Add tool response to conversation
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": assistant_message.get("content"),
+                    "tool_calls": assistant_message["tool_calls"],
+                }
+            )
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": tool_call["id"],
+                    "content": tool_result,
+                }
+            )
+
+        # Get final response after tool execution
+        print("\nGetting final response after tool execution...")
+        response = make_request(messages, tools=tools)
+        choice = response["choices"][0]
+        print(f"Assistant: {choice['message']['content']}")
+
+    print("\n" + "=" * 60)
+    print("Examples completed!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except requests.exceptions.ConnectionError:
+        print("Error: Could not connect to server.")
+        print("Make sure the server is running:")
+        print("  uv run m serve docs/examples/m_serve/m_serve_example_tool_calling.py")
+    except Exception as e:
+        print(f"Error: {e}")