Skip to content

Commit a203e46

Browse files
committed
support new tasks interface
1 parent f10615f commit a203e46

5 files changed

Lines changed: 18 additions & 61 deletions

File tree

experiments/run_miniwob.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from agentlab.agents.react_toolcall_agent import AgentConfig, LLMArgs, ReactToolCallAgentArgs
1212
from agentlab.agents.tapeagent.agent import TapeAgentArgs, load_config
1313
from agentlab.backends.browser.mcp_playwright import MCPPlaywright
14-
from agentlab.backends.browser.playwright import AsyncPlaywright
14+
from agentlab.backends.browser.playwright import SyncPlaywright
1515
from agentlab.benchmarks.miniwob import MiniWobBenchmark
1616
from agentlab.experiments.study import make_study
1717
from agentlab.llm.chat_api import BaseModelArgs
@@ -54,7 +54,7 @@ def parse_args():
5454
if args.backend == "bgym":
5555
benchmark = DEFAULT_BENCHMARKS["miniwob"](n_repeats=1)
5656
elif args.backend == "playwright":
57-
benchmark = MiniWobBenchmark(backend_cls=AsyncPlaywright)
57+
benchmark = MiniWobBenchmark(backend_cls=SyncPlaywright)
5858
elif args.backend == "mcp":
5959
benchmark = MiniWobBenchmark(backend_cls=MCPPlaywright)
6060
else:

experiments/test_mcp.py

Lines changed: 0 additions & 42 deletions
This file was deleted.

src/agentlab/backends/browser/mcp.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from mcp import ClientSession, StdioServerParameters, stdio_client
1010
from mcp import Tool as MCPTool
11-
from mcp.types import CallToolResult, ImageContent, TextContent
11+
from mcp.types import CallToolResult, ContentBlock, TextContent
1212

1313
from agentlab.actions import FunctionSpec, ToolCall, ToolSpec
1414
from agentlab.backends.browser.base import BrowserBackend
@@ -24,7 +24,7 @@ def __init__(self, config_path: str, read_timeout_seconds: int = 10) -> None:
2424
self.tool_to_server: dict[str, str] = {}
2525
self.read_timeout_seconds = read_timeout_seconds
2626
self.exit_stack = AsyncExitStack()
27-
self.loop = None
27+
self.loop: asyncio.AbstractEventLoop
2828

2929
def initialize(self):
3030
try:
@@ -125,15 +125,15 @@ def check_tool_exists(self, tool_name):
125125
raise Exception(f"Tool {tool_name} not found in any of the MCP servers")
126126
return server_name
127127

128-
def actions(self) -> tuple[ToolSpec]:
129-
return (
128+
def actions(self) -> list[ToolSpec]:
129+
return [
130130
ToolSpec(
131131
function=FunctionSpec(
132132
name=tool.name, description=tool.description or "", parameters=tool.inputSchema
133133
)
134134
)
135135
for tool in self.tools.values()
136-
)
136+
]
137137

138138
async def aclose(self) -> None:
139139
await self.exit_stack.aclose()
@@ -144,28 +144,28 @@ def close(self) -> None:
144144

145145
class MCPBrowserBackend(BrowserBackend):
146146
config_path: str
147-
_mcp = None
147+
_mcp: MCPClient
148148

149149
def initialize(self) -> None:
150150
self._mcp = MCPClient(config_path=self.config_path)
151151
self._mcp.initialize()
152152

153153
def step(self, action: ToolCall) -> dict:
154154
contents = self.call_tool(action.name, action.arguments)
155-
text = "\n".join([c.text for c in contents if c.type == "text"])
155+
action_result = "\n".join([c.text for c in contents if c.type == "text"])
156156
images = [c for c in contents if c.type == "image"]
157157
return {
158-
"text": text,
158+
"action_result": action_result,
159159
"screenshot": images[-1] if images else None,
160160
}
161161

162-
def call_tool(self, tool_name: str, arguments: dict) -> list[TextContent | ImageContent]:
162+
def call_tool(self, tool_name: str, arguments: dict) -> list[ContentBlock]:
163163
tool_result = self._mcp.call_tool(tool_name, arguments)
164164
if tool_result.isError:
165-
return [TextContent(text=f"Error calling tool {tool_name}")] + tool_result.content
165+
return [TextContent(type="text", text=f"Error calling tool {tool_name}")] + tool_result.content
166166
return tool_result.content
167167

168-
def actions(self) -> tuple[ToolSpec]:
168+
def actions(self) -> list[ToolSpec]:
169169
return list(self._mcp.actions())
170170

171171
def close(self) -> None:

src/agentlab/backends/browser/mcp_playwright.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
class MCPPlaywright(MCPBrowserBackend):
1616
config_path: str = DEFAULT_CONFIG_PATH
1717

18-
def run_js(self, js: str):
18+
def evaluate_js(self, js: str):
1919
contents = self.call_tool("browser_evaluate", {"function": js})
2020
raw_response = "\n".join([c.text for c in contents if c.type == "text"])
2121
try:
@@ -30,14 +30,14 @@ def run_js(self, js: str):
3030
def step(self, action: ToolCall) -> dict:
3131
contents = self.call_tool(action.name, action.arguments)
3232
logger.info(f"Step result has {len(contents)} contents")
33-
tool_result = "\n".join(
33+
action_result = "\n".join(
3434
[c.text for c in contents if c.type == "text" and "# Ran Playwright code" not in c.text]
3535
)
3636
html = self.page_html()
3737
screenshot = self.page_screenshot()
3838
axtree = self.page_axtree()
3939
return {
40-
"tool_result": tool_result,
40+
"action_result": action_result,
4141
"pruned_html": html,
4242
"axtree_txt": axtree,
4343
"screenshot": screenshot,
@@ -60,7 +60,7 @@ def page_axtree(self) -> str:
6060
contents = self.call_tool("browser_snapshot", {})
6161
return "\n".join([c.text for c in contents if c.type == "text"])
6262

63-
def page_screenshot(self) -> Image:
63+
def page_screenshot(self) -> Image.Image:
6464
contents = self.call_tool("browser_take_screenshot", {})
6565
content = [c for c in contents if c.type == "image"][0]
6666
image_base64 = content.data

src/agentlab/benchmarks/miniwob/benchmark.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ def model_post_init(self, __context: Any) -> None:
2828
if self.dataset is None:
2929
self.dataset = get_miniwob_tasks()
3030
for task in self.dataset:
31-
name = f"miniwob.{task.task_id}"
32-
env_args = BrowserEnvArgs(task_name=name, task=task, backend_cls=self.backend_cls)
31+
env_args = BrowserEnvArgs(task=task, backend_cls=self.backend_cls)
3332
self.env_args_list.append(env_args)
3433
logger.info(f"Loaded {len(self.env_args_list)} miniwob tasks")

0 commit comments

Comments
 (0)