Skip to content

Commit 362de79

Browse files
committed
async playwright backend
1 parent a203e46 commit 362de79

1 file changed

Lines changed: 160 additions & 58 deletions

File tree

src/agentlab/backends/browser/playwright.py

Lines changed: 160 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
1-
import asyncio
21
import logging
32
from io import BytesIO
43
from typing import Any, Callable
54

65
from PIL import Image
7-
from playwright.async_api import Browser, Page, async_playwright
6+
from playwright.async_api import Page as AsyncPage
7+
from playwright.async_api import async_playwright
8+
from playwright.sync_api import Page as SyncPage
9+
from playwright.sync_api import sync_playwright
810

911
from agentlab.actions import ToolCall, ToolSpec
10-
from agentlab.backends.browser.base import BrowserBackend
12+
from agentlab.backends.browser.base import AsyncBrowserBackend, BrowserBackend
1113

1214
logger = logging.getLogger(__name__)
1315

1416

15-
class AsyncPlaywright(BrowserBackend):
17+
_pw = None # Global Playwright instance for SyncPlaywright
18+
_browser = None # Global Browser instance for SyncPlaywright
19+
20+
21+
class SyncPlaywright(BrowserBackend):
22+
"""Fully synchronous Playwright backend using playwright.sync_api."""
23+
24+
has_pw_page: bool = True
1625
_actions: dict[str, Callable]
17-
_loop: asyncio.AbstractEventLoop
18-
_browser: Browser
19-
_page: Page
26+
_page: SyncPage
2027

2128
def model_post_init(self, __context: Any):
2229
self._actions = {
@@ -29,37 +36,139 @@ def model_post_init(self, __context: Any):
2936
"browser_mouse_click_xy": self.browser_mouse_click_xy,
3037
}
3138

32-
def initialize(self, loop: asyncio.AbstractEventLoop | None = None):
33-
self._loop = loop or asyncio.get_event_loop()
34-
self._loop.run_until_complete(self.ainitialize())
39+
def initialize(self):
40+
global _pw, _browser
41+
if _pw is None:
42+
_pw = sync_playwright().start()
43+
if _browser is None:
44+
_browser = _pw.chromium.launch(headless=True, chromium_sandbox=True)
45+
self._page = _browser.new_page()
46+
47+
@property
48+
def page(self) -> SyncPage:
49+
return self._page
50+
51+
def browser_press_key(self, key: str):
52+
"""Press a key on the keyboard."""
53+
self._page.keyboard.press(key)
54+
55+
def browser_type(self, selector: str, text: str):
56+
"""Type text into the focused element."""
57+
self._page.type(selector, text)
58+
59+
def browser_click(self, selector: str):
60+
"""Click on a selector."""
61+
self._page.click(selector, timeout=3000, strict=True)
62+
63+
def browser_drag(self, from_selector: str, to_selector: str):
64+
"""Drag and drop from one selector to another."""
65+
from_elem = self._page.locator(from_selector)
66+
from_elem.hover(timeout=500)
67+
self._page.mouse.down()
68+
69+
to_elem = self._page.locator(to_selector)
70+
to_elem.hover(timeout=500)
71+
self._page.mouse.up()
72+
73+
def browser_hover(self, selector: str):
74+
"""Hover over a given element."""
75+
self._page.hover(selector, timeout=3000, strict=True)
76+
77+
def browser_select_option(self, selector: str, value: str):
78+
"""Select an option from a given element."""
79+
self._page.select_option(selector, value)
80+
81+
def browser_mouse_click_xy(self, x: int, y: int):
82+
"""Click at a given x, y coordinate using the mouse."""
83+
self._page.mouse.click(x, y, delay=100)
84+
85+
def evaluate_js(self, js: str):
86+
js_result = self._page.evaluate(js)
87+
logger.info(f"JS result: {js_result}")
88+
return js_result
89+
90+
def goto(self, url: str):
91+
self._page.goto(url)
92+
93+
def page_html(self) -> str:
94+
return self._page.content()
95+
96+
def page_screenshot(self) -> Image.Image:
97+
scr_bytes = self._page.screenshot()
98+
return Image.open(BytesIO(scr_bytes))
99+
100+
def page_axtree(self) -> str:
101+
axtree = self._page.accessibility.snapshot()
102+
return flatten_axtree(axtree)
103+
104+
def step(self, action: ToolCall) -> dict:
105+
fn = self._actions[action.name]
106+
try:
107+
action_result = fn(**action.arguments)
108+
except Exception as e:
109+
action_result = f"Error executing action {action.name}: {e}"
110+
logger.error(action_result)
111+
html = self.page_html()
112+
screenshot = self.page_screenshot()
113+
axtree = self.page_axtree()
114+
return {
115+
"action_result": action_result,
116+
"pruned_html": html,
117+
"axtree_txt": axtree,
118+
"screenshot": screenshot,
119+
}
120+
121+
def actions(self) -> list[ToolSpec]:
122+
return [ToolSpec.from_function(fn) for fn in self._actions.values()]
123+
124+
def close(self):
125+
self._page.close()
126+
127+
128+
_apw = None # Global Playwright instance for AsyncPlaywright
129+
_abrowser = None # Global Browser instance for AsyncPlaywright
130+
35131

36-
async def ainitialize(self):
37-
pw = await async_playwright().start()
38-
self._browser = await pw.chromium.launch(headless=True, chromium_sandbox=True)
39-
self._page = await self._browser.new_page()
132+
class AsyncPlaywright(AsyncBrowserBackend):
133+
"""Fully asynchronous Playwright backend using playwright.async_api."""
134+
135+
has_pw_page: bool = False
136+
_actions: dict[str, Callable]
137+
_page: AsyncPage
138+
139+
def model_post_init(self, __context: Any):
140+
self._actions = {
141+
"browser_press_key": self.browser_press_key,
142+
"browser_type": self.browser_type,
143+
"browser_click": self.browser_click,
144+
"browser_drag": self.browser_drag,
145+
"browser_hover": self.browser_hover,
146+
"browser_select_option": self.browser_select_option,
147+
"browser_mouse_click_xy": self.browser_mouse_click_xy,
148+
}
149+
150+
async def initialize(self):
151+
global _apw, _abrowser
152+
if _apw is None:
153+
_apw = await async_playwright().start()
154+
if _abrowser is None:
155+
_abrowser = await _apw.chromium.launch(headless=True, chromium_sandbox=True)
156+
self._page = await _abrowser.new_page()
40157

41158
async def browser_press_key(self, key: str):
42-
"""
43-
Press a key on the keyboard.
44-
"""
159+
"""Press a key on the keyboard."""
45160
await self._page.keyboard.press(key)
46161

47162
async def browser_type(self, selector: str, text: str):
48-
"""
49-
Type text into the focused element.
50-
"""
163+
"""Type text into the focused element."""
51164
await self._page.type(selector, text)
52165

53166
async def browser_click(self, selector: str):
54-
"""
55-
Click on a selector.
56-
"""
167+
"""Click on a selector."""
57168
await self._page.click(selector, timeout=3000, strict=True)
58169

59170
async def browser_drag(self, from_selector: str, to_selector: str):
60-
"""
61-
Drag and drop from one selector to another.
62-
"""
171+
"""Drag and drop from one selector to another."""
63172
from_elem = self._page.locator(from_selector)
64173
await from_elem.hover(timeout=500)
65174
await self._page.mouse.down()
@@ -69,66 +178,59 @@ async def browser_drag(self, from_selector: str, to_selector: str):
69178
await self._page.mouse.up()
70179

71180
async def browser_hover(self, selector: str):
72-
"""
73-
Hover over a given element.
74-
"""
181+
"""Hover over a given element."""
75182
await self._page.hover(selector, timeout=3000, strict=True)
76183

77184
async def browser_select_option(self, selector: str, value: str):
78-
"""
79-
Select an option from a given element.
80-
"""
185+
"""Select an option from a given element."""
81186
await self._page.select_option(selector, value)
82187

83188
async def browser_mouse_click_xy(self, x: int, y: int):
84-
"""
85-
Click at a given x, y coordinate using the mouse.
86-
"""
189+
"""Click at a given x, y coordinate using the mouse."""
87190
await self._page.mouse.click(x, y, delay=100)
88191

89-
def run_js(self, js: str):
90-
js_result = self._loop.run_until_complete(self._page.evaluate(js))
192+
async def evaluate_js(self, js: str):
193+
js_result = await self._page.evaluate(js)
91194
logger.info(f"JS result: {js_result}")
92195
return js_result
93196

94-
def goto(self, url: str):
95-
self._loop.run_until_complete(self._page.goto(url))
197+
async def goto(self, url: str):
198+
await self._page.goto(url)
96199

97-
def page_html(self):
98-
return self._loop.run_until_complete(self._page.content())
200+
async def page_html(self) -> str:
201+
return await self._page.content()
99202

100-
def page_screenshot(self):
101-
scr_bytes = self._loop.run_until_complete(self._page.screenshot())
203+
async def page_screenshot(self) -> Image.Image:
204+
scr_bytes = await self._page.screenshot()
102205
return Image.open(BytesIO(scr_bytes))
103206

104-
def page_axtree(self):
105-
axtree = self._loop.run_until_complete(self._page.accessibility.snapshot())
106-
flat_axtree = flatten_axtree(axtree)
107-
return flat_axtree
207+
async def page_axtree(self) -> str:
208+
axtree = await self._page.accessibility.snapshot()
209+
return flatten_axtree(axtree)
108210

109-
def step(self, action: ToolCall):
211+
async def step(self, action: ToolCall) -> dict:
110212
fn = self._actions[action.name]
111213
try:
112-
action_result = self._loop.run_until_complete(fn(**action.arguments))
214+
action_result = await fn(**action.arguments)
113215
except Exception as e:
114216
action_result = f"Error executing action {action.name}: {e}"
115217
logger.error(action_result)
116-
html = self.page_html()
117-
screenshot = self.page_screenshot()
118-
axtree = self.page_axtree()
218+
html = await self.page_html()
219+
screenshot = await self.page_screenshot()
220+
axtree = await self.page_axtree()
119221
return {
120-
"tool_result": action_result,
222+
"action_result": action_result,
121223
"pruned_html": html,
122224
"axtree_txt": axtree,
123225
"screenshot": screenshot,
124226
}
125227

126-
def actions(self) -> tuple[ToolSpec]:
127-
specs = [ToolSpec.from_function(fn) for fn in self._actions.values()]
128-
return tuple(specs)
228+
def actions(self) -> list[ToolSpec]:
229+
return [ToolSpec.from_function(fn) for fn in self._actions.values()]
129230

130-
def close(self):
131-
self._loop.run_until_complete(self._browser.close())
231+
async def close(self):
232+
await self._browser.close()
233+
await self._pw.stop()
132234

133235

134236
def flatten_axtree(axtree_dict: dict | None) -> str:

0 commit comments

Comments
 (0)