1- import asyncio
21import logging
32from io import BytesIO
43from typing import Any , Callable
54
65from PIL import Image
7- from playwright .async_api import Browser , Page , async_playwright
6+ from playwright .async_api import Page as AsyncPage
7+ from playwright .async_api import async_playwright
8+ from playwright .sync_api import Page as SyncPage
9+ from playwright .sync_api import sync_playwright
810
911from agentlab .actions import ToolCall , ToolSpec
10- from agentlab .backends .browser .base import BrowserBackend
12+ from agentlab .backends .browser .base import AsyncBrowserBackend , BrowserBackend
1113
1214logger = logging .getLogger (__name__ )
1315
1416
15- class AsyncPlaywright (BrowserBackend ):
17+ _pw = None # Global Playwright instance for SyncPlaywright
18+ _browser = None # Global Browser instance for SyncPlaywright
19+
20+
21+ class SyncPlaywright (BrowserBackend ):
22+ """Fully synchronous Playwright backend using playwright.sync_api."""
23+
24+ has_pw_page : bool = True
1625 _actions : dict [str , Callable ]
17- _loop : asyncio .AbstractEventLoop
18- _browser : Browser
19- _page : Page
26+ _page : SyncPage
2027
2128 def model_post_init (self , __context : Any ):
2229 self ._actions = {
@@ -29,37 +36,139 @@ def model_post_init(self, __context: Any):
2936 "browser_mouse_click_xy" : self .browser_mouse_click_xy ,
3037 }
3138
32- def initialize (self , loop : asyncio .AbstractEventLoop | None = None ):
33- self ._loop = loop or asyncio .get_event_loop ()
34- self ._loop .run_until_complete (self .ainitialize ())
39+ def initialize (self ):
40+ global _pw , _browser
41+ if _pw is None :
42+ _pw = sync_playwright ().start ()
43+ if _browser is None :
44+ _browser = _pw .chromium .launch (headless = True , chromium_sandbox = True )
45+ self ._page = _browser .new_page ()
46+
47+ @property
48+ def page (self ) -> SyncPage :
49+ return self ._page
50+
51+ def browser_press_key (self , key : str ):
52+ """Press a key on the keyboard."""
53+ self ._page .keyboard .press (key )
54+
55+ def browser_type (self , selector : str , text : str ):
56+ """Type text into the focused element."""
57+ self ._page .type (selector , text )
58+
59+ def browser_click (self , selector : str ):
60+ """Click on a selector."""
61+ self ._page .click (selector , timeout = 3000 , strict = True )
62+
63+ def browser_drag (self , from_selector : str , to_selector : str ):
64+ """Drag and drop from one selector to another."""
65+ from_elem = self ._page .locator (from_selector )
66+ from_elem .hover (timeout = 500 )
67+ self ._page .mouse .down ()
68+
69+ to_elem = self ._page .locator (to_selector )
70+ to_elem .hover (timeout = 500 )
71+ self ._page .mouse .up ()
72+
73+ def browser_hover (self , selector : str ):
74+ """Hover over a given element."""
75+ self ._page .hover (selector , timeout = 3000 , strict = True )
76+
77+ def browser_select_option (self , selector : str , value : str ):
78+ """Select an option from a given element."""
79+ self ._page .select_option (selector , value )
80+
81+ def browser_mouse_click_xy (self , x : int , y : int ):
82+ """Click at a given x, y coordinate using the mouse."""
83+ self ._page .mouse .click (x , y , delay = 100 )
84+
85+ def evaluate_js (self , js : str ):
86+ js_result = self ._page .evaluate (js )
87+ logger .info (f"JS result: { js_result } " )
88+ return js_result
89+
90+ def goto (self , url : str ):
91+ self ._page .goto (url )
92+
93+ def page_html (self ) -> str :
94+ return self ._page .content ()
95+
96+ def page_screenshot (self ) -> Image .Image :
97+ scr_bytes = self ._page .screenshot ()
98+ return Image .open (BytesIO (scr_bytes ))
99+
100+ def page_axtree (self ) -> str :
101+ axtree = self ._page .accessibility .snapshot ()
102+ return flatten_axtree (axtree )
103+
104+ def step (self , action : ToolCall ) -> dict :
105+ fn = self ._actions [action .name ]
106+ try :
107+ action_result = fn (** action .arguments )
108+ except Exception as e :
109+ action_result = f"Error executing action { action .name } : { e } "
110+ logger .error (action_result )
111+ html = self .page_html ()
112+ screenshot = self .page_screenshot ()
113+ axtree = self .page_axtree ()
114+ return {
115+ "action_result" : action_result ,
116+ "pruned_html" : html ,
117+ "axtree_txt" : axtree ,
118+ "screenshot" : screenshot ,
119+ }
120+
121+ def actions (self ) -> list [ToolSpec ]:
122+ return [ToolSpec .from_function (fn ) for fn in self ._actions .values ()]
123+
124+ def close (self ):
125+ self ._page .close ()
126+
127+
128+ _apw = None # Global Playwright instance for AsyncPlaywright
129+ _abrowser = None # Global Browser instance for AsyncPlaywright
130+
35131
36- async def ainitialize (self ):
37- pw = await async_playwright ().start ()
38- self ._browser = await pw .chromium .launch (headless = True , chromium_sandbox = True )
39- self ._page = await self ._browser .new_page ()
132+ class AsyncPlaywright (AsyncBrowserBackend ):
133+ """Fully asynchronous Playwright backend using playwright.async_api."""
134+
135+ has_pw_page : bool = False
136+ _actions : dict [str , Callable ]
137+ _page : AsyncPage
138+
139+ def model_post_init (self , __context : Any ):
140+ self ._actions = {
141+ "browser_press_key" : self .browser_press_key ,
142+ "browser_type" : self .browser_type ,
143+ "browser_click" : self .browser_click ,
144+ "browser_drag" : self .browser_drag ,
145+ "browser_hover" : self .browser_hover ,
146+ "browser_select_option" : self .browser_select_option ,
147+ "browser_mouse_click_xy" : self .browser_mouse_click_xy ,
148+ }
149+
150+ async def initialize (self ):
151+ global _apw , _abrowser
152+ if _apw is None :
153+ _apw = await async_playwright ().start ()
154+ if _abrowser is None :
155+ _abrowser = await _apw .chromium .launch (headless = True , chromium_sandbox = True )
156+ self ._page = await _abrowser .new_page ()
40157
41158 async def browser_press_key (self , key : str ):
42- """
43- Press a key on the keyboard.
44- """
159+ """Press a key on the keyboard."""
45160 await self ._page .keyboard .press (key )
46161
47162 async def browser_type (self , selector : str , text : str ):
48- """
49- Type text into the focused element.
50- """
163+ """Type text into the focused element."""
51164 await self ._page .type (selector , text )
52165
53166 async def browser_click (self , selector : str ):
54- """
55- Click on a selector.
56- """
167+ """Click on a selector."""
57168 await self ._page .click (selector , timeout = 3000 , strict = True )
58169
59170 async def browser_drag (self , from_selector : str , to_selector : str ):
60- """
61- Drag and drop from one selector to another.
62- """
171+ """Drag and drop from one selector to another."""
63172 from_elem = self ._page .locator (from_selector )
64173 await from_elem .hover (timeout = 500 )
65174 await self ._page .mouse .down ()
@@ -69,66 +178,59 @@ async def browser_drag(self, from_selector: str, to_selector: str):
69178 await self ._page .mouse .up ()
70179
71180 async def browser_hover (self , selector : str ):
72- """
73- Hover over a given element.
74- """
181+ """Hover over a given element."""
75182 await self ._page .hover (selector , timeout = 3000 , strict = True )
76183
77184 async def browser_select_option (self , selector : str , value : str ):
78- """
79- Select an option from a given element.
80- """
185+ """Select an option from a given element."""
81186 await self ._page .select_option (selector , value )
82187
83188 async def browser_mouse_click_xy (self , x : int , y : int ):
84- """
85- Click at a given x, y coordinate using the mouse.
86- """
189+ """Click at a given x, y coordinate using the mouse."""
87190 await self ._page .mouse .click (x , y , delay = 100 )
88191
89- def run_js (self , js : str ):
90- js_result = self ._loop . run_until_complete ( self . _page .evaluate (js ) )
192+ async def evaluate_js (self , js : str ):
193+ js_result = await self ._page .evaluate (js )
91194 logger .info (f"JS result: { js_result } " )
92195 return js_result
93196
94- def goto (self , url : str ):
95- self ._loop . run_until_complete ( self . _page .goto (url ) )
197+ async def goto (self , url : str ):
198+ await self ._page .goto (url )
96199
97- def page_html (self ):
98- return self ._loop . run_until_complete ( self . _page .content () )
200+ async def page_html (self ) -> str :
201+ return await self ._page .content ()
99202
100- def page_screenshot (self ):
101- scr_bytes = self ._loop . run_until_complete ( self . _page .screenshot () )
203+ async def page_screenshot (self ) -> Image . Image :
204+ scr_bytes = await self ._page .screenshot ()
102205 return Image .open (BytesIO (scr_bytes ))
103206
104- def page_axtree (self ):
105- axtree = self ._loop .run_until_complete (self ._page .accessibility .snapshot ())
106- flat_axtree = flatten_axtree (axtree )
107- return flat_axtree
207+ async def page_axtree (self ) -> str :
208+ axtree = await self ._page .accessibility .snapshot ()
209+ return flatten_axtree (axtree )
108210
109- def step (self , action : ToolCall ):
211+ async def step (self , action : ToolCall ) -> dict :
110212 fn = self ._actions [action .name ]
111213 try :
112- action_result = self . _loop . run_until_complete ( fn (** action .arguments ) )
214+ action_result = await fn (** action .arguments )
113215 except Exception as e :
114216 action_result = f"Error executing action { action .name } : { e } "
115217 logger .error (action_result )
116- html = self .page_html ()
117- screenshot = self .page_screenshot ()
118- axtree = self .page_axtree ()
218+ html = await self .page_html ()
219+ screenshot = await self .page_screenshot ()
220+ axtree = await self .page_axtree ()
119221 return {
120- "tool_result " : action_result ,
222+ "action_result " : action_result ,
121223 "pruned_html" : html ,
122224 "axtree_txt" : axtree ,
123225 "screenshot" : screenshot ,
124226 }
125227
126- def actions (self ) -> tuple [ToolSpec ]:
127- specs = [ToolSpec .from_function (fn ) for fn in self ._actions .values ()]
128- return tuple (specs )
228+ def actions (self ) -> list [ToolSpec ]:
229+ return [ToolSpec .from_function (fn ) for fn in self ._actions .values ()]
129230
130- def close (self ):
131- self ._loop .run_until_complete (self ._browser .close ())
231+ async def close (self ):
232+ await self ._browser .close ()
233+ await self ._pw .stop ()
132234
133235
134236def flatten_axtree (axtree_dict : dict | None ) -> str :
0 commit comments