Skip to content

Commit 2e87d58

Browse files
pull latest version
2 parents 6325d64 + 6c13c7e commit 2e87d58

21 files changed

Lines changed: 2096 additions & 252 deletions

.github/workflows/pypi.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
1+
name: Build and Publish
22

33
on: [push, workflow_dispatch]
44

@@ -48,10 +48,11 @@ jobs:
4848
uses: pypa/gh-action-pypi-publish@release/v1
4949

5050
github-release:
51-
name: Sign with Sigstore and upload them to GitHub Release
51+
name: Sign packages with Sigstore and upload them to GitHub Release
5252
needs:
5353
- publish-to-pypi
5454
runs-on: ubuntu-latest
55+
5556
permissions:
5657
contents: write # IMPORTANT: mandatory for making GitHub Releases
5758
id-token: write # IMPORTANT: mandatory for sigstore
@@ -64,7 +65,7 @@ jobs:
6465
path: dist/
6566

6667
- name: Sign the dists with Sigstore
67-
uses: sigstore/gh-action-sigstore-python@v1.2.3
68+
uses: sigstore/gh-action-sigstore-python@v2.1.1
6869
with:
6970
inputs: >-
7071
./dist/*.tar.gz

.github/workflows/unit_tests.yml

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
name: Unit tests
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
11+
code-format:
12+
runs-on: ubuntu-latest
13+
defaults:
14+
run:
15+
shell: bash -l {0}
16+
steps:
17+
18+
- name: Checkout Repository
19+
uses: actions/checkout@v4
20+
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.10'
25+
cache: 'pip' # caching pip dependencies
26+
27+
- name: Pip install
28+
run: pip install black[jupyter]==24.2.0 blacken-docs
29+
30+
- name: Pip list
31+
run: pip list
32+
33+
- name: Code Formatting
34+
run: black . --check
35+
36+
browsergym-workarena-fast:
37+
runs-on: ubuntu-latest
38+
39+
defaults:
40+
run:
41+
shell: bash -l {0}
42+
43+
steps:
44+
45+
- name: Checkout Repository
46+
uses: actions/checkout@v4
47+
48+
- name: Set up Python
49+
uses: actions/setup-python@v5
50+
with:
51+
python-version: '3.10'
52+
cache: 'pip' # caching pip dependencies
53+
54+
- name: Pip install
55+
working-directory: ./dev
56+
run: pip install -r requirements.txt
57+
58+
- name: Pip list
59+
run: pip list
60+
61+
- name: Install Playwright
62+
run: playwright install --with-deps
63+
64+
- name: Run non-slow browsergym-workarena Unit Tests
65+
env:
66+
SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
67+
SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
68+
SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
69+
run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests
70+
71+
browsergym-workarena-slow:
72+
runs-on: ubuntu-latest
73+
74+
defaults:
75+
run:
76+
shell: bash -l {0}
77+
78+
steps:
79+
80+
- name: Checkout Repository
81+
uses: actions/checkout@v4
82+
83+
- name: Set up Python
84+
uses: actions/setup-python@v5
85+
with:
86+
python-version: '3.10'
87+
cache: 'pip' # caching pip dependencies
88+
89+
- name: Pip install
90+
working-directory: ./dev
91+
run: pip install -r requirements.txt
92+
93+
- name: Pip list
94+
run: pip list
95+
96+
- name: Install Playwright
97+
run: playwright install --with-deps
98+
99+
- name: Run slow browsergym-workarena Unit Tests
100+
env:
101+
SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
102+
SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
103+
SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
104+
run: pytest -n 5 --durations=10 -m 'slow and not pricy' --slowmo 1000 -v tests

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ Your installation is now complete! 🎉
4141

4242
Run this code to see WorkArena in action.
4343

44+
Note: the following example executes WorkArena's oracle (cheat) function to solve each task. To evaluate an agent, calls to `env.step()` must be used instead.
45+
4446
```python
4547
import random
4648

@@ -59,8 +61,7 @@ for (task, seed) in zip(AGENT_L2_SAMPLED_TASKS, AGENT_L2_SEEDS):
5961

6062
# Instantiate a new environment
6163
env = BrowserEnv(task_entrypoint=task,
62-
headless=False,
63-
slow_mo=1000)
64+
headless=False)
6465
env.reset()
6566

6667
# Cheat functions use Playwright to automatically solve the task
@@ -75,7 +76,7 @@ for (task, seed) in zip(AGENT_L2_SAMPLED_TASKS, AGENT_L2_SEEDS):
7576
if reward == 1:
7677
env.chat.add_message(role="user", msg="Yes, that works. Thanks!")
7778
else:
78-
env.chat.add_message(role="user", msg=f"No, that doesn't work. {message.get('message', '')}")
79+
env.chat.add_message(role="user", msg=f"No, that doesn't work. {info.get('message', '')}")
7980

8081
sleep(3)
8182
env.close()

dev/environment.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
name: workarena-dev
2+
3+
channels:
4+
- huggingface
5+
- conda-forge
6+
- defaults
7+
8+
dependencies:
9+
- python>=3.10
10+
- pip
11+
12+
- pip:
13+
- -r requirements.txt

dev/requirements.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
black[jupyter]==24.2.0
2+
blacken-docs
3+
pre-commit
4+
pytest==7.3.2
5+
pytest-xdist
6+
pytest-playwright
7+
tenacity
8+
browsergym-core
9+
-e .. # local package

pyproject.toml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ authors = [
1111
{name = "Maxime Gasse"},
1212
{name = "Alex Lacoste"},
1313
{name = "Manuel Del Verme"},
14+
{name = "Megh Thakkar"},
1415
]
1516
readme = "README.md"
1617
requires-python = ">3.7"
@@ -40,3 +41,30 @@ files = ["requirements.txt"]
4041

4142
[tool.hatch.build.targets.wheel]
4243
packages = ["src/browsergym"]
44+
45+
[tool.black]
46+
line-length = 100
47+
include = '\.pyi?$'
48+
exclude = '''
49+
/(
50+
\.eggs
51+
| \.git
52+
| \.hg
53+
| \.mypy_cache
54+
| \.nox
55+
| \.tox
56+
| \.venv
57+
| _build
58+
| buck-out
59+
| build
60+
| dist
61+
)/
62+
'''
63+
64+
[tool.pytest.ini_options]
65+
filterwarnings = [
66+
'ignore::UserWarning:gymnasium.*:', # too many "The obs is not within the observation space." warnings.
67+
]
68+
markers = [
69+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
70+
]
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""
2+
A demonstration of how observation/action traces can be extracted
3+
for WorkArena tasks without modifying the task code.
4+
5+
Author: Alexandre Drouin (alexandre.drouin@servicenow.com)
6+
7+
Notes:
8+
- This approach relies on monkey patching the playwright actions to log the actions and observations.
9+
It has not been tested for parallel execution. It might work with multiprocessing, but it will for
10+
sure not work with multithreading.
11+
12+
"""
13+
14+
import importlib
15+
import logging
16+
import os
17+
import pickle
18+
import playwright.sync_api as playwright_sync
19+
20+
from browsergym.core.env import BrowserEnv
21+
from browsergym.workarena import ALL_WORKARENA_TASKS
22+
from collections import defaultdict
23+
from tenacity import retry, stop_after_attempt, wait_fixed
24+
from time import time
25+
26+
27+
N_PER_TASK = 10
28+
29+
30+
def monkey_patch_playwright(observation_callback, trace_storage):
31+
"""
32+
A function that overrides the default playwright actions to log the actions and observations.
33+
34+
Parameters:
35+
------------
36+
observation_callback: callable
37+
A function that returns the observation of the environment.
38+
trace_storage: list
39+
A list to store the trace of the actions and observations.
40+
These will be appended in-place.
41+
42+
"""
43+
44+
def wrapper(func, interface):
45+
def wrapped(*args, **kwargs):
46+
# Get the observation
47+
obs = observation_callback()
48+
49+
# Get the BID of the element on which we are acting.
50+
if interface.__name__ == "Locator":
51+
# Get the locator
52+
locator = args[0]
53+
# Get the BID
54+
bid = locator.element_handle().evaluate('(el) => el.getAttribute("bid")')
55+
elif interface.__name__ == "Keyboard":
56+
# Get the BID of the element
57+
bid = "keyboard"
58+
else:
59+
# Get the BID of the element
60+
bid = args[0].evaluate('(el) => el.getAttribute("bid")')
61+
62+
logging.info(f"Action: {func.__name__} BID: {bid} -- Args: {args[1:]} {kwargs}")
63+
trace_storage.append(
64+
{
65+
"obs": obs,
66+
"action": func.__name__,
67+
"args": args[1:],
68+
"kwargs": kwargs,
69+
"bid": bid,
70+
"time": time(),
71+
}
72+
)
73+
74+
# Resume action
75+
return func(*args, **kwargs)
76+
77+
return wrapped
78+
79+
# Interfaces and actions we want to monkey patch
80+
importlib.reload(playwright_sync)
81+
from playwright.sync_api import Page, Frame, Locator, Keyboard, ElementHandle
82+
83+
# TODO: Make sure the list of interfaces and actions is exhaustive
84+
# It covers all that is used in WorkArena cheats as of April 11, 2024
85+
interfaces = [Page, Frame, Locator, Keyboard, ElementHandle]
86+
actions = ["click", "select_option", "set_checked", "fill", "press", "type", "down", "up"]
87+
88+
for interface in interfaces:
89+
for action in actions:
90+
if hasattr(interface, action):
91+
setattr(interface, action, wrapper(getattr(interface, action), interface))
92+
print(f"Monkey patched {interface.__name__}.{action}")
93+
94+
95+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
96+
def extract_trace(task_cls, headless=True):
97+
"""
98+
Extracts the trace of actions and observations for a given task.
99+
100+
Parameters:
101+
------------
102+
task_cls: class
103+
The class of the task to extract the trace from.
104+
105+
"""
106+
# Instantiate a new environment
107+
env = BrowserEnv(task_entrypoint=task_cls, headless=headless, slow_mo=1000)
108+
109+
# Setup customized tracing
110+
trace = []
111+
monkey_patch_playwright(observation_callback=env._get_obs, trace_storage=trace)
112+
113+
env.reset()
114+
env.task.cheat(env.page, env.chat.messages)
115+
env.close()
116+
117+
return trace
118+
119+
120+
if __name__ == "__main__":
121+
os.makedirs("trace_profiling", exist_ok=True)
122+
123+
task_traces = defaultdict(list)
124+
for task in ALL_WORKARENA_TASKS:
125+
print("Task:", task)
126+
for i in range(N_PER_TASK):
127+
print(f"Extracting trace {i+1}/{N_PER_TASK}")
128+
trace = extract_trace(task, headless=True)
129+
task_traces[task].append(trace)
130+
131+
pickle.dump(task_traces, open("trace_profiling/task_traces.pkl", "wb"))

0 commit comments

Comments
 (0)