ServiceNow
diff --git a/‎.github/workflows/pypi.yml‎
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/pypi.yml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.github/workflows/unit_tests.yml‎
Lines changed: 104 additions & 0 deletions b/‎.github/workflows/unit_tests.yml‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎dev/environment.yaml‎
Lines changed: 13 additions & 0 deletions b/‎dev/environment.yaml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎dev/requirements.txt‎
Lines changed: 9 additions & 0 deletions b/‎dev/requirements.txt‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 28 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎scripts/extract_finetuning_traces.py‎
Lines changed: 131 additions & 0 deletions b/‎scripts/extract_finetuning_traces.py‎
Lines changed: 131 additions & 0 deletions
@@ -1,4 +1,4 @@
-name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
+name: Build and Publish
 
 on: [push, workflow_dispatch]
 
@@ -48,10 +48,11 @@ jobs:
           uses: pypa/gh-action-pypi-publish@release/v1
 
     github-release:
-      name: Sign with Sigstore and upload them to GitHub Release
+      name: Sign packages with Sigstore and upload them to GitHub Release
       needs:
       - publish-to-pypi
       runs-on: ubuntu-latest
+
       permissions:
         contents: write  # IMPORTANT: mandatory for making GitHub Releases
         id-token: write  # IMPORTANT: mandatory for sigstore
@@ -64,7 +65,7 @@ jobs:
           path: dist/
 
       - name: Sign the dists with Sigstore
-        uses: sigstore/gh-action-sigstore-python@v1.2.3
+        uses: sigstore/gh-action-sigstore-python@v2.1.1
         with:
           inputs: >-
             ./dist/*.tar.gz
 
@@ -0,0 +1,104 @@
+name: Unit tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+
+  code-format:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+    steps:
+
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          cache: 'pip' # caching pip dependencies
+
+      - name: Pip install
+        run: pip install black[jupyter]==24.2.0 blacken-docs
+
+      - name: Pip list
+        run: pip list
+
+      - name: Code Formatting
+        run: black . --check
+
+  browsergym-workarena-fast:
+    runs-on: ubuntu-latest
+  
+    defaults:
+      run:
+        shell: bash -l {0}
+  
+    steps:
+  
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+  
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          cache: 'pip' # caching pip dependencies
+  
+      - name: Pip install
+        working-directory: ./dev
+        run: pip install -r requirements.txt
+  
+      - name: Pip list
+        run: pip list
+  
+      - name: Install Playwright
+        run: playwright install --with-deps
+  
+      - name: Run non-slow browsergym-workarena Unit Tests
+        env:
+          SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
+          SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
+          SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
+        run: pytest -n 5 --durations=10 -m 'not slow and not pricy' --slowmo 1000 -v tests
+
+  browsergym-workarena-slow:
+    runs-on: ubuntu-latest
+
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          cache: 'pip' # caching pip dependencies
+
+      - name: Pip install
+        working-directory: ./dev
+        run: pip install -r requirements.txt
+
+      - name: Pip list
+        run: pip list
+
+      - name: Install Playwright
+        run: playwright install --with-deps
+
+      - name: Run slow browsergym-workarena Unit Tests
+        env:
+          SNOW_INSTANCE_URL: ${{ secrets.SNOW_INSTANCE_URL }}
+          SNOW_INSTANCE_UNAME: ${{ secrets.SNOW_INSTANCE_UNAME }}
+          SNOW_INSTANCE_PWD: ${{ secrets.SNOW_INSTANCE_PWD }}
+        run: pytest -n 5 --durations=10 -m 'slow and not pricy' --slowmo 1000 -v tests
@@ -41,6 +41,8 @@ Your installation is now complete! 🎉
 
 Run this code to see WorkArena in action.
 
+Note: the following example executes WorkArena's oracle (cheat) function to solve each task. To evaluate an agent, calls to `env.step()` must be used instead.
+
 ```python
 import random
 
@@ -59,8 +61,7 @@ for (task, seed) in zip(AGENT_L2_SAMPLED_TASKS, AGENT_L2_SEEDS):
 
     # Instantiate a new environment
     env = BrowserEnv(task_entrypoint=task,
-                    headless=False, 
-                    slow_mo=1000)
+                    headless=False)
     env.reset()
 
     # Cheat functions use Playwright to automatically solve the task
@@ -75,7 +76,7 @@ for (task, seed) in zip(AGENT_L2_SAMPLED_TASKS, AGENT_L2_SEEDS):
     if reward == 1:
         env.chat.add_message(role="user", msg="Yes, that works. Thanks!")
     else:
-        env.chat.add_message(role="user", msg=f"No, that doesn't work. {message.get('message', '')}")
+        env.chat.add_message(role="user", msg=f"No, that doesn't work. {info.get('message', '')}")
 
     sleep(3)
     env.close()
 
@@ -0,0 +1,13 @@
+name: workarena-dev
+
+channels:
+  - huggingface
+  - conda-forge
+  - defaults
+
+dependencies:
+  - python>=3.10
+  - pip
+
+  - pip:
+      - -r requirements.txt
@@ -0,0 +1,9 @@
+black[jupyter]==24.2.0
+blacken-docs
+pre-commit
+pytest==7.3.2
+pytest-xdist
+pytest-playwright
+tenacity
+browsergym-core
+-e .. # local package
@@ -11,6 +11,7 @@ authors = [
     {name = "Maxime Gasse"},
     {name = "Alex Lacoste"},
     {name = "Manuel Del Verme"},
+    {name = "Megh Thakkar"},
 ]
 readme = "README.md"
 requires-python = ">3.7"
@@ -40,3 +41,30 @@ files = ["requirements.txt"]
 
 [tool.hatch.build.targets.wheel]
 packages = ["src/browsergym"]
+
+[tool.black]
+line-length = 100
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.nox
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+)/
+'''
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    'ignore::UserWarning:gymnasium.*:',  # too many "The obs is not within the observation space." warnings.
+]
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+]
@@ -0,0 +1,131 @@
+"""
+A demonstration of how observation/action traces can be extracted
+for WorkArena tasks without modifying the task code.
+
+Author: Alexandre Drouin (alexandre.drouin@servicenow.com)
+
+Notes:
+- This approach relies on monkey patching the playwright actions to log the actions and observations.
+  It has not been tested for parallel execution. It might work with multiprocessing, but it will for
+  sure not work with multithreading.
+
+"""
+
+import importlib
+import logging
+import os
+import pickle
+import playwright.sync_api as playwright_sync
+
+from browsergym.core.env import BrowserEnv
+from browsergym.workarena import ALL_WORKARENA_TASKS
+from collections import defaultdict
+from tenacity import retry, stop_after_attempt, wait_fixed
+from time import time
+
+
+N_PER_TASK = 10
+
+
+def monkey_patch_playwright(observation_callback, trace_storage):
+    """
+    A function that overrides the default playwright actions to log the actions and observations.
+
+    Parameters:
+    ------------
+    observation_callback: callable
+        A function that returns the observation of the environment.
+    trace_storage: list
+        A list to store the trace of the actions and observations.
+        These will be appended in-place.
+
+    """
+
+    def wrapper(func, interface):
+        def wrapped(*args, **kwargs):
+            # Get the observation
+            obs = observation_callback()
+
+            # Get the BID of the element on which we are acting.
+            if interface.__name__ == "Locator":
+                # Get the locator
+                locator = args[0]
+                # Get the BID
+                bid = locator.element_handle().evaluate('(el) => el.getAttribute("bid")')
+            elif interface.__name__ == "Keyboard":
+                # Get the BID of the element
+                bid = "keyboard"
+            else:
+                # Get the BID of the element
+                bid = args[0].evaluate('(el) => el.getAttribute("bid")')
+
+            logging.info(f"Action: {func.__name__} BID: {bid}  --   Args: {args[1:]} {kwargs}")
+            trace_storage.append(
+                {
+                    "obs": obs,
+                    "action": func.__name__,
+                    "args": args[1:],
+                    "kwargs": kwargs,
+                    "bid": bid,
+                    "time": time(),
+                }
+            )
+
+            # Resume action
+            return func(*args, **kwargs)
+
+        return wrapped
+
+    # Interfaces and actions we want to monkey patch
+    importlib.reload(playwright_sync)
+    from playwright.sync_api import Page, Frame, Locator, Keyboard, ElementHandle
+
+    # TODO: Make sure the list of interfaces and actions is exhaustive
+    #       It covers all that is used in WorkArena cheats as of April 11, 2024
+    interfaces = [Page, Frame, Locator, Keyboard, ElementHandle]
+    actions = ["click", "select_option", "set_checked", "fill", "press", "type", "down", "up"]
+
+    for interface in interfaces:
+        for action in actions:
+            if hasattr(interface, action):
+                setattr(interface, action, wrapper(getattr(interface, action), interface))
+                print(f"Monkey patched {interface.__name__}.{action}")
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
+def extract_trace(task_cls, headless=True):
+    """
+    Extracts the trace of actions and observations for a given task.
+
+    Parameters:
+    ------------
+    task_cls: class
+        The class of the task to extract the trace from.
+
+    """
+    # Instantiate a new environment
+    env = BrowserEnv(task_entrypoint=task_cls, headless=headless, slow_mo=1000)
+
+    # Setup customized tracing
+    trace = []
+    monkey_patch_playwright(observation_callback=env._get_obs, trace_storage=trace)
+
+    env.reset()
+    env.task.cheat(env.page, env.chat.messages)
+    env.close()
+
+    return trace
+
+
+if __name__ == "__main__":
+    os.makedirs("trace_profiling", exist_ok=True)
+
+    task_traces = defaultdict(list)
+    for task in ALL_WORKARENA_TASKS:
+        print("Task:", task)
+        for i in range(N_PER_TASK):
+            print(f"Extracting trace {i+1}/{N_PER_TASK}")
+            trace = extract_trace(task, headless=True)
+            task_traces[task].append(trace)
+
+    pickle.dump(task_traces, open("trace_profiling/task_traces.pkl", "wb"))