GrayboxTech · guillaume-byte · Jun 18, 2026 · May 21, 2026 · May 27, 2026 · May 28, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -2,19 +2,52 @@ name: Code CI
 
 on:
   push:
-    branches: [ main, dev ]
+    # Fire on every branch so the `gate` job below can decide whether to run.
+    # GitHub cannot filter on commit message at the event level, so we trigger
+    # broadly and gate execution: main/dev always run, other branches run only
+    # when the head commit message contains "[force ci]".
+    branches: [ '**' ]
     tags: [ 'v*' ]
   pull_request:
     types: [ opened, reopened, ready_for_review ]
   workflow_dispatch:
 
 jobs:
-  # Required gate: a lint violation here FAILS the CI run. This job has no
-  # `needs` and nothing `needs` it, so it runs in parallel with install/test/
-  # build — its failure turns the run red but does NOT stop the other jobs from
-  # running (GitHub Actions does not cancel sibling jobs on failure).
+  # Single source of truth for "should the core CI jobs run?". Reproduces the
+  # previous trigger rules (PR / manual dispatch / main / dev run; tags skip)
+  # and adds a manual escape hatch for custom branches via "[force ci]".
+  gate:
+    runs-on: ubuntu-latest
+    outputs:
+      run_ci: ${{ steps.decide.outputs.run_ci }}
+    steps:
+      - name: Decide whether to run CI
+        id: decide
+        env:
+          # Evaluated by GitHub (not the shell) so the raw commit message is
+          # never interpolated into bash — avoids quoting/injection issues.
+          FORCE_CI: ${{ contains(github.event.head_commit.message, '[force ci]') }}
+        run: |
+          if [ "${{ github.event_name }}" != "push" ]; then
+            run_ci=true            # PRs and manual dispatch always run
+          elif [[ "${{ github.ref }}" == refs/tags/* ]]; then
+            run_ci=false           # tag pushes are handled by the release workflow
+          elif [ "${{ github.ref }}" = "refs/heads/main" ] || [ "${{ github.ref }}" = "refs/heads/dev" ]; then
+            run_ci=true            # default branches always run
+          elif [ "${FORCE_CI}" = "true" ]; then
+            run_ci=true            # custom branch opted in via [force ci]
+          else
+            run_ci=false           # custom branch without [force ci] → skip
+          fi
+          echo "run_ci=${run_ci}" >> "$GITHUB_OUTPUT"
+          echo "Decision: run_ci=${run_ci} (event=${{ github.event_name }}, ref=${{ github.ref }}, force_ci=${FORCE_CI})"
+  # Required gate: a lint violation here FAILS the CI run. Nothing `needs` this
+  # job, so it runs in parallel with install/test/build — its failure turns the
+  # run red but does NOT stop the other jobs from running (GitHub Actions does
+  # not cancel sibling jobs on failure).
   code-quality:
-    if: ${{ github.event_name != 'push' || !startsWith(github.ref, 'refs/tags/') }}
+    needs: gate
+    if: ${{ needs.gate.outputs.run_ci == 'true' }}
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
@@ -99,7 +132,8 @@ jobs:
   # Single-version install on 3.11 — always runs so `test` has a gate to
   # depend on regardless of branch.
   install:
-    if: ${{ github.event_name != 'push' || !startsWith(github.ref, 'refs/tags/') }}
+    needs: gate
+    if: ${{ needs.gate.outputs.run_ci == 'true' }}
     runs-on: ubuntu-latest
     name: install (Python 3.11)
     steps:
@@ -160,10 +194,11 @@ jobs:
           python -c "import weightslab; print(f'weightslab imported successfully on Python ${{ matrix.python-version }}')"
 
   test:
-    if: ${{ github.event_name != 'push' || !startsWith(github.ref, 'refs/tags/') }}
+    if: ${{ needs.gate.outputs.run_ci == 'true' }}
     runs-on: ubuntu-latest
-    # Depends on the fast 3.11 gate only — the matrix runs independently in parallel.
-    needs: install
+    # Depends on the fast 3.11 install gate; `gate` is also a direct need so this
+    # job can read run_ci (the matrix and main-only jobs run independently).
+    needs: [ gate, install ]
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

diff --git a/AGENTS.md b/AGENTS.md
diff --git a/weightslab/backend/ledgers.py b/weightslab/backend/ledgers.py
@@ -29,6 +29,15 @@
 DEFAULT_NAME = "main"
 
 
+def _debug_logging_enabled() -> bool:
+    """True only when WEIGHTSLAB_LOG_LEVEL is 'debug' (case-insensitive).
+
+    Used to gate verbose diagnostics (traceback dumps) so they stay quiet in
+    normal runs and only surface when the user explicitly opts into debug.
+    """
+    return os.environ.get("WEIGHTSLAB_LOG_LEVEL", "").strip().lower() == "debug"
+
+
 def _rebuild_proxy(obj: Any) -> "Proxy":
     proxy = Proxy()
     proxy._obj = obj
@@ -39,6 +48,36 @@ def _rebuild_value_proxy(parent: "Proxy", key: Any, default: Any = None) -> "Pro
     return Proxy._ValueProxy(parent, key, default)
 
 
+# Sentinel: the resolved value should stay a live proxy (no plain-value coercion).
+_KEEP_AS_PROXY = object()
+
+
+def _is_torch_device(value: Any) -> bool:
+    """True if ``value`` is a ``torch.device``, matched by duck typing.
+
+    Checked via the type's module/name so this backend module never imports
+    torch (it must stay usable without a torch install).
+    """
+    t = type(value)
+    return t.__name__ == "device" and t.__module__ == "torch"
+
+
+def _plain_get_value(value: Any) -> Any:
+    """Plain value a ``get`` should hand back for types that must NOT be returned
+    as a live proxy, else the ``_KEEP_AS_PROXY`` sentinel.
+
+    - ``str``: immutable; as a live proxy it breaks os.PathLike consumers
+      (os.stat / os.path.isdir see ``__index__`` and treat it as a file descriptor).
+    - ``torch.device``: torch's C-level ``torch.device()`` / ``Tensor.to()`` reject
+      a proxy, so we return its string form (which torch accepts everywhere).
+    """
+    if isinstance(value, str):
+        return value
+    if _is_torch_device(value):
+        return str(value)
+    return _KEEP_AS_PROXY
+
+
 class Proxy:
     """A small forwarding proxy that holds a mutable reference to an object.
 
@@ -115,7 +154,9 @@ def get(self, *args, **kwargs) -> Any:
             """
             v = self._resolve()
             if not args and not kwargs:
-                return v
+                # Hand back the plain form for str / torch.device; raw value otherwise.
+                plain = _plain_get_value(v)
+                return v if plain is _KEEP_AS_PROXY else plain
             if hasattr(v, 'get'):
                 return v.get(*args, **kwargs)
             # Fallback for the proxy's own resolve-with-default logic
@@ -357,17 +398,50 @@ def __len__(self) -> int:
                 return 0
             return len(v)
 
+        def __iter__(self):
+            """Iterate over the resolved value (e.g. a list/tuple hyperparameter).
+
+            Without this, ``for x in proxy`` / ``tuple(proxy)`` / unpacking raise
+            ``'_ValueProxy' object is not iterable`` even though the wrapped value
+            is iterable. A None target iterates as empty.
+            """
+            v = self._resolve()
+            if v is None:
+                return iter(())
+            return iter(v)
+
+        def __getitem__(self, item: Any) -> Any:
+            """Index, slice, or key into the resolved value.
+
+            Works for lists (``proxy[0]``, ``proxy[1:3]``), dicts (``proxy[key]``)
+            and strings. Nested mappings are re-wrapped in a live proxy to mirror
+            __getattr__, so chained subscripting keeps tracking edits.
+            """
+            v = self._resolve()
+            if v is None:
+                raise TypeError("ValueProxy target not set (resolved to None)")
+            value = v[item]
+            if isinstance(value, dict):
+                return Proxy._ValueProxy(Proxy(v), item)
+            return value
+
     def get(self, ref=None, default=None, proxy: bool = True) -> Any:
         """Get wrapped object or a key from the wrapped mapping.
 
         Args:
             ref: Mapping key when provided.
             default: Fallback value when key/target is missing.
-            proxy: When True and ref is provided, return a live key proxy.
+            proxy: When True and ref is provided, return a live key proxy —
+                except for ``str`` values, which are returned raw (see below).
         """
         if ref is not None:
             if proxy:
-                return Proxy._ValueProxy(self, ref, default)
+                vp = Proxy._ValueProxy(self, ref, default)
+                # str and torch.device are handed back as plain values (see
+                # _plain_get_value); other types (dict/list/int/float/...) stay
+                # live proxies so studio edits keep tracking.
+                plain = _plain_get_value(vp._resolve())
+                return vp if plain is _KEEP_AS_PROXY else plain
             return self._obj.get(ref, default)
         return self._obj if self._obj is not None else default
 
@@ -457,11 +531,14 @@ def __next__(self):
                     self._it.is_a_loop = False  # Loop ends here
                     raise
                 except KeyError:
-                    traceback.print_exc()
-                    logger.error(
-                        "KeyError during Proxy iteration. This may indicate the underlying object was modified during iteration. Returning StopIteration to end iteration gracefully." +
-                        "\nOtherwise there is a missmatch between data metadata returned, e.g., some metadata has augmentation parameters and other not. Please initialize all metadata with the same keys and types to avoid this error."
-                    )
+                    # Quiet by default; only surface this diagnostic when the user
+                    # opted into debug logging (WEIGHTSLAB_LOG_LEVEL=debug).
+                    if _debug_logging_enabled():
+                        traceback.print_exc()
+                        logger.error(
+                            "KeyError during Proxy iteration. This may indicate the underlying object was modified during iteration. Returning StopIteration to end iteration gracefully." +
+                            "\nOtherwise there is a missmatch between data metadata returned, e.g., some metadata has augmentation parameters and other not. Please initialize all metadata with the same keys and types to avoid this error."
+                        )
                     raise StopIteration
         return _ProxyIterator(underlying_iter)
 
@@ -590,13 +667,15 @@ def __next__(self):
             # Let StopIteration propagate naturally to signal end of iteration
             raise
         except Exception:
-            traceback.print_exc()
+            if _debug_logging_enabled():
+                traceback.print_exc()
             # clear cached iterator so future next(proxy) restarts
             try:
                 if '_iterator' in self.__dict__:
                     object.__delattr__(self, '_iterator')
             except Exception:
-                traceback.print_exc()
+                if _debug_logging_enabled():
+                    traceback.print_exc()
             raise StopIteration
 
     # Context manager support so `with proxy as x:` works when the proxy

diff --git a/weightslab/components/experiment_hash.py b/weightslab/components/experiment_hash.py
@@ -237,6 +237,8 @@ def _hash_config(self, config: Dict[str, Any]) -> str:
         config_cp.pop('is_training', None)
         config_cp.pop('pause_at_step', None)
         # config_cp.pop('auditor_mode', None)  # Audit should be another state
+        if 'auditor_mode' not in config_cp:
+            config_cp['auditor_mode'] = False
 
         try:
             # Sort keys for deterministic hashing

diff --git a/weightslab/data/dataframe_manager.py b/weightslab/data/dataframe_manager.py
@@ -4,6 +4,7 @@
 import threading
 import logging
 import traceback
+import warnings
 import numpy as np
 import pandas as pd
 import torch
@@ -60,6 +61,16 @@ def _safe_update(target: pd.DataFrame, source: pd.DataFrame) -> None:
         mask = src.notna()
         if not mask.any():
             continue
+        # Widen the target to object before assigning object (arrays/masks) or
+        # bool values into a numeric (e.g. NaN -> float64) column. Otherwise
+        # pandas >= 2.1 emits an incompatible-dtype FutureWarning (a future hard
+        # error). Compatible numeric assignments keep their dtype (fast path).
+        try:
+            if src.dtype.kind in ("O", "b") and target[col].dtype != object \
+                    and target[col].dtype.kind != src.dtype.kind:
+                target[col] = target[col].astype(object)
+        except Exception:
+            pass
         try:
             target.loc[common_idx[mask.values], col] = src[common_idx].values
         except Exception:
@@ -765,7 +776,7 @@ def _is_bbox_array(value: Any) -> bool:
             arr = np.asanyarray(value)
         except Exception:
             return False
-        return arr.ndim == 2 and arr.shape[0] >= 0 and arr.shape[-1] in (4, 5, 6)
+        return arr.ndim == 2 and arr.shape[0] >= 0 and arr.shape[-1] in range(3, 9+1)
 
     @staticmethod
     def _is_empty(value: Any) -> bool:
@@ -1624,17 +1635,31 @@ def _apply_updates_frame_locked(self, df_updates: pd.DataFrame, broadcast: bool)
         if self._df.index.has_duplicates:
             self._df = self._df[~self._df.index.duplicated(keep='last')]
 
-        # Widen categorical target columns so a new value doesn't raise
-        # "Cannot setitem on a Categorical with a new category".
+        # Categorical target columns must be widened up front: assigning a value
+        # outside the category list raises an UNCATCHABLE AssertionError, so it
+        # cannot be handled by the try/except below.
         for col in df_updates.columns:
             if col in self._df.columns and isinstance(self._df[col].dtype, pd.CategoricalDtype):
                 self._df[col] = self._df[col].astype(object)
 
         # Masked update: only overwrite where df_updates is non-NaN.
         mask = df_updates.notna()
-        self._df.loc[df_updates.index, df_updates.columns] = (
-            self._df.loc[df_updates.index, df_updates.columns].where(~mask, df_updates)
-        )
+        combined = self._df.loc[df_updates.index, df_updates.columns].where(~mask, df_updates)
+        try:
+            with warnings.catch_warnings():
+                # pandas >= 2.1 only *warns* when a partial assignment changes a
+                # column's dtype (object arrays/masks or bool into a numeric column),
+                # but will raise in a future version. Promote it so we widen the
+                # affected columns to object and retry. Compatible assignments take
+                # this fast path unchanged.
+                warnings.filterwarnings(
+                    "error", message=".*incompatible dtype.*", category=FutureWarning)
+                self._df.loc[df_updates.index, df_updates.columns] = combined
+        except Exception:
+            for col in df_updates.columns:
+                if col in self._df.columns and self._df[col].dtype != object:
+                    self._df[col] = self._df[col].astype(object)
+            self._df.loc[df_updates.index, df_updates.columns] = combined
         return df_updates.index
 
     def _merge_buffer_frames_into(self, df: pd.DataFrame, sample_df: pd.DataFrame, instance_df: pd.DataFrame) -> pd.DataFrame:
@@ -1748,7 +1773,8 @@ def _apply_buffer_records_nonblocking(self, records: List[Dict[str, Any]]):
             applied_index = written_s.append(written_i) if len(written_i) else written_s
             update_cols = sample_df.columns.union(instance_df.columns)
             # Keep newly-added signal columns float32 and empty object cells as None.
-            self._df = self._optimize_dataframe_memory(self._df)
+            _df = self._optimize_dataframe_memory(self._df)
+            self._df = _df
         finally:
             self._lock.release()
 

diff --git a/weightslab/data/h5_dataframe_store.py b/weightslab/data/h5_dataframe_store.py
@@ -29,6 +29,27 @@
     pass
 
 
+def _align_col_dtype_for_assign(existing: pd.DataFrame, source: pd.DataFrame, col: str) -> None:
+    """Upcast ``existing[col]`` to object before a partial ``.loc`` assignment when
+    the source column holds object data (numpy arrays, stringified masks) or bool
+    flags that don't fit the target's dtype.
+
+    pandas >= 2.1 raises a FutureWarning (a future hard error) when a partial
+    ``.loc`` assignment would change a column's dtype — e.g. assigning object or
+    bool values into a column that was just initialized as float64 via ``np.nan``.
+    Upcasting the target column to object first makes the assignment dtype-stable.
+    Compatible numeric assignments (e.g. int into float) are left untouched.
+    Best-effort: dtype alignment must never break a merge.
+    """
+    try:
+        src_kind = source[col].dtype.kind  # 'O' object, 'b' bool, 'i'/'u'/'f' numeric
+        tgt_dtype = existing[col].dtype
+        if src_kind in ("O", "b") and tgt_dtype != object and tgt_dtype.kind != src_kind:
+            existing[col] = existing[col].astype(object)
+    except Exception:
+        pass
+
+
 class _InterProcessFileLock:
     """Lightweight cross-platform file lock.
 
@@ -694,13 +715,15 @@ def upsert(self, origin: str, df: pd.DataFrame) -> int:
                                             if col not in existing.columns:
                                                 is_categorical = col.startswith("tag") or col.startswith("TAG") or col == "discarded"
                                                 existing[col] = False if is_categorical else np.nan
+                                            _align_col_dtype_for_assign(existing, df_norm, col)
                                             existing.loc[matching_rows.index, col] = df_norm.loc[idx, col]
                                     elif isinstance(matching_rows, pd.Series):
                                         # Single row matched
                                         for col in df_norm.columns:
                                             if col not in existing.columns:
                                                 is_categorical = col.startswith("tag") or col.startswith("TAG") or col == "discarded"
                                                 existing[col] = False if is_categorical else np.nan
+                                            _align_col_dtype_for_assign(existing, df_norm, col)
                                             existing.loc[matching_rows.name, col] = df_norm.loc[idx, col]
                             else:
                                 # Normal case: same index structure
@@ -717,6 +740,7 @@ def upsert(self, origin: str, df: pd.DataFrame) -> int:
                                             is_categorical = col.startswith("tag") or col.startswith("TAG") or col == "discarded"
                                             existing[col] = False if is_categorical else np.nan
                                         # Update values for common rows
+                                        _align_col_dtype_for_assign(existing, df_norm, col)
                                         existing.loc[common_idx, col] = df_norm.loc[common_idx, col]
 
                                 # 2. Append strictly new rows