diff --git a/2.0/README.md b/2.0/README.md
index 7d2eb1a3..b414435d 100644
--- a/2.0/README.md
+++ b/2.0/README.md
@@ -39,6 +39,14 @@ This variant keeps the same SIFT1M-scale service contract and recall target as
 offline indexing strategies are more viable. Its problem ID is
 `vector_db_ann_relaxed`.
 
+## Generals.io Bot Arena
+
+This game-playing problem asks agents to improve a patch-based bot for a local
+Generals.io-style simulator. Its problem ID is `generals_io_bot`. The judge
+applies the submitted patch to a clean skeleton, runs a hidden arena against
+multiple baseline bot families, and scores by mean baseline win rate with a
+small faster-win tiebreak. The online generals.io service is not used.
+
 ## BBOPlace ISPD2005
 
 This VLSI placement problem asks agents to generate macro placement candidates
diff --git a/2.0/problems/generals_io_bot/config.yaml b/2.0/problems/generals_io_bot/config.yaml
new file mode 100644
index 00000000..acb60a55
--- /dev/null
+++ b/2.0/problems/generals_io_bot/config.yaml
@@ -0,0 +1,47 @@
+tag: games
+runtime:
+  language: patch
+  timeout_seconds: 10800
+  environment: "Generals.io bot patch; local generals-bots simulator arena"
+  apt_packages:
+    - bash
+    - ca-certificates
+    - git
+    - python3
+    - python3-pip
+  judge_apt_packages:
+    - bash
+    - ca-certificates
+    - git
+    - python3
+    - python3-pip
+  docker:
+    image: frontiercs/generals-io-bot-agent:experimental-c2b77bf
+    judge_image: frontiercs/generals-io-bot-judge:experimental-c2b77bf
+environment:
+  cpus: 4
+  memory_mb: 8192
+  storage_mb: 8192
+  build_timeout_seconds: 1800
+evaluation:
+  generals_bots_commit: "c2b77bf72812ec91fb2024d80d90112b961dfa7e"
+  arena_seed: 20260608
+  games_per_matchup: 1
+  async_start_method: spawn
+  max_eval_seconds: 240
+  truncation: 180
+  pool_size: 2
+  speed_weight: 0.25
+  grid_sizes:
+    - 10
+  baselines:
+    - random_low_split
+    - expander
+    - strongest_frontier
+    - hunter
+    - fast_pathing
+    - flobot_fast
+submission:
+  kind: file
+  path: /app/solution.patch
+  allow_empty: true
diff --git a/2.0/problems/generals_io_bot/docker/README.md b/2.0/problems/generals_io_bot/docker/README.md
new file mode 100644
index 00000000..460991b3
--- /dev/null
+++ b/2.0/problems/generals_io_bot/docker/README.md
@@ -0,0 +1,14 @@
+# Generals.io Bot Images
+
+Build the task-specific Harbor images before running a local Harbor trial:
+
+```bash
+bash 2.0/problems/generals_io_bot/docker/build_images.sh
+```
+
+The images install `strakam/generals-bots` from pinned commit
+`c2b77bf72812ec91fb2024d80d90112b961dfa7e` plus explicit CPU `jax/jaxlib`
+dependencies used by the simulator.
+
+- Agent image: exposes `/app/generals_agent` as a git checkout for the agent.
+- Judge image: keeps a clean copy at `/opt/generals-agent-clean` for patch application.
diff --git a/2.0/problems/generals_io_bot/docker/agent/Dockerfile b/2.0/problems/generals_io_bot/docker/agent/Dockerfile
new file mode 100644
index 00000000..1054a7c8
--- /dev/null
+++ b/2.0/problems/generals_io_bot/docker/agent/Dockerfile
@@ -0,0 +1,27 @@
+# syntax=docker/dockerfile:1.7
+FROM python:3.11-slim
+
+ARG GENERALS_BOTS_COMMIT=c2b77bf72812ec91fb2024d80d90112b961dfa7e
+
+ENV XLA_FLAGS="--xla_cpu_multi_thread_eigen=false intra_op_parallelism_threads=1" \
+    OMP_NUM_THREADS=1 \
+    OPENBLAS_NUM_THREADS=1 \
+    MKL_NUM_THREADS=1 \
+    NUMEXPR_NUM_THREADS=1
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends bash ca-certificates git && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir \
+    "git+https://github.com/strakam/generals-bots.git@${GENERALS_BOTS_COMMIT}" \
+    "jax[cpu]>=0.4.30" \
+    "jaxlib>=0.4.30"
+
+WORKDIR /app
+COPY harbor/app/generals_agent /app/generals_agent
+RUN git -C /app/generals_agent init -q && \
+    git -C /app/generals_agent config user.email frontier-cs@example.invalid && \
+    git -C /app/generals_agent config user.name "Frontier-CS" && \
+    git -C /app/generals_agent add . && \
+    git -C /app/generals_agent commit -q -m base
diff --git a/2.0/problems/generals_io_bot/docker/build_images.sh b/2.0/problems/generals_io_bot/docker/build_images.sh
new file mode 100755
index 00000000..8eb1ebae
--- /dev/null
+++ b/2.0/problems/generals_io_bot/docker/build_images.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
+TASK_DIR="$ROOT/2.0/problems/generals_io_bot"
+COMMIT="${GENERALS_BOTS_COMMIT:-c2b77bf72812ec91fb2024d80d90112b961dfa7e}"
+SHORT="${COMMIT:0:7}"
+AGENT_TAG="${AGENT_TAG:-frontiercs/generals-io-bot-agent:experimental-${SHORT}}"
+JUDGE_TAG="${JUDGE_TAG:-frontiercs/generals-io-bot-judge:experimental-${SHORT}}"
+
+docker build \
+  --build-arg GENERALS_BOTS_COMMIT="$COMMIT" \
+  -f "$TASK_DIR/docker/agent/Dockerfile" \
+  -t "$AGENT_TAG" \
+  "$TASK_DIR"
+
+docker build \
+  --build-arg GENERALS_BOTS_COMMIT="$COMMIT" \
+  -f "$TASK_DIR/docker/judge/Dockerfile" \
+  -t "$JUDGE_TAG" \
+  "$TASK_DIR"
diff --git a/2.0/problems/generals_io_bot/docker/judge/Dockerfile b/2.0/problems/generals_io_bot/docker/judge/Dockerfile
new file mode 100644
index 00000000..547024dc
--- /dev/null
+++ b/2.0/problems/generals_io_bot/docker/judge/Dockerfile
@@ -0,0 +1,28 @@
+# syntax=docker/dockerfile:1.7
+FROM python:3.11-slim
+
+ARG GENERALS_BOTS_COMMIT=c2b77bf72812ec91fb2024d80d90112b961dfa7e
+
+ENV XLA_FLAGS="--xla_cpu_multi_thread_eigen=false intra_op_parallelism_threads=1" \
+    OMP_NUM_THREADS=1 \
+    OPENBLAS_NUM_THREADS=1 \
+    MKL_NUM_THREADS=1 \
+    NUMEXPR_NUM_THREADS=1
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends bash ca-certificates git && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir \
+    "git+https://github.com/strakam/generals-bots.git@${GENERALS_BOTS_COMMIT}" \
+    "jax[cpu]>=0.4.30" \
+    "jaxlib>=0.4.30"
+
+COPY harbor/app/generals_agent /opt/generals-agent-clean
+RUN git -C /opt/generals-agent-clean init -q && \
+    git -C /opt/generals-agent-clean config user.email frontier-cs@example.invalid && \
+    git -C /opt/generals-agent-clean config user.name "Frontier-CS Judge" && \
+    git -C /opt/generals-agent-clean add . && \
+    git -C /opt/generals-agent-clean commit -q -m base
+
+WORKDIR /judge
diff --git a/2.0/problems/generals_io_bot/evaluate.sh b/2.0/problems/generals_io_bot/evaluate.sh
new file mode 100755
index 00000000..9ae52639
--- /dev/null
+++ b/2.0/problems/generals_io_bot/evaluate.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+set -euo pipefail
+python3 "$(dirname "$0")/evaluator.py" "$1"
diff --git a/2.0/problems/generals_io_bot/evaluator.py b/2.0/problems/generals_io_bot/evaluator.py
new file mode 100644
index 00000000..11135760
--- /dev/null
+++ b/2.0/problems/generals_io_bot/evaluator.py
@@ -0,0 +1,802 @@
+"""Evaluator for the Frontier-CS 2.0 Generals.io bot arena task."""
+
+from __future__ import annotations
+
+import importlib.util
+import ast
+import json
+import os
+import re
+import shutil
+import signal
+import subprocess
+import sys
+import tempfile
+from functools import partial
+from pathlib import Path
+from typing import Any
+
+import jax
+import jax.numpy as jnp
+import jax.random as jrandom
+from generals import GeneralsEnv, get_observation
+from generals.agents import Agent, ExpanderAgent, HunterAgent, RandomAgent
+from generals.core.action import compute_valid_move_mask_obs
+
+_DIRECTIONS = jnp.array([[-1, 0], [1, 0], [0, -1], [0, 1]], dtype=jnp.int32)
+
+MAX_PATCH_BYTES = 500_000
+MAX_CHANGED_FILES = 20
+TASK_CONFIG_PATH = Path("/judge/task_config.json")
+DEFAULT_CLEAN_SOURCE = Path("/opt/generals-agent-clean")
+LOCAL_CLEAN_SOURCE = Path(__file__).parent / "harbor" / "app" / "generals_agent"
+
+ALLOWED_FILES = {
+    "bot.py",
+    "strategy.py",
+    "utils.py",
+}
+DENIED_TOKENS = (
+    "import os",
+    "from os",
+    "import sys",
+    "from sys",
+    "subprocess",
+    "import socket",
+    "from socket",
+    "socket",
+    "import requests",
+    "from requests",
+    "requests",
+    "import urllib",
+    "from urllib",
+    "urllib",
+    "urllib3",
+    "httpx",
+    "aiohttp",
+    "websocket",
+    "socketio",
+    "generals.remote",
+    "http.client",
+    "ftplib",
+    "open(",
+    "io.",
+    "import pathlib",
+    "from pathlib",
+    "pathlib",
+    "Path(",
+    "read_text",
+    "read_bytes",
+    "write_text",
+    "write_bytes",
+    "os.environ",
+    "os.getenv",
+    "__import__",
+    "importlib",
+    "eval(",
+    "exec(",
+    "compile(",
+)
+DENIED_IMPORT_ROOTS = {
+    "builtins",
+    "ftplib",
+    "http",
+    "httpx",
+    "importlib",
+    "io",
+    "os",
+    "pathlib",
+    "requests",
+    "socket",
+    "subprocess",
+    "sys",
+    "urllib",
+    "urllib3",
+    "websocket",
+}
+DENIED_IMPORT_PREFIXES = {
+    "generals.remote",
+}
+DENIED_CALL_NAMES = {
+    "__import__",
+    "breakpoint",
+    "compile",
+    "delattr",
+    "dir",
+    "eval",
+    "exec",
+    "getattr",
+    "globals",
+    "help",
+    "input",
+    "locals",
+    "open",
+    "setattr",
+    "vars",
+}
+DENIED_ATTR_NAMES = {
+    "environ",
+    "getenv",
+    "popen",
+    "read_bytes",
+    "read_text",
+    "remove",
+    "rename",
+    "replace",
+    "rmdir",
+    "system",
+    "unlink",
+    "write_bytes",
+    "write_text",
+}
+DENIED_DUNDER_ATTRS = {
+    "__bases__",
+    "__class__",
+    "__code__",
+    "__dict__",
+    "__getattribute__",
+    "__globals__",
+    "__mro__",
+    "__subclasses__",
+}
+
+DEFAULT_BASELINES = (
+    "random_low_split",
+    "expander",
+    "strongest_frontier",
+    "hunter",
+    "fast_pathing",
+    "flobot_fast",
+)
+
+
+class _EvaluationTimeout(Exception):
+    pass
+
+
+def _load_task_config() -> dict[str, Any]:
+    try:
+        payload = json.loads(TASK_CONFIG_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        return {}
+    return payload if isinstance(payload, dict) else {}
+
+
+TASK_CONFIG = _load_task_config()
+EVALUATION_CONFIG = (
+    TASK_CONFIG.get("evaluation", {})
+    if isinstance(TASK_CONFIG.get("evaluation"), dict)
+    else {}
+)
+
+
+def _config_int(name: str, default: int) -> int:
+    try:
+        return int(EVALUATION_CONFIG.get(name, default))
+    except Exception:
+        return default
+
+
+def _config_float(name: str, default: float) -> float:
+    try:
+        return float(EVALUATION_CONFIG.get(name, default))
+    except Exception:
+        return default
+
+
+def _config_tuple_int(name: str, default: tuple[int, ...]) -> tuple[int, ...]:
+    raw = EVALUATION_CONFIG.get(name, default)
+    if isinstance(raw, list):
+        return tuple(int(x) for x in raw)
+    if isinstance(raw, tuple):
+        return tuple(int(x) for x in raw)
+    return default
+
+
+def _config_tuple_str(name: str, default: tuple[str, ...]) -> tuple[str, ...]:
+    raw = EVALUATION_CONFIG.get(name, default)
+    if isinstance(raw, list):
+        return tuple(str(x) for x in raw)
+    if isinstance(raw, tuple):
+        return tuple(str(x) for x in raw)
+    return default
+
+
+def _clean_source_dir() -> Path:
+    if DEFAULT_CLEAN_SOURCE.exists():
+        return DEFAULT_CLEAN_SOURCE
+    return LOCAL_CLEAN_SOURCE
+
+
+def _run(cmd: list[str], *, cwd: Path) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        cmd,
+        cwd=cwd,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        timeout=60,
+        check=False,
+    )
+
+
+def _changed_files(patch_path: Path) -> list[str]:
+    result = _run(["git", "diff", "--name-only", "--no-index", "--", "/dev/null", str(patch_path)], cwd=Path("/tmp"))
+    del result
+    text = patch_path.read_text(encoding="utf-8", errors="replace")
+    paths: list[str] = []
+    for line in text.splitlines():
+        if line.startswith("+++ b/"):
+            path = line.removeprefix("+++ b/").strip()
+            if path != "/dev/null" and path not in paths:
+                paths.append(path)
+    return paths
+
+
+def _validate_patch(patch_path: Path) -> tuple[bool, str, list[str]]:
+    try:
+        data = patch_path.read_bytes()
+    except Exception as exc:
+        return False, f"could not read patch: {exc}", []
+    if not data.strip():
+        return True, "baseline skeleton", []
+    if len(data) > MAX_PATCH_BYTES:
+        return False, f"patch too large: {len(data)} bytes > {MAX_PATCH_BYTES}", []
+    if b"\x00" in data:
+        return False, "binary patches are not allowed", []
+
+    text = data.decode("utf-8", errors="replace")
+    if re.search(r"^GIT binary patch$", text, flags=re.MULTILINE):
+        return False, "binary patches are not allowed", []
+
+    paths = _changed_files(patch_path)
+    if not paths:
+        return False, "patch does not modify any tracked file", []
+    if len(paths) > MAX_CHANGED_FILES:
+        return False, f"too many changed files: {len(paths)} > {MAX_CHANGED_FILES}", paths
+    for path in paths:
+        normalized = Path(path)
+        if normalized.is_absolute() or ".." in normalized.parts:
+            return False, f"unsafe patch path: {path}", paths
+        if path not in ALLOWED_FILES:
+            return False, f"patch may only modify {sorted(ALLOWED_FILES)}; got {path}", paths
+
+    lowered = text.lower()
+    for token in DENIED_TOKENS:
+        if token.lower() in lowered:
+            return False, f"patch contains denied token: {token}", paths
+    return True, "ok", paths
+
+
+def _validate_candidate_source(work: Path) -> tuple[bool, str]:
+    for filename in sorted(ALLOWED_FILES):
+        path = work / filename
+        if not path.exists():
+            continue
+        try:
+            tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
+        except SyntaxError:
+            return False, f"{filename} has invalid Python syntax"
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Import):
+                for alias in node.names:
+                    root = alias.name.split(".", 1)[0]
+                    if root in DENIED_IMPORT_ROOTS or any(
+                        alias.name == prefix or alias.name.startswith(prefix + ".")
+                        for prefix in DENIED_IMPORT_PREFIXES
+                    ):
+                        return False, f"{filename} imports denied module: {alias.name}"
+            elif isinstance(node, ast.ImportFrom):
+                module = node.module or ""
+                root = module.split(".", 1)[0]
+                if root in DENIED_IMPORT_ROOTS or any(
+                    module == prefix or module.startswith(prefix + ".")
+                    for prefix in DENIED_IMPORT_PREFIXES
+                ):
+                    return False, f"{filename} imports denied module: {module}"
+            elif isinstance(node, ast.Name):
+                if node.id == "__builtins__" or (node.id.startswith("__") and node.id.endswith("__")):
+                    return False, f"{filename} uses denied dynamic name: {node.id}"
+            elif isinstance(node, ast.Attribute):
+                if node.attr in DENIED_ATTR_NAMES or node.attr in DENIED_DUNDER_ATTRS:
+                    return False, f"{filename} uses denied attribute: {node.attr}"
+            elif isinstance(node, ast.Call):
+                func = node.func
+                if isinstance(func, ast.Name) and func.id in DENIED_CALL_NAMES:
+                    return False, f"{filename} calls denied function: {func.id}"
+                if isinstance(func, ast.Attribute) and func.attr in DENIED_ATTR_NAMES:
+                    return False, f"{filename} calls denied method: {func.attr}"
+    return True, "ok"
+
+
+def _prepare_candidate(patch_path: Path) -> tuple[Path | None, str]:
+    ok, message, _ = _validate_patch(patch_path)
+    if not ok:
+        return None, message
+
+    clean_source = _clean_source_dir()
+    if not clean_source.exists():
+        return None, f"clean source not found: {clean_source}"
+
+    tmp = Path(tempfile.mkdtemp(prefix="frontier-generals-"))
+    work = tmp / "generals_agent"
+    shutil.copytree(clean_source, work, ignore=shutil.ignore_patterns(".git", "__pycache__"))
+    _run(["git", "init", "-q"], cwd=work)
+    _run(["git", "config", "user.email", "frontier-cs@example.invalid"], cwd=work)
+    _run(["git", "config", "user.name", "Frontier-CS Judge"], cwd=work)
+    _run(["git", "add", "."], cwd=work)
+    _run(["git", "commit", "-q", "-m", "base"], cwd=work)
+
+    if patch_path.read_text(encoding="utf-8", errors="replace").strip():
+        check = _run(["git", "apply", "--check", str(patch_path)], cwd=work)
+        if check.returncode != 0:
+            return None, "patch failed to apply"
+        apply = _run(["git", "apply", str(patch_path)], cwd=work)
+        if apply.returncode != 0:
+            return None, "patch failed to apply"
+    ok, message = _validate_candidate_source(work)
+    if not ok:
+        return None, message
+    return work, "ok"
+
+
+def _load_candidate_factory(work: Path):
+    sys.path.insert(0, str(work))
+    try:
+        spec = importlib.util.spec_from_file_location("frontier_candidate_bot", work / "bot.py")
+        if spec is None or spec.loader is None:
+            raise RuntimeError("could not load bot.py")
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        cls = getattr(module, "FrontierAgent")
+        return cls
+    finally:
+        try:
+            sys.path.remove(str(work))
+        except ValueError:
+            pass
+
+
+def _shift_grid(values, fill_value, step: int, axis: int):
+    shifted = jnp.roll(values, step, axis)
+    edge = 0 if step == 1 else -1
+    if axis == 0:
+        return shifted.at[edge, :].set(fill_value)
+    return shifted.at[:, edge].set(fill_value)
+
+
+def _bfs_distance(passable, sources):
+    h, w = passable.shape
+    inf = jnp.int32(h * w + 7)
+
+    def relax(_, dist):
+        neighbors = jnp.minimum(
+            jnp.minimum(_shift_grid(dist, inf, 1, 0), _shift_grid(dist, inf, -1, 0)),
+            jnp.minimum(_shift_grid(dist, inf, 1, 1), _shift_grid(dist, inf, -1, 1)),
+        )
+        return jnp.where(
+            sources,
+            0,
+            jnp.where(passable, jnp.minimum(dist, neighbors + 1), inf),
+        )
+
+    return jax.lax.fori_loop(0, h * w, relax, jnp.where(sources, 0, inf))
+
+
+def _best_direction_toward(field, passable):
+    inf = jnp.int32(field.size + 13)
+    values = jnp.stack(
+        [
+            jnp.where(_shift_grid(passable, False, 1, 0), _shift_grid(field, inf, 1, 0), inf),
+            jnp.where(_shift_grid(passable, False, -1, 0), _shift_grid(field, inf, -1, 0), inf),
+            jnp.where(_shift_grid(passable, False, 1, 1), _shift_grid(field, inf, 1, 1), inf),
+            jnp.where(_shift_grid(passable, False, -1, 1), _shift_grid(field, inf, -1, 1), inf),
+        ],
+        axis=0,
+    )
+    return jnp.argmin(values, axis=0).astype(jnp.int32), jnp.min(values, axis=0)
+
+
+class _PathingBaselineAgent(Agent):
+    """Judge-only pathing baseline with tuned hunting and exploration pressure."""
+
+    def __init__(
+        self,
+        *,
+        id: str = "Pathing",
+        release_threshold: int = 6,
+        scout_far: bool = True,
+        block_neutral_cities: bool = True,
+        convoy_weight: float = 100.0,
+    ):
+        super().__init__(id=id)
+        self.release_threshold = int(release_threshold)
+        self.scout_far = bool(scout_far)
+        self.block_neutral_cities = bool(block_neutral_cities)
+        self.convoy_weight = float(convoy_weight)
+
+    @partial(jax.jit, static_argnums=0)
+    def act(self, observation, key):
+        del key
+        army = observation.armies
+        mine = observation.owned_cells
+        h, w = army.shape
+        reach = jnp.int32(h * w + 7)
+        city_block = observation.cities & ~mine if self.block_neutral_cities else jnp.zeros_like(mine)
+        passable = ~(observation.mountains | observation.structures_in_fog | city_block)
+        movable = mine & (army > 1)
+        mine_army = jnp.where(mine, army, 0)
+
+        own_general = mine & observation.generals
+        own_general_army = jnp.sum(jnp.where(own_general, army, 0))
+        own_general_idx = jnp.argmax(own_general.reshape(-1).astype(jnp.int32))
+        dist_from_general = _bfs_distance(passable, own_general)
+
+        enemy_general = observation.opponent_cells & observation.generals
+        enemy_land = observation.opponent_cells & ~observation.cities
+        fog = observation.fog_cells & passable & (dist_from_general < reach)
+        open_land = passable & ~mine & (dist_from_general < reach)
+
+        def farthest(mask):
+            return mask & (dist_from_general == jnp.max(jnp.where(mask, dist_from_general, -1)))
+
+        def nearest(mask):
+            return mask & (dist_from_general == jnp.min(jnp.where(mask, dist_from_general, reach)))
+
+        fog_goal = farthest(fog) if self.scout_far else nearest(fog)
+        open_goal = farthest(open_land) if self.scout_far else nearest(open_land)
+        goal = jnp.where(
+            jnp.any(enemy_general),
+            enemy_general,
+            jnp.where(jnp.any(enemy_land), enemy_land, jnp.where(jnp.any(fog), fog_goal, open_goal)),
+        )
+
+        dist_to_goal = _bfs_distance(passable, goal)
+        direction, neighbor_dist = _best_direction_toward(dist_to_goal, passable)
+        advances = neighbor_dist < dist_to_goal
+        flat_direction = direction.reshape(-1)
+
+        enemy_general_army = jnp.sum(jnp.where(enemy_general, army, 0))
+        killing_move = (
+            jnp.any(enemy_general)
+            & movable
+            & (dist_to_goal == 1)
+            & advances
+            & (army - 1 > enemy_general_army)
+        )
+        kill_idx = jnp.argmax(jnp.where(killing_move, mine_army, -1).reshape(-1))
+
+        feed_from_general = (own_general_army >= self.release_threshold) & advances.reshape(-1)[own_general_idx]
+        convoy_move = movable & ~own_general & advances
+        convoy_score = mine_army.astype(jnp.float32) * self.convoy_weight - dist_to_goal.astype(jnp.float32)
+        convoy_idx = jnp.argmax(jnp.where(convoy_move, convoy_score, -1.0).reshape(-1))
+
+        do_kill = jnp.any(killing_move)
+        do_feed = (~do_kill) & feed_from_general
+        do_convoy = (~do_kill) & (~do_feed) & jnp.any(convoy_move)
+        idx = jnp.where(do_kill, kill_idx, jnp.where(do_feed, own_general_idx, convoy_idx))
+        should_pass = ~(do_kill | do_feed | do_convoy)
+        return jnp.array(
+            [should_pass, idx // w, idx % w, flat_direction[idx], do_feed],
+            dtype=jnp.int32,
+        )
+
+
+class _FlobotStyleAgent(Agent):
+    """Judge-only Flobot-inspired baseline: spread, infiltrate, then end-game push."""
+
+    def __init__(self, *, id: str = "FlobotStyle", early_threshold: int = 12):
+        super().__init__(id=id)
+        self.early_threshold = int(early_threshold)
+
+    @partial(jax.jit, static_argnums=0)
+    def act(self, observation, key):
+        del key
+        army = observation.armies
+        mine = observation.owned_cells
+        h, w = army.shape
+        reach = jnp.int32(h * w + 7)
+        passable = ~(observation.mountains | observation.structures_in_fog)
+        movable = mine & (army > 1)
+        mine_army = jnp.where(mine, army, 0)
+
+        own_general = mine & observation.generals
+        own_general_idx = jnp.argmax(own_general.reshape(-1).astype(jnp.int32))
+        own_general_army = jnp.sum(jnp.where(own_general, army, 0))
+        dist_from_general = _bfs_distance(passable, own_general)
+
+        enemy_general = observation.opponent_cells & observation.generals
+        enemy_land = observation.opponent_cells & ~observation.cities
+        visible_enemy = enemy_general | enemy_land
+        neutral_city = observation.cities & ~mine & ~observation.opponent_cells
+        fog = observation.fog_cells & passable & (dist_from_general < reach)
+        open_land = passable & ~mine & (dist_from_general < reach)
+
+        far_fog = fog & (dist_from_general == jnp.max(jnp.where(fog, dist_from_general, -1)))
+        border_target = visible_enemy | (fog & (dist_from_general == jnp.min(jnp.where(fog, dist_from_general, reach))))
+        economy_target = neutral_city & (dist_from_general == jnp.min(jnp.where(neutral_city, dist_from_general, reach)))
+        spread_target = open_land & (dist_from_general == jnp.max(jnp.where(open_land, dist_from_general, -1)))
+        goal = jnp.where(
+            jnp.any(enemy_general),
+            enemy_general,
+            jnp.where(
+                jnp.any(enemy_land),
+                border_target,
+                jnp.where(jnp.any(neutral_city) & (own_general_army >= self.early_threshold), economy_target, jnp.where(jnp.any(fog), far_fog, spread_target)),
+            ),
+        )
+
+        dist_to_goal = _bfs_distance(passable, goal)
+        direction, neighbor_dist = _best_direction_toward(dist_to_goal, passable)
+        advances = neighbor_dist < dist_to_goal
+        flat_direction = direction.reshape(-1)
+
+        enemy_general_army = jnp.sum(jnp.where(enemy_general, army, 0))
+        can_end = (
+            jnp.any(enemy_general)
+            & movable
+            & advances
+            & ((army - 1) > (enemy_general_army + dist_to_goal))
+        )
+        end_idx = jnp.argmax(jnp.where(can_end, mine_army - dist_to_goal, -1).reshape(-1))
+
+        early_launch = (own_general_army >= self.early_threshold) & advances.reshape(-1)[own_general_idx]
+        spread = movable & advances
+        border = mine & (dist_to_goal <= 2)
+        spread_score = mine_army.astype(jnp.float32) * jnp.where(border, 2.0, 1.0) - dist_to_goal.astype(jnp.float32)
+        spread_idx = jnp.argmax(jnp.where(spread, spread_score, -1.0).reshape(-1))
+
+        do_end = jnp.any(can_end)
+        do_early = (~do_end) & early_launch
+        do_spread = (~do_end) & (~do_early) & jnp.any(spread)
+        idx = jnp.where(do_end, end_idx, jnp.where(do_early, own_general_idx, spread_idx))
+        should_pass = ~(do_end | do_early | do_spread)
+        return jnp.array(
+            [should_pass, idx // w, idx % w, flat_direction[idx], do_early],
+            dtype=jnp.int32,
+        )
+
+
+class _HiddenFrontierAgent(Agent):
+    def __init__(self, id: str = "Frontier"):
+        super().__init__(id=id)
+
+    def act(self, observation, key):
+        del key
+        valid = compute_valid_move_mask_obs(observation)
+        h, w = observation.armies.shape
+        positions = jnp.argwhere(valid, size=h * w * 4, fill_value=-1)
+        num_valid = jnp.sum(jnp.all(positions >= 0, axis=-1))
+
+        def score_move(idx):
+            move = positions[idx]
+            ok = jnp.all(move >= 0)
+            r, c, d = move[0], move[1], move[2]
+            dr = jnp.array([-1, 1, 0, 0], dtype=jnp.int32)[d]
+            dc = jnp.array([0, 0, -1, 1], dtype=jnp.int32)[d]
+            nr = jnp.clip(r + dr, 0, h - 1)
+            nc = jnp.clip(c + dc, 0, w - 1)
+            dest_owned = observation.owned_cells[nr, nc]
+            dest_opponent = observation.opponent_cells[nr, nc]
+            dest_neutral = observation.neutral_cells[nr, nc]
+            dest_general = observation.generals[nr, nc] & dest_opponent
+            source_army = observation.armies[r, c]
+            dest_army = observation.armies[nr, nc]
+            can_capture = source_army > dest_army + 1
+            value = source_army.astype(jnp.float32)
+            value += jnp.where(dest_neutral, 20.0, 0.0)
+            value += jnp.where(dest_opponent, 60.0, 0.0)
+            value += jnp.where(dest_general, 10000.0, 0.0)
+            value = jnp.where(dest_owned, value * 0.1, value)
+            value = jnp.where(can_capture & ok, value, -1.0)
+            return value
+
+        scores = jax.vmap(score_move)(jnp.arange(h * w * 4))
+        best = jnp.argmax(scores)
+        move = positions[best]
+        should_pass = (num_valid == 0) | (scores[best] < 0)
+        return jnp.array([should_pass, move[0], move[1], move[2], 0], dtype=jnp.int32)
+
+
+def _make_hidden_baseline(name: str, *, player: int):
+    if name == "random_low_split":
+        return RandomAgent(id=f"RandomLowSplit-{player}", idle_prob=0.03, split_prob=0.10)
+    if name == "random_high_split":
+        return RandomAgent(id=f"RandomHighSplit-{player}", idle_prob=0.08, split_prob=0.45)
+    if name == "expander":
+        return ExpanderAgent(id=f"Expander-{player}")
+    if name in {"hunter", "hunter_pressure", "hunter_mirror"}:
+        return HunterAgent(id=f"Hunter-{player}")
+    if name == "fast_pathing":
+        return _PathingBaselineAgent(id=f"FastPathing-{player}", release_threshold=6, scout_far=True)
+    if name == "near_scout_pathing":
+        return _PathingBaselineAgent(id=f"NearScoutPathing-{player}", release_threshold=8, scout_far=False)
+    if name == "flobot_style":
+        return _FlobotStyleAgent(id=f"FlobotStyle-{player}", early_threshold=12)
+    if name == "flobot_fast":
+        return _FlobotStyleAgent(id=f"FlobotFast-{player}", early_threshold=8)
+    if name == "strongest_frontier":
+        return _HiddenFrontierAgent(id=f"Frontier-{player}")
+    raise ValueError(f"unknown baseline {name!r}")
+
+
+def _play_hidden_game(
+    candidate_factory,
+    *,
+    baseline_name: str,
+    candidate_player: int,
+    seed: int,
+    grid_size: int,
+    truncation: int,
+    pool_size: int,
+) -> tuple[bool, int]:
+    env = GeneralsEnv(grid_dims=(grid_size, grid_size), truncation=truncation, pool_size=pool_size)
+    key = jrandom.PRNGKey(seed)
+    pool, state = env.reset(key)
+    candidate = candidate_factory()
+    baseline = _make_hidden_baseline(baseline_name, player=1 - candidate_player)
+
+    agents = [None, None]
+    agents[candidate_player] = candidate
+    agents[1 - candidate_player] = baseline
+    for agent in agents:
+        reset = getattr(agent, "reset", None)
+        if callable(reset):
+            reset()
+
+    terminated = truncated = False
+    turns = 0
+    while not (terminated or truncated):
+        obs_0 = get_observation(state, 0)
+        obs_1 = get_observation(state, 1)
+        key, k0, k1 = jrandom.split(key, 3)
+        actions = jnp.stack([agents[0].act(obs_0, k0), agents[1].act(obs_1, k1)])
+        timestep, state = env.step(state, actions, pool)
+        terminated = bool(timestep.terminated)
+        truncated = bool(timestep.truncated)
+        turns += 1
+
+    return int(timestep.info.winner) == candidate_player, turns
+
+
+def _evaluate_hidden_agent(
+    candidate_factory,
+    *,
+    games_per_matchup: int,
+    seed: int,
+    grid_sizes: tuple[int, ...],
+    truncation: int,
+    baselines: tuple[str, ...],
+    pool_size: int,
+    speed_weight: float,
+) -> dict[str, Any]:
+    total = 0
+    wins = 0
+    speed_credit = 0.0
+    by_baseline: dict[str, dict[str, float]] = {}
+
+    for baseline in baselines:
+        b_total = 0
+        b_wins = 0
+        for grid_size in grid_sizes:
+            for game_idx in range(games_per_matchup):
+                for candidate_player in (0, 1):
+                    baseline_offset = sum((idx + 1) * ord(ch) for idx, ch in enumerate(baseline))
+                    game_seed = seed + 100003 * game_idx + 1009 * grid_size + 17 * candidate_player
+                    game_seed += 7919 * baseline_offset
+                    candidate_won, turns = _play_hidden_game(
+                        candidate_factory,
+                        baseline_name=baseline,
+                        candidate_player=candidate_player,
+                        seed=game_seed,
+                        grid_size=grid_size,
+                        truncation=truncation,
+                        pool_size=pool_size,
+                    )
+                    total += 1
+                    b_total += 1
+                    if candidate_won:
+                        wins += 1
+                        b_wins += 1
+                        speed_credit += max(0.0, (truncation - turns) / truncation)
+        by_baseline[baseline] = {
+            "games": float(b_total),
+            "wins": float(b_wins),
+            "win_rate": float(b_wins / b_total if b_total else 0.0),
+        }
+
+    win_rate = wins / total if total else 0.0
+    speed_tiebreak = speed_credit / total if total else 0.0
+    mean_baseline_win_rate = (
+        sum(item["win_rate"] for item in by_baseline.values()) / len(by_baseline)
+        if by_baseline
+        else 0.0
+    )
+    speed_weight = max(0.0, min(1.0, speed_weight))
+    win_weight = 1.0 - speed_weight
+    score = 100.0 * (win_weight * mean_baseline_win_rate + speed_weight * speed_tiebreak)
+    return {
+        "score": score,
+        "win_rate": win_rate,
+        "mean_baseline_win_rate": mean_baseline_win_rate,
+        "speed_tiebreak": speed_tiebreak,
+        "win_weight": win_weight,
+        "speed_weight": speed_weight,
+        "games": float(total),
+        "wins": float(wins),
+        "by_baseline": by_baseline,
+    }
+
+
+def evaluate(solution_path: str) -> tuple[float, float, str]:
+    patch_path = Path(solution_path).resolve()
+    work, prep_message = _prepare_candidate(patch_path)
+    if work is None:
+        message = json.dumps({"status": "invalid", "reason": prep_message}, sort_keys=True)
+        return 0.0, 0.0, message
+
+    try:
+        candidate_factory = _load_candidate_factory(work)
+        timeout_seconds = max(1, _config_int("max_eval_seconds", 240))
+
+        def _raise_timeout(_signum, _frame):
+            raise _EvaluationTimeout()
+
+        previous_handler = signal.getsignal(signal.SIGALRM)
+        previous_timer = signal.setitimer(signal.ITIMER_REAL, timeout_seconds)
+        try:
+            signal.signal(signal.SIGALRM, _raise_timeout)
+            metrics = _evaluate_hidden_agent(
+                candidate_factory,
+                games_per_matchup=max(1, _config_int("games_per_matchup", 1)),
+                seed=_config_int("arena_seed", 20260608),
+                grid_sizes=_config_tuple_int("grid_sizes", (10,)),
+                truncation=_config_int("truncation", 180),
+                baselines=_config_tuple_str("baselines", DEFAULT_BASELINES),
+                pool_size=max(2, _config_int("pool_size", 2)),
+                speed_weight=_config_float("speed_weight", 0.25),
+            )
+        finally:
+            signal.setitimer(signal.ITIMER_REAL, *previous_timer)
+            signal.signal(signal.SIGALRM, previous_handler)
+    except _EvaluationTimeout:
+        message = json.dumps(
+            {
+                "status": "timeout",
+                "reason": "evaluation exceeded task time budget",
+            },
+            sort_keys=True,
+        )
+        return 0.0, 0.0, message
+    except Exception as exc:
+        message = json.dumps(
+            {"status": "error", "reason": type(exc).__name__},
+            sort_keys=True,
+        )
+        return 0.0, 0.0, message
+
+    score = max(0.0, min(100.0, float(metrics["score"])))
+    public_metrics = {
+        "status": "scored",
+        "score": score,
+        "win_rate": round(float(metrics["win_rate"]), 4),
+        "mean_baseline_win_rate": round(float(metrics["mean_baseline_win_rate"]), 4),
+        "speed_tiebreak": round(float(metrics["speed_tiebreak"]), 4),
+        "speed_weight": round(float(metrics["speed_weight"]), 4),
+        "games": int(metrics["games"]),
+        "wins": int(metrics["wins"]),
+    }
+    return score, score, json.dumps(public_metrics, sort_keys=True)
+
+
+def main() -> int:
+    if len(sys.argv) != 2:
+        print("usage: evaluator.py /path/to/solution.patch", file=sys.stderr)
+        return 2
+    score, score_unbounded, message = evaluate(sys.argv[1])
+    print(json.dumps({"score": score, "score_unbounded": score_unbounded, "message": message}))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/2.0/problems/generals_io_bot/harbor/app/LICENSE.generals-bots b/2.0/problems/generals_io_bot/harbor/app/LICENSE.generals-bots
new file mode 100644
index 00000000..27f2f9fc
--- /dev/null
+++ b/2.0/problems/generals_io_bot/harbor/app/LICENSE.generals-bots
@@ -0,0 +1,23 @@
+MIT License
+
+Copyright (c) 2024, Matej Straka
+
+Author: Matej Straka
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/2.0/problems/generals_io_bot/harbor/app/README.md b/2.0/problems/generals_io_bot/harbor/app/README.md
new file mode 100644
index 00000000..d925183c
--- /dev/null
+++ b/2.0/problems/generals_io_bot/harbor/app/README.md
@@ -0,0 +1,22 @@
+# Generals.io Bot Arena
+
+Work in `/app/generals_agent`, then run:
+
+```bash
+bash /app/make_submission.sh
+bash /app/submit.sh
+```
+
+Submit the baseline skeleton once before running long local experiments, then
+submit every meaningful improvement. Local simulations are useful only for tiny
+sanity checks; the black-box judge is the scoring feedback for this task, and
+submissions run asynchronously while you keep improving.
+
+Submission is asynchronous. Use:
+
+```bash
+bash /app/submissions.sh
+bash /app/wait_submission.sh <submission_uuid>
+```
+
+to inspect judge results while continuing to improve the bot.
diff --git a/2.0/problems/generals_io_bot/harbor/app/generals_agent/README.md b/2.0/problems/generals_io_bot/harbor/app/generals_agent/README.md
new file mode 100644
index 00000000..47854f1f
--- /dev/null
+++ b/2.0/problems/generals_io_bot/harbor/app/generals_agent/README.md
@@ -0,0 +1,46 @@
+# Generals Agent Skeleton
+
+Edit `bot.py` and implement `FrontierAgent`.
+
+The judge-side arena is black-box. This workspace contains the bot skeleton and
+the public `generals-bots` package, but no Frontier-CS evaluation harness,
+baseline ensemble, seeds, or match runner.
+
+Useful imports:
+
+```python
+import jax.numpy as jnp
+from generals.core.action import create_action, compute_valid_move_mask_obs
+from generals.agents import Agent
+```
+
+Action format:
+
+```text
+[pass, row, col, direction, split]
+```
+
+Directions:
+
+```text
+0 up, 1 down, 2 left, 3 right
+```
+
+Create a patch submission:
+
+```bash
+bash /app/make_submission.sh
+bash /app/submit.sh
+```
+
+Submit the baseline skeleton once before running long local experiments, then
+submit every meaningful improvement. Local simulations are useful only for tiny
+sanity checks; the black-box judge is the scoring feedback for this task, and
+submissions run asynchronously while you keep improving.
+
+The judge accepts patches touching only `bot.py`, `strategy.py`, and `utils.py`.
+Do not read files, launch subprocesses, open network sockets, or inspect
+environment variables; the evaluator rejects these patterns.
+
+The simulator is JAX-based. Compact array logic usually runs faster than large
+Python-heavy policies.
diff --git a/2.0/problems/generals_io_bot/harbor/app/generals_agent/bot.py b/2.0/problems/generals_io_bot/harbor/app/generals_agent/bot.py
new file mode 100644
index 00000000..d611e9b8
--- /dev/null
+++ b/2.0/problems/generals_io_bot/harbor/app/generals_agent/bot.py
@@ -0,0 +1,12 @@
+from generals.agents import ExpanderAgent
+
+
+class FrontierAgent(ExpanderAgent):
+    """Baseline expanding bot.
+
+    Improve this class or replace it with your own implementation. The judge
+    instantiates FrontierAgent() and calls act(observation, key) every turn.
+    """
+
+    def __init__(self, id: str = "FrontierAgent"):
+        super().__init__(id=id)
diff --git a/2.0/problems/generals_io_bot/harbor/app/make_submission.sh b/2.0/problems/generals_io_bot/harbor/app/make_submission.sh
new file mode 100755
index 00000000..b503257a
--- /dev/null
+++ b/2.0/problems/generals_io_bot/harbor/app/make_submission.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+REPO="${GENERALS_AGENT_DIR:-/app/generals_agent}"
+OUT="${1:-/app/solution.patch}"
+
+if [[ ! -d "$REPO/.git" ]]; then
+  echo "Generals agent checkout not found at $REPO" >&2
+  exit 2
+fi
+
+git -C "$REPO" diff --binary -- bot.py strategy.py utils.py > "$OUT"
+bytes=$(wc -c < "$OUT" | tr -d ' ')
+echo "Wrote $OUT ($bytes bytes)"
diff --git a/2.0/problems/generals_io_bot/harbor/app/solution.patch b/2.0/problems/generals_io_bot/harbor/app/solution.patch
new file mode 100644
index 00000000..d406071d
--- /dev/null
+++ b/2.0/problems/generals_io_bot/harbor/app/solution.patch
@@ -0,0 +1,12 @@
+diff --git a/bot.py b/bot.py
+index d611e9b..cd85a3a 100644
+--- a/bot.py
++++ b/bot.py
+@@ -2,6 +2,7 @@ from generals.agents import ExpanderAgent
+ 
+ 
+ class FrontierAgent(ExpanderAgent):
++    # Reference patch keeps the public baseline behavior.
+     """Baseline expanding bot.
+ 
+     Improve this class or replace it with your own implementation. The judge
diff --git a/2.0/problems/generals_io_bot/readme b/2.0/problems/generals_io_bot/readme
new file mode 100644
index 00000000..91ae0b66
--- /dev/null
+++ b/2.0/problems/generals_io_bot/readme
@@ -0,0 +1,113 @@
+# Generals.io Bot Arena
+
+## Problem
+
+Implement a bot for a local Generals.io-style arena. Your bot plays repeated
+two-player games against fixed baseline bots in the `generals-bots` simulator.
+
+Each game is played on a square grid with fog of war. A player wins by capturing
+the opponent's general. If no general is captured before the truncation limit,
+the game is scored as a draw for win-rate purposes.
+
+The environment is the local `generals-bots` simulator, not the online
+generals.io service. Each turn your bot receives an observation containing:
+
+```text
+armies, generals, cities, mountains, neutral_cells, owned_cells,
+opponent_cells, fog_cells, structures_in_fog, owned/opponent land and army
+counts, and timestep
+```
+
+Fog hides cells outside the visibility radius around your territory. Mountains
+are impassable. Cities and generals produce armies over time. A valid move sends
+army from one owned cell to an adjacent passable cell; moving into enemy or
+neutral territory captures it only when the moving army is larger than the
+defending army.
+
+## Submission
+
+Submit a patch against the public `generals_agent` skeleton. In Harbor, edit the
+repository under:
+
+```text
+/app/generals_agent
+```
+
+Then run:
+
+```bash
+bash /app/make_submission.sh
+bash /app/submit.sh
+```
+
+Start by submitting the baseline skeleton once before running long local
+experiments. This establishes black-box feedback early; later submissions can
+replace it as you improve the bot.
+
+The patch must produce a Python module with:
+
+```python
+class FrontierAgent:
+    def act(self, observation, key):
+        ...
+```
+
+`act` must return a `generals-bots` action array:
+
+```text
+[pass, row, col, direction, split]
+```
+
+where `direction` is `0=up`, `1=down`, `2=left`, `3=right`, and `split`
+selects whether to move half the army instead of all-but-one.
+
+Patches may modify only these files:
+
+```text
+bot.py
+strategy.py
+utils.py
+```
+
+The judge rejects binary patches, oversized patches, path traversal, and common
+file/network/process access tokens. This is a bot-policy benchmark, not an
+environment inspection task.
+
+The agent workspace intentionally does not include a Frontier-CS match runner,
+baseline ensemble, hidden seeds, or evaluator implementation. Use the black-box
+submission interface for scoring feedback.
+
+## Scoring
+
+Every submission is evaluated against the same baseline families used by final
+verification. These include random, expansion, hunting/pathing, and
+strategy-inspired rule-based opponents, so exploiting only one weak bot is not
+enough for a high score. Faster wins also matter: the score gives substantial
+credit for capturing the enemy general in fewer turns.
+
+The default Harbor configuration is intentionally lightweight so agents can
+iterate quickly: it uses one game per matchup and an internal evaluator time
+budget. Increase `games_per_matchup`, `grid_sizes`, `truncation`, `pool_size`,
+and `max_eval_seconds` together in `config.yaml` for a heavier run. Adjust
+`speed_weight` if you want fast wins to matter more or less relative to raw win
+rate.
+
+Practical tip: the simulator is JAX-based. Simple array programs compile and
+run much faster than large Python control-flow policies, so keep `act` compact
+and vectorized when possible.
+
+The reported score is scaled to `[0, 100]`:
+
+```text
+score = 100 * ((1 - speed_weight) * mean_baseline_win_rate + speed_weight * mean_baseline_speed_tiebreak)
+```
+
+The default `speed_weight` is `0.25`. The speed credit is only earned on games
+that your bot wins and is larger for earlier captures.
+
+## Notes
+
+- The online generals.io service is not used.
+- The hidden evaluator and hidden seeds are not visible in the agent workspace.
+- The task uses `strakam/generals-bots` at pinned commit
+  `c2b77bf72812ec91fb2024d80d90112b961dfa7e` under the MIT license.
diff --git a/2.0/problems/generals_io_bot/reference.patch b/2.0/problems/generals_io_bot/reference.patch
new file mode 100644
index 00000000..d406071d
--- /dev/null
+++ b/2.0/problems/generals_io_bot/reference.patch
@@ -0,0 +1,12 @@
+diff --git a/bot.py b/bot.py
+index d611e9b..cd85a3a 100644
+--- a/bot.py
++++ b/bot.py
+@@ -2,6 +2,7 @@ from generals.agents import ExpanderAgent
+ 
+ 
+ class FrontierAgent(ExpanderAgent):
++    # Reference patch keeps the public baseline behavior.
+     """Baseline expanding bot.
+ 
+     Improve this class or replace it with your own implementation. The judge
diff --git a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/Dockerfile b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/Dockerfile
index 8f3f6233..2cc631d4 100644
--- a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/Dockerfile
+++ b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/Dockerfile
@@ -30,4 +30,16 @@ COPY readme config.yaml task_config.json submission_config.json AGENT.md \
     wait_submission.py wait_submission.sh cancel_submission.py cancel_submission.sh /app/
 COPY harbor_app/ /app/
 {visible_input_copies}
+RUN if command -v git >/dev/null 2>&1; then \
+        find /app -mindepth 2 -maxdepth 4 -type d -name .git -print | \
+        while read -r gitdir; do \
+            repo="$(dirname "$gitdir")"; \
+            git -C "$repo" config user.email frontier-cs@example.invalid; \
+            git -C "$repo" config user.name "Frontier-CS"; \
+            git -C "$repo" add -A; \
+            if ! git -C "$repo" diff --cached --quiet; then \
+                git -C "$repo" commit -q --amend --no-edit || git -C "$repo" commit -q -m base; \
+            fi; \
+        done; \
+    fi
 RUN chmod +x /app/submit.sh /app/submissions.sh /app/wait_submission.sh /app/cancel_submission.sh
diff --git a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/judge_server.py b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/judge_server.py
index 83cb02a2..a26c9506 100644
--- a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/judge_server.py
+++ b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/judge_server.py
@@ -72,6 +72,28 @@ def configured_max_queue_size() -> int:
     return DEFAULT_MAX_QUEUE_SIZE
 
 
+def configured_allow_empty_submission() -> bool:
+    config = load_task_config()
+    submission = config.get("submission", {})
+    return bool(isinstance(submission, dict) and submission.get("allow_empty"))
+
+
+def configured_async_start_method() -> str:
+    config = load_task_config()
+    evaluation = config.get("evaluation", {})
+    configured = None
+    if isinstance(evaluation, dict):
+        configured = evaluation.get("async_start_method")
+    method = str(
+        os.environ.get("FRONTIER_ASYNC_EVAL_START_METHOD")
+        or configured
+        or "fork"
+    )
+    if method not in multiprocessing.get_all_start_methods():
+        return "fork"
+    return method
+
+
 MAX_QUEUE_SIZE = configured_max_queue_size()
 
 
@@ -341,7 +363,7 @@ def validate_payload(payload: dict[str, Any], *, allow_final: bool, role_token:
             raise ValueError("directory submission must include archive_b64")
     else:
         code = payload.get("code")
-        if not isinstance(code, str) or not code.strip():
+        if not isinstance(code, str) or (not configured_allow_empty_submission() and not code.strip()):
             raise ValueError("file submission must include non-empty string field 'code'")
         submission_kind = "file"
     return submission_uuid, submission_role, submission_kind
@@ -359,7 +381,34 @@ def run_payload(payload: dict[str, Any], *, submission_role: str) -> dict[str, A
     if not acquired:
         raise TimeoutError("timed out waiting for evaluator lock")
     try:
-        return evaluate_payload_direct(payload, submission_role=submission_role)
+        ctx = multiprocessing.get_context(configured_async_start_method())
+        with tempfile.TemporaryDirectory(prefix="frontier_cs_2_0_final_result_") as tmp:
+            result_path = Path(tmp) / "result.json"
+            process = ctx.Process(
+                target=_async_evaluate_child,
+                args=(payload, submission_role, str(result_path)),
+            )
+            process.start()
+            deadline = time.time() + EVALUATION_LOCK_TIMEOUT_SECONDS
+            while process.is_alive():
+                process.join(timeout=0.2)
+                if time.time() >= deadline:
+                    terminate_process_group(process)
+                    raise TimeoutError("final evaluation timed out")
+
+            if not result_path.exists():
+                raise RuntimeError(
+                    f"evaluation process exited without a result (exitcode={process.exitcode})"
+                )
+            output = json.loads(result_path.read_text(encoding="utf-8"))
+            if not isinstance(output, dict):
+                raise RuntimeError("evaluation process returned invalid output")
+            if not output.get("ok"):
+                raise RuntimeError(str(output.get("detail") or "evaluation failed"))
+            result = output.get("result")
+            if not isinstance(result, dict):
+                raise RuntimeError("evaluation process returned invalid result")
+            return result
     finally:
         EVALUATION_LOCK.release()
 
@@ -369,12 +418,15 @@ def _async_evaluate_child(
     submission_role: str,
     result_path: str,
 ) -> None:
+    global EVALUATOR
     if os.name == "posix":
         try:
             os.setsid()
         except OSError:
             pass
     try:
+        if EVALUATOR is None:
+            EVALUATOR = load_problem_evaluator()
         result = evaluate_payload_direct(payload, submission_role=submission_role)
         output = {"ok": True, "result": result}
     except BaseException:
@@ -390,7 +442,7 @@ def terminate_process_group(process: multiprocessing.Process) -> None:
         try:
             os.killpg(process.pid, signal.SIGTERM)
         except ProcessLookupError:
-            pass
+            process.terminate()
         except OSError:
             process.terminate()
     else:
@@ -404,7 +456,7 @@ def terminate_process_group(process: multiprocessing.Process) -> None:
         try:
             os.killpg(process.pid, signal.SIGKILL)
         except ProcessLookupError:
-            pass
+            process.kill()
         except OSError:
             process.kill()
     else:
@@ -431,7 +483,7 @@ def run_async_payload(
         if submission_is_cancelling(submission_uuid):
             raise SubmissionCancelled()
 
-        ctx = multiprocessing.get_context("fork")
+        ctx = multiprocessing.get_context(configured_async_start_method())
         with tempfile.TemporaryDirectory(prefix="frontier_cs_2_0_async_result_") as tmp:
             result_path = Path(tmp) / "result.json"
             process = ctx.Process(
diff --git a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/submit.py b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/submit.py
index 3fca5b46..311f818d 100644
--- a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/submit.py
+++ b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/environment/submit.py
@@ -148,6 +148,7 @@ def main() -> int:
     default_path = str(config.get("path") or SOLUTION_PATH)
     solution_path = Path(sys.argv[1] if len(sys.argv) > 1 else default_path)
     exclude = list(config.get("exclude", []) or [])
+    allow_empty = bool(config.get("allow_empty", False))
     sub_uuid = str(uuid.uuid4())
     code_chars = 0
     file_count = 0
@@ -208,7 +209,7 @@ def main() -> int:
     else:
         code = solution_path.read_text(encoding="utf-8")
         code_chars = len(code)
-        if not code.strip():
+        if not allow_empty and not code.strip():
             msg = f"Solution file {solution_path} is empty"
             print(f"[submit] ERROR: {msg}", file=sys.stderr)
             log_record(
diff --git a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/tests/evaluate.py b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/tests/evaluate.py
index 4e91d8db..eef882fe 100644
--- a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/tests/evaluate.py
+++ b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/task-template/tests/evaluate.py
@@ -34,50 +34,11 @@
 FINAL_ROLE_TOKEN = "{verifier_token}"
 
 
-def submission_reward(record: dict) -> float | None:
-    try:
-        return float(record.get("score", 0.0)) / 100.0
-    except (TypeError, ValueError):
-        return None
-
-
 def result_score_key(record: dict) -> tuple[float, float]:
     score = float(record.get("score", 0.0))
     return (score, float(record.get("score_unbounded", score)))
 
 
-def best_submission() -> dict | None:
-    submissions_log = (
-        VERIFIER_SUBMISSIONS_LOG
-        if VERIFIER_SUBMISSIONS_LOG.exists()
-        else JUDGE_SUBMISSIONS_LOG
-    )
-    if not submissions_log.exists():
-        return None
-
-    best: dict | None = None
-    for line in submissions_log.read_text(encoding="utf-8").splitlines():
-        if not line.strip():
-            continue
-        try:
-            record = json.loads(line)
-            reward = submission_reward(record)
-            if reward is None:
-                continue
-        except json.JSONDecodeError:
-            continue
-        if record.get("submission_role", "agent") != "agent":
-            continue
-        if record.get("status") != "done":
-            continue
-        metrics = record.get("metrics", {})
-        if isinstance(metrics, dict) and metrics.get("evaluation_scope") == "quick_feedback":
-            continue
-        if best is None or result_score_key(record) > result_score_key(best):
-            best = record
-    return best
-
-
 def write_reward(reward: float, detail: str = "", extra: dict | None = None) -> None:
     REWARD_TXT.parent.mkdir(parents=True, exist_ok=True)
     reward = max(0.0, min(1.0, float(reward)))
@@ -391,7 +352,8 @@ def try_write_best_final_result(reason: str, final_key: tuple[float, float] | No
                 return
             write_reward(0.0, f"{solution_path} not found")
             return
-        if solution_path.is_file() and not solution_path.read_text(encoding="utf-8").strip():
+        allow_empty = bool(config.get("allow_empty", False))
+        if solution_path.is_file() and not allow_empty and not solution_path.read_text(encoding="utf-8").strip():
             print(f"ERROR: {solution_path} is empty")
             if try_write_best_final_result(f"{solution_path} is empty"):
                 return