diff --git a/scripts/run_anvil.py b/scripts/run_anvil.py index b35b921..62443cc 100644 --- a/scripts/run_anvil.py +++ b/scripts/run_anvil.py @@ -1023,6 +1023,23 @@ def run_with_tongs(merged, anvil_cmd, opts, *, docker, providers=None, "session tongs require the anvil '--name' as a session handle" ) + # An org-layer `shared` tong is partitioned onto its own isolated network, + # which the anvil joins by name -- so a scoped launch needs the anvil --name + # for the same reason a session launch does. Derive the org scope token from + # the org layer's directory (None when no org layer was passed, leaving every + # shared tong on today's global, unscoped naming). + org_token = tongs.org_scope_token(dict(opts.layer_dirs).get(tongs.ORG)) + has_org_shared = bool(org_token) and any( + merged[name]["definition"].get("lifecycle") != "session" + and merged[name]["source"] == tongs.ORG + for name in merged + ) + if has_org_shared and not session_id: + raise OrchestrationError( + "org-scoped shared tongs require the anvil '--name' as a handle to " + "join their isolated network" + ) + plan = tongs.plan_network(merged, base_network, session_id) # `volume` tongs are refused upstream, so the injection is reachability for # the network-facing kinds only: `port` env vars and, for `mcp` tongs, the @@ -1032,13 +1049,15 @@ def run_with_tongs(merged, anvil_cmd, opts, *, docker, providers=None, created_network = None started_sessions = [] connected_shared = [] + joined_shared_networks = [] # isolated per-scope networks the anvil must join + anvil_multi = False # the anvil was created via the multi-network path mcp_dir = None # host temp dir holding the generated MCP config, if any try: if plan["create"]: docker.ensure_network(plan["create"]) created_network = plan["create"] - ready_checks = [] + ready_checks = [] # (name, defn, alias, container, probe_network) for name in sorted(merged): defn = merged[name]["definition"] alias = tongs.canonical_alias(name, defn) @@ -1051,15 +1070,29 @@ def run_with_tongs(merged, anvil_cmd, opts, *, docker, providers=None, label_hash=tongs.config_hash(defn), make_channel=make_channel, ) started_sessions.append(container) + probe_network = plan["network"] else: - container = tongs.shared_container_name(name) + # An org-sourced shared tong is partitioned onto an isolated + # per-org network and a scoped container name; every other shared + # tong stays on the shared base network, unscoped, as before. + scope = org_token if merged[name]["source"] == tongs.ORG else None + container = tongs.shared_container_name(name, scope=scope) + if scope: + tong_network = tongs.shared_network_name(scope) + docker.ensure_network(tong_network) + if tong_network not in joined_shared_networks: + joined_shared_networks.append(tong_network) + probe_network = tong_network + else: + tong_network = base_network + probe_network = plan["network"] _ensure_shared_tong( docker, name, defn, - container=container, network=base_network, alias=alias, + container=container, network=tong_network, alias=alias, resolver=resolver, workspace=opts.workspace, label_hash=tongs.config_hash(defn), make_channel=make_channel, ) - ready_checks.append((name, defn, alias, container)) + ready_checks.append((name, defn, alias, container, probe_network)) # Attach each network-facing `shared` tong to the per-session network under # its canonical alias, so the anvil reaches it there without the long-lived @@ -1068,6 +1101,12 @@ def run_with_tongs(merged, anvil_cmd, opts, *, docker, providers=None, # because a `none` session tong with no alias must still be started; only the # network-facing `shared` tongs in plan["shared_connect"] are connected here.) for name, alias in plan["shared_connect"]: + if org_token and merged[name]["source"] == tongs.ORG: + # An org-scoped shared tong is isolated on its own network, which + # the anvil joins directly -- it is deliberately never attached to + # the per-session network, so the session reaches it only through + # that org network and never via the shared base/session fabric. + continue container = tongs.shared_container_name(name) # ensure_network may have reused a network left by a hard-killed prior # session whose teardown never ran, with this shared tong still attached; @@ -1077,11 +1116,13 @@ def run_with_tongs(merged, anvil_cmd, opts, *, docker, providers=None, docker.network_connect(plan["network"], container, alias=alias) connected_shared.append((plan["network"], container)) - # Probe readiness on the network the anvil will use, so a `shared` tong is - # checked at the alias the anvil dials (connected to that network above). - for name, defn, alias, container in ready_checks: + # Probe readiness on the network the anvil will reach each tong over: the + # session/base network for ordinary tongs, but the isolated org network + # for a scoped shared tong (it lives only there, never on the session + # fabric), so each is checked at the alias the anvil actually dials. + for name, defn, alias, container, probe_network in ready_checks: if not wait_ready( - docker, container, defn, alias, plan["network"], + docker, container, defn, alias, probe_network, anvil_image=opts.anvil_image, sleep=sleep, monotonic=monotonic, ): raise OrchestrationError("tong '%s' did not become ready in time" % name) @@ -1102,12 +1143,17 @@ def run_with_tongs(merged, anvil_cmd, opts, *, docker, providers=None, anvil_cmd, network=plan["network"], pre_image_args=pre_image_args, post_image_args=post_image_args, ) - if plan["extra_networks"]: + # The anvil joins the base network (the `NETWORK=` escape hatch) when a + # per-session network is its primary, plus every isolated org network its + # scoped shared tongs live on. + extra_networks = list(plan["extra_networks"]) + joined_shared_networks + if extra_networks: # The anvil joins more than one network, which docker run cannot do at # creation, so create -> connect the extras -> start it attached. - return docker.run_foreground_multi( - injected, plan["extra_networks"], session_id - ) + # (session_id is guaranteed here: a session network or an org network + # both require the anvil --name, checked above.) + anvil_multi = True + return docker.run_foreground_multi(injected, extra_networks, session_id) return docker.run_foreground(injected) finally: # Tear down per-session state, leaving the long-lived `shared` tongs @@ -1116,11 +1162,20 @@ def run_with_tongs(merged, anvil_cmd, opts, *, docker, providers=None, # refuses to delete a network while endpoints remain. for container in started_sessions: docker.rm_force(container) - if created_network: + # A multi-network anvil is an explicitly-created container (left for us so + # a failed connect/start is not orphaned); the plain single-network run + # uses `--rm` and self-removes, so it is only force-removed here. + if anvil_multi: docker.rm_force(session_id) - for network, container in connected_shared: - docker.network_disconnect(network, container) + for network, container in connected_shared: + docker.network_disconnect(network, container) + if created_network: docker.network_rm(created_network) + # Best-effort prune of each isolated org network: docker refuses while the + # long-lived shared tong is still attached, so the network persists with + # its tong and is reclaimed only once nothing is on it. + for network in joined_shared_networks: + docker.network_rm(network) # The generated MCP config was bind-mounted into the anvil, which has now # exited; remove the host temp file holding it. if mcp_dir: diff --git a/scripts/test_run_anvil.py b/scripts/test_run_anvil.py index 941939e..fe726ce 100644 --- a/scripts/test_run_anvil.py +++ b/scripts/test_run_anvil.py @@ -877,6 +877,15 @@ def __call__(self): "readiness": {"mode": "none"}, } +# An org-owned credential-holding MCP tong: the user's reported case. Two orgs +# ship this same file with different credentials; each must run partitioned. +ORG_ASANA = { + "lifecycle": "shared", + "image": "asana-mcp:latest", + "interface": {"kind": "mcp", "name": "asana-mcp", "port": 3000}, + "readiness": {"mode": "none"}, +} + # A per-session network service (a throwaway fixture DB) reached by host+port. SESSION_PORT = { "lifecycle": "session", @@ -1555,6 +1564,112 @@ def test_session_tong_without_anvil_name_raises_before_any_docker_call(self): ) self.assertEqual(docker.calls, []) # nothing created => nothing to tear down + # --- Per-org isolation of `shared` tongs -------------------------------- + + _ACME = "/orgs/acme/.swarmforge/tongs" + _GLOBEX = "/orgs/globex/.swarmforge/tongs" + + def _run_org(self, docker, merged, org_dir, harness="opencode", anvil=None): + """Drive run_with_tongs with an org layer dir wired into the options.""" + opts = run_anvil.LauncherOptions( + layer_dirs=[(tongs.ORG, org_dir)], workspace=None, approvals=None, + providers=None, harness=harness, anvil_image="anvil:img", no_prompt=False, + ) + return run_anvil.run_with_tongs( + merged, anvil or ANVIL_ARGV, opts, + docker=docker, sleep=lambda _s: None, monotonic=_Clock(), + ) + + def test_org_shared_tong_isolated_on_per_org_network(self): + # An org-owned shared tong starts on its own per-org network (never the + # shared base network), and the anvil joins that network as an extra. + docker = FakeDocker() + merged = {"asana": {"source": tongs.ORG, "definition": ORG_ASANA}} + self._run_org(docker, merged, self._ACME) + token = tongs.org_scope_token(self._ACME) + net = tongs.shared_network_name(token) + container = tongs.shared_container_name("asana", scope=token) + self.assertIn(("ensure_network", net), docker.calls) + started = docker.run_argvs[0] + self.assertIn(container, started) + self.assertEqual(started[started.index("--network") + 1], net) + self.assertNotEqual(started[started.index("--network") + 1], "opencode-net") + # The anvil keeps opencode-net as its primary (for the model backend) and + # joins the org network as an extra via the multi-network path. + self.assertEqual(docker.anvil_extra_networks, [net]) + self.assertEqual( + docker.anvil_argv[docker.anvil_argv.index("--network") + 1], "opencode-net" + ) + + def test_org_shared_tong_readiness_probes_on_org_network(self): + # A scoped shared tong with a tcp probe is checked on its org network -- + # the only network it lives on -- not on the anvil's base network. + docker = FakeDocker() + defn = { + "lifecycle": "shared", "image": "asana-mcp:latest", + "interface": {"kind": "mcp", "name": "asana-mcp", "port": 3000}, + "readiness": {"mode": "tcp"}, + } + merged = {"asana": {"source": tongs.ORG, "definition": defn}} + self._run_org(docker, merged, self._ACME) + net = tongs.shared_network_name(tongs.org_scope_token(self._ACME)) + self.assertIn(("tcp_probe", net, "asana-mcp", 3000, "anvil:img"), docker.calls) + + def test_two_orgs_partition_into_distinct_containers_and_networks(self): + # The crux: the same tong file in two orgs yields distinct containers and + # distinct networks (so neither tears the other down, and neither is + # reachable from the other), while the agent-facing MCP server name + # (interface.name) stays identical in both. + merged = {"asana": {"source": tongs.ORG, "definition": ORG_ASANA}} + d1 = FakeDocker() + self._run_org(d1, merged, self._ACME) + d2 = FakeDocker() + self._run_org(d2, merged, self._GLOBEX) + + s1, s2 = d1.run_argvs[0], d2.run_argvs[0] + self.assertNotEqual( + s1[s1.index("--name") + 1], s2[s2.index("--name") + 1] + ) + self.assertNotEqual(d1.anvil_extra_networks, d2.anvil_extra_networks) + # Same agent-facing MCP name on each org's isolated network. + self.assertEqual(s1[s1.index("--network-alias") + 1], "asana-mcp") + self.assertEqual(s2[s2.index("--network-alias") + 1], "asana-mcp") + + def test_non_org_shared_tong_stays_global_even_with_org_layer(self): + # A repo-sourced shared tong keeps the base network and unscoped name even + # when the launch also carries an org layer dir -- only org-owned shared + # tongs are partitioned. + docker = FakeDocker() + merged = {"ollama": {"source": tongs.REPO, "definition": SHARED_OLLAMA}} + self._run_org(docker, merged, self._ACME) + started = docker.run_argvs[0] + self.assertIn("swarmforge-shared-ollama", started) + self.assertEqual(started[started.index("--network") + 1], "opencode-net") + self.assertNotIn("ensure_network", [c[0] for c in docker.calls]) + self.assertIsNone(docker.anvil_extra_networks) + + def test_org_shared_network_pruned_best_effort_and_tong_left_running(self): + # On teardown the org network is pruned best-effort (docker refuses while + # the long-lived tong is attached, so it persists), and the shared tong is + # force-removed only once -- at start, to clear a leftover -- never as a + # teardown step. + docker = FakeDocker() + merged = {"asana": {"source": tongs.ORG, "definition": ORG_ASANA}} + self._run_org(docker, merged, self._ACME) + token = tongs.org_scope_token(self._ACME) + net = tongs.shared_network_name(token) + container = tongs.shared_container_name("asana", scope=token) + self.assertIn(("network_rm", net), docker.calls) + self.assertEqual(docker.calls.count(("rm_force", container)), 1) + + def test_org_shared_tong_without_anvil_name_raises_before_any_docker_call(self): + docker = FakeDocker() + anvil = ["docker", "run", "-it", "--rm", "--network", "opencode-net", "img"] + merged = {"asana": {"source": tongs.ORG, "definition": ORG_ASANA}} + with self.assertRaises(run_anvil.OrchestrationError): + self._run_org(docker, merged, self._ACME, anvil=anvil) + self.assertEqual(docker.calls, []) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/scripts/test_tongs.py b/scripts/test_tongs.py index c4d6f59..e5a39a3 100644 --- a/scripts/test_tongs.py +++ b/scripts/test_tongs.py @@ -869,6 +869,46 @@ def test_shared_container_name_sanitizes_and_prefixes(self): self.assertEqual(tongs.shared_container_name("ollama"), "swarmforge-shared-ollama") self.assertEqual(tongs.shared_container_name("my tong/x"), "swarmforge-shared-my-tong-x") + def test_shared_container_name_scope_partitions_identical_names(self): + # Two orgs shipping the same tong name get distinct container names so + # they never collide on one daemon-global name (the teardown bug). + a = tongs.shared_container_name("asana", scope="acme-1a2b3c4d") + b = tongs.shared_container_name("asana", scope="globex-9f8e7d6c") + self.assertEqual(a, "swarmforge-shared-acme-1a2b3c4d-asana") + self.assertEqual(b, "swarmforge-shared-globex-9f8e7d6c-asana") + self.assertNotEqual(a, b) + # No scope is byte-identical to the unscoped name (today's behavior). + self.assertEqual( + tongs.shared_container_name("asana"), "swarmforge-shared-asana" + ) + + def test_shared_network_name_is_scope_prefixed(self): + self.assertEqual( + tongs.shared_network_name("acme-1a2b3c4d"), + "swarmforge-shared-net-acme-1a2b3c4d", + ) + + def test_org_scope_token_none_without_org_dir(self): + self.assertIsNone(tongs.org_scope_token(None)) + self.assertIsNone(tongs.org_scope_token("")) + + def test_org_scope_token_stable_per_path_and_distinct_per_org(self): + # Same org path (e.g. two repos under one org) => same token; different + # orgs => different tokens. Path is normalized so trailing slashes and + # `.`/`..` segments do not change identity. + acme = tongs.org_scope_token("/home/me/orgs/acme/.swarmforge/tongs") + acme_again = tongs.org_scope_token("/home/me/orgs/acme/.swarmforge/tongs/") + acme_dotted = tongs.org_scope_token("/home/me/orgs/acme/./.swarmforge/tongs") + globex = tongs.org_scope_token("/home/me/orgs/globex/.swarmforge/tongs") + self.assertEqual(acme, acme_again) + self.assertEqual(acme, acme_dotted) + self.assertNotEqual(acme, globex) + + def test_org_scope_token_carries_readable_org_root_hint(self): + # The org root (parent of `.swarmforge/`) is prefixed for `docker ps`. + token = tongs.org_scope_token("/home/me/orgs/acme/.swarmforge/tongs") + self.assertTrue(token.startswith("acme-"), token) + def test_session_container_name_carries_session_and_sanitizes(self): self.assertEqual(tongs.session_container_name("claude-proj", "github"), "claude-proj-tong-github") self.assertEqual(tongs.session_container_name("claude-proj", "my tong/x"), "claude-proj-tong-my-tong-x") diff --git a/scripts/tongs.py b/scripts/tongs.py index 4752ca8..1177439 100644 --- a/scripts/tongs.py +++ b/scripts/tongs.py @@ -1052,19 +1052,70 @@ def readiness_settings(defn): # long-lived container is found (and staleness-checked) across sessions. SHARED_CONTAINER_PREFIX = "swarmforge-shared" +# A scoped shared tong is isolated on its own docker network (this prefix + +# scope token) instead of the shared base network, so another scope's anvil has +# no interface on it and cannot reach the tong even by raw IP. +SHARED_NETWORK_PREFIX = "swarmforge-shared-net" + def _sanitize_container_token(name): return re.sub(r"[^A-Za-z0-9_.-]+", "-", name).strip("-_.") -def shared_container_name(name): +def org_scope_token(org_tongs_dir): + """Stable short token identifying one org by its tongs directory. + + A `shared` tong owned by the org layer must be partitioned per org: two orgs + that ship the same tong (same filename, same `interface.name`) but different + credentials would otherwise collide on one daemon-global container name -- + each launch tearing the other's container down -- and sit reachable side by + side on the shared base network. The token scopes both the container name and + the isolating network so neither collides across orgs. + + Derived from the absolute org-tongs directory path, so every launch pointed + at the same org (e.g. different repos under one org) shares a token while + different orgs differ. A readable hint from the org root (the parent of + `.swarmforge/`) is prefixed for `docker ps`; the hash is what guarantees + uniqueness. Returns None when no org layer path is given, leaving a launch + with no org tongs on today's global, unscoped naming. + """ + if not org_tongs_dir: + return None + canonical = os.path.normpath(os.path.abspath(org_tongs_dir)) + digest = hashlib.sha256(canonical.encode("utf-8")).hexdigest()[:8] + hint = _sanitize_container_token( + os.path.basename(os.path.dirname(os.path.dirname(canonical))) + ) + return "%s-%s" % (hint, digest) if hint else digest + + +def shared_container_name(name, scope=None): """Stable container name for a `shared` tong (session-independent). Sanitized to the characters docker permits in a container name and prefixed - so the container is recognizable as a Swarmforge-managed shared tong. + so the container is recognizable as a Swarmforge-managed shared tong. An + optional `scope` token (see `org_scope_token`) partitions otherwise + identically-named shared tongs owned by different scopes -- so two orgs + shipping the same tong do not collide on one daemon-global container name. """ token = _sanitize_container_token(name) - return "%s-%s" % (SHARED_CONTAINER_PREFIX, token) if token else SHARED_CONTAINER_PREFIX + parts = [SHARED_CONTAINER_PREFIX] + if scope: + parts.append(scope) + if token: + parts.append(token) + return "-".join(parts) + + +def shared_network_name(scope): + """Isolated docker network hosting one scope's `shared` tongs. + + A scoped `shared` tong lives alone on this network instead of the shared base + network, and only the matching scope's anvil joins it -- so another scope's + anvil has no interface on it and cannot reach the tong even by dialing a raw + IP. The scope token (see `org_scope_token`) keeps two orgs' networks distinct. + """ + return "%s-%s" % (SHARED_NETWORK_PREFIX, scope) def session_container_name(session_id, name):