From 6355f3c5e7d36fa1122d104ed18fa0adf164abd3 Mon Sep 17 00:00:00 2001 From: CrypticSwarm Date: Tue, 16 Jun 2026 23:05:12 -0500 Subject: [PATCH] Resolve and deliver tong secrets through host provider CLIs Tong definitions reference secrets as ${secret::}. Resolve them on the host by shelling out to a provider CLI declared once in the user layer (~/.swarmforge/secret-providers.yaml), the docker-credential-helper pattern, so Swarmforge knows nothing about any individual secret manager. Loading the provider table and building the argv are pure (in tongs.py); the subprocess that runs the CLI lives in the launcher's resolver closure, which surfaces a clear error -- never the secret -- on an unknown provider, an unrunnable CLI, or a non-zero exit. Only the CLI's stdout is captured; its stderr passes through to the user's terminal, so interactive unlocks (op signin, biometrics) stay visible. Deliver resolved secrets to a tong through an in-memory tmpfs file rather than a docker -e env var: anything holding the docker socket could read an env value back via docker inspect. Each secret-bearing env var becomes a tmpfs file plus a _FILE pointer the tong reads; plain env keeps flowing through -e. The resolved value only ever appears in the file plan, never in the env the launcher passes as -e, and the launcher never writes a secret to host disk. An env name that would escape the tmpfs dir as a path component is refused, and a declared env var that collides with a secret's _FILE pointer fails the launch (and validation) unless it already names the generated path, so a tong is never started unable to find its secret. These are host-side helpers exercised only when a tong carries a secret reference; the empty-discovery launch path is unchanged. --- scripts/run_anvil.py | 50 +++++++++++ scripts/test_run_anvil.py | 67 +++++++++++++++ scripts/test_tongs.py | 167 ++++++++++++++++++++++++++++++++++++ scripts/tongs.py | 176 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 460 insertions(+) diff --git a/scripts/run_anvil.py b/scripts/run_anvil.py index 95303c3..67baa53 100644 --- a/scripts/run_anvil.py +++ b/scripts/run_anvil.py @@ -41,6 +41,7 @@ import collections import importlib.util import os +import subprocess import sys # Load the pure core (layer discovery + name-based merge) by path, the same way @@ -248,6 +249,55 @@ def gate_workspace_tongs(merged, workspace, approvals_path, prompt=True, out=Non tongs.save_approvals(approvals_path, approvals) +# --- Secret resolution -------------------------------------------------------- + + +class SecretResolutionError(Exception): + """A secret reference could not be resolved; the launch must not proceed.""" + + +def make_secret_resolver(providers): + """Build the impure resolver closure over a configured provider table. + + Returns `resolve(provider, ref) -> str`, the side-effectful counterpart to + the pure `tongs.substitute_secrets`/`tongs.plan_tong_secrets`: it shells out + to the provider CLI built by `tongs.secret_provider_command` and returns the + secret printed on stdout. Interactive unlocks (`op signin`, biometrics) work + because the launcher runs in the user's terminal before the anvil starts. A + single trailing newline -- which provider CLIs conventionally append -- is + stripped; any other whitespace is preserved verbatim. + + Raises `SecretResolutionError` (naming the provider and reference, never the + secret) for an unknown provider, a CLI that cannot be run, or a non-zero + exit, so a misconfigured secret stops the launch rather than handing the tong + an empty or partial value. + """ + + def resolve(provider, ref): + try: + command = tongs.secret_provider_command(providers, provider, ref) + except KeyError: + raise SecretResolutionError( + "no secret provider %r is configured; declare it in " + "secret-providers.yaml" % provider + ) + try: + completed = subprocess.run(command, stdout=subprocess.PIPE, check=False) + except OSError as exc: + raise SecretResolutionError( + "secret provider %r could not run: %s" % (provider, exc) + ) + if completed.returncode != 0: + raise SecretResolutionError( + "secret provider %r failed for %r (exit %d)" + % (provider, ref, completed.returncode) + ) + value = completed.stdout.decode("utf-8") + return value[:-1] if value.endswith("\n") else value + + return resolve + + def exec_anvil(anvil_cmd): """Exec the anvil argv, replacing this process. diff --git a/scripts/test_run_anvil.py b/scripts/test_run_anvil.py index 9455e25..deddba7 100644 --- a/scripts/test_run_anvil.py +++ b/scripts/test_run_anvil.py @@ -9,6 +9,7 @@ import sys import tempfile import unittest +from unittest import mock HERE = os.path.dirname(os.path.abspath(__file__)) MODULE_PATH = os.path.join(HERE, "run_anvil.py") @@ -329,6 +330,72 @@ def test_missing_workspace_path_fails_closed(self): self._gate(merged, answer="y\n", workspace="") +class SecretResolverTests(unittest.TestCase): + """make_secret_resolver shells out to the provider CLI and reports failures.""" + + # Portable provider commands built on the test interpreter so the suite does + # not depend on op/pass/echo being installed. "{ref}" is substituted by + # tongs.secret_provider_command before exec. + def _writes(self, expr): + return [sys.executable, "-c", "import sys; sys.stdout.write(%s)" % expr, "{ref}"] + + def test_resolves_ref_via_provider_cli(self): + resolve = run_anvil.make_secret_resolver({"echo": self._writes("sys.argv[1]")}) + self.assertEqual(resolve("echo", "op://Work/secret"), "op://Work/secret") + + def test_provider_stderr_inherits_terminal(self): + with mock.patch.object(run_anvil.subprocess, "run") as run: + run.return_value = subprocess.CompletedProcess(["provider"], 0, stdout=b"secret\n") + resolve = run_anvil.make_secret_resolver({"p": ["provider", "{ref}"]}) + self.assertEqual(resolve("p", "ref"), "secret") + self.assertIsNone(run.call_args.kwargs.get("stderr")) + + def test_strips_single_trailing_newline(self): + resolve = run_anvil.make_secret_resolver({"echo": self._writes("sys.argv[1] + '\\n'")}) + self.assertEqual(resolve("echo", "token"), "token") + + def test_preserves_inner_and_other_whitespace(self): + # Only one trailing newline is stripped; interior/extra newlines survive. + resolve = run_anvil.make_secret_resolver({"echo": self._writes("sys.argv[1] + '\\n\\n'")}) + self.assertEqual(resolve("echo", "a\nb"), "a\nb\n") + + def test_unknown_provider_raises(self): + resolve = run_anvil.make_secret_resolver({"op": ["op", "read", "{ref}"]}) + with self.assertRaises(run_anvil.SecretResolutionError): + resolve("vault", "x") + + def test_nonzero_exit_raises(self): + resolve = run_anvil.make_secret_resolver( + {"boom": [sys.executable, "-c", "import sys; sys.exit(3)"]} + ) + with self.assertRaises(run_anvil.SecretResolutionError): + resolve("boom", "x") + + def test_unrunnable_provider_raises(self): + resolve = run_anvil.make_secret_resolver({"missing": ["/no/such/binary-xyz", "{ref}"]}) + with self.assertRaises(run_anvil.SecretResolutionError): + resolve("missing", "x") + + def test_error_message_never_contains_the_secret(self): + # A failing CLI must not surface the resolved value; here it prints the + # ref to stderr and fails, and the error names provider/ref (which are + # not secret) -- the resolver never reaches a secret value on failure. + resolve = run_anvil.make_secret_resolver( + {"boom": [sys.executable, "-c", "import sys; sys.exit(1)"]} + ) + with self.assertRaises(run_anvil.SecretResolutionError) as ctx: + resolve("boom", "ref-token") + self.assertIn("boom", str(ctx.exception)) + + def test_drives_plan_tong_secrets_end_to_end(self): + # The resolver is the impure half of tongs.plan_tong_secrets: a secret + # env var ends up as a tmpfs file, never as a -e value. + resolve = run_anvil.make_secret_resolver({"echo": self._writes("sys.argv[1]")}) + plan = tongs.plan_tong_secrets({"TOKEN": "${secret:echo:s3cr3t}"}, resolve) + self.assertEqual(plan["files"], {"/run/swarmforge/secrets/TOKEN": "s3cr3t"}) + self.assertEqual(plan["env"], {"TOKEN_FILE": "/run/swarmforge/secrets/TOKEN"}) + + class MainGateTests(unittest.TestCase): """main() stops before exec when a workspace tong is unapproved.""" diff --git a/scripts/test_tongs.py b/scripts/test_tongs.py index c8b697d..c0500bd 100644 --- a/scripts/test_tongs.py +++ b/scripts/test_tongs.py @@ -200,6 +200,26 @@ def test_bad_interface_kind(self): errors = tongs.validate_tong("t", {"lifecycle": "session", "image": "x", "interface": {"kind": "socket"}}) self.assertTrue(any("interface.kind" in e for e in errors)) + def test_rejects_secret_file_pointer_collision(self): + errors = tongs.validate_tong("t", { + "lifecycle": "session", + "image": "x", + "interface": {"kind": "none"}, + "readiness": {"mode": "none"}, + "env": {"TOKEN": "${secret:op:t}", "TOKEN_FILE": "/declared/path"}, + }) + self.assertTrue(any("TOKEN_FILE" in e for e in errors)) + + def test_rejects_invalid_secret_env_name(self): + errors = tongs.validate_tong("t", { + "lifecycle": "session", + "image": "x", + "interface": {"kind": "none"}, + "readiness": {"mode": "none"}, + "env": {"a/b": "${secret:op:t}"}, + }) + self.assertTrue(any("a/b" in e for e in errors)) + class SecretRefTests(unittest.TestCase): def test_parse_single_ref_with_inner_colons(self): @@ -239,6 +259,153 @@ def test_substitute_uses_injected_resolver(self): self.assertIn("${secret", defn["env"]["A"]) # original not mutated +PROVIDERS_YAML = """\ +providers: + op: ["op", "read", "{ref}"] + pass: ["pass", "show", "{ref}"] +""" + + +class SecretProviderTests(unittest.TestCase): + def test_loads_provider_table(self): + with tempfile.TemporaryDirectory() as tmp: + path = os.path.join(tmp, "secret-providers.yaml") + with open(path, "w") as f: + f.write(PROVIDERS_YAML) + providers = tongs.load_secret_providers(path) + self.assertEqual( + providers, + {"op": ["op", "read", "{ref}"], "pass": ["pass", "show", "{ref}"]}, + ) + + def test_missing_file_yields_empty(self): + self.assertEqual(tongs.load_secret_providers("/no/such/file.yaml"), {}) + self.assertEqual(tongs.load_secret_providers(""), {}) + + def test_file_without_providers_block_yields_empty(self): + with tempfile.TemporaryDirectory() as tmp: + path = os.path.join(tmp, "p.yaml") + with open(path, "w") as f: + f.write("unrelated: true\n") + self.assertEqual(tongs.load_secret_providers(path), {}) + + def test_non_mapping_providers_raises(self): + with tempfile.TemporaryDirectory() as tmp: + path = os.path.join(tmp, "p.yaml") + with open(path, "w") as f: + f.write("providers: nope\n") + with self.assertRaises(ValueError): + tongs.load_secret_providers(path) + + def test_non_list_command_raises(self): + with tempfile.TemporaryDirectory() as tmp: + path = os.path.join(tmp, "p.yaml") + with open(path, "w") as f: + f.write('providers:\n op: "op read {ref}"\n') + with self.assertRaises(ValueError): + tongs.load_secret_providers(path) + + def test_command_substitutes_ref_in_every_element(self): + providers = {"op": ["op", "read", "{ref}", "--prefix={ref}"]} + self.assertEqual( + tongs.secret_provider_command(providers, "op", "op://Work/x"), + ["op", "read", "op://Work/x", "--prefix=op://Work/x"], + ) + + def test_command_unknown_provider_raises_keyerror(self): + with self.assertRaises(KeyError): + tongs.secret_provider_command({"op": ["op"]}, "vault", "x") + + +class SecretDeliveryTests(unittest.TestCase): + def test_partition_splits_plain_from_secret_bearing_env(self): + env = { + "PLAIN": "value", + "TOKEN": "${secret:op:op://Work/github/token}", + "MIXED": "Bearer ${secret:pass:db/pw}", + } + plain, secret = tongs.partition_secret_env(env) + self.assertEqual(plain, {"PLAIN": "value"}) + self.assertEqual( + secret, + {"TOKEN": "${secret:op:op://Work/github/token}", "MIXED": "Bearer ${secret:pass:db/pw}"}, + ) + + def test_partition_empty_env(self): + self.assertEqual(tongs.partition_secret_env(None), ({}, {})) + self.assertEqual(tongs.partition_secret_env({}), ({}, {})) + + def test_delivery_plan_routes_secrets_to_tmpfs_files(self): + plan = tongs.secret_delivery_plan({"TOKEN": "s3cr3t", "API_KEY": "k3y"}) + self.assertEqual(plan["tmpfs"], "/run/swarmforge/secrets") + self.assertEqual( + plan["files"], + {"/run/swarmforge/secrets/API_KEY": "k3y", "/run/swarmforge/secrets/TOKEN": "s3cr3t"}, + ) + self.assertEqual( + plan["env"], + { + "API_KEY_FILE": "/run/swarmforge/secrets/API_KEY", + "TOKEN_FILE": "/run/swarmforge/secrets/TOKEN", + }, + ) + + def test_delivery_plan_empty_has_no_tmpfs(self): + plan = tongs.secret_delivery_plan({}) + self.assertEqual(plan, {"tmpfs": None, "files": {}, "env": {}}) + + def test_plan_tong_secrets_keeps_secret_values_out_of_env(self): + env = {"REGION": "us", "TOKEN": "${secret:op:op://Work/github/token}"} + plan = tongs.plan_tong_secrets(env, lambda p, r: "RESOLVED-%s" % r) + # Plain env passes through; the secret becomes a _FILE pointer, never a value. + self.assertEqual( + plan["env"], + {"REGION": "us", "TOKEN_FILE": "/run/swarmforge/secrets/TOKEN"}, + ) + self.assertEqual(plan["tmpfs"], "/run/swarmforge/secrets") + self.assertEqual( + plan["files"], + {"/run/swarmforge/secrets/TOKEN": "RESOLVED-op://Work/github/token"}, + ) + # The resolved secret value reaches files only -- never the -e env plan. + self.assertNotIn("RESOLVED-op://Work/github/token", json.dumps(plan["env"])) + + def test_delivery_plan_rejects_traversal_env_name(self): + # An env name that would escape the tmpfs dir as a path component is + # refused rather than baked into a file path. + with self.assertRaises(ValueError): + tongs.secret_delivery_plan({"../../etc/cron.d/x": "v"}) + with self.assertRaises(ValueError): + tongs.secret_delivery_plan({"a/b": "v"}) + + def test_plan_tong_secrets_rejects_pointer_collision(self): + # A plain TOKEN_FILE that disagrees with the synthesized pointer would + # make the tong unable to find TOKEN, so the plan fails closed. + env = {"TOKEN_FILE": "/declared/path", "TOKEN": "${secret:op:t}"} + with self.assertRaises(ValueError): + tongs.plan_tong_secrets(env, lambda p, r: "SECRET") + + def test_plan_tong_secrets_allows_matching_declared_pointer(self): + env = { + "TOKEN_FILE": "/run/swarmforge/secrets/TOKEN", + "TOKEN": "${secret:op:t}", + } + plan = tongs.plan_tong_secrets(env, lambda p, r: "SECRET") + self.assertEqual(plan["env"]["TOKEN_FILE"], "/run/swarmforge/secrets/TOKEN") + self.assertEqual(plan["files"], {"/run/swarmforge/secrets/TOKEN": "SECRET"}) + self.assertNotIn("SECRET", json.dumps(plan["env"])) + + def test_plan_tong_secrets_inert_without_secrets(self): + plan = tongs.plan_tong_secrets({"REGION": "us"}, lambda p, r: "x") + self.assertEqual(plan, {"env": {"REGION": "us"}, "tmpfs": None, "files": {}}) + + def test_plan_tong_secrets_resolves_each_provider_with_its_ref(self): + env = {"A": "${secret:op:a}", "B": "${secret:pass:b}"} + seen = [] + tongs.plan_tong_secrets(env, lambda p, r: seen.append((p, r)) or "v") + self.assertEqual(sorted(seen), [("op", "a"), ("pass", "b")]) + + class EnvNamingTests(unittest.TestCase): def test_prefix_sanitizes_name(self): self.assertEqual(tongs.tong_env_prefix("github-creds"), "SWARMFORGE_TONG_GITHUB_CREDS") diff --git a/scripts/tongs.py b/scripts/tongs.py index ae5b4ac..a8af847 100644 --- a/scripts/tongs.py +++ b/scripts/tongs.py @@ -252,6 +252,19 @@ def err(msg): env = defn.get("env") if env is not None and not isinstance(env, dict): err("'env' must be a mapping of name -> value") + elif isinstance(env, dict): + plain, secret = partition_secret_env(env) + for secret_name in sorted(secret): + if not ENV_NAME_RE.match(secret_name): + err("invalid secret env name %r (must be a valid identifier)" % secret_name) + continue + pointer_name = secret_name + SECRET_FILE_ENV_SUFFIX + pointer_value = "%s/%s" % (SECRET_TMPFS_DIR, secret_name) + if pointer_name in plain and plain[pointer_name] != pointer_value: + err( + "env %r collides with the secret file pointer for %r" + % (pointer_name, secret_name) + ) for listish in ("mounts", "networks"): value = defn.get(listish) @@ -328,6 +341,169 @@ def substitute_secrets(value, resolver): return value +# --- Secret providers --------------------------------------------------------- +# A secret reference (${secret::}) is resolved on the host by +# shelling out to a provider CLI -- the docker-credential-helper pattern, so +# Swarmforge knows nothing about any individual secret manager. Providers are +# declared once in the user layer (~/.swarmforge/secret-providers.yaml): +# +# providers: +# op: ["op", "read", "{ref}"] +# pass: ["pass", "show", "{ref}"] +# +# Each value is an argv template; the literal token "{ref}" in any element is +# replaced with the reference. Loading the table and building the argv are pure +# and live here; the subprocess that actually runs the CLI is the caller's (see +# run_anvil.make_secret_resolver), keeping this module side-effect free. + +SECRET_REF_TOKEN = "{ref}" + + +def load_secret_providers(path): + """Load the user-layer secret-provider table. + + Returns `{provider: [argv template, ...]}`. A missing file (or one without a + `providers:` block) yields `{}` -- no providers configured, so resolving any + secret reference later fails loudly rather than silently. Raises `ValueError` + if the file is present but malformed, so a typo surfaces at load time instead + of dropping a provider. + + Command templates must be single-line flow lists; the dependency-free YAML + subset parser does not join a list wrapped across lines. + """ + if not path or not os.path.isfile(path): + return {} + data = load_tong_file(path) + providers = data.get("providers") if isinstance(data, dict) else None + if providers is None: + return {} + if not isinstance(providers, dict): + raise ValueError("secret-providers: 'providers' must be a mapping") + out = {} + for name, template in providers.items(): + if not isinstance(template, list) or not template: + raise ValueError( + "secret-providers: provider %r must be a non-empty command list" % name + ) + if not all(isinstance(part, str) for part in template): + raise ValueError( + "secret-providers: provider %r command must be a list of strings" % name + ) + out[name] = list(template) + return out + + +def secret_provider_command(providers, provider, ref): + """Concrete argv that resolves `ref` through `provider`. + + Substitutes the literal `{ref}` token in every element of the provider's argv + template. Raises `KeyError` if the provider is not declared (the caller turns + this into a clean launch error naming the missing provider). + """ + return [part.replace(SECRET_REF_TOKEN, ref) for part in providers[provider]] + + +# --- Secret delivery ---------------------------------------------------------- +# A resolved secret must never reach a tong as a docker `-e` env var: anything +# holding the docker socket (the broker tong) could read it back via +# `docker inspect`. Instead each secret-bearing env var is delivered as a file on +# an in-memory tmpfs the launcher populates at startup, and the tong is pointed +# at the file with a `_FILE` env var (the conventional docker-secret +# indirection). Plain (non-secret) env keeps flowing through `-e` unchanged. + +SECRET_TMPFS_DIR = "/run/swarmforge/secrets" +SECRET_FILE_ENV_SUFFIX = "_FILE" + +# A secret's file lands at SECRET_TMPFS_DIR/, so the env name becomes a +# path component. Restricting it to the POSIX env-name grammar keeps that path +# inside the tmpfs dir -- a name like "../../etc/foo" from an untrusted workspace +# tong cannot escape it -- and is exactly what docker accepts for an env var. +ENV_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + +def partition_secret_env(env): + """Split a tong's env into `(plain, secret)` by secret-reference presence. + + `env` is the tong definition's `env` mapping (values may be unresolved + `${secret:...}` references). `plain` holds values with no secret reference + (safe to pass straight through as `-e`); `secret` holds the keys whose value + contains at least one reference (routed to tmpfs delivery so the resolved + value never appears in `docker inspect`). Order within each is preserved. + """ + plain, secret = {}, {} + for key, value in (env or {}).items(): + if find_secret_refs(value): + secret[key] = value + else: + plain[key] = value + return plain, secret + + +def secret_delivery_plan(resolved_secrets): + """Plan tmpfs delivery for already-resolved secret env values. + + `resolved_secrets` is `{env_name: secret_value}`. Returns plain data the + launcher applies when it starts the tong: + + * `tmpfs` -- the in-tong tmpfs mountpoint to create (`--tmpfs`), so the + secret files live in memory and never touch disk, or `None` + when there are no secrets. + * `files` -- `{absolute_path: secret_value}` the launcher writes into the + running container's tmpfs (never to the host). + * `env` -- `{_FILE: absolute_path}` pointing the tong at each file; + these are paths, not secrets, so they are safe as `-e`. + + With no secrets the tmpfs/files/env are all empty, so a tong without secrets + gets no tmpfs mount and no indirection. Raises `ValueError` for an env name + that is not a valid identifier, so a name that would escape the tmpfs dir + when used as a path component stops the launch rather than reaching disk. + """ + files = {} + env = {} + for name in sorted(resolved_secrets): + if not ENV_NAME_RE.match(name): + raise ValueError("invalid secret env name %r (must be a valid identifier)" % name) + path = "%s/%s" % (SECRET_TMPFS_DIR, name) + files[path] = resolved_secrets[name] + env[name + SECRET_FILE_ENV_SUFFIX] = path + return {"tmpfs": SECRET_TMPFS_DIR if files else None, "files": files, "env": env} + + +def plan_tong_secrets(env, resolver): + """Resolve a tong's secret env and plan how the tong receives it. + + Combines the steps the launcher performs for one tong's environment: + partition `env` into plain and secret, resolve only the secret-bearing values + through the injected `resolver(provider, ref) -> str` (keeping this function + pure), then deliver those via tmpfs. Returns: + + * `env` -- plain env vars plus the `_FILE` pointers (never a secret + value). + * `tmpfs` -- the tmpfs mountpoint to create, or `None` when no secrets. + * `files` -- `{absolute_path: secret_value}` to write into the tmpfs. + + The resolved secret values appear only under `files`, never under `env`, so + nothing the launcher passes as `-e` is readable through `docker inspect`. + """ + plain, secret = partition_secret_env(env) + resolved = {key: substitute_secrets(value, resolver) for key, value in secret.items()} + delivery = secret_delivery_plan(resolved) + merged_env = dict(plain) + for key, value in delivery["env"].items(): + # A file pointer (TOKEN_FILE) can collide with a plain env var of the same + # name. Unless it already points at the generated path, fail rather than + # launching a tong that cannot find its secret. + if key in merged_env: + if merged_env[key] == value: + continue + raise ValueError( + "tong env %r collides with the secret file pointer for %r" + % (key, key[:-len(SECRET_FILE_ENV_SUFFIX)]) + ) + merged_env[key] = value + return {"env": merged_env, "tmpfs": delivery["tmpfs"], "files": delivery["files"]} + + # --- Environment-variable naming ----------------------------------------------