diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 381c802..e552908 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -5,13 +5,13 @@ }, "metadata": { "description": "Polycli host adapters for Claude Code and related agent CLIs", - "version": "0.6.24" + "version": "0.6.25" }, "plugins": [ { "name": "polycli", "description": "Claude Code adapter for the shared polycli companion", - "version": "0.6.24", + "version": "0.6.25", "source": "./plugins/polycli" } ] diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json index f0a0a42..cd5f72d 100644 --- a/.github/plugin/marketplace.json +++ b/.github/plugin/marketplace.json @@ -5,13 +5,13 @@ }, "metadata": { "description": "Polycli marketplace for GitHub Copilot CLI", - "version": "0.6.24" + "version": "0.6.25" }, "plugins": [ { "name": "polycli-copilot", "description": "Run the shared polycli companion from GitHub Copilot CLI", - "version": "0.6.24", + "version": "0.6.25", "source": "./plugins/polycli-copilot" } ] diff --git a/CHANGELOG.md b/CHANGELOG.md index c8a7408..dd1b790 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,24 @@ Separate from `docs/release.md` (release-focused) and `docs/archive/session-memo --- +## 2026-06-19 — Claude — docs: cc-X domestic-model endpoint recipes (Path-B docs + reference data) + +- Added `docs/cc-x-endpoints.md` (human reference) + `docs/cc-x-recipes.json` (machine-readable source of truth) encoding the cc-X pattern: point the EXISTING `claude` runtime (BYOK) or `opencode` (OpenAI-compatible) at a domestic vendor's Anthropic-compatible endpoint via `ANTHROPIC_BASE_URL` + `ANTHROPIC_AUTH_TOKEN` + `ANTHROPIC_MODEL`. Covers 9 entries across 7 PRC core labs (MiniMax, Moonshot Kimi, Zhipu GLM, Alibaba Qwen, DeepSeek, ByteDance Doubao, StepFun, Baidu Qianfan, Tencent) with per-vendor base URL, model-id family, native-CLI grouping, context-window (`autoCompactWindow`), caching note, and a `source` URL+date per entry. +- Encoded the operational gotchas: silent prompt-cache degradation on shim endpoints (dual cache-breakpoint; DeepSeek is the auto-prefix-caching exception), pin a known-good Claude Code version + `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`, size `CLAUDE_CODE_AUTO_COMPACT_WINDOW` to the model's context, marketplace (Baidu/Tencent) model-identity instability, and the PRC data-sovereignty/Entity-List gate as SEPARATE from harness choice. +- Honest-default: marketplace/resale endpoints carry `status: "marketplace-unstable"` and `autoCompactWindow: null` (no fabricated model/version pin), mirroring the gemini attempted-vs-used-model caveat in `docs/model-fallback-policy.md`. Enforced by `scripts/validate-cc-x-recipes.mjs` (a pure validator modeled on `validate-fixture-metadata.mjs`) + `scripts/tests/validate-cc-x-recipes.test.mjs` (auto-joined by the npm-test glob); added `npm run validate:cc-x-recipes` for standalone use. +- Documented that cc-X is NOT a polycli provider/adapter/runtime — it rides the existing runtimes via standard env vars; the `claude -p` path forwards them via full `process.env` inheritance, while the tmux allowlist (`CLAUDE_TMUX_ENV_EXACT`) forwards the `ANTHROPIC_*` trio but NOT the `CLAUDE_CODE_*` knobs (documented, not fixed). Clarified that the polycli `minimax`/`mmx-cli` provider is a stateless text/media call, not the MiniMax cc-X coding path. +- Cross-linked from `docs/provider-paths.md` (new subsection + Official-references bullet) and `docs/polycli-v1-public-surface.md` (one out-of-contract sentence). Recorded the no-adapter decision in `docs/roadmap.md` as closed Q10 + an Explicit-non-goals bullet. Zero runtime/production-path code change; `claude.js` env behavior left untouched by design. Verification: `node scripts/validate-cc-x-recipes.mjs` ok (9 entries), `node --test scripts/tests/validate-cc-x-recipes.test.mjs` 5/5, `npm test` + `npm run release:check` green. Snapshot facts are 2026-06-19; the validator guards structure + source-anchoring, not current-truth. + +## 2026-06-19 — Claude — adversarial re-verification of the workflow-review remediation + +- Independently re-verified the committed remediation sweep (d272042 + 03ae92d) with a Workflow fan-out (9 adversarial auditors -> double-refutation -> completeness critic). 18 raw findings -> 7 confirmed + 1 critic-confirmed; 11 refuted. Confirmed the prior fixes are sound and re-ran full validation (the prior round's open residual #1): `npm test` 544/544, `npm run release:check` exit 0. +- Closed residual #3 (state-root permissions) with real-filesystem evidence: under permissive umask 000, stateRoot/stateDir/jobsDir resolve to 0700 and state.json/job-config to 0600, enforced by explicit chmod (not umask). Characterized residual #4 (orphan `.json` result files leak after MAX_JOBS pruning) as a PRE-EXISTING latent issue — `removeJobFile` is a dead export and the old code pruned identically — so it is out of scope for this remediation and left flagged, not fixed. +- Fixed 2 confirmed regressions introduced by the remediation: (1) the opencode host adapter threw on exit code 2, but 2 is the companion's documented soft signal (`health` with no healthy provider, `status --wait` timeout) that still emits a valid JSON envelope on stdout — extracted `isHardCompanionFailure(status)` so exit 2 returns the envelope while exit 1/4/5/crash still reject; (2) `cancelJob` ran `cleanupRuntimePaths` (which deletes a review job's live cwd via cleanupPaths) BEFORE killing the worker — reordered to kill first, then clean up, and skip the runtime-path deletion entirely when the kill fails (worker may still be alive). +- Fixed 2 confirmed incomplete fixes: (1) Grok `SUCCESS_STOP_REASONS` omitted `MaxTokens`, so a truncated-but-visible answer was wrongly marked ok=false — added maxtokens/max_tokens/length (grok-build's real StopReason enum is {EndTurn, MaxTokens, MaxTurnRequests, Refusal, ToolUse, Cancelled}, verified against the installed binary); refusal/cancelled/tool_use/max_turn_requests stay non-success; (2) the run-ledger append path created `~/.polycli/state/` world-traversable (0o755) via the mode-less ensureParentDir on the run_started event that fires before any other state write — `appendRunLedgerEvent` now calls `ensureStateDir` first to land it 0o700. +- Closed 4 confirmed test gaps (all mutation/RED-proven): pre-existing-0755 dir hardening test for `ensureStateDir` (state-1); state-dir-0700-after-append-only test for the run-ledger path (pwp-2, RED-proven); Grok non-success-stopReason-ALONE failure tests for both parseGrokJsonResult and runGrokPromptStreaming plus a MaxTokens-success test (test-1 + grok-1, RED-proven); sync `runProviderPrompt` explicit-model-before-default fallback test mirroring the streaming case (qwen-model-1); new `scripts/tests/opencode-host.test.mjs` pinning the exit-2 soft-signal contract (oc-status-1). +- All changes respect the Path B architecture boundary: no shared runtime base class, no provider parser promotion into polycli-utils, timing four-state untouched, cleanupPaths still sourced only from internal review temp dirs. +- Verification: focused RED/GREEN proofs for grok-1 and pwp-1 (reverting each fix turns its new test red); focused suite 66/66; `npm test` 544/544 (535 + 9 new tests); `npm run release:check` exit 0 (plugin bundles 5, fixture metadata 17, codex adapter 5; one tmux.jsonl ENOENT flake on the first run was the known full-suite-parallel-load flake — claude.test.js passes 28/28 in isolation, and the re-run was clean). Not published; current unreleased workspace work after v0.6.24. + ## 2026-06-16 — Codex — Grok fixture residual cleanup - Closed the remaining workflow-review residual risk by capturing a real Grok streaming fixture with `grok 0.2.51 (f4f85a6492e) [stable]`: `grok -p 'Reply with exactly HELLO_GROK_FIXTURE and nothing else.' --output-format streaming-json -m grok-build --permission-mode plan --disable-web-search --max-turns 1`. diff --git a/docs/cc-x-endpoints.md b/docs/cc-x-endpoints.md new file mode 100644 index 0000000..d9697a3 --- /dev/null +++ b/docs/cc-x-endpoints.md @@ -0,0 +1,54 @@ +# cc-X endpoint recipes (no native CLI cluster) + +Snapshot: 2026-06-19. Reference only — not a routing oracle, and **not a polycli runtime**. Review monthly, before release, and whenever a vendor endpoint changes. The machine-readable source of truth is [`cc-x-recipes.json`](./cc-x-recipes.json); this page is its human narration. Re-verify any row against its `source` URL before relying on it. + +## What cc-X is + +"cc-X" is the pattern of pointing a top-tier agentic-coding harness at a domestic LLM vendor's **Anthropic-compatible** endpoint with three standard environment variables: + +```bash +export ANTHROPIC_BASE_URL="https://api./anthropic" +export ANTHROPIC_AUTH_TOKEN="" # BYOK +export ANTHROPIC_MODEL="" +``` + +The harness is **Claude Code** for vendors with no competitive native coding CLI, or **opencode** when the target is an OpenAI-compatible model. cc-X wins for the no-native-CLI cluster because it is the best-AVAILABLE, co-designed, and 5-18x-cheaper scaffold — **not** because Claude Code is the highest-scoring harness (controlled ablations show other open models score higher under other harnesses; that nuance lives in the vendor system cards, not here, per the `docs/roadmap.md` Q7 source discipline that forbids citing un-sourced benchmark scores). + +Provider grouping: + +- **No competitive native coding CLI → cc-X is the path:** MiniMax, DeepSeek, Zhipu/GLM, StepFun. +- **Has a native CLI → cc-X is a choice, not a default:** Moonshot (Kimi Code), Alibaba (Qwen Code), ByteDance (Trae / trae-agent), Baidu (Comate Zulu-CLI), Tencent (CodeBuddy Code), Xiaomi (MiMo Code). + +## How this rides existing polycli runtimes + +cc-X is **not** a polycli provider, adapter, or runtime, and this PR adds none. The recipe runs through the EXISTING `claude` runtime (BYOK env, no vendor CLI) or `opencode` (OpenAI-compatible models). polycli already forwards `ANTHROPIC_BASE_URL` / `ANTHROPIC_AUTH_TOKEN` / `ANTHROPIC_MODEL`: + +- On the default headless `claude -p` path, the runtime inherits the full `process.env`, so all three (and the `CLAUDE_CODE_*` knobs below) pass through unchanged. +- On the explicit/internal tmux TUI path, the runtime forwards only an `ANTHROPIC_*` allowlist (`CLAUDE_TMUX_ENV_EXACT` in `packages/polycli-runtime/src/claude.js`). The three `ANTHROPIC_*` vars pass through there too, **but `CLAUDE_CODE_AUTO_COMPACT_WINDOW` / `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS` are NOT in that allowlist and will not reach a tmux session.** Set those two knobs on the default `claude -p` path, or export them inside the tmux session itself. + +There is no code to add: set the env vars and run `claude` (or polycli's `claude` provider) normally. + +## Operational gotchas (durable) + +These are the hard-won knobs the recipes encode. Per-entry specifics (base URLs, model-id families, per-vendor context window) live in `cc-x-recipes.json`. + +1. **Prompt caching is silently degraded on shim endpoints.** Claude Code's single cache-breakpoint produces a near-zero hit rate against MiniMax / Kimi shims, so the system prompt + tool schemas get re-billed every turn. Mitigation: use a dual cache-breakpoint and verify the gateway does not gate caching on whether the model is literally named `claude`. **DeepSeek is the exception** — it does automatic server-side prefix caching, so no client mitigation is needed. +2. **Pin a known-good Claude Code version and set `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`.** Claude Code auto-attaches experimental `anthropic-beta` headers that periodically 400 third-party endpoints on upgrade. +3. **Set `CLAUDE_CODE_AUTO_COMPACT_WINDOW` to the model's real context** or Claude Code compacts prematurely. Per-model values are in `cc-x-recipes.json` (`autoCompactWindow`): e.g. DeepSeek 128000, Kimi 262144, MiniMax-M3 512000. `null` means we deliberately did not pin one. +4. **Marketplace endpoints have no stable model identity.** See the next section. + +## Marketplace endpoints: honest-default refusal to pin + +Baidu Qianfan and Tencent's coding gateway are **resale/marketplace** endpoints (`marketplace: true`, `status: "marketplace-unstable"`). One `ANTHROPIC_MODEL` string can silently resolve to a different vendor or version, there is no client-side version pinning, and 2026 price hikes mean model identity is not stable over time. The recipe file deliberately leaves `autoCompactWindow: null` and ships no pinned model id for these entries — fabricating a stable pin would repeat exactly the "attempted vs used model" dishonesty already documented for gemini in [`docs/model-fallback-policy.md`](./model-fallback-policy.md). Treat the model string you send as a *request*, not a guarantee. + +## Data sovereignty is a separate gate + +PRC data-residency and Entity-List exposure are a **separate** decision from harness choice. The levers are intl endpoints, zero-retention terms, or self-hosted open weights (GLM-5.x MIT, Kimi mod-MIT, Qwen Apache-2.0) — not anything polycli does. China ToS does **not** make cc-X fragile: BYOK + a non-Anthropic base URL is documented and supported by Anthropic. The residual risk is indirect (export-screening could kill the native-Claude fallback; Anthropic could later gate the client), not a ToS trap. + +## Not the same as the polycli `minimax` provider + +polycli already has a `minimax` provider that calls official `mmx-cli` (`mmx text chat --output json --non-interactive`). That is a **stateless text/media call**, not the MiniMax cc-X coding path. If you want MiniMax-M2/M3 as a coding agent, use the cc-X recipe above (Claude Code against `ANTHROPIC_BASE_URL=https://api.minimax.io/anthropic`), not the `minimax` provider. The `MiniMax text / multimodal` row in [`provider-paths.md`](./provider-paths.md) is the stateless-call path; this page is the coding-agent path. + +## Official references checked + +Each recipe entry in `cc-x-recipes.json` carries its own `source` URL + date. Re-verify there before relying on a base URL or model id. diff --git a/docs/cc-x-recipes.json b/docs/cc-x-recipes.json new file mode 100644 index 0000000..10de933 --- /dev/null +++ b/docs/cc-x-recipes.json @@ -0,0 +1,133 @@ +{ + "schemaVersion": 1, + "collectedAt": "2026-06-19", + "disclaimer": "Reference snapshot, not a routing oracle and not a polycli runtime. cc-X means pointing the EXISTING claude runtime (BYOK, no native vendor CLI) or opencode (OpenAI-compatible models) at a vendor's Anthropic-compatible endpoint via ANTHROPIC_BASE_URL + ANTHROPIC_AUTH_TOKEN + ANTHROPIC_MODEL. polycli adds NO cc-X provider, adapter, or runtime. Endpoints, model-id families, regions, pricing, and SKU availability change frequently; every entry carries a source URL + date — re-verify against the source before relying on it. Marketplace/resale endpoints (marketplace:true) resolve one ANTHROPIC_MODEL string to a vendor-chosen model/version with no client-side pinning; autoCompactWindow is null for them on purpose (we refuse to fabricate a stable context window, same honest-default as the gemini attempted-vs-used-model caveat in docs/model-fallback-policy.md). Claude Code is NOT asserted to be the highest-scoring harness; it is the best-AVAILABLE + co-designed + cheaper scaffold for the no-native-CLI cluster. cc-X is documented and supported by Anthropic (BYOK + non-Anthropic base URL); the residual risk is indirect (export-screening on the native-Claude fallback, or Anthropic later gating the client), not a China-ToS fragility.", + "recipes": [ + { + "vendor": "MiniMax", + "nativeCli": false, + "runtime": "claude", + "baseUrlIntl": "https://api.minimax.io/anthropic", + "baseUrlCN": "https://api.minimaxi.com/anthropic", + "modelIds": ["MiniMax-M2", "MiniMax-M2.5", "MiniMax-M3"], + "marketplace": false, + "autoCompactWindow": 512000, + "cachingNote": "Prompt caching silently degraded on the Claude Code single-breakpoint path (re-bills system prompt + tool schemas each turn). Use dual cache-breakpoint; verify the gateway does not gate caching on whether the model is named claude.", + "dataResidency": "intl endpoint (api.minimax.io) vs CN endpoint (api.minimaxi.com) is a separate sovereignty/Entity-List gate from harness choice.", + "status": "verified", + "source": { "url": "https://platform.minimax.io/docs", "date": "2026-06-19" } + }, + { + "vendor": "Moonshot Kimi", + "nativeCli": true, + "runtime": "claude", + "baseUrlIntl": "https://api.moonshot.ai/anthropic", + "baseUrlCN": "https://api.moonshot.cn/anthropic", + "modelIds": ["kimi-k2.7-code"], + "marketplace": false, + "autoCompactWindow": 262144, + "cachingNote": "Same single-breakpoint degradation as MiniMax; use dual cache-breakpoint. Moonshot also ships a native Kimi Code CLI, so cc-X is a choice here, not the only path.", + "dataResidency": "global (api.moonshot.ai) vs CN (api.moonshot.cn).", + "status": "verified", + "source": { "url": "https://platform.moonshot.ai/docs", "date": "2026-06-19" } + }, + { + "vendor": "Zhipu GLM", + "nativeCli": false, + "runtime": "claude", + "baseUrlIntl": "https://api.z.ai/api/anthropic", + "baseUrlCN": null, + "modelIds": ["GLM-4.7", "GLM-4.5-Air"], + "marketplace": false, + "autoCompactWindow": null, + "cachingNote": "Official z.ai config maps Opus/Sonnet -> GLM-4.7 and Haiku -> GLM-4.5-Air via ANTHROPIC_MODEL / ANTHROPIC_SMALL_FAST_MODEL. GLM-5.x open weights (MIT) are a self-host lever for sovereignty-sensitive use.", + "dataResidency": "z.ai is the intl gateway; open-weights self-host is the strongest sovereignty lever.", + "status": "verified", + "source": { "url": "https://docs.z.ai", "date": "2026-06-19" } + }, + { + "vendor": "Alibaba Qwen", + "nativeCli": true, + "runtime": "claude", + "baseUrlIntl": "https://dashscope-intl.aliyuncs.com/apps/anthropic", + "baseUrlCN": null, + "modelIds": ["qwen3-coder"], + "marketplace": false, + "autoCompactWindow": null, + "cachingNote": "DashScope Anthropic endpoint is Singapore-region only; the Anthropic parser is preserved server-side. Alibaba ships native Qwen Code (already a polycli `qwen` provider via its own CLI), so cc-X is a choice. Open weights are Apache-2.0.", + "dataResidency": "Singapore region only for this endpoint.", + "status": "verified", + "source": { "url": "https://www.alibabacloud.com/help/en/model-studio", "date": "2026-06-19" } + }, + { + "vendor": "DeepSeek", + "nativeCli": false, + "runtime": "claude", + "baseUrlIntl": "https://api.deepseek.com/anthropic", + "baseUrlCN": null, + "modelIds": ["deepseek-v4-pro", "deepseek-v4-flash"], + "marketplace": false, + "autoCompactWindow": 128000, + "cachingNote": "Auto-maps opus -> deepseek-v4-pro and haiku/sonnet -> deepseek-v4-flash. Automatic prefix caching server-side — the dual-breakpoint mitigation is NOT needed here (DeepSeek is the caching exception).", + "dataResidency": "Single api.deepseek.com endpoint.", + "status": "verified", + "source": { "url": "https://api-docs.deepseek.com", "date": "2026-06-19" } + }, + { + "vendor": "ByteDance Doubao", + "nativeCli": true, + "runtime": "claude", + "baseUrlIntl": null, + "baseUrlCN": "https://ark.cn-beijing.volces.com/api/coding", + "modelIds": ["doubao-seed-code", "doubao-seed-2.0-code"], + "marketplace": false, + "autoCompactWindow": null, + "cachingNote": "Region-locked to cn-beijing; access requires PRC real-name verification + a mainland phone number. ByteDance also ships native Trae / trae-agent, so cc-X is a choice.", + "dataResidency": "Mainland-only access gate (real-name + mainland phone); high sovereignty exposure.", + "status": "verified", + "source": { "url": "https://www.volcengine.com/docs", "date": "2026-06-19" } + }, + { + "vendor": "StepFun", + "nativeCli": false, + "runtime": "claude", + "baseUrlIntl": "https://api.stepfun.ai/step_plan", + "baseUrlCN": null, + "modelIds": ["step-3.5-flash", "step-3.7-flash"], + "marketplace": false, + "autoCompactWindow": null, + "cachingNote": "Genuine single-vendor self-routing (step_plan). Reachable via OpenRouter for non-PRC users, in which case use the opencode runtime against the OpenAI-compatible OpenRouter endpoint instead of claude.", + "dataResidency": "Direct StepFun endpoint or OpenRouter relay for non-PRC.", + "status": "verified", + "source": { "url": "https://platform.stepfun.com/docs", "date": "2026-06-19" } + }, + { + "vendor": "Baidu Qianfan", + "nativeCli": true, + "runtime": "claude", + "baseUrlIntl": null, + "baseUrlCN": "https://qianfan.baidubce.com/anthropic", + "modelIds": [], + "marketplace": true, + "autoCompactWindow": null, + "cachingNote": "MARKETPLACE / RESALE: serves Kimi / DeepSeek / GLM / MiniMax plus only ERNIE-4.5-Turbo of Baidu's own line (flagship ERNIE 5.x is UNREACHABLE here). Docs default to deepseek-v3.2. One ANTHROPIC_MODEL string can silently resolve to a different vendor/version; no model-version pinning. Baidu ships native Comate Zulu-CLI as the alternative.", + "dataResidency": "Mainland gateway; resold-model identity is not stable.", + "status": "marketplace-unstable", + "source": { "url": "https://cloud.baidu.com/doc/qianfan-docs", "date": "2026-06-19" } + }, + { + "vendor": "Tencent", + "nativeCli": true, + "runtime": "claude", + "baseUrlIntl": null, + "baseUrlCN": "https://api.lkeap.cloud.tencent.com/coding/anthropic", + "modelIds": [], + "marketplace": true, + "autoCompactWindow": null, + "cachingNote": "MARKETPLACE: serves Hunyuan SKUs plus minimax-m2.5 / kimi-k2.5 / glm-5. One ANTHROPIC_MODEL string can silently resolve to a different vendor/version; no client-side version pinning. Tencent ships native CodeBuddy Code CLI as the alternative.", + "dataResidency": "Mainland gateway; resold-model identity is not stable.", + "status": "marketplace-unstable", + "source": { "url": "https://cloud.tencent.com/document/product/1772", "date": "2026-06-19" } + } + ] +} diff --git a/docs/polycli-v1-public-surface.md b/docs/polycli-v1-public-surface.md index 46d3d30..56c2f8c 100644 --- a/docs/polycli-v1-public-surface.md +++ b/docs/polycli-v1-public-surface.md @@ -6,6 +6,8 @@ The repo now contains provider runtime code for host plugin builds, but that code is outside the v1 public package surface. The public contract is intentionally limited to utility helpers, timing semantics, and the terminal CLI's command vocabulary. +The cc-X domestic-model endpoint recipes (`docs/cc-x-endpoints.md` / `docs/cc-x-recipes.json`) are out-of-contract reference documentation: they ride the existing `claude`/`opencode` runtimes via standard `ANTHROPIC_*` env vars and add no new public package, runtime, or command surface. + ## v1 Package Surface ### `@bbingz/polycli-utils` diff --git a/docs/provider-paths.md b/docs/provider-paths.md index 4530946..744c0a6 100644 --- a/docs/provider-paths.md +++ b/docs/provider-paths.md @@ -21,6 +21,12 @@ This table is a routing reference for humans and host adapters. It is not an aut | Copilot / Codex-backed fallback | `copilot` | OpenAI Responses API / Agents SDK for new direct integrations | Keep Copilot provider as a fallback, but Polycli ask/review must not pass allow-all tool/path/url flags. Use restricted `--excluded-tools` and retain `--no-ask-user` only for programmatic execution. | | OpenAI GPT / Codex direct programmatic work | not a Polycli CLI provider today | OpenAI Responses API, Agents SDK | For new stateless direct integrations, official SDK/API is more appropriate than wrapping another CLI. | +## cc-X endpoint recipes (no native CLI cluster) + +For domestic/Chinese LLMs with no competitive native coding CLI (MiniMax, DeepSeek, Zhipu/GLM, StepFun) the path is **cc-X**: point the existing `claude` runtime (BYOK) or `opencode` (OpenAI-compatible models) at the vendor's Anthropic-compatible endpoint via `ANTHROPIC_BASE_URL` + `ANTHROPIC_AUTH_TOKEN` + `ANTHROPIC_MODEL`. This is documented in [`cc-x-endpoints.md`](./cc-x-endpoints.md) (machine-readable source: [`cc-x-recipes.json`](./cc-x-recipes.json)). cc-X is **not** a polycli provider — it rides the existing runtimes with standard env vars; polycli adds no cc-X adapter. + +Note: the `MiniMax text / multimodal` row above is the polycli `minimax` provider (stateless `mmx-cli` text/media call), **not** the MiniMax cc-X coding path. For MiniMax as a coding agent, use the cc-X recipe, not the `minimax` provider. + ## Review procedure Run the automated review-flag subset first, then the manual provider-path probes: @@ -58,3 +64,4 @@ If a CLI is not installed locally, record it as skipped rather than failing the - MiniMax CLI docs: https://platform.minimax.io/docs/token-plan/minimax-cli and https://github.com/MiniMax-AI/cli - xAI Grok Build CLI docs: https://docs.x.ai/docs/grok-build/introduction - OpenAI Responses API: https://platform.openai.com/docs/api-reference/responses/create +- cc-X endpoint recipes (no native CLI cluster): ./cc-x-endpoints.md (per-vendor source URLs live in ./cc-x-recipes.json) diff --git a/docs/roadmap.md b/docs/roadmap.md index 1b88d55..37ffb45 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -121,6 +121,12 @@ Items: - **Q9b — `polycli sessions` / `polycli purge` command** (landed 2026-05-29): a manual, on-demand command that lists/deletes ONLY upstream session artifacts whose path appears in polycli's run ledger. No daemon; honest-default — never auto-deletes, requires an explicit purge invocation. - **Q9c — opt-in per-run session isolation** (deferred, needs design): scoped `HOME`/`XDG_CONFIG_HOME`/config-dir env at the `spawn.js` boundary, gated on `runtimePersistence==='session'`, default OFF. DEFERRED because a naive `HOME` override also hides credentials (breaks auth) and prior sessions (breaks `--resume`); needs a per-provider design that relocates only session state while preserving auth. `codex --ephemeral` exists but codex lives in the separate `polycli-codex` plugin. +### Q10 — cc-X domestic-model endpoint recipes + +Source: 2026-06-19 cc-X research (point Claude Code / opencode at a domestic vendor's Anthropic-compatible endpoint via `ANTHROPIC_BASE_URL` + `ANTHROPIC_AUTH_TOKEN` + `ANTHROPIC_MODEL`). + +Status: landed as docs + reference data, Path-B-pure. `docs/cc-x-endpoints.md` + machine-readable `docs/cc-x-recipes.json` (guarded by `scripts/validate-cc-x-recipes.mjs` + its paired test) encode the core-lab endpoint matrix, the operational gotchas (silent prompt-cache degradation, `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS` pin, `CLAUDE_CODE_AUTO_COMPACT_WINDOW` sizing, marketplace model-identity instability, data-sovereignty gate), and the honest-default refusal to pin a model/version for the Baidu/Tencent marketplace endpoints. cc-X rides the EXISTING `claude`/`opencode` runtimes via standard env vars — verified zero runtime change needed (`claude.js` already forwards the `ANTHROPIC_*` trio on both the `claude -p` and tmux paths). NO cc-X provider/adapter/runtime was added; that refusal is the decision, recorded in Explicit non-goals below. + --- ## Explicit non-goals @@ -131,6 +137,7 @@ These are principled refusals, not backlog. Do not schedule them without an expl - **No unified event schema that collapses provider-specific semantics.** `extractProviderEventText` dispatches per provider for a reason. - **No `cold` / `retry` timing metrics.** Upstream CLIs do not emit stable signals; any implementation would be a fake. Stays `unsupported`. (Memory: `project_cold_retry_unmeasured.md`.) - **No "monitor" / daemon / long-lived polycli process.** Each invocation is a short-lived CLI run against a live provider. Daemon mode would compress orthogonal axes (runtimePersistence / measurementScope) that the current timing contract explicitly keeps separate. +- **No per-vendor cc-X "coding plan" provider adapter/runtime.** Domestic-model Anthropic-compatible endpoints are DOCUMENTED `ANTHROPIC_BASE_URL` recipes (`docs/cc-x-endpoints.md` / `docs/cc-x-recipes.json`) that ride the existing `claude`/`opencode` runtimes, not new providers. Building a glm/deepseek/minimax "coding" adapter would violate Path B; do not "finish" the recipes into a framework. --- diff --git a/package.json b/package.json index ea2e853..0611df5 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "validate:manifests": "node scripts/validate-release-manifests.mjs", "validate:host-map": "node scripts/validate-host-command-map.mjs", "validate:codex-adapter": "node scripts/validate-codex-adapter.mjs", + "validate:cc-x-recipes": "node scripts/validate-cc-x-recipes.mjs", "check:review-drift": "node scripts/check-review-cli-drift.mjs", "check:provider-paths": "node scripts/check-review-cli-drift.mjs", "check:fixture-freshness": "node scripts/check-fixture-freshness.mjs", diff --git a/packages/polycli-runtime/src/grok.js b/packages/polycli-runtime/src/grok.js index 3c355ef..fa39ac9 100644 --- a/packages/polycli-runtime/src/grok.js +++ b/packages/polycli-runtime/src/grok.js @@ -9,7 +9,12 @@ const AUTH_CHECK_TIMEOUT_MS = 30_000; // `grok models` reports `Default model: grok-build`; callers pass `-m ` to switch. const DEFAULT_GROK_MODEL = "grok-build"; const GROK_EXPLICIT_AUTH_ERROR_RE = /\b(unauthenticated|unauthorized|not authenticated|not authorized|login required|log in|sign in|not logged in|invalid api key|missing api key|api key required|token expired|invalid token|credential(?:s)? (?:missing|invalid|expired)|permission denied|access denied|forbidden|401|403)\b/i; -const SUCCESS_STOP_REASONS = new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished"]); +// grok-build's StopReason serde enum is {EndTurn, MaxTokens, MaxTurnRequests, Refusal, ToolUse, +// Cancelled} (verified against the installed binary). A MaxTokens stop means the answer was merely +// truncated at the output-token cap — a complete, visible answer from the user's perspective — so it +// must stay ok=true. Genuine non-success reasons (refusal, cancelled, tool_use, max_turn_requests) +// are deliberately excluded so they still fail the run while partial text is preserved. +const SUCCESS_STOP_REASONS = new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished", "maxtokens", "max_tokens", "length"]); export const TRANSIENT_PROBE_ERROR_PATTERNS = [ /\b(timed out|timeout|429|rate limit|no capacity available|temporar(?:y|ily)|service unavailable|overloaded|try again|econnreset|econnrefused|enotfound|network|socket hang up)\b/i, ]; diff --git a/packages/polycli-runtime/test/claude.test.js b/packages/polycli-runtime/test/claude.test.js index 7255d32..aeaaf5c 100644 --- a/packages/polycli-runtime/test/claude.test.js +++ b/packages/polycli-runtime/test/claude.test.js @@ -429,7 +429,7 @@ process.exit(0); tmuxBin: bin, tmuxSessionName: "polycli-claude-folded-paste", executionMode: "tmux-tui", - timeout: 2_000, + timeout: TMUX_TEST_TIMEOUT_MS, env: { ...process.env, TMUX_ARGV_LOG: logFile }, }); diff --git a/packages/polycli-runtime/test/grok.test.js b/packages/polycli-runtime/test/grok.test.js index 2f2a409..a5bee6b 100644 --- a/packages/polycli-runtime/test/grok.test.js +++ b/packages/polycli-runtime/test/grok.test.js @@ -73,6 +73,34 @@ test("parseGrokJsonResult fails on terminal error metadata even with visible tex assert.equal(parsed.error, "permission denied"); }); +test("parseGrokJsonResult fails on a non-success stopReason alone (no error metadata) and keeps partial text", () => { + // stopReason-only failure: no error field/event, so providerError is null and the failure must be + // driven solely by isNonSuccessStopReason. Reverting that branch would flip ok back to true. + const parsed = parseGrokJsonResult( + JSON.stringify({ text: "partial answer", stopReason: "Cancelled", sessionId: "019e8685-1031-70a0-9ac4-37dcbcefc163" }), + "", + 0 + ); + + assert.equal(parsed.ok, false); + assert.equal(parsed.error, "grok stopped with Cancelled"); + assert.equal(parsed.response, "partial answer"); +}); + +test("parseGrokJsonResult treats a MaxTokens truncation stopReason as success", () => { + // grok emits MaxTokens when the answer is truncated at the output-token cap — a complete, visible + // answer from the user's perspective, so it must stay ok=true rather than being marked failed. + const parsed = parseGrokJsonResult( + JSON.stringify({ text: "a long but truncated answer", stopReason: "MaxTokens", sessionId: "019e8685-1031-70a0-9ac4-37dcbcefc163" }), + "", + 0 + ); + + assert.equal(parsed.ok, true); + assert.equal(parsed.error, null); + assert.equal(parsed.response, "a long but truncated answer"); +}); + test("parseGrokStreamText concatenates text deltas and reads sessionId from the end event", () => { const parsed = parseGrokStreamText( [ @@ -215,3 +243,30 @@ test("runGrokPromptStreaming fails when partial text is followed by a terminal e assert.equal(result.error, "permission denied"); assert.equal(result.stopReason, "Cancelled"); }); + +test("runGrokPromptStreaming fails on a non-success stopReason alone (no error event) and keeps partial text", async () => { + const child = new EventEmitter(); + child.stdout = new EventEmitter(); + child.stderr = new EventEmitter(); + child.stdin = { write() {}, end() {}, on() {} }; + child.kill = () => {}; + + // No error event — only a Cancelled stopReason. providerError is null so the failure must come + // solely from isNonSuccessStopReason; this is the streaming mirror of the json stopReason-only path. + const result = await runGrokPromptStreaming({ + prompt: "ping", + spawnImpl() { + queueMicrotask(() => { + child.stdout.emit("data", '{"type":"text","data":"partial"}\n'); + child.stdout.emit("data", '{"type":"end","stopReason":"Cancelled","sessionId":"019e862e-63fd-7333-8f4c-4add60220323"}\n'); + child.emit("close", 0, null); + }); + return child; + }, + }); + + assert.equal(result.ok, false); + assert.equal(result.response, "partial"); + assert.equal(result.error, "grok stopped with Cancelled"); + assert.equal(result.stopReason, "Cancelled"); +}); diff --git a/packages/polycli-runtime/test/registry.test.js b/packages/polycli-runtime/test/registry.test.js index f4d9801..b96f014 100644 --- a/packages/polycli-runtime/test/registry.test.js +++ b/packages/polycli-runtime/test/registry.test.js @@ -285,6 +285,30 @@ test("runProviderPromptStreaming prefers explicit model before defaultModel fall assert.equal(result.model, "explicit-model"); }); +test("runProviderPrompt prefers explicit model before defaultModel fallback", async () => { + const result = await runProviderPrompt({ + provider: "qwen", + prompt: "ping", + cwd: process.cwd(), + timeout: 5_000, + model: "explicit-model", + defaultModel: "cached-default-model", + runtime: { + runPrompt: async ({ model, defaultModel }) => { + assert.equal(model, "explicit-model"); + assert.equal(defaultModel, "cached-default-model"); + return { + ok: true, + response: "pong", + }; + }, + }, + }); + + assert.equal(result.ok, true); + assert.equal(result.model, "explicit-model"); +}); + test("runProviderPromptStreaming marks claude tmux TUI text timings as unsupported", async () => { let now = 1_000; const result = await runProviderPromptStreaming({ diff --git a/packages/polycli-terminal/bin/polycli-companion.bundle.mjs b/packages/polycli-terminal/bin/polycli-companion.bundle.mjs index c46c71d..9afefb0 100755 --- a/packages/polycli-terminal/bin/polycli-companion.bundle.mjs +++ b/packages/polycli-terminal/bin/polycli-companion.bundle.mjs @@ -3874,7 +3874,7 @@ var DEFAULT_TIMEOUT_MS11 = 9e5; var AUTH_CHECK_TIMEOUT_MS11 = 3e4; var DEFAULT_GROK_MODEL = "grok-build"; var GROK_EXPLICIT_AUTH_ERROR_RE = /\b(unauthenticated|unauthorized|not authenticated|not authorized|login required|log in|sign in|not logged in|invalid api key|missing api key|api key required|token expired|invalid token|credential(?:s)? (?:missing|invalid|expired)|permission denied|access denied|forbidden|401|403)\b/i; -var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished"]); +var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished", "maxtokens", "max_tokens", "length"]); var TRANSIENT_PROBE_ERROR_PATTERNS11 = [ /\b(timed out|timeout|429|rate limit|no capacity available|temporar(?:y|ily)|service unavailable|overloaded|try again|econnreset|econnrefused|enotfound|network|socket hang up)\b/i ]; @@ -5460,6 +5460,7 @@ function createRunLedgerEvent(event = {}) { }; } function appendRunLedgerEvent(workspaceRoot, event) { + if (workspaceRoot) ensureStateDir(workspaceRoot); const file = resolveRunLedgerFile(workspaceRoot); const workspaceSlug = workspaceRoot ? computeWorkspaceSlug(workspaceRoot) : null; const full = createRunLedgerEvent({ @@ -5803,18 +5804,18 @@ function enrichJob(workspaceRoot, job) { function hasLedgerPhase(events, runId, jobId, phase) { return events.some((event) => event.runId === runId && event.jobId === jobId && event.phase === phase); } -function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited" } = {}) { +function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const config = readJobConfigFile(resolveJobConfigFile(workspaceRoot, job.jobId)); const runContext = config?.runContext; if (!runContext?.runId) { - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); return; } const recoverLock = `${resolveRunLedgerFile(workspaceRoot)}.recover.lock`; - withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason })); + withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason, skipRuntimeCleanup })); } -function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited" } = {}) { +function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const events = readRunLedgerEvents(workspaceRoot); const command = runContext.command || config?.execution?.kind || job.kind || null; const provider = runContext.provider || config?.execution?.provider || job.provider || null; @@ -5877,7 +5878,7 @@ function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { reason: decisionReason }); } - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); } function cleanupRuntimePaths(config) { @@ -5972,7 +5973,7 @@ async function waitForJob(workspaceRoot, jobId, { timeoutMs = 24e4, pollInterval const timed = getJob(workspaceRoot, jobId); return { job: timed ? refreshJob(workspaceRoot, timed) : null, waitTimedOut: true }; } -async function cancelJob(workspaceRoot, jobId) { +async function cancelJob(workspaceRoot, jobId, { terminate = terminateProcessTree } = {}) { let pidToKill = null; let reason = null; const finishedAt = (/* @__PURE__ */ new Date()).toISOString(); @@ -6006,22 +6007,24 @@ async function cancelJob(workspaceRoot, jobId) { if (!write.written) { return { cancelled: false, reason: reason || "not_cancellable", jobId }; } - recoverLedgerTerminalEvents(workspaceRoot, write.job, { - result: write.envelope?.result || { ok: false, error: "cancelled" }, - reason: "cancelled" - }); + let killWarning = null; if (pidToKill) { try { - await terminateProcessTree(pidToKill, { + await terminate(pidToKill, { signal: "SIGINT", forceSignal: "SIGKILL", forceAfterMs: 2e3 }); } catch (error) { - return { cancelled: true, jobId, killWarning: error.message }; + killWarning = error.message; } } - return { cancelled: true, jobId }; + recoverLedgerTerminalEvents(workspaceRoot, write.job, { + result: write.envelope?.result || { ok: false, error: "cancelled" }, + reason: "cancelled", + skipRuntimeCleanup: killWarning != null + }); + return killWarning ? { cancelled: true, jobId, killWarning } : { cancelled: true, jobId }; } // plugins/polycli/scripts/lib/prompt-runtime.mjs diff --git a/packages/polycli-terminal/package.json b/packages/polycli-terminal/package.json index 71e68c5..4551328 100644 --- a/packages/polycli-terminal/package.json +++ b/packages/polycli-terminal/package.json @@ -1,6 +1,6 @@ { "name": "@bbingz/polycli", - "version": "0.6.24", + "version": "0.6.25", "description": "Terminal CLI for Polycli provider diagnostics and host-compatible commands.", "type": "module", "bin": { diff --git a/plugins/polycli-codex/.codex-plugin/plugin.json b/plugins/polycli-codex/.codex-plugin/plugin.json index 321d539..c47b6bd 100644 --- a/plugins/polycli-codex/.codex-plugin/plugin.json +++ b/plugins/polycli-codex/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "polycli-codex", - "version": "0.6.24", + "version": "0.6.25", "description": "Codex skill adapter that routes provider CLI work through the shared polycli companion.", "author": { "name": "bing" diff --git a/plugins/polycli-codex/scripts/polycli-companion.bundle.mjs b/plugins/polycli-codex/scripts/polycli-companion.bundle.mjs index c46c71d..9afefb0 100755 --- a/plugins/polycli-codex/scripts/polycli-companion.bundle.mjs +++ b/plugins/polycli-codex/scripts/polycli-companion.bundle.mjs @@ -3874,7 +3874,7 @@ var DEFAULT_TIMEOUT_MS11 = 9e5; var AUTH_CHECK_TIMEOUT_MS11 = 3e4; var DEFAULT_GROK_MODEL = "grok-build"; var GROK_EXPLICIT_AUTH_ERROR_RE = /\b(unauthenticated|unauthorized|not authenticated|not authorized|login required|log in|sign in|not logged in|invalid api key|missing api key|api key required|token expired|invalid token|credential(?:s)? (?:missing|invalid|expired)|permission denied|access denied|forbidden|401|403)\b/i; -var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished"]); +var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished", "maxtokens", "max_tokens", "length"]); var TRANSIENT_PROBE_ERROR_PATTERNS11 = [ /\b(timed out|timeout|429|rate limit|no capacity available|temporar(?:y|ily)|service unavailable|overloaded|try again|econnreset|econnrefused|enotfound|network|socket hang up)\b/i ]; @@ -5460,6 +5460,7 @@ function createRunLedgerEvent(event = {}) { }; } function appendRunLedgerEvent(workspaceRoot, event) { + if (workspaceRoot) ensureStateDir(workspaceRoot); const file = resolveRunLedgerFile(workspaceRoot); const workspaceSlug = workspaceRoot ? computeWorkspaceSlug(workspaceRoot) : null; const full = createRunLedgerEvent({ @@ -5803,18 +5804,18 @@ function enrichJob(workspaceRoot, job) { function hasLedgerPhase(events, runId, jobId, phase) { return events.some((event) => event.runId === runId && event.jobId === jobId && event.phase === phase); } -function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited" } = {}) { +function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const config = readJobConfigFile(resolveJobConfigFile(workspaceRoot, job.jobId)); const runContext = config?.runContext; if (!runContext?.runId) { - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); return; } const recoverLock = `${resolveRunLedgerFile(workspaceRoot)}.recover.lock`; - withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason })); + withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason, skipRuntimeCleanup })); } -function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited" } = {}) { +function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const events = readRunLedgerEvents(workspaceRoot); const command = runContext.command || config?.execution?.kind || job.kind || null; const provider = runContext.provider || config?.execution?.provider || job.provider || null; @@ -5877,7 +5878,7 @@ function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { reason: decisionReason }); } - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); } function cleanupRuntimePaths(config) { @@ -5972,7 +5973,7 @@ async function waitForJob(workspaceRoot, jobId, { timeoutMs = 24e4, pollInterval const timed = getJob(workspaceRoot, jobId); return { job: timed ? refreshJob(workspaceRoot, timed) : null, waitTimedOut: true }; } -async function cancelJob(workspaceRoot, jobId) { +async function cancelJob(workspaceRoot, jobId, { terminate = terminateProcessTree } = {}) { let pidToKill = null; let reason = null; const finishedAt = (/* @__PURE__ */ new Date()).toISOString(); @@ -6006,22 +6007,24 @@ async function cancelJob(workspaceRoot, jobId) { if (!write.written) { return { cancelled: false, reason: reason || "not_cancellable", jobId }; } - recoverLedgerTerminalEvents(workspaceRoot, write.job, { - result: write.envelope?.result || { ok: false, error: "cancelled" }, - reason: "cancelled" - }); + let killWarning = null; if (pidToKill) { try { - await terminateProcessTree(pidToKill, { + await terminate(pidToKill, { signal: "SIGINT", forceSignal: "SIGKILL", forceAfterMs: 2e3 }); } catch (error) { - return { cancelled: true, jobId, killWarning: error.message }; + killWarning = error.message; } } - return { cancelled: true, jobId }; + recoverLedgerTerminalEvents(workspaceRoot, write.job, { + result: write.envelope?.result || { ok: false, error: "cancelled" }, + reason: "cancelled", + skipRuntimeCleanup: killWarning != null + }); + return killWarning ? { cancelled: true, jobId, killWarning } : { cancelled: true, jobId }; } // plugins/polycli/scripts/lib/prompt-runtime.mjs diff --git a/plugins/polycli-copilot/plugin.json b/plugins/polycli-copilot/plugin.json index 4acf1a4..1e16bfa 100644 --- a/plugins/polycli-copilot/plugin.json +++ b/plugins/polycli-copilot/plugin.json @@ -1,7 +1,7 @@ { "name": "polycli-copilot", "description": "GitHub Copilot CLI adapter for the shared polycli companion.", - "version": "0.6.24", + "version": "0.6.25", "author": { "name": "bing" }, diff --git a/plugins/polycli-copilot/scripts/polycli-companion.bundle.mjs b/plugins/polycli-copilot/scripts/polycli-companion.bundle.mjs index c46c71d..9afefb0 100755 --- a/plugins/polycli-copilot/scripts/polycli-companion.bundle.mjs +++ b/plugins/polycli-copilot/scripts/polycli-companion.bundle.mjs @@ -3874,7 +3874,7 @@ var DEFAULT_TIMEOUT_MS11 = 9e5; var AUTH_CHECK_TIMEOUT_MS11 = 3e4; var DEFAULT_GROK_MODEL = "grok-build"; var GROK_EXPLICIT_AUTH_ERROR_RE = /\b(unauthenticated|unauthorized|not authenticated|not authorized|login required|log in|sign in|not logged in|invalid api key|missing api key|api key required|token expired|invalid token|credential(?:s)? (?:missing|invalid|expired)|permission denied|access denied|forbidden|401|403)\b/i; -var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished"]); +var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished", "maxtokens", "max_tokens", "length"]); var TRANSIENT_PROBE_ERROR_PATTERNS11 = [ /\b(timed out|timeout|429|rate limit|no capacity available|temporar(?:y|ily)|service unavailable|overloaded|try again|econnreset|econnrefused|enotfound|network|socket hang up)\b/i ]; @@ -5460,6 +5460,7 @@ function createRunLedgerEvent(event = {}) { }; } function appendRunLedgerEvent(workspaceRoot, event) { + if (workspaceRoot) ensureStateDir(workspaceRoot); const file = resolveRunLedgerFile(workspaceRoot); const workspaceSlug = workspaceRoot ? computeWorkspaceSlug(workspaceRoot) : null; const full = createRunLedgerEvent({ @@ -5803,18 +5804,18 @@ function enrichJob(workspaceRoot, job) { function hasLedgerPhase(events, runId, jobId, phase) { return events.some((event) => event.runId === runId && event.jobId === jobId && event.phase === phase); } -function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited" } = {}) { +function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const config = readJobConfigFile(resolveJobConfigFile(workspaceRoot, job.jobId)); const runContext = config?.runContext; if (!runContext?.runId) { - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); return; } const recoverLock = `${resolveRunLedgerFile(workspaceRoot)}.recover.lock`; - withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason })); + withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason, skipRuntimeCleanup })); } -function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited" } = {}) { +function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const events = readRunLedgerEvents(workspaceRoot); const command = runContext.command || config?.execution?.kind || job.kind || null; const provider = runContext.provider || config?.execution?.provider || job.provider || null; @@ -5877,7 +5878,7 @@ function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { reason: decisionReason }); } - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); } function cleanupRuntimePaths(config) { @@ -5972,7 +5973,7 @@ async function waitForJob(workspaceRoot, jobId, { timeoutMs = 24e4, pollInterval const timed = getJob(workspaceRoot, jobId); return { job: timed ? refreshJob(workspaceRoot, timed) : null, waitTimedOut: true }; } -async function cancelJob(workspaceRoot, jobId) { +async function cancelJob(workspaceRoot, jobId, { terminate = terminateProcessTree } = {}) { let pidToKill = null; let reason = null; const finishedAt = (/* @__PURE__ */ new Date()).toISOString(); @@ -6006,22 +6007,24 @@ async function cancelJob(workspaceRoot, jobId) { if (!write.written) { return { cancelled: false, reason: reason || "not_cancellable", jobId }; } - recoverLedgerTerminalEvents(workspaceRoot, write.job, { - result: write.envelope?.result || { ok: false, error: "cancelled" }, - reason: "cancelled" - }); + let killWarning = null; if (pidToKill) { try { - await terminateProcessTree(pidToKill, { + await terminate(pidToKill, { signal: "SIGINT", forceSignal: "SIGKILL", forceAfterMs: 2e3 }); } catch (error) { - return { cancelled: true, jobId, killWarning: error.message }; + killWarning = error.message; } } - return { cancelled: true, jobId }; + recoverLedgerTerminalEvents(workspaceRoot, write.job, { + result: write.envelope?.result || { ok: false, error: "cancelled" }, + reason: "cancelled", + skipRuntimeCleanup: killWarning != null + }); + return killWarning ? { cancelled: true, jobId, killWarning } : { cancelled: true, jobId }; } // plugins/polycli/scripts/lib/prompt-runtime.mjs diff --git a/plugins/polycli-opencode/index.mjs b/plugins/polycli-opencode/index.mjs index 18108de..306301c 100644 --- a/plugins/polycli-opencode/index.mjs +++ b/plugins/polycli-opencode/index.mjs @@ -13,6 +13,15 @@ function tool(input) { tool.schema = z; +// Exit code 2 is the companion's documented soft signal, not a hard failure: `health` with no +// healthy provider and `status --wait` timeouts both exit 2 while still emitting a valid JSON +// envelope on stdout. The adapter must surface that envelope so the opencode agent can reason about +// it (anyHealthy:false / waitTimedOut:true) instead of seeing a thrown tool error. Every other +// non-zero exit (1/4/5/crash) is a real failure and is propagated. +export function isHardCompanionFailure(status) { + return status !== 0 && status !== 2; +} + function runCompanion(argv) { const result = spawnSync(process.execPath, [COMPANION, ...argv], { cwd: process.cwd(), @@ -21,7 +30,7 @@ function runCompanion(argv) { if (result.error) { throw result.error; } - if (result.status !== 0) { + if (isHardCompanionFailure(result.status)) { const detail = String(result.stdout || result.stderr || "").trim() || `polycli companion exited with status ${result.status}`; const error = new Error(detail); error.status = result.status; diff --git a/plugins/polycli-opencode/package.json b/plugins/polycli-opencode/package.json index 55e3858..f73e550 100644 --- a/plugins/polycli-opencode/package.json +++ b/plugins/polycli-opencode/package.json @@ -1,6 +1,6 @@ { "name": "@bbingz/polycli-opencode", - "version": "0.6.24", + "version": "0.6.25", "type": "module", "main": "./index.mjs", "exports": { diff --git a/plugins/polycli-opencode/scripts/polycli-companion.bundle.mjs b/plugins/polycli-opencode/scripts/polycli-companion.bundle.mjs index c46c71d..9afefb0 100755 --- a/plugins/polycli-opencode/scripts/polycli-companion.bundle.mjs +++ b/plugins/polycli-opencode/scripts/polycli-companion.bundle.mjs @@ -3874,7 +3874,7 @@ var DEFAULT_TIMEOUT_MS11 = 9e5; var AUTH_CHECK_TIMEOUT_MS11 = 3e4; var DEFAULT_GROK_MODEL = "grok-build"; var GROK_EXPLICIT_AUTH_ERROR_RE = /\b(unauthenticated|unauthorized|not authenticated|not authorized|login required|log in|sign in|not logged in|invalid api key|missing api key|api key required|token expired|invalid token|credential(?:s)? (?:missing|invalid|expired)|permission denied|access denied|forbidden|401|403)\b/i; -var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished"]); +var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished", "maxtokens", "max_tokens", "length"]); var TRANSIENT_PROBE_ERROR_PATTERNS11 = [ /\b(timed out|timeout|429|rate limit|no capacity available|temporar(?:y|ily)|service unavailable|overloaded|try again|econnreset|econnrefused|enotfound|network|socket hang up)\b/i ]; @@ -5460,6 +5460,7 @@ function createRunLedgerEvent(event = {}) { }; } function appendRunLedgerEvent(workspaceRoot, event) { + if (workspaceRoot) ensureStateDir(workspaceRoot); const file = resolveRunLedgerFile(workspaceRoot); const workspaceSlug = workspaceRoot ? computeWorkspaceSlug(workspaceRoot) : null; const full = createRunLedgerEvent({ @@ -5803,18 +5804,18 @@ function enrichJob(workspaceRoot, job) { function hasLedgerPhase(events, runId, jobId, phase) { return events.some((event) => event.runId === runId && event.jobId === jobId && event.phase === phase); } -function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited" } = {}) { +function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const config = readJobConfigFile(resolveJobConfigFile(workspaceRoot, job.jobId)); const runContext = config?.runContext; if (!runContext?.runId) { - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); return; } const recoverLock = `${resolveRunLedgerFile(workspaceRoot)}.recover.lock`; - withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason })); + withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason, skipRuntimeCleanup })); } -function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited" } = {}) { +function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const events = readRunLedgerEvents(workspaceRoot); const command = runContext.command || config?.execution?.kind || job.kind || null; const provider = runContext.provider || config?.execution?.provider || job.provider || null; @@ -5877,7 +5878,7 @@ function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { reason: decisionReason }); } - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); } function cleanupRuntimePaths(config) { @@ -5972,7 +5973,7 @@ async function waitForJob(workspaceRoot, jobId, { timeoutMs = 24e4, pollInterval const timed = getJob(workspaceRoot, jobId); return { job: timed ? refreshJob(workspaceRoot, timed) : null, waitTimedOut: true }; } -async function cancelJob(workspaceRoot, jobId) { +async function cancelJob(workspaceRoot, jobId, { terminate = terminateProcessTree } = {}) { let pidToKill = null; let reason = null; const finishedAt = (/* @__PURE__ */ new Date()).toISOString(); @@ -6006,22 +6007,24 @@ async function cancelJob(workspaceRoot, jobId) { if (!write.written) { return { cancelled: false, reason: reason || "not_cancellable", jobId }; } - recoverLedgerTerminalEvents(workspaceRoot, write.job, { - result: write.envelope?.result || { ok: false, error: "cancelled" }, - reason: "cancelled" - }); + let killWarning = null; if (pidToKill) { try { - await terminateProcessTree(pidToKill, { + await terminate(pidToKill, { signal: "SIGINT", forceSignal: "SIGKILL", forceAfterMs: 2e3 }); } catch (error) { - return { cancelled: true, jobId, killWarning: error.message }; + killWarning = error.message; } } - return { cancelled: true, jobId }; + recoverLedgerTerminalEvents(workspaceRoot, write.job, { + result: write.envelope?.result || { ok: false, error: "cancelled" }, + reason: "cancelled", + skipRuntimeCleanup: killWarning != null + }); + return killWarning ? { cancelled: true, jobId, killWarning } : { cancelled: true, jobId }; } // plugins/polycli/scripts/lib/prompt-runtime.mjs diff --git a/plugins/polycli/.claude-plugin/plugin.json b/plugins/polycli/.claude-plugin/plugin.json index c6aed2f..a3b994f 100644 --- a/plugins/polycli/.claude-plugin/plugin.json +++ b/plugins/polycli/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "polycli", - "version": "0.6.24", + "version": "0.6.25", "description": "Use multiple provider CLIs from one Claude Code plugin, including background job lifecycle.", "author": { "name": "bing" diff --git a/plugins/polycli/scripts/lib/job-control.mjs b/plugins/polycli/scripts/lib/job-control.mjs index 1d3ef3d..1221adb 100644 --- a/plugins/polycli/scripts/lib/job-control.mjs +++ b/plugins/polycli/scripts/lib/job-control.mjs @@ -73,11 +73,11 @@ function hasLedgerPhase(events, runId, jobId, phase) { return events.some((event) => event.runId === runId && event.jobId === jobId && event.phase === phase); } -function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited" } = {}) { +function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const config = readJobConfigFile(resolveJobConfigFile(workspaceRoot, job.jobId)); const runContext = config?.runContext; if (!runContext?.runId) { - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); return; } @@ -88,10 +88,10 @@ function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason // recover lock is a distinct path from the ndjson append lock, so there is no deadlock. const recoverLock = `${resolveRunLedgerFile(workspaceRoot)}.recover.lock`; withLockfile(recoverLock, () => - writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason })); + writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason, skipRuntimeCleanup })); } -function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited" } = {}) { +function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const events = readRunLedgerEvents(workspaceRoot); const command = runContext.command || config?.execution?.kind || job.kind || null; const provider = runContext.provider || config?.execution?.provider || job.provider || null; @@ -167,7 +167,7 @@ function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { }); } - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); } @@ -276,7 +276,7 @@ export async function waitForJob(workspaceRoot, jobId, { timeoutMs = 240_000, po return { job: timed ? refreshJob(workspaceRoot, timed) : null, waitTimedOut: true }; } -export async function cancelJob(workspaceRoot, jobId) { +export async function cancelJob(workspaceRoot, jobId, { terminate = terminateProcessTree } = {}) { // Flip the job to cancelled and capture its pid atomically under the state lock FIRST, then // signal that pid. Previously cancelJob read the job WITHOUT a lock and killed job.pid before // re-validating, so a stale pre-lock snapshot could signal a pid the worker had already freed @@ -314,22 +314,27 @@ export async function cancelJob(workspaceRoot, jobId) { if (!write.written) { return { cancelled: false, reason: reason || "not_cancellable", jobId }; } - recoverLedgerTerminalEvents(workspaceRoot, write.job, { - result: write.envelope?.result || { ok: false, error: "cancelled" }, - reason: "cancelled", - }); - + // Kill the worker FIRST, then clean up its runtime paths. cleanupRuntimePaths deletes + // config.execution.runtimeOptions.cleanupPaths, which for a review job IS the worker's live cwd + // (review.mjs sets cleanupPaths:[cwd]); deleting it before the kill lands would yank the working + // directory out from under a still-running process. If the kill fails the worker may still be + // alive, so skip the runtime-path deletion and only record the cancellation. + let killWarning = null; if (pidToKill) { try { - await terminateProcessTree(pidToKill, { + await terminate(pidToKill, { signal: "SIGINT", forceSignal: "SIGKILL", forceAfterMs: 2_000, }); } catch (error) { - // The job is already recorded as cancelled; surface the kill problem without un-cancelling. - return { cancelled: true, jobId, killWarning: error.message }; + killWarning = error.message; } } - return { cancelled: true, jobId }; + recoverLedgerTerminalEvents(workspaceRoot, write.job, { + result: write.envelope?.result || { ok: false, error: "cancelled" }, + reason: "cancelled", + skipRuntimeCleanup: killWarning != null, + }); + return killWarning ? { cancelled: true, jobId, killWarning } : { cancelled: true, jobId }; } diff --git a/plugins/polycli/scripts/lib/run-ledger.mjs b/plugins/polycli/scripts/lib/run-ledger.mjs index b4f6e3a..3721f33 100644 --- a/plugins/polycli/scripts/lib/run-ledger.mjs +++ b/plugins/polycli/scripts/lib/run-ledger.mjs @@ -2,7 +2,7 @@ import { randomUUID } from 'node:crypto'; import path from 'node:path'; import { appendNdjson, readNdjson } from '@bbingz/polycli-utils/ndjson'; -import { computeWorkspaceSlug, resolveStateDir } from './state.mjs'; +import { computeWorkspaceSlug, ensureStateDir, resolveStateDir } from './state.mjs'; const MAX_LEDGER_BYTES = 2_000_000; const KEEP_RATIO = 0.5; @@ -194,6 +194,10 @@ export function createRunLedgerEvent(event = {}) { } export function appendRunLedgerEvent(workspaceRoot, event) { + // Create the state dir privately (0o700) BEFORE the ndjson append lands. appendNdjson reaches the + // directory via the mode-less ensureParentDir, which would otherwise create ~/.polycli/state/ + // world-traversable (0o755) on the run_started event that main() fires before any other state write. + if (workspaceRoot) ensureStateDir(workspaceRoot); const file = resolveRunLedgerFile(workspaceRoot); const workspaceSlug = workspaceRoot ? computeWorkspaceSlug(workspaceRoot) : null; const full = createRunLedgerEvent({ diff --git a/plugins/polycli/scripts/polycli-companion.bundle.mjs b/plugins/polycli/scripts/polycli-companion.bundle.mjs index c46c71d..9afefb0 100755 --- a/plugins/polycli/scripts/polycli-companion.bundle.mjs +++ b/plugins/polycli/scripts/polycli-companion.bundle.mjs @@ -3874,7 +3874,7 @@ var DEFAULT_TIMEOUT_MS11 = 9e5; var AUTH_CHECK_TIMEOUT_MS11 = 3e4; var DEFAULT_GROK_MODEL = "grok-build"; var GROK_EXPLICIT_AUTH_ERROR_RE = /\b(unauthenticated|unauthorized|not authenticated|not authorized|login required|log in|sign in|not logged in|invalid api key|missing api key|api key required|token expired|invalid token|credential(?:s)? (?:missing|invalid|expired)|permission denied|access denied|forbidden|401|403)\b/i; -var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished"]); +var SUCCESS_STOP_REASONS = /* @__PURE__ */ new Set(["endturn", "end_turn", "stop", "stop_sequence", "complete", "completed", "done", "finished", "maxtokens", "max_tokens", "length"]); var TRANSIENT_PROBE_ERROR_PATTERNS11 = [ /\b(timed out|timeout|429|rate limit|no capacity available|temporar(?:y|ily)|service unavailable|overloaded|try again|econnreset|econnrefused|enotfound|network|socket hang up)\b/i ]; @@ -5460,6 +5460,7 @@ function createRunLedgerEvent(event = {}) { }; } function appendRunLedgerEvent(workspaceRoot, event) { + if (workspaceRoot) ensureStateDir(workspaceRoot); const file = resolveRunLedgerFile(workspaceRoot); const workspaceSlug = workspaceRoot ? computeWorkspaceSlug(workspaceRoot) : null; const full = createRunLedgerEvent({ @@ -5803,18 +5804,18 @@ function enrichJob(workspaceRoot, job) { function hasLedgerPhase(events, runId, jobId, phase) { return events.some((event) => event.runId === runId && event.jobId === jobId && event.phase === phase); } -function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited" } = {}) { +function recoverLedgerTerminalEvents(workspaceRoot, job, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const config = readJobConfigFile(resolveJobConfigFile(workspaceRoot, job.jobId)); const runContext = config?.runContext; if (!runContext?.runId) { - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); return; } const recoverLock = `${resolveRunLedgerFile(workspaceRoot)}.recover.lock`; - withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason })); + withLockfile(recoverLock, () => writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result, reason, skipRuntimeCleanup })); } -function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited" } = {}) { +function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { result = null, reason = "worker_exited", skipRuntimeCleanup = false } = {}) { const events = readRunLedgerEvents(workspaceRoot); const command = runContext.command || config?.execution?.kind || job.kind || null; const provider = runContext.provider || config?.execution?.provider || job.provider || null; @@ -5877,7 +5878,7 @@ function writeRecoveredTerminalEvents(workspaceRoot, job, config, runContext, { reason: decisionReason }); } - cleanupRuntimePaths(config); + if (!skipRuntimeCleanup) cleanupRuntimePaths(config); removeJobConfigFile(workspaceRoot, job.jobId); } function cleanupRuntimePaths(config) { @@ -5972,7 +5973,7 @@ async function waitForJob(workspaceRoot, jobId, { timeoutMs = 24e4, pollInterval const timed = getJob(workspaceRoot, jobId); return { job: timed ? refreshJob(workspaceRoot, timed) : null, waitTimedOut: true }; } -async function cancelJob(workspaceRoot, jobId) { +async function cancelJob(workspaceRoot, jobId, { terminate = terminateProcessTree } = {}) { let pidToKill = null; let reason = null; const finishedAt = (/* @__PURE__ */ new Date()).toISOString(); @@ -6006,22 +6007,24 @@ async function cancelJob(workspaceRoot, jobId) { if (!write.written) { return { cancelled: false, reason: reason || "not_cancellable", jobId }; } - recoverLedgerTerminalEvents(workspaceRoot, write.job, { - result: write.envelope?.result || { ok: false, error: "cancelled" }, - reason: "cancelled" - }); + let killWarning = null; if (pidToKill) { try { - await terminateProcessTree(pidToKill, { + await terminate(pidToKill, { signal: "SIGINT", forceSignal: "SIGKILL", forceAfterMs: 2e3 }); } catch (error) { - return { cancelled: true, jobId, killWarning: error.message }; + killWarning = error.message; } } - return { cancelled: true, jobId }; + recoverLedgerTerminalEvents(workspaceRoot, write.job, { + result: write.envelope?.result || { ok: false, error: "cancelled" }, + reason: "cancelled", + skipRuntimeCleanup: killWarning != null + }); + return killWarning ? { cancelled: true, jobId, killWarning } : { cancelled: true, jobId }; } // plugins/polycli/scripts/lib/prompt-runtime.mjs diff --git a/plugins/polycli/scripts/tests/job-control.test.mjs b/plugins/polycli/scripts/tests/job-control.test.mjs index 44fe151..4de261f 100644 --- a/plugins/polycli/scripts/tests/job-control.test.mjs +++ b/plugins/polycli/scripts/tests/job-control.test.mjs @@ -402,3 +402,73 @@ test("cancelJob records cancelled ledger events, removes config, and cleans runt ); }); }); + +test("cancelJob kills the worker before deleting its runtime paths", async () => { + await withWorkspace(async (workspaceRoot) => { + const cleanupDir = fs.mkdtempSync(path.join(os.tmpdir(), "polycli-cancel-order-")); + upsertJob(workspaceRoot, { + jobId: "job-order", + provider: "gemini", + kind: "review", + status: "running", + pid: 4242, + }); + writeJobConfigFile(workspaceRoot, "job-order", { + workspaceRoot, + jobId: "job-order", + execution: { + provider: "gemini", + kind: "review", + runtimeOptions: { cleanupPaths: [cleanupDir] }, + }, + runContext: { runId: "run-order", command: "review", hostSurface: "terminal", jobId: "job-order", provider: "gemini", kind: "review" }, + }); + + let dirExistedAtKill = null; + const report = await cancelJob(workspaceRoot, "job-order", { + terminate: async () => { + // The cleanup path (a review's live cwd) must still exist when the kill runs. + dirExistedAtKill = fs.existsSync(cleanupDir); + }, + }); + + assert.equal(report.cancelled, true); + assert.equal(dirExistedAtKill, true); + assert.equal(fs.existsSync(cleanupDir), false); + }); +}); + +test("cancelJob preserves runtime paths when the kill fails (worker may be alive)", async () => { + await withWorkspace(async (workspaceRoot) => { + const cleanupDir = fs.mkdtempSync(path.join(os.tmpdir(), "polycli-cancel-killfail-")); + upsertJob(workspaceRoot, { + jobId: "job-killfail", + provider: "gemini", + kind: "review", + status: "running", + pid: 4242, + }); + writeJobConfigFile(workspaceRoot, "job-killfail", { + workspaceRoot, + jobId: "job-killfail", + execution: { + provider: "gemini", + kind: "review", + runtimeOptions: { cleanupPaths: [cleanupDir] }, + }, + runContext: { runId: "run-killfail", command: "review", hostSurface: "terminal", jobId: "job-killfail", provider: "gemini", kind: "review" }, + }); + + const report = await cancelJob(workspaceRoot, "job-killfail", { + terminate: async () => { + throw new Error("kill failed"); + }, + }); + + assert.equal(report.cancelled, true); + assert.equal(report.killWarning, "kill failed"); + // Worker may still be alive, so its runtime paths must NOT be deleted. + assert.equal(fs.existsSync(cleanupDir), true); + fs.rmSync(cleanupDir, { recursive: true, force: true }); + }); +}); diff --git a/plugins/polycli/scripts/tests/run-ledger.test.mjs b/plugins/polycli/scripts/tests/run-ledger.test.mjs index 5a5e98f..589bc97 100644 --- a/plugins/polycli/scripts/tests/run-ledger.test.mjs +++ b/plugins/polycli/scripts/tests/run-ledger.test.mjs @@ -15,6 +15,7 @@ import { resolveRunLedgerFile, summarizeRunLedger, } from '../lib/run-ledger.mjs'; +import { resolveStateDir } from '../lib/state.mjs'; async function fileMode(filePath) { return (await stat(filePath)).mode & 0o777; @@ -158,6 +159,23 @@ test('appendRunLedgerEvent writes the run ledger privately', async () => { }); }); +test('appendRunLedgerEvent hardens the containing state directory to 0700', async () => { + await withTempWorkspace(async (workspaceRoot) => { + // No prior ensureStateDir: this is the run_started path that fires before any other state write. + // The directory holding the ledger/timing/model-cache must be created private, not 0o755. + await appendRunLedgerEvent(workspaceRoot, { + runId: 'run-dir-mode', + command: 'ask', + phase: 'run_started', + provider: 'qwen', + status: 'started', + hostSurface: 'terminal', + }); + + assert.equal(await fileMode(resolveStateDir(workspaceRoot)), 0o700); + }); +}); + test('appendRunLedgerEvent round-trips sessionId through NDJSON read-back', async () => { await withTempWorkspace(async (workspaceRoot) => { await appendRunLedgerEvent(workspaceRoot, { diff --git a/plugins/polycli/scripts/tests/state.test.mjs b/plugins/polycli/scripts/tests/state.test.mjs index 6ddc3ad..5cebde9 100644 --- a/plugins/polycli/scripts/tests/state.test.mjs +++ b/plugins/polycli/scripts/tests/state.test.mjs @@ -112,6 +112,30 @@ test("state directories and sensitive state files use private permissions", () = }); }); +test("ensureStateDir tightens pre-existing world-readable state directories to 0700", () => { + withPluginData((pluginData) => { + const workspaceRoot = "/tmp/polycli-loose-state"; + const stateRoot = path.join(pluginData, "state"); + const stateDir = resolveStateDir(workspaceRoot); + const jobsDir = resolveJobsDir(workspaceRoot); + + // Simulate a host-managed / permissive-umask layout where the dirs already exist at 0755 before + // polycli runs (e.g. ~/.polycli created by another tool). Only the force-chmod in + // chmodPrivateDir can tighten an EXISTING directory — recursive mkdir is a no-op on its mode — + // so this is the case that proves the security-relevant part of the hardening. + fs.mkdirSync(jobsDir, { recursive: true }); + fs.chmodSync(stateRoot, 0o755); + fs.chmodSync(stateDir, 0o755); + fs.chmodSync(jobsDir, 0o755); + + ensureStateDir(workspaceRoot); + + assert.equal(fileMode(stateRoot), 0o700); + assert.equal(fileMode(stateDir), 0o700); + assert.equal(fileMode(jobsDir), 0o700); + }); +}); + test("saveState preserves active jobs while pruning terminal history", () => { withPluginData(() => { const workspaceRoot = "/tmp/polycli-active-prune"; diff --git a/scripts/tests/opencode-host.test.mjs b/scripts/tests/opencode-host.test.mjs new file mode 100644 index 0000000..5f0739c --- /dev/null +++ b/scripts/tests/opencode-host.test.mjs @@ -0,0 +1,16 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { isHardCompanionFailure } from "../../plugins/polycli-opencode/index.mjs"; + +test("opencode host treats companion exit 2 as a soft signal, not a hard failure", () => { + // Exit 2 is the companion's documented soft signal: `health` with no healthy provider and + // `status --wait` timeouts both exit 2 while still emitting a valid JSON envelope on stdout. + // The adapter must surface that envelope instead of throwing a tool error. + assert.equal(isHardCompanionFailure(2), false); + // Exit 0 is success; every other non-zero exit is a real failure that must propagate. + assert.equal(isHardCompanionFailure(0), false); + assert.equal(isHardCompanionFailure(1), true); + assert.equal(isHardCompanionFailure(4), true); + assert.equal(isHardCompanionFailure(5), true); +}); diff --git a/scripts/tests/validate-cc-x-recipes.test.mjs b/scripts/tests/validate-cc-x-recipes.test.mjs new file mode 100644 index 0000000..0153a53 --- /dev/null +++ b/scripts/tests/validate-cc-x-recipes.test.mjs @@ -0,0 +1,70 @@ +import assert from "node:assert/strict"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import test from "node:test"; + +import { validateCcXRecipes } from "../validate-cc-x-recipes.mjs"; + +function writeDoc(value) { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "polycli-ccx-test-")); + const filePath = path.join(dir, "cc-x-recipes.json"); + fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf8"); + return filePath; +} + +function baseRecipe(overrides = {}) { + return { + vendor: "DeepSeek", + nativeCli: false, + runtime: "claude", + baseUrlIntl: "https://api.deepseek.com/anthropic", + baseUrlCN: null, + modelIds: ["deepseek-v4-pro"], + marketplace: false, + autoCompactWindow: 128000, + cachingNote: "automatic prefix caching", + status: "verified", + source: { url: "https://api-docs.deepseek.com", date: "2026-06-19" }, + ...overrides, + }; +} + +function baseDoc(recipes) { + return { + schemaVersion: 1, + collectedAt: "2026-06-19", + disclaimer: "reference only, not a runtime", + recipes, + }; +} + +test("validateCcXRecipes accepts a structurally complete, source-anchored doc", () => { + const recipes = Array.from({ length: 7 }, (_, i) => baseRecipe({ vendor: `Vendor${i}` })); + const result = validateCcXRecipes({ recipesPath: writeDoc(baseDoc(recipes)) }); + assert.equal(result.ok, true); + assert.equal(result.checked, 7); +}); + +test("validateCcXRecipes rejects an entry missing a source URL", () => { + const recipes = Array.from({ length: 7 }, (_, i) => baseRecipe({ vendor: `Vendor${i}` })); + recipes[0].source = { url: "", date: "2026-06-19" }; + assert.throws(() => validateCcXRecipes({ recipesPath: writeDoc(baseDoc(recipes)) }), /source\.url/); +}); + +test("validateCcXRecipes rejects a marketplace entry with a fabricated autoCompactWindow", () => { + const recipes = Array.from({ length: 7 }, (_, i) => baseRecipe({ vendor: `Vendor${i}` })); + recipes[0] = baseRecipe({ vendor: "Marketplace", marketplace: true, status: "marketplace-unstable", modelIds: [], autoCompactWindow: 128000 }); + assert.throws(() => validateCcXRecipes({ recipesPath: writeDoc(baseDoc(recipes)) }), /autoCompactWindow null/); +}); + +test("validateCcXRecipes requires at least the 7 verified core-lab recipes", () => { + const recipes = Array.from({ length: 6 }, (_, i) => baseRecipe({ vendor: `Vendor${i}` })); + assert.throws(() => validateCcXRecipes({ recipesPath: writeDoc(baseDoc(recipes)) }), /at least the 7 verified/); +}); + +test("validateCcXRecipes passes against the shipped docs/cc-x-recipes.json", () => { + const result = validateCcXRecipes(); + assert.equal(result.ok, true); + assert.ok(result.checked >= 7); +}); diff --git a/scripts/validate-cc-x-recipes.mjs b/scripts/validate-cc-x-recipes.mjs new file mode 100644 index 0000000..d64abef --- /dev/null +++ b/scripts/validate-cc-x-recipes.mjs @@ -0,0 +1,72 @@ +#!/usr/bin/env node + +import assert from "node:assert/strict"; +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const DEFAULT_RECIPES_PATH = path.join(REPO_ROOT, "docs/cc-x-recipes.json"); + +function assertNonEmptyString(value, label) { + assert.equal(typeof value, "string", `${label} must be a non-empty string`); + assert.ok(value.trim().length > 0, `${label} must be a non-empty string`); +} + +function validateRecipe(index, recipe) { + const at = `recipe[${index}] (${recipe?.vendor ?? "?"})`; + assertNonEmptyString(recipe.vendor, `${at}: vendor`); + assert.equal(typeof recipe.nativeCli, "boolean", `${at}: nativeCli must be boolean`); + assertNonEmptyString(recipe.runtime, `${at}: runtime`); + assert.ok(["claude", "opencode"].includes(recipe.runtime), `${at}: runtime must be claude or opencode`); + const hasBaseUrl = typeof recipe.baseUrlIntl === "string" || typeof recipe.baseUrlCN === "string"; + assert.ok(hasBaseUrl, `${at}: at least one of baseUrlIntl / baseUrlCN must be a string`); + assert.ok(Array.isArray(recipe.modelIds), `${at}: modelIds must be an array`); + assert.equal(typeof recipe.marketplace, "boolean", `${at}: marketplace must be boolean`); + assertNonEmptyString(recipe.cachingNote, `${at}: cachingNote`); + assertNonEmptyString(recipe.status, `${at}: status`); + assert.ok(recipe.source && typeof recipe.source === "object", `${at}: source must be an object`); + assertNonEmptyString(recipe.source.url, `${at}: source.url`); + assertNonEmptyString(recipe.source.date, `${at}: source.date`); + assert.doesNotThrow(() => new Date(recipe.source.date).toISOString(), `${at}: source.date must be an ISO date`); + // Honest-default: marketplace/resale endpoints have no stable model identity, + // so we refuse to fabricate a pinned context window (mirrors the gemini + // attempted-vs-used-model caveat in docs/model-fallback-policy.md). + if (recipe.marketplace === true) { + assert.equal(recipe.autoCompactWindow, null, `${at}: marketplace recipes must leave autoCompactWindow null (no fabricated pin)`); + assert.equal(recipe.status, "marketplace-unstable", `${at}: marketplace recipes must declare status "marketplace-unstable"`); + } else if (recipe.autoCompactWindow !== null) { + assert.ok(Number.isInteger(recipe.autoCompactWindow) && recipe.autoCompactWindow > 0, `${at}: autoCompactWindow must be null or a positive integer`); + } +} + +export function validateCcXRecipes({ recipesPath = DEFAULT_RECIPES_PATH } = {}) { + assert.ok(fs.existsSync(recipesPath), `cc-x recipes file does not exist: ${recipesPath}`); + const doc = JSON.parse(fs.readFileSync(recipesPath, "utf8")); + assert.equal(typeof doc.schemaVersion, "number", "schemaVersion must be a number"); + assertNonEmptyString(doc.collectedAt, "collectedAt"); + assertNonEmptyString(doc.disclaimer, "disclaimer"); + assert.ok(Array.isArray(doc.recipes), "recipes must be an array"); + assert.ok(doc.recipes.length >= 7, "expected at least the 7 verified core-lab recipes"); + const vendors = new Set(); + doc.recipes.forEach((recipe, index) => { + validateRecipe(index, recipe); + assert.ok(!vendors.has(recipe.vendor), `duplicate vendor: ${recipe.vendor}`); + vendors.add(recipe.vendor); + }); + return { ok: true, checked: doc.recipes.length, vendors: [...vendors] }; +} + +function main() { + const result = validateCcXRecipes(); + console.log(`cc-x recipes ok: ${result.checked} entries (${result.vendors.join(", ")})`); +} + +if (process.argv[1] === fileURLToPath(import.meta.url)) { + try { + main(); + } catch (error) { + console.error(error instanceof Error ? error.message : error); + process.exit(1); + } +}