diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index beaee808..a83e2fea 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -500,12 +500,29 @@ export async function execCpToPod( readStream, false, async status => { - if (errStream.size()) { + // Only reject on a non-Success V1Status. A non-empty errStream + // on Success is just stderr noise (e.g. tar writing + // "tar: Removing leading '/' from member names" on every copy) + // and must not trigger the 30-attempt retry storm. Stderr is + // still forwarded to debug logs so operators can inspect it. + if (status.status !== 'Success') { + // errStream.size() guard is load-bearing: getContentsAsString() + // on an empty buffer returns boolean false, which would + // interpolate as the literal string "false" in the error. + const errDetail = errStream.size() + ? errStream.getContentsAsString() + : '' reject( new Error( - `Error from execCpToPod - status: ${status.status}, details: \n ${errStream.getContentsAsString()}` + `Error from execCpToPod - status: ${status.status}, details: \n ${errDetail}` ) ) + return + } + if (errStream.size()) { + core.debug( + `execCpToPod stderr (status=Success): ${errStream.getContentsAsString()}` + ) } resolve(status) } @@ -597,12 +614,28 @@ export async function execCpFromPod( null, false, async status => { - if (errStream.size()) { + // Only reject on a non-Success V1Status. A non-empty errStream + // on Success is just stderr noise (e.g. tar warnings) and must + // not trigger the 30-attempt retry storm. Stderr is still + // forwarded to debug logs so operators can inspect it. + if (status.status !== 'Success') { + // errStream.size() guard is load-bearing: getContentsAsString() + // on an empty buffer returns boolean false, which would + // interpolate as the literal string "false" in the error. + const errDetail = errStream.size() + ? errStream.getContentsAsString() + : '' reject( new Error( - `Error from cpFromPod - details: \n ${errStream.getContentsAsString()}` + `Error from cpFromPod - status: ${status.status}, details: \n ${errDetail}` ) ) + return + } + if (errStream.size()) { + core.debug( + `execCpFromPod stderr (status=Success): ${errStream.getContentsAsString()}` + ) } resolve(status) } diff --git a/packages/k8s/tests/exec-cp-settle-policy-test.ts b/packages/k8s/tests/exec-cp-settle-policy-test.ts new file mode 100644 index 00000000..f064df13 --- /dev/null +++ b/packages/k8s/tests/exec-cp-settle-policy-test.ts @@ -0,0 +1,205 @@ +const mockExec = jest.fn() + +jest.mock('@kubernetes/client-node', () => { + return { + KubeConfig: jest.fn().mockImplementation(() => ({ + loadFromDefault: jest.fn(), + makeApiClient: jest.fn().mockReturnValue({}), + getContexts: jest.fn().mockReturnValue([{ namespace: 'test-namespace' }]) + })), + Exec: jest.fn().mockImplementation(() => ({ exec: mockExec })), + // eslint-disable-next-line @typescript-eslint/no-extraneous-class + CoreV1Api: class CoreV1Api {}, + // eslint-disable-next-line @typescript-eslint/no-extraneous-class + BatchV1Api: class BatchV1Api {}, + // eslint-disable-next-line @typescript-eslint/no-extraneous-class + AuthorizationV1Api: class AuthorizationV1Api {}, + Log: jest.fn() + } +}) + +jest.mock('tar-fs', () => ({ + default: { + pack: jest.fn().mockReturnValue({ pipe: jest.fn() }), + extract: jest.fn().mockReturnValue({ on: jest.fn(), pipe: jest.fn() }) + }, + __esModule: true +})) + +jest.mock('../src/k8s/utils', () => { + const actual = jest.requireActual('../src/k8s/utils') + return { + ...actual, + sleep: jest.fn().mockResolvedValue(undefined) + } +}) + +const mockDebug = jest.fn() +jest.mock('@actions/core', () => ({ + info: jest.fn(), + warning: jest.fn(), + debug: (...args: unknown[]) => mockDebug(...args), + error: jest.fn() +})) + +import { execCpToPod, execCpFromPod } from '../src/k8s' + +// Drive the kc Exec callback synchronously after the prod code awaits it. +// The kc `exec(...)` signature is: +// (ns, pod, container, command, stdout, stderr, stdin, tty, statusCb) +// The prod settle promise wraps that single call. +function mockExecOnceWithStatus( + statusValue: { status: string }, + stderrWrite?: Buffer +): void { + mockExec.mockImplementationOnce( + async ( + _ns: string, + _pod: string, + _container: string, + _command: string[], + _stdout: NodeJS.WritableStream | null, + stderr: NodeJS.WritableStream, + _stdin: NodeJS.ReadableStream | null, + _tty: boolean, + statusCb: (s: { status: string }) => void + ) => { + if (stderrWrite) { + ;(stderr as unknown as { write: (b: Buffer) => void }).write( + stderrWrite + ) + } + // Invoke the status callback so the prod settle promise resolves/rejects. + statusCb(statusValue) + return { on: jest.fn() } + } + ) +} + +describe('execCpToPod settle policy', () => { + beforeEach(() => { + jest.clearAllMocks() + mockDebug.mockReset() + process.env['ACTIONS_RUNNER_KUBERNETES_NAMESPACE'] = 'test-namespace' + }) + + afterEach(() => { + delete process.env['ACTIONS_RUNNER_KUBERNETES_NAMESPACE'] + }) + + it('resolves on Success even when stderr is non-empty (benign tar warning)', async () => { + mockExecOnceWithStatus( + { status: 'Success' }, + Buffer.from("tar: Removing leading '/' from member names\n") + ) + + // Force the post-exec hash-verification loop to no-op (we only care about + // the settle behavior here). It retries up to 15 times and the + // jest.mock above makes sleep() instantly resolve, so the loop exits + // quickly even with errors. + await expect( + execCpToPod('my-pod', '/tmp/src', '/workspace') + ).resolves.toBeUndefined() + + // The whole point of the fix: benign stderr on Success must NOT + // trigger a retry. Exactly one tar-cp exec call. + expect(mockExec).toHaveBeenCalledTimes(1) + + // Stderr must be forwarded to debug logs (not silently dropped). + const debugMessages = mockDebug.mock.calls.map(c => String(c[0])) + expect(debugMessages.some(m => m.includes('execCpToPod stderr'))).toBe(true) + }) + + it('rejects on Failure with stderr included in the error message', async () => { + // Fail every attempt so the retry loop eventually throws the formatted + // wrapper. We rely on the sleep mock resolving instantly. + for (let i = 0; i < 30; i++) { + mockExecOnceWithStatus( + { status: 'Failure' }, + Buffer.from('tar: cannot open: No space left') + ) + } + + await expect( + execCpToPod('my-pod', '/tmp/src', '/workspace') + ).rejects.toThrow(/status: Failure/) + // Retry cap is 30 in execCpToPod's while loop. + expect(mockExec).toHaveBeenCalledTimes(30) + }) + + it('rejects when V1Status.status is undefined', async () => { + for (let i = 0; i < 30; i++) { + mockExecOnceWithStatus({ status: undefined } as unknown as { + status: string + }) + } + + await expect( + execCpToPod('my-pod', '/tmp/src', '/workspace') + ).rejects.toThrow(/status: undefined/) + }) +}) + +describe('execCpFromPod settle policy', () => { + beforeEach(() => { + jest.clearAllMocks() + mockDebug.mockReset() + process.env['ACTIONS_RUNNER_KUBERNETES_NAMESPACE'] = 'test-namespace' + }) + + afterEach(() => { + delete process.env['ACTIONS_RUNNER_KUBERNETES_NAMESPACE'] + }) + + it('resolves on Success even when stderr is non-empty (benign tar warning)', async () => { + mockExecOnceWithStatus( + { status: 'Success' }, + Buffer.from("tar: Removing leading '/' from member names\n") + ) + + await expect( + execCpFromPod('my-pod', '/workspace/output', '/tmp/dst') + ).resolves.toBeUndefined() + + // Benign stderr on Success must NOT trigger a retry. Exactly one + // tar-cp exec call. (The post-cp hash-verification loop also calls + // exec via execCalculateOutputHashSorted; filter to the tar invocation + // we care about.) + const tarCalls = mockExec.mock.calls.filter(c => + Array.isArray(c[3]) && c[3].some((a: string) => a && a.includes('tar')) + ) + expect(tarCalls).toHaveLength(1) + + const debugMessages = mockDebug.mock.calls.map(c => String(c[0])) + expect(debugMessages.some(m => m.includes('execCpFromPod stderr'))).toBe( + true + ) + }) + + it('rejects on Failure with stderr included in the error message', async () => { + for (let i = 0; i < 30; i++) { + mockExecOnceWithStatus( + { status: 'Failure' }, + Buffer.from('tar: cannot open: Permission denied') + ) + } + + await expect( + execCpFromPod('my-pod', '/workspace/output', '/tmp/dst') + ).rejects.toThrow(/status: Failure/) + // Retry cap is 30 in execCpFromPod's while loop. + expect(mockExec).toHaveBeenCalledTimes(30) + }) + + it('rejects when V1Status.status is undefined', async () => { + for (let i = 0; i < 30; i++) { + mockExecOnceWithStatus({ status: undefined } as unknown as { + status: string + }) + } + + await expect( + execCpFromPod('my-pod', '/workspace/output', '/tmp/dst') + ).rejects.toThrow(/status: undefined/) + }) +})