From d6a87047f6d664aaab89435636346e46462ceea2 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 2 May 2026 23:11:35 +0000 Subject: [PATCH 1/9] Move K&R function definitions onto the grammar path K&R-style definitions (`int f(a, b) int a; long b; { ... }`) used to fall through to the legacy chomp, where the chomp's top-level `;` terminator fragmented them into multiple `declKind: 'unknown'` external declarations. The validator now accepts the shape, the grammar dispatches into a new `kr_declaration_list` rule between the parameter-list `)` and the body `{`, and the result is a single structured `function_definition` external declaration. The declaration-list child preserves the source as flat token refs to match the legacy CST shape. https://claude.ai/code/session_01DEdkKecwpq59ydTqZ7Aobv --- README.md | 12 ++++-- c-grammar.jsonic | 37 ++++++++++++++++++ src/c.ts | 99 +++++++++++++++++++++++++++++++++++++++++++++++- test/c.test.ts | 46 ++++++++++++++++++++++ 4 files changed, 189 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b5a2f08..0ae73a1 100644 --- a/README.md +++ b/README.md @@ -92,13 +92,15 @@ positions are preserved on token spans). - all expression and statement forms Shapes still on the legacy path: - - K&R parameter lists (`int f(a, b) int a; long b; { … }`) — - rare in modern code; csmith never generates them - complex compound declarators beyond simple function pointers (`int (*arr[N])(int);` arrays-of-fn-ptrs, `int (*(*fpp))(int);` ptr-to-fn-ptr). Plain function pointers `int (*fp)(int);` and top-level `static_assert(cond, msg);` - moved onto the grammar path in 2.0. + moved onto the grammar path in 2.0. K&R-style function + definitions (`int f(a, b) int a; long b; { … }`) moved onto + the grammar path in 2.1; the inter-paren-and-brace + declaration list is preserved as a flat `kr_declaration_list` + node, matching the legacy CST shape. Both paths produce identical CST shapes; the `@jsonic/expr`-driven `val` handles initializer expressions in @@ -131,6 +133,9 @@ translation_unit identifier_list (K&R) asm_label?, attribute_spec? '=' initializer + kr_declaration_list (K&R fn-def: flat token-refs + between the param `)` and + the body `{`) static_assert_declaration { condition, message? } define_directive { macroName, macroKind, macroParams?, macroVariadic? } include_directive { includeForm, headerKind, headerName } @@ -263,7 +268,6 @@ Known fall-throughs that produce a `declKind: 'unknown'` external declaration rather than a structured one (still parseable, source fidelity preserved): -- K&R-style parameter declarations (`int f(a, b) int a; long b; { … }`). - GCC `__extern_inline` declarations gated on a `__USE_EXTERN_INLINES` feature macro that hasn't been `#define`d. diff --git a/c-grammar.jsonic b/c-grammar.jsonic index fe7acb4..601579f 100644 --- a/c-grammar.jsonic +++ b/c-grammar.jsonic @@ -205,6 +205,18 @@ { s: 'PUNC_LBRACE' b: 1 p: 'compound_statement' a: '@simple-decl-start-fn-body' g: 'simple-decl-fn-body' } + # K&R-style function definition: between the `)` of an + # identifier-list parameter list and the `{` of the body, + # a sequence of parameter declarations may appear (`int + # f(a, b) int a; long b; { … }`). Descend into + # kr_declaration_list which absorbs flat token-refs until + # `{`; on return the LBRACE alt above picks up the body. + # Gated on @kr-not-yet so we don't re-fire after the list + # has already been attached. + { s: '#SIMPLE_TYPE_HEAD' c: '@kr-not-yet' b: 1 + p: 'kr_declaration_list' g: 'simple-decl-kr-type' } + { s: '#STORAGE_PREFIX' c: '@kr-not-yet' b: 1 + p: 'kr_declaration_list' g: 'simple-decl-kr-storage' } # First declarator (after specs). Backstep the head token so # init_declarator's open sees it; descend into the sub-rule. # ID head: plain declarator. STAR head: pointer prefix. @@ -500,6 +512,31 @@ ] } + # kr_declaration_list: the K&R parameter-declaration list that + # appears between the closing `)` of an identifier-list parameter + # list and the opening `{` of the function body. The legacy + # post-process produced a flat node containing the raw token refs + # (no inner declaration structuring); we mirror that shape so + # consumers see the same CST regardless of which path fired. + # Open absorbs the first token; close keeps absorbing until `{`. + kr_declaration_list: { + open: [ + # Defensive: empty list (caller already guarded). + { s: 'PUNC_LBRACE' b: 1 g: 'kr-empty' } + { s: '#ZZ' b: 1 g: 'kr-eof' } + { s: '#ANY_C_TOKEN' a: '@kr-take' g: 'kr-first' } + ] + close: [ + # Stop at the body-opening `{` (don't consume — let + # simple_declaration's PUNC_LBRACE alt drive + # compound_statement). + { s: 'PUNC_LBRACE' b: 1 g: 'kr-end' } + { s: '#ZZ' b: 1 g: 'kr-end-eof' } + { s: '#ANY_C_TOKEN' a: '@kr-take' r: 'kr_declaration_list' + g: 'kr-more' } + ] + } + # parameter_type_list: 1+ comma-separated parameter_declarations, # optionally terminated by `, ...` for variadic functions. parameter_type_list: { diff --git a/src/c.ts b/src/c.ts index cd530ef..03c8c4e 100644 --- a/src/c.ts +++ b/src/c.ts @@ -245,6 +245,18 @@ const grammarText = ` { s: 'PUNC_LBRACE' b: 1 p: 'compound_statement' a: '@simple-decl-start-fn-body' g: 'simple-decl-fn-body' } + # K&R-style function definition: between the \`)\` of an + # identifier-list parameter list and the \`{\` of the body, + # a sequence of parameter declarations may appear (\`int + # f(a, b) int a; long b; { … }\`). Descend into + # kr_declaration_list which absorbs flat token-refs until + # \`{\`; on return the LBRACE alt above picks up the body. + # Gated on @kr-not-yet so we don't re-fire after the list + # has already been attached. + { s: '#SIMPLE_TYPE_HEAD' c: '@kr-not-yet' b: 1 + p: 'kr_declaration_list' g: 'simple-decl-kr-type' } + { s: '#STORAGE_PREFIX' c: '@kr-not-yet' b: 1 + p: 'kr_declaration_list' g: 'simple-decl-kr-storage' } # First declarator (after specs). Backstep the head token so # init_declarator's open sees it; descend into the sub-rule. # ID head: plain declarator. STAR head: pointer prefix. @@ -540,6 +552,31 @@ const grammarText = ` ] } + # kr_declaration_list: the K&R parameter-declaration list that + # appears between the closing \`)\` of an identifier-list parameter + # list and the opening \`{\` of the function body. The legacy + # post-process produced a flat node containing the raw token refs + # (no inner declaration structuring); we mirror that shape so + # consumers see the same CST regardless of which path fired. + # Open absorbs the first token; close keeps absorbing until \`{\`. + kr_declaration_list: { + open: [ + # Defensive: empty list (caller already guarded). + { s: 'PUNC_LBRACE' b: 1 g: 'kr-empty' } + { s: '#ZZ' b: 1 g: 'kr-eof' } + { s: '#ANY_C_TOKEN' a: '@kr-take' g: 'kr-first' } + ] + close: [ + # Stop at the body-opening \`{\` (don't consume — let + # simple_declaration's PUNC_LBRACE alt drive + # compound_statement). + { s: 'PUNC_LBRACE' b: 1 g: 'kr-end' } + { s: '#ZZ' b: 1 g: 'kr-end-eof' } + { s: '#ANY_C_TOKEN' a: '@kr-take' r: 'kr_declaration_list' + g: 'kr-more' } + ] + } + # parameter_type_list: 1+ comma-separated parameter_declarations, # optionally terminated by \`, ...\` for variadic functions. parameter_type_list: { @@ -2840,7 +2877,29 @@ return { j++ } if (!closed) return false - const post = fetchDeep(ctx, j + 1)?.name + let post = fetchDeep(ctx, j + 1)?.name + // K&R-style function definition: between the `)` of the + // identifier-list parameters and the `{` of the body, a + // sequence of parameter declarations may appear. Walk past + // them (each starts with a type/storage/attribute keyword + // and ends with a top-level `;`) so the body-validate step + // below can run. + if (post && post !== 'PUNC_SEMI' && post !== 'PUNC_LBRACE') { + if (simpleTypeHeadSet.has(post) || storagePrefixSet.has(post)) { + let p = j + 1 + while (p < j + 1 + SAFETY) { + const t2 = fetchDeep(ctx, p) + const n3 = t2?.name + if (!n3 || n3 === '#ZZ') return false + if (n3 === 'PUNC_LBRACE') break + if (n3 === 'PUNC_RBRACE') return false + p++ + } + if (p >= j + 1 + SAFETY) return false + j = p - 1 + post = 'PUNC_LBRACE' + } + } if (post !== 'PUNC_SEMI' && post !== 'PUNC_LBRACE') return false if (post === 'PUNC_LBRACE') { if (!isFunctionBodySupported(ctx, j + 1)) return false @@ -3172,6 +3231,30 @@ return { pushTokenWithTrivia(rule.parent.k.ptl, rule.c0 as Token) }, + // ---- kr_declaration_list refs (K&R fn-def declaration list) ---- + // + // Sits between the closing `)` of a function declarator's + // identifier-list parameters and the opening `{` of the body. The + // legacy structureExternalDeclaration produced a flat node carrying + // the raw token refs; the grammar rule mirrors that shape. + + // Gate so the dispatching alts in simple_declaration's close don't + // re-fire after the kr_declaration_list has been attached. + '@kr-not-yet': (rule: Rule): boolean => !rule.u.krList, + + '@kr_declaration_list-bo': (rule: Rule): void => { + if (rule.node && rule.node.kind === 'kr_declaration_list') return + rule.node = makeNode('kr_declaration_list') + }, + + // Push the just-matched token (open alt → o0; close alt's r: + // re-recursion → o0 in the recursed rule's open) onto the + // kr_declaration_list node. + '@kr-take': (rule: Rule): void => { + const tkn = (rule.state === 'o' ? rule.o0 : rule.c0) as Token + pushTokenWithTrivia(rule.node, tkn) + }, + // ---- identifier_list refs (K&R-style prototype) ---- // bo: build the identifier_list CST scaffold. Guard against @@ -3477,6 +3560,15 @@ return { !rule.u.fnBody) { rule.u.fnBody = rule.child.node } + // K&R declaration list (between the parameter `)` and the body + // `{`). Stash on u so @simple-decl-finalize-fn can splice it + // ahead of the compound_statement in the function_definition + // CST. + if (rule.child && rule.child.name === 'kr_declaration_list' && + rule.child.node && rule.child.node.kind === 'kr_declaration_list' && + !rule.u.krList) { + rule.u.krList = rule.child.node + } }, // close action: matched `;`, finish the declaration. The @@ -3527,6 +3619,11 @@ return { rule.node.children.push(firstId.children[0]) } } + // K&R: declaration list lives between the declarator and the + // body in the legacy CST shape. + if (rule.u.krList) { + rule.node.children.push(rule.u.krList) + } rule.node.children.push(rule.u.fnBody) void ctx }, diff --git a/test/c.test.ts b/test/c.test.ts index 3e34442..be72d02 100644 --- a/test/c.test.ts +++ b/test/c.test.ts @@ -659,6 +659,52 @@ describe('c parser smoke', () => { assert.ok(findKind(fp, 'identifier_list')) }) + test('parameters: K&R function definition flows through grammar', () => { + // Pre-ANSI K&R definition: declarations of the parameter types + // appear between the `)` of the identifier-list and the `{` of + // the body. The whole thing is a single function_definition + // external_declaration with a kr_declaration_list child holding + // the flat token sequence. + const src = 'int f(a, b) int a; long b; { return a + b; }' + const out = j(src) + assert.equal(out.children.length, 1) + const ed = out.children[0] + assert.equal(ed.declKind, 'function_definition') + assert.equal(ed.viaPath, 'grammar') + // Identifier list inside the function_postfix. + assert.ok(findKind(ed, 'identifier_list')) + // K&R declaration list between the declarator and the body. + const krl = findKind(ed, 'kr_declaration_list') + assert.ok(krl) + // Children are flat token refs covering the param-decls. + assert.deepEqual(tokenSrcs(krl), + ['int', 'a', ';', 'long', 'b', ';']) + // Body is structured as a compound_statement. + assert.ok(findKind(ed, 'compound_statement')) + }) + + test('parameters: K&R definition with no inline declarations', () => { + // `int f(a, b) { … }` — identifier-list params with no + // declarations between `)` and `{`. Must still produce a + // function_definition (and no kr_declaration_list). + const src = 'int f(a, b) { return 0; }' + const out = j(src) + assert.equal(out.children.length, 1) + const ed = out.children[0] + assert.equal(ed.declKind, 'function_definition') + assert.equal(findKind(ed, 'kr_declaration_list'), null) + }) + + test('parameters: K&R definition with pointer-typed parameter', () => { + const src = 'void g(p) char *p; { *p = 0; }' + const out = j(src) + const ed = out.children[0] + assert.equal(ed.declKind, 'function_definition') + const krl = findKind(ed, 'kr_declaration_list') + assert.ok(krl) + assert.deepEqual(tokenSrcs(krl), ['char', '*', 'p', ';']) + }) + // ---- Macro-name tagging (slice 9) ----------------------------------- test('macro tagging: identifier in #define body becomes MACRO_NAME later', () => { From 0466b0f1177c889af768e7bf9cf4d94860d60131 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 2 May 2026 23:49:18 +0000 Subject: [PATCH 2/9] Move complex compound declarators onto the grammar path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase P landed simple function-pointer declarators on the grammar but left the complex shapes — arrays of fn-pointers, fn returning ptr-to- array, nested paren-forms, leading-pointer types with paren-form declarators — on the legacy chomp + structure.ts path. Worse, the validator silently accepted `int (*arr[3])(int);` while the grammar emitted a structurally wrong CST (the inner `[3]` postfix sat as a sibling of the inner declarator instead of inside it). This change extends the grammar so all four shapes parse correctly: int (*arr[3])(int); // inner array postfix on inner DD int (*get())[10]; // inner function postfix on inner DD int (*(*fpp))(int); // nested paren-form (recursive PID) char *(*foo[3])(int); // leading-pointer-type with paren-form `paren_inner_declarator` now dispatches `array_postfix` / `function_postfix` for inner postfixes (they attach to its own direct_declarator via rule.parent.k.directDeclarator), recurses into itself for nested paren-forms, and tracks paren-pending state separately from `init_declarator`'s. `@pid-paren-close` performs the declarator-attachment that `@pid-name` does for non-nested PIDs. `init_declarator` close gains a paren-form alt gated on `!named` so the leading-pointer-type case routes here instead of falling into function_postfix. The validator factors out a `walkParenFormDeclarator` helper that recursively validates the new shapes. https://claude.ai/code/session_01DEdkKecwpq59ydTqZ7Aobv --- README.md | 24 ++-- c-grammar.jsonic | 33 ++++- src/c.ts | 255 +++++++++++++++++++++++++++++++----- test/c.test.ts | 84 ++++++++++++ test/spec/path-dispatch.tsv | 4 + 5 files changed, 354 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 0ae73a1..8f71473 100644 --- a/README.md +++ b/README.md @@ -91,16 +91,20 @@ positions are preserved on token spans). - top-level GCC `__asm__` - all expression and statement forms - Shapes still on the legacy path: - - complex compound declarators beyond simple function pointers - (`int (*arr[N])(int);` arrays-of-fn-ptrs, - `int (*(*fpp))(int);` ptr-to-fn-ptr). Plain function pointers - `int (*fp)(int);` and top-level `static_assert(cond, msg);` - moved onto the grammar path in 2.0. K&R-style function - definitions (`int f(a, b) int a; long b; { … }`) moved onto - the grammar path in 2.1; the inter-paren-and-brace - declaration list is preserved as a flat `kr_declaration_list` - node, matching the legacy CST shape. + Plain function pointers `int (*fp)(int);` and top-level + `static_assert(cond, msg);` moved onto the grammar path in 2.0. + K&R-style function definitions (`int f(a, b) int a; long b; { + … }`) moved onto the grammar path in 2.1; the inter-paren-and- + brace declaration list is preserved as a flat + `kr_declaration_list` node, matching the legacy CST shape. + Complex compound declarators — arrays of fn-pointers (`int + (*arr[N])(int);`), function returning ptr-to-array (`int + (*get())[10];`), nested paren-forms (`int (*(*fpp))(int);`), + and leading-pointer types with paren-form declarators (`char + *(*foo[3])(int);`) — moved onto the grammar path in 2.2; + `paren_inner_declarator` now recurses, dispatches inner + postfixes, and accepts a leading-pointer-type entry from + `init_declarator`'s close. Both paths produce identical CST shapes; the `@jsonic/expr`-driven `val` handles initializer expressions in diff --git a/c-grammar.jsonic b/c-grammar.jsonic index 601579f..d602465 100644 --- a/c-grammar.jsonic +++ b/c-grammar.jsonic @@ -358,6 +358,13 @@ # Returning from pointer_list, capture the ID, then re-enter # to check for postfix / initializer. { s: 'ID' a: '@idecl-name' r: 'init_declarator' g: 'idecl-id-after-ptrs' } + # Leading-pointer-type with paren-form declarator + # (`char *(*foo)(int);`): after pointer_list returned, we see + # `(` instead of an ID. Treat it as a paren-form sub-declarator + # and descend into paren_inner_declarator. Gated on !named so + # this doesn't fire for the function-postfix `(` after an ID. + { s: 'PUNC_LPAREN' c: '@idecl-not-named' a: '@idecl-paren-open' + p: 'paren_inner_declarator' g: 'idecl-paren-after-ptrs' } # Array postfix `[ … ]` (one or more dimensions). Each one # re-enters init_declarator so additional postfixes can stack. { s: 'PUNC_LBRACKET' b: 1 p: 'array_postfix' @@ -408,8 +415,32 @@ # After pointer_list returns, capture the ID then re-enter. { s: 'ID' a: '@pid-name' r: 'paren_inner_declarator' g: 'pid-id-after-ptrs' } + # Nested paren-form: `int (*(*fpp))(int)` — after the + # pointer_list returned, another `(` opens a deeper inner + # declarator. Push `(` onto our direct_declarator and + # recurse; the matching `)` is consumed by the rparen alt + # below (gated on @pid-paren-pending). + { s: 'PUNC_LPAREN' c: '@pid-not-named' a: '@pid-paren-open' + p: 'paren_inner_declarator' g: 'pid-nested' } + # Returning from a nested paren_inner_declarator: consume + # the matching `)`, mark named/parenClosed, and recurse so + # any inner postfix (`[…]` / `(…)`) can still attach. + { s: 'PUNC_RPAREN' c: '@pid-paren-pending' a: '@pid-paren-close' + r: 'paren_inner_declarator' g: 'pid-nested-rparen' } + # Inner array postfix on the just-named declarator: + # `int (*arr[3])(int);` — the `[3]` belongs to the inner + # direct_declarator, not the outer one. array_postfix + # attaches to rule.parent.k.directDeclarator which is our + # inner direct_declarator at this point. + { s: 'PUNC_LBRACKET' c: '@pid-named' b: 1 p: 'array_postfix' + r: 'paren_inner_declarator' g: 'pid-arr' } + # Inner function postfix on the just-named declarator: + # `int (*get())[10];` — the inner `()` is a function postfix. + { s: 'PUNC_LPAREN' c: '@pid-named' b: 1 p: 'function_postfix' + r: 'paren_inner_declarator' g: 'pid-fn' } # Stop before `)` so the outer init_declarator's close can - # consume it. + # consume it (only when this PID isn't itself paren-pending, + # handled by the priority of the @pid-paren-pending alt above). { s: 'PUNC_RPAREN' b: 1 g: 'pid-end-rparen' } { s: [] g: 'pid-end' } ] diff --git a/src/c.ts b/src/c.ts index 03c8c4e..5a70a8d 100644 --- a/src/c.ts +++ b/src/c.ts @@ -398,6 +398,13 @@ const grammarText = ` # Returning from pointer_list, capture the ID, then re-enter # to check for postfix / initializer. { s: 'ID' a: '@idecl-name' r: 'init_declarator' g: 'idecl-id-after-ptrs' } + # Leading-pointer-type with paren-form declarator + # (\`char *(*foo)(int);\`): after pointer_list returned, we see + # \`(\` instead of an ID. Treat it as a paren-form sub-declarator + # and descend into paren_inner_declarator. Gated on !named so + # this doesn't fire for the function-postfix \`(\` after an ID. + { s: 'PUNC_LPAREN' c: '@idecl-not-named' a: '@idecl-paren-open' + p: 'paren_inner_declarator' g: 'idecl-paren-after-ptrs' } # Array postfix \`[ … ]\` (one or more dimensions). Each one # re-enters init_declarator so additional postfixes can stack. { s: 'PUNC_LBRACKET' b: 1 p: 'array_postfix' @@ -448,8 +455,32 @@ const grammarText = ` # After pointer_list returns, capture the ID then re-enter. { s: 'ID' a: '@pid-name' r: 'paren_inner_declarator' g: 'pid-id-after-ptrs' } + # Nested paren-form: \`int (*(*fpp))(int)\` — after the + # pointer_list returned, another \`(\` opens a deeper inner + # declarator. Push \`(\` onto our direct_declarator and + # recurse; the matching \`)\` is consumed by the rparen alt + # below (gated on @pid-paren-pending). + { s: 'PUNC_LPAREN' c: '@pid-not-named' a: '@pid-paren-open' + p: 'paren_inner_declarator' g: 'pid-nested' } + # Returning from a nested paren_inner_declarator: consume + # the matching \`)\`, mark named/parenClosed, and recurse so + # any inner postfix (\`[…]\` / \`(…)\`) can still attach. + { s: 'PUNC_RPAREN' c: '@pid-paren-pending' a: '@pid-paren-close' + r: 'paren_inner_declarator' g: 'pid-nested-rparen' } + # Inner array postfix on the just-named declarator: + # \`int (*arr[3])(int);\` — the \`[3]\` belongs to the inner + # direct_declarator, not the outer one. array_postfix + # attaches to rule.parent.k.directDeclarator which is our + # inner direct_declarator at this point. + { s: 'PUNC_LBRACKET' c: '@pid-named' b: 1 p: 'array_postfix' + r: 'paren_inner_declarator' g: 'pid-arr' } + # Inner function postfix on the just-named declarator: + # \`int (*get())[10];\` — the inner \`()\` is a function postfix. + { s: 'PUNC_LPAREN' c: '@pid-named' b: 1 p: 'function_postfix' + r: 'paren_inner_declarator' g: 'pid-fn' } # Stop before \`)\` so the outer init_declarator's close can - # consume it. + # consume it (only when this PID isn't itself paren-pending, + # handled by the priority of the @pid-paren-pending alt above). { s: 'PUNC_RPAREN' b: 1 g: 'pid-end-rparen' } { s: [] g: 'pid-end' } ] @@ -2296,6 +2327,60 @@ function fetchDeep(ctx: Context, idx: number): Token | undefined { return isReal(result) ? result : undefined } +// Validate a parenthesised compound declarator starting at `lparenIdx` +// (the index of `(`). Returns the index AFTER the matching `)` on +// success, or -1 on failure. Shape: +// ( ( | ID-like) * ) +// where pointer-list = `*` (KW_CONST | KW_VOLATILE | KW_RESTRICT | +// KW__ATOMIC)*, and inner-postfix is `[...]` or `(...)` with balanced +// brackets/parens. The recursion depth is bounded by FETCH_DEEP_CAP. +function walkParenFormDeclarator(ctx: Context, lparenIdx: number): number { + if (fetchDeep(ctx, lparenIdx)?.name !== 'PUNC_LPAREN') return -1 + let p = lparenIdx + 1 + // One or more `*` (with optional pointer qualifiers between them). + let starCount = 0 + while (p < FETCH_DEEP_CAP) { + const n = fetchDeep(ctx, p)?.name + if (n === 'PUNC_STAR') { p++; starCount++; continue } + if (starCount > 0 && ( + n === 'KW_CONST' || n === 'KW_VOLATILE' || + n === 'KW_RESTRICT' || n === 'KW__ATOMIC')) { p++; continue } + break + } + if (starCount === 0) return -1 + // Inner: another paren-form, or an ID-like name. + const inner = fetchDeep(ctx, p)?.name + if (inner === 'PUNC_LPAREN') { + const r = walkParenFormDeclarator(ctx, p) + if (r < 0) return -1 + p = r + } else if (inner === 'ID' || inner === 'TYPEDEF_NAME' || + inner === 'MACRO_NAME') { + p++ + } else { + return -1 + } + // Inner postfixes: `[...]` or `(...)` chains. Just balance them — + // the grammar's array_postfix / function_postfix rules will parse + // the contents at parse time. + while (p < FETCH_DEEP_CAP) { + const n = fetchDeep(ctx, p)?.name + if (n !== 'PUNC_LBRACKET' && n !== 'PUNC_LPAREN') break + const closer = n === 'PUNC_LBRACKET' ? 'PUNC_RBRACKET' : 'PUNC_RPAREN' + let depth = 1; p++ + while (p < FETCH_DEEP_CAP && depth > 0) { + const m = fetchDeep(ctx, p)?.name + if (!m || m === '#ZZ') return -1 + if (m === n) depth++ + else if (m === closer) depth-- + p++ + } + if (depth !== 0) return -1 + } + if (fetchDeep(ctx, p)?.name !== 'PUNC_RPAREN') return -1 + return p + 1 +} + function isFunctionBodySupported(ctx: Context, lbraceI: number): boolean { // Walk forward from `{` to its matching `}`, fetching tokens as // we go via fetchDeep. Reject on the first unsupported keyword we @@ -2739,38 +2824,21 @@ return { // (`struct S;`, `enum E;`) and pre-existing simple cases still // flow via the SEMI check below using ctx.t (no fetchDeep). if (ctx.t[i]?.name === 'PUNC_SEMI') return true - // Phase P: parenthesised sub-declarator (function pointer). - // Shape: `+ ( * + ID ) ( ? ) ;`. No initializer - // for now — initialised forms still flow through the legacy - // chomp because val doesn't yet handle every initializer form. + // Parenthesised compound declarator. Covers: + // `int (*p)(int);` — function pointer + // `int (*p)[10];` — pointer to array + // `int (*arr[3])(int);` — array of fn-pointers (inner array + // postfix attaches to inner decl) + // `int (*get())[10];` — fn returning ptr-to-array (inner + // function postfix) + // `int (*(*fpp))(int);` — pointer-to-function-pointer + // (nested paren-form) + // No initializer for now — initialised forms still flow through + // the legacy chomp because val doesn't yet handle every + // initializer form. if (ctx.t[i]?.name === 'PUNC_LPAREN') { - // Parenthesised compound declarator. Three shapes: - // `int (*p)(int);` — function pointer - // `int (*p)[10];` — pointer to array - // `int (*arr[3])(int);` — array of fn-pointers (inner has - // its own array postfix) - let p = i + 1 - // Require at least one `*` inside the parens. - if (fetchDeep(ctx, p)?.name !== 'PUNC_STAR') return false - while (p < i + 8 && fetchDeep(ctx, p)?.name === 'PUNC_STAR') p++ - const innerName = fetchDeep(ctx, p)?.name - if (innerName !== 'ID' && innerName !== 'TYPEDEF_NAME' && - innerName !== 'MACRO_NAME') return false - p++ - // Optional inner array postfix(es): `(*arr[3])`. - while (fetchDeep(ctx, p)?.name === 'PUNC_LBRACKET') { - let bd = 1; p++ - while (p < FETCH_DEEP_CAP && bd > 0) { - const n2 = fetchDeep(ctx, p)?.name - if (!n2) return false - if (n2 === 'PUNC_LBRACKET') bd++ - else if (n2 === 'PUNC_RBRACKET') bd-- - p++ - } - if (bd !== 0) return false - } - if (fetchDeep(ctx, p)?.name !== 'PUNC_RPAREN') return false - p++ + let p = walkParenFormDeclarator(ctx, i) + if (p < 0) return false // Trailing postfix: `(...)` (function postfix), `[...]` (array // postfix), or chain of either. Walk balanced parens / brackets. const post1 = fetchDeep(ctx, p)?.name @@ -2814,6 +2882,37 @@ return { break } const idName = fetchDeep(ctx, i)?.name + // Leading-pointer-type with paren-form declarator: + // `char *(*foo)(int);` — pointer to function returning char* + // `char *(*foo[3])(int);` — array of pointers to function ... + // After the leading `*`s, accept a paren-form declarator instead + // of an ID. The paren-form contents are handled by the same rule + // as the type-headed case above. + if (sawPointer && idName === 'PUNC_LPAREN') { + let p = walkParenFormDeclarator(ctx, i) + if (p < 0) return false + const post1 = fetchDeep(ctx, p)?.name + if (post1 !== 'PUNC_LPAREN' && post1 !== 'PUNC_LBRACKET') return false + while (true) { + const start = fetchDeep(ctx, p)?.name + if (start !== 'PUNC_LPAREN' && start !== 'PUNC_LBRACKET') break + const closer = start === 'PUNC_LPAREN' ? 'PUNC_RPAREN' : 'PUNC_RBRACKET' + let depth = 0 + let closed = false + while (p < FETCH_DEEP_CAP) { + const n2 = fetchDeep(ctx, p)?.name + if (!n2) return false + if (n2 === start) depth++ + else if (n2 === closer) depth-- + if (depth === 0 && n2 !== start) { closed = true; break } + p++ + } + if (!closed) return false + p++ + } + const post = fetchDeep(ctx, p)?.name + return post === 'PUNC_SEMI' + } if (idName !== 'ID' && idName !== 'TYPEDEF_NAME' && idName !== 'MACRO_NAME') return false i++ @@ -3067,16 +3166,30 @@ return { // declarator, and marks the rule as paren-pending so the close // alt for `)` fires after paren_inner_declarator returns. '@idecl-paren-open': (rule: Rule): void => { - const lparen = rule.o0 as Token + // Fires from open (the bare `(` form `int (*p)(…)`) AND from + // close (the leading-pointer `(` form `char *(*p)(…)`); pick + // the matched token from the right slot. + const lparen = (rule.state === 'c' ? rule.c0 : rule.o0) as Token pushTokenWithTrivia(rule.k.directDeclarator, lparen) - rule.k.declarator.children.push(rule.k.directDeclarator) - rule.node.children.push(rule.k.declarator) + if (rule.k.declarator.children.indexOf(rule.k.directDeclarator) < 0) { + rule.k.declarator.children.push(rule.k.directDeclarator) + } + if (rule.node.children.indexOf(rule.k.declarator) < 0) { + rule.node.children.push(rule.k.declarator) + } rule.k.idclParenPending = true }, '@idecl-paren-pending': (rule: Rule): boolean => rule.k.idclParenPending === true && rule.k.parenClosed !== true, + // Used to gate the paren-form alt that fires when init_declarator's + // close sees `(` after pointer_list returned but before any ID was + // captured. Distinguishes the leading-pointer-type paren-form + // (`char *(*foo)(int)`) from the function-postfix `(` that follows + // a captured ID. + '@idecl-not-named': (rule: Rule): boolean => rule.k.named !== true, + // Consume the matching `)` after paren_inner_declarator returns, // append it to the outer direct_declarator, and latch named so // r:-recursion's reentry-gate accepts the next iteration. @@ -3099,6 +3212,25 @@ return { // when @pid-name attaches `rule.k.declarator` into the outer // direct_declarator. Use a paren_inner-specific marker instead. '@paren_inner_declarator-bo': (rule: Rule): void => { + // First p:-descent into a deeper paren_inner_declarator (nested + // paren-form `int (*(*fpp))(int)`): the parent rule is also a + // paren_inner_declarator and its k has been shallow-copied here + // (so .pidInit is already true). Override and rebuild fresh + // scaffolding so the inner declarator doesn't alias the outer + // one's nodes. Distinguish from r:-re-entry on the SAME level by + // checking k.named: at first descent the inner hasn't named yet. + if (rule.parent && rule.parent.name === 'paren_inner_declarator' && + !rule.k.named) { + rule.k.declarator = makeNode('declarator') + rule.k.directDeclarator = makeNode('direct_declarator') + rule.k.attached = false + rule.k.pidInit = true + rule.k.pidParenPending = false + rule.k.pidParenClosed = false + return + } + // r:-recursion shallow-copies k from the previous instance; + // skip re-init so the in-flight inner declarator isn't reset. if (rule.k.pidInit) return rule.k.pidInit = true rule.k.declarator = makeNode('declarator') @@ -3107,6 +3239,59 @@ return { '@pid-named': (rule: Rule): boolean => rule.k.named === true, + '@pid-not-named': (rule: Rule): boolean => rule.k.named !== true, + + // Paren-pending state for nested paren-forms (`int (*(*fpp))(int)`): + // when this PID has descended into a deeper PID via a `(` in its + // close, the matching `)` must be consumed onto OUR direct_declarator + // (not the outer init_declarator's). Tracked separately from + // idclParenPending so the two layers don't collide. + '@pid-paren-pending': (rule: Rule): boolean => + rule.k.pidParenPending === true && rule.k.pidParenClosed !== true, + + // Take the opening `(` of a nested paren-form onto our inner + // direct_declarator, mark paren-pending, and descend. Fires from + // close state so the matched token is in c0. + '@pid-paren-open': (rule: Rule): void => { + const lparen = (rule.state === 'c' ? rule.c0 : rule.o0) as Token + pushTokenWithTrivia(rule.k.directDeclarator, lparen) + rule.k.pidParenPending = true + }, + + // Consume the matching `)` for a nested paren-form. Mark + // pidParenClosed so the gate above stops firing, and latch named + // so the open's reentry-gate accepts the next iteration where + // any inner postfix `[…]` / `(…)` can attach. Also attach our + // (now-complete) declarator onto the outer's direct_declarator — + // nested paren-forms never run @pid-name at THIS level (they + // descend into a deeper PID instead), so the attachment that + // @pid-name normally performs has to happen here. + '@pid-paren-close': (rule: Rule): void => { + pushTokenWithTrivia(rule.k.directDeclarator, rule.c0 as Token) + rule.k.pidParenClosed = true + rule.k.named = true + if (!rule.k.attached) { + // Splice OUR direct_declarator into OUR declarator.children + // (the @pid-name path does this when an ID is captured; for + // nested paren-forms no ID is captured at this level so we + // need to do it ourselves). + if (rule.k.declarator.children.indexOf(rule.k.directDeclarator) < 0) { + rule.k.declarator.children.push(rule.k.directDeclarator) + } + const outer = rule.parent as Rule + if (outer && outer.k && outer.k.directDeclarator) { + outer.k.directDeclarator.children.push(rule.k.declarator) + const dn = rule.k.directDeclarator.declaredName + if (dn) { + outer.k.directDeclarator.declaredName = dn + outer.k.declarator.declaredName = dn + if (outer.node) outer.node.declaredName = dn + } + rule.k.attached = true + } + } + }, + // Capture the inner declarator's name. Mirrors @idecl-name but // attaches the resulting inner declarator onto the outer (parent) // init_declarator's directDeclarator children rather than diff --git a/test/c.test.ts b/test/c.test.ts index be72d02..a3409b3 100644 --- a/test/c.test.ts +++ b/test/c.test.ts @@ -705,6 +705,90 @@ describe('c parser smoke', () => { assert.deepEqual(tokenSrcs(krl), ['char', '*', 'p', ';']) }) + // ---- Complex compound declarators ---------------------------------- + + function topInitDeclarator(ed: any): any { + return ed.children + .find((c: any) => c.kind === 'init_declarator_list') + .children[0] + } + function topDeclarator(ed: any): any { + const id = topInitDeclarator(ed) + return id.children.find((c: any) => c.kind === 'declarator') + } + function topDirectDeclarator(ed: any): any { + return topDeclarator(ed).children + .find((c: any) => c.kind === 'direct_declarator') + } + + test('compound decl: array of fn-pointers structures inner postfix', () => { + // `int (*arr[3])(int);` — `arr` is an array of 3 pointers to + // functions taking int returning int. The inner array_postfix + // [3] must sit INSIDE the inner direct_declarator (alongside + // `arr`), NOT as a sibling of the inner declarator at the + // outer paren level. + const ed = j('int (*arr[3])(int);').children[0] + assert.equal(ed.viaPath, 'grammar') + const outerDD = topDirectDeclarator(ed) + const innerDecl = outerDD.children.find((c: any) => c.kind === 'declarator') + assert.ok(innerDecl) + const innerDD = innerDecl.children.find((c: any) => c.kind === 'direct_declarator') + assert.ok(innerDD) + // The array_postfix must be INSIDE innerDD, not a sibling at outerDD. + assert.ok(innerDD.children.some((c: any) => c.kind === 'array_postfix')) + assert.equal(outerDD.children.filter((c: any) => c.kind === 'array_postfix').length, 0) + // Outer level still carries the function_postfix `(int)`. + assert.ok(outerDD.children.some((c: any) => c.kind === 'function_postfix')) + }) + + test('compound decl: fn returning ptr-to-array (inner fn postfix)', () => { + // `int (*get())[10];` — the inner function_postfix `()` attaches + // to the inner direct_declarator; the outer `[10]` is a sibling + // postfix at the outer paren level. + const ed = j('int (*get())[10];').children[0] + assert.equal(ed.viaPath, 'grammar') + const outerDD = topDirectDeclarator(ed) + const innerDecl = outerDD.children.find((c: any) => c.kind === 'declarator') + const innerDD = innerDecl.children.find((c: any) => c.kind === 'direct_declarator') + assert.ok(innerDD.children.some((c: any) => c.kind === 'function_postfix')) + assert.ok(outerDD.children.some((c: any) => c.kind === 'array_postfix')) + }) + + test('compound decl: nested paren-form (ptr-to-fn-ptr)', () => { + // `int (*(*fpp))(int);` — two levels of paren-form. Each level + // contributes its own declarator + direct_declarator pair, each + // carrying one `*`. + const ed = j('int (*(*fpp))(int);').children[0] + assert.equal(ed.viaPath, 'grammar') + const outerDD = topDirectDeclarator(ed) + assert.equal(outerDD.declaredName, 'fpp') + const midDecl = outerDD.children.find((c: any) => c.kind === 'declarator') + const midDD = midDecl.children.find((c: any) => c.kind === 'direct_declarator') + const innerDecl = midDD.children.find((c: any) => c.kind === 'declarator') + const innerDD = innerDecl.children.find((c: any) => c.kind === 'direct_declarator') + assert.equal(innerDD.declaredName, 'fpp') + assert.ok(midDecl.children.some((c: any) => c.kind === 'pointer')) + assert.ok(innerDecl.children.some((c: any) => c.kind === 'pointer')) + }) + + test('compound decl: leading-pointer type with paren-form', () => { + // `char *(*foo)(int);` — type `char *`, then a paren-form + // declarator. The outer `*` sits at the OUTER declarator + // level (not inside the paren-form). + const ed = j('char *(*foo)(int);').children[0] + assert.equal(ed.viaPath, 'grammar') + const outerDeclarator = topDeclarator(ed) + const outerPointers = outerDeclarator.children.filter( + (c: any) => c.kind === 'pointer') + assert.equal(outerPointers.length, 1) + const outerDD = outerDeclarator.children.find( + (c: any) => c.kind === 'direct_declarator') + const innerDecl = outerDD.children.find((c: any) => c.kind === 'declarator') + const innerPointers = innerDecl.children.filter( + (c: any) => c.kind === 'pointer') + assert.equal(innerPointers.length, 1) + }) + // ---- Macro-name tagging (slice 9) ----------------------------------- test('macro tagging: identifier in #define body becomes MACRO_NAME later', () => { diff --git a/test/spec/path-dispatch.tsv b/test/spec/path-dispatch.tsv index 5f5b3ac..6d452a7 100644 --- a/test/spec/path-dispatch.tsv +++ b/test/spec/path-dispatch.tsv @@ -73,6 +73,10 @@ typedef int Arr[10]; grammar declaration typedef of array # ---- grammar path: complex compound declarators ---- int (*p)[10]; grammar declaration pointer to array int (*arr[3])(int); grammar declaration array of fn-pointers +int (*get())[10]; grammar declaration fn returning ptr-to-array +int (*(*fpp))(int); grammar declaration nested paren-form (ptr to fn-ptr) +char *(*foo)(int); grammar declaration leading-ptr-type ptr-to-fn +char *(*foo[3])(int); grammar declaration leading-ptr-type array of fn-ptrs # ---- legacy chomp path: brace initializers ---- int a[3] = { 1, 2, 3 }; grammar declaration brace init From 84d19eb464fd71ebdd9240ab0b1197271f63a458 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 3 May 2026 09:04:02 +0000 Subject: [PATCH 3/9] Remove legacy chomp + extended-mode infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plain-C was already 100% covered by the grammar path; the chomp + structureExternalDeclaration safety net was reachable only for malformed input, and extended-mode (preprocessor, GCC __attribute__ / __asm__, MSVC __declspec, in-body #-lines, etc.) depended on the legacy post-process for anything the grammar didn't structure. Per the upcoming clean-slate rewrite of extended mode, both go. Removed: - src/structure.ts (~1975 lines), src/conditional-groups.ts, src/expr.ts (the latter two were already dead code in the grammar path) - The chomp wildcard alt + finalize-extdecl close alts in external_declaration; the wildcard cascade and @looks-simple-decl validator - All extension grammar rules: preprocessor_directive / define_directive / undef_directive / include_directive / conditional_directive / simple_directive / macro_parameter_list / macro_body / header_form / preprocessor_line, asm_statement / asm_template / asm_section / asm_operand / asm_clobber / asm_label_ref, attribute_spec_gcc / attribute_spec_msvc - COptions.extended, EXTENSION_RULES stripping, @extended-on / @extended-off / @plain-and-first-iter / @ext-and-first-iter / @plain-as23-and-first / @new-path / @mark-new-path / @absorb-token / @finalize-extdecl / @terminated / @just-closed-and-decl-ahead - isFunctionBodySupported, fetchDeep, walkParenFormDeclarator, skipLeadingAttributes, skipTaggedSpec, UNSUPPORTED_BODY_TOKENS, plus the registerTypedefIfApplicable / finalizeExternalDeclaration / registerMacrosFromTree / firstNonTriviaIs / startsNewExternalDeclaration helpers and the legacy declarator-walk helpers (findDeclaredName, splitDeclarators, declaratorPart, findSpecBoundary, isSpecifierKw, matchClose) plus the unused TYPE_SPEC_KEYWORD_NAMES / STORAGE_CLASS_NAMES / etc. classification sets - csmith corpus + fixture tests + generator - test/spec/path-dispatch.tsv (single path → no dispatch to track) - ~24 extension-feature tests in c.test.ts (preprocessor, GCC asm, GCC __attribute__, MSVC __declspec, conditional_group, macro tagging in #define bodies, ...) and the viaPath assertions external_declaration's open now dispatches statically: KW_STATIC_ASSERT / KW__STATIC_ASSERT into static_assert_declaration, #SIMPLE_TYPE_HEAD / #STORAGE_PREFIX / KW__BITINT / `[[` into simple_declaration. Anything else is a parse error. external_declaration's close runs @finalize-new-path unconditionally. Tests: 78/78 pass. https://claude.ai/code/session_01DEdkKecwpq59ydTqZ7Aobv --- README.md | 256 +- c-grammar.jsonic | 547 +-- src/c.ts | 1694 +--------- src/conditional-groups.ts | 156 - src/expr.ts | 714 ---- src/structure.ts | 1975 ----------- test/c.test.ts | 566 +--- test/csmith-common.ts | 51 - test/csmith-corpus/seed-001.c | 1607 --------- test/csmith-corpus/seed-002.c | 1724 ---------- test/csmith-corpus/seed-003.c | 1112 ------ test/csmith-corpus/seed-004.c | 2474 -------------- test/csmith-corpus/seed-005.c | 121 - test/csmith-corpus/seed-006.c | 2125 ------------ test/csmith-corpus/seed-007.c | 1589 --------- test/csmith-corpus/seed-008.c | 1355 -------- test/csmith-corpus/seed-009.c | 1749 ---------- test/csmith-corpus/seed-010.c | 3289 ------------------ test/csmith-corpus/seed-011.c | 3034 ----------------- test/csmith-corpus/seed-012.c | 914 ----- test/csmith-corpus/seed-013.c | 124 - test/csmith-corpus/seed-014.c | 121 - test/csmith-corpus/seed-015.c | 2478 -------------- test/csmith-corpus/seed-016.c | 2526 -------------- test/csmith-corpus/seed-017.c | 142 - test/csmith-corpus/seed-018.c | 115 - test/csmith-corpus/seed-019.c | 1118 ------ test/csmith-corpus/seed-020.c | 1856 ---------- test/csmith-corpus/seed-021.c | 98 - test/csmith-corpus/seed-022.c | 3691 -------------------- test/csmith-corpus/seed-023.c | 1554 --------- test/csmith-corpus/seed-024.c | 1125 ------- test/csmith-corpus/seed-025.c | 1924 ----------- test/csmith-corpus/seed-026.c | 2878 ---------------- test/csmith-corpus/seed-027.c | 2824 ---------------- test/csmith-corpus/seed-028.c | 852 ----- test/csmith-corpus/seed-029.c | 2379 ------------- test/csmith-corpus/seed-030.c | 2771 --------------- test/csmith-corpus/seed-031.c | 1372 -------- test/csmith-corpus/seed-032.c | 1954 ----------- test/csmith-corpus/seed-033.c | 140 - test/csmith-corpus/seed-034.c | 98 - test/csmith-corpus/seed-035.c | 4497 ------------------------- test/csmith-corpus/seed-036.c | 1687 ---------- test/csmith-corpus/seed-037.c | 1526 --------- test/csmith-corpus/seed-038.c | 2504 -------------- test/csmith-corpus/seed-039.c | 2303 ------------- test/csmith-corpus/seed-040.c | 719 ---- test/csmith-corpus/seed-041.c | 1166 ------- test/csmith-corpus/seed-042.c | 2577 -------------- test/csmith-corpus/seed-043.c | 1691 ---------- test/csmith-corpus/seed-044.c | 895 ----- test/csmith-corpus/seed-045.c | 2322 ------------- test/csmith-corpus/seed-046.c | 301 -- test/csmith-corpus/seed-047.c | 1485 -------- test/csmith-corpus/seed-048.c | 1280 ------- test/csmith-corpus/seed-049.c | 3603 -------------------- test/csmith-corpus/seed-050.c | 986 ------ test/csmith-corpus/seed-051.c | 2259 ------------- test/csmith-corpus/seed-052.c | 131 - test/csmith-corpus/seed-053.c | 1380 -------- test/csmith-corpus/seed-054.c | 128 - test/csmith-corpus/seed-055.c | 2307 ------------- test/csmith-corpus/seed-056.c | 1984 ----------- test/csmith-corpus/seed-057.c | 1536 --------- test/csmith-corpus/seed-058.c | 1633 --------- test/csmith-corpus/seed-059.c | 1453 -------- test/csmith-corpus/seed-060.c | 2060 ----------- test/csmith-corpus/seed-061.c | 1336 -------- test/csmith-corpus/seed-062.c | 1205 ------- test/csmith-corpus/seed-063.c | 139 - test/csmith-corpus/seed-064.c | 129 - test/csmith-corpus/seed-065.c | 123 - test/csmith-corpus/seed-066.c | 2551 -------------- test/csmith-corpus/seed-067.c | 1133 ------- test/csmith-corpus/seed-068.c | 2424 ------------- test/csmith-corpus/seed-069.c | 124 - test/csmith-corpus/seed-070.c | 876 ----- test/csmith-corpus/seed-071.c | 1931 ----------- test/csmith-corpus/seed-072.c | 97 - test/csmith-corpus/seed-073.c | 1680 --------- test/csmith-corpus/seed-074.c | 2667 --------------- test/csmith-corpus/seed-075.c | 196 -- test/csmith-corpus/seed-076.c | 96 - test/csmith-corpus/seed-077.c | 1957 ----------- test/csmith-corpus/seed-078.c | 131 - test/csmith-corpus/seed-079.c | 1588 --------- test/csmith-corpus/seed-080.c | 2518 -------------- test/csmith-corpus/seed-081.c | 1105 ------ test/csmith-corpus/seed-082.c | 3866 --------------------- test/csmith-corpus/seed-083.c | 1828 ---------- test/csmith-corpus/seed-084.c | 121 - test/csmith-corpus/seed-085.c | 1539 --------- test/csmith-corpus/seed-086.c | 1473 -------- test/csmith-corpus/seed-087.c | 2001 ----------- test/csmith-corpus/seed-088.c | 1868 ---------- test/csmith-corpus/seed-089.c | 1592 --------- test/csmith-corpus/seed-090.c | 2417 ------------- test/csmith-corpus/seed-091.c | 2530 -------------- test/csmith-corpus/seed-092.c | 1247 ------- test/csmith-corpus/seed-093.c | 3950 ---------------------- test/csmith-corpus/seed-094.c | 2303 ------------- test/csmith-corpus/seed-095.c | 2040 ----------- test/csmith-corpus/seed-096.c | 96 - test/csmith-corpus/seed-097.c | 2332 ------------- test/csmith-corpus/seed-098.c | 2211 ------------ test/csmith-corpus/seed-099.c | 1089 ------ test/csmith-corpus/seed-100.c | 107 - test/csmith-fixture.ts | 94 - test/csmith-fixtures/seed-001.json.gz | Bin 72669 -> 0 bytes test/csmith-fixtures/seed-002.json.gz | Bin 86959 -> 0 bytes test/csmith-fixtures/seed-003.json.gz | Bin 50159 -> 0 bytes test/csmith-fixtures/seed-004.json.gz | Bin 163397 -> 0 bytes test/csmith-fixtures/seed-005.json.gz | Bin 1566 -> 0 bytes test/csmith-fixtures/seed-006.json.gz | Bin 104848 -> 0 bytes test/csmith-fixtures/seed-007.json.gz | Bin 74833 -> 0 bytes test/csmith-fixtures/seed-008.json.gz | Bin 59546 -> 0 bytes test/csmith-fixtures/seed-009.json.gz | Bin 83642 -> 0 bytes test/csmith-fixtures/seed-010.json.gz | Bin 183762 -> 0 bytes test/csmith-fixtures/seed-011.json.gz | Bin 160792 -> 0 bytes test/csmith-fixtures/seed-012.json.gz | Bin 35386 -> 0 bytes test/csmith-fixtures/seed-013.json.gz | Bin 1320 -> 0 bytes test/csmith-fixtures/seed-014.json.gz | Bin 1276 -> 0 bytes test/csmith-fixtures/seed-015.json.gz | Bin 125525 -> 0 bytes test/csmith-fixtures/seed-016.json.gz | Bin 126851 -> 0 bytes test/csmith-fixtures/seed-017.json.gz | Bin 1824 -> 0 bytes test/csmith-fixtures/seed-018.json.gz | Bin 2133 -> 0 bytes test/csmith-fixtures/seed-019.json.gz | Bin 52205 -> 0 bytes test/csmith-fixtures/seed-020.json.gz | Bin 104187 -> 0 bytes test/csmith-fixtures/seed-021.json.gz | Bin 1162 -> 0 bytes test/csmith-fixtures/seed-022.json.gz | Bin 198027 -> 0 bytes test/csmith-fixtures/seed-023.json.gz | Bin 74869 -> 0 bytes test/csmith-fixtures/seed-024.json.gz | Bin 55094 -> 0 bytes test/csmith-fixtures/seed-025.json.gz | Bin 102415 -> 0 bytes test/csmith-fixtures/seed-026.json.gz | Bin 165712 -> 0 bytes test/csmith-fixtures/seed-027.json.gz | Bin 139151 -> 0 bytes test/csmith-fixtures/seed-028.json.gz | Bin 44813 -> 0 bytes test/csmith-fixtures/seed-029.json.gz | Bin 125620 -> 0 bytes test/csmith-fixtures/seed-030.json.gz | Bin 149360 -> 0 bytes test/csmith-fixtures/seed-031.json.gz | Bin 63350 -> 0 bytes test/csmith-fixtures/seed-032.json.gz | Bin 102810 -> 0 bytes test/csmith-fixtures/seed-033.json.gz | Bin 1849 -> 0 bytes test/csmith-fixtures/seed-034.json.gz | Bin 1153 -> 0 bytes test/csmith-fixtures/seed-035.json.gz | Bin 223525 -> 0 bytes test/csmith-fixtures/seed-036.json.gz | Bin 72936 -> 0 bytes test/csmith-fixtures/seed-037.json.gz | Bin 65507 -> 0 bytes test/csmith-fixtures/seed-038.json.gz | Bin 118405 -> 0 bytes test/csmith-fixtures/seed-039.json.gz | Bin 125842 -> 0 bytes test/csmith-fixtures/seed-040.json.gz | Bin 33597 -> 0 bytes test/csmith-fixtures/seed-041.json.gz | Bin 58680 -> 0 bytes test/csmith-fixtures/seed-042.json.gz | Bin 132607 -> 0 bytes test/csmith-fixtures/seed-043.json.gz | Bin 83870 -> 0 bytes test/csmith-fixtures/seed-044.json.gz | Bin 38624 -> 0 bytes test/csmith-fixtures/seed-045.json.gz | Bin 118420 -> 0 bytes test/csmith-fixtures/seed-046.json.gz | Bin 6212 -> 0 bytes test/csmith-fixtures/seed-047.json.gz | Bin 78036 -> 0 bytes test/csmith-fixtures/seed-048.json.gz | Bin 56063 -> 0 bytes test/csmith-fixtures/seed-049.json.gz | Bin 251172 -> 0 bytes test/csmith-fixtures/seed-050.json.gz | Bin 36214 -> 0 bytes test/csmith-fixtures/seed-051.json.gz | Bin 112331 -> 0 bytes test/csmith-fixtures/seed-052.json.gz | Bin 1616 -> 0 bytes test/csmith-fixtures/seed-053.json.gz | Bin 60388 -> 0 bytes test/csmith-fixtures/seed-054.json.gz | Bin 1711 -> 0 bytes test/csmith-fixtures/seed-055.json.gz | Bin 105869 -> 0 bytes test/csmith-fixtures/seed-056.json.gz | Bin 90451 -> 0 bytes test/csmith-fixtures/seed-057.json.gz | Bin 78613 -> 0 bytes test/csmith-fixtures/seed-058.json.gz | Bin 84948 -> 0 bytes test/csmith-fixtures/seed-059.json.gz | Bin 59784 -> 0 bytes test/csmith-fixtures/seed-060.json.gz | Bin 95636 -> 0 bytes test/csmith-fixtures/seed-061.json.gz | Bin 61560 -> 0 bytes test/csmith-fixtures/seed-062.json.gz | Bin 55866 -> 0 bytes test/csmith-fixtures/seed-063.json.gz | Bin 1961 -> 0 bytes test/csmith-fixtures/seed-064.json.gz | Bin 1584 -> 0 bytes test/csmith-fixtures/seed-065.json.gz | Bin 1954 -> 0 bytes test/csmith-fixtures/seed-066.json.gz | Bin 124321 -> 0 bytes test/csmith-fixtures/seed-067.json.gz | Bin 52465 -> 0 bytes test/csmith-fixtures/seed-068.json.gz | Bin 122695 -> 0 bytes test/csmith-fixtures/seed-069.json.gz | Bin 1571 -> 0 bytes test/csmith-fixtures/seed-070.json.gz | Bin 43314 -> 0 bytes test/csmith-fixtures/seed-071.json.gz | Bin 94747 -> 0 bytes test/csmith-fixtures/seed-072.json.gz | Bin 1123 -> 0 bytes test/csmith-fixtures/seed-073.json.gz | Bin 93031 -> 0 bytes test/csmith-fixtures/seed-074.json.gz | Bin 136956 -> 0 bytes test/csmith-fixtures/seed-075.json.gz | Bin 4242 -> 0 bytes test/csmith-fixtures/seed-076.json.gz | Bin 1102 -> 0 bytes test/csmith-fixtures/seed-077.json.gz | Bin 85449 -> 0 bytes test/csmith-fixtures/seed-078.json.gz | Bin 1626 -> 0 bytes test/csmith-fixtures/seed-079.json.gz | Bin 91928 -> 0 bytes test/csmith-fixtures/seed-080.json.gz | Bin 123806 -> 0 bytes test/csmith-fixtures/seed-081.json.gz | Bin 50285 -> 0 bytes test/csmith-fixtures/seed-082.json.gz | Bin 218973 -> 0 bytes test/csmith-fixtures/seed-083.json.gz | Bin 100610 -> 0 bytes test/csmith-fixtures/seed-084.json.gz | Bin 1313 -> 0 bytes test/csmith-fixtures/seed-085.json.gz | Bin 69831 -> 0 bytes test/csmith-fixtures/seed-086.json.gz | Bin 67433 -> 0 bytes test/csmith-fixtures/seed-087.json.gz | Bin 107907 -> 0 bytes test/csmith-fixtures/seed-088.json.gz | Bin 93705 -> 0 bytes test/csmith-fixtures/seed-089.json.gz | Bin 80837 -> 0 bytes test/csmith-fixtures/seed-090.json.gz | Bin 120285 -> 0 bytes test/csmith-fixtures/seed-091.json.gz | Bin 118761 -> 0 bytes test/csmith-fixtures/seed-092.json.gz | Bin 64325 -> 0 bytes test/csmith-fixtures/seed-093.json.gz | Bin 188133 -> 0 bytes test/csmith-fixtures/seed-094.json.gz | Bin 114947 -> 0 bytes test/csmith-fixtures/seed-095.json.gz | Bin 96237 -> 0 bytes test/csmith-fixtures/seed-096.json.gz | Bin 1106 -> 0 bytes test/csmith-fixtures/seed-097.json.gz | Bin 119619 -> 0 bytes test/csmith-fixtures/seed-098.json.gz | Bin 121515 -> 0 bytes test/csmith-fixtures/seed-099.json.gz | Bin 55579 -> 0 bytes test/csmith-fixtures/seed-100.json.gz | Bin 1540 -> 0 bytes test/csmith-gen.ts | 71 - test/csmith.test.ts | 85 - test/spec/path-dispatch.tsv | 87 - 212 files changed, 141 insertions(+), 162417 deletions(-) delete mode 100644 src/conditional-groups.ts delete mode 100644 src/expr.ts delete mode 100644 src/structure.ts delete mode 100644 test/csmith-common.ts delete mode 100644 test/csmith-corpus/seed-001.c delete mode 100644 test/csmith-corpus/seed-002.c delete mode 100644 test/csmith-corpus/seed-003.c delete mode 100644 test/csmith-corpus/seed-004.c delete mode 100644 test/csmith-corpus/seed-005.c delete mode 100644 test/csmith-corpus/seed-006.c delete mode 100644 test/csmith-corpus/seed-007.c delete mode 100644 test/csmith-corpus/seed-008.c delete mode 100644 test/csmith-corpus/seed-009.c delete mode 100644 test/csmith-corpus/seed-010.c delete mode 100644 test/csmith-corpus/seed-011.c delete mode 100644 test/csmith-corpus/seed-012.c delete mode 100644 test/csmith-corpus/seed-013.c delete mode 100644 test/csmith-corpus/seed-014.c delete mode 100644 test/csmith-corpus/seed-015.c delete mode 100644 test/csmith-corpus/seed-016.c delete mode 100644 test/csmith-corpus/seed-017.c delete mode 100644 test/csmith-corpus/seed-018.c delete mode 100644 test/csmith-corpus/seed-019.c delete mode 100644 test/csmith-corpus/seed-020.c delete mode 100644 test/csmith-corpus/seed-021.c delete mode 100644 test/csmith-corpus/seed-022.c delete mode 100644 test/csmith-corpus/seed-023.c delete mode 100644 test/csmith-corpus/seed-024.c delete mode 100644 test/csmith-corpus/seed-025.c delete mode 100644 test/csmith-corpus/seed-026.c delete mode 100644 test/csmith-corpus/seed-027.c delete mode 100644 test/csmith-corpus/seed-028.c delete mode 100644 test/csmith-corpus/seed-029.c delete mode 100644 test/csmith-corpus/seed-030.c delete mode 100644 test/csmith-corpus/seed-031.c delete mode 100644 test/csmith-corpus/seed-032.c delete mode 100644 test/csmith-corpus/seed-033.c delete mode 100644 test/csmith-corpus/seed-034.c delete mode 100644 test/csmith-corpus/seed-035.c delete mode 100644 test/csmith-corpus/seed-036.c delete mode 100644 test/csmith-corpus/seed-037.c delete mode 100644 test/csmith-corpus/seed-038.c delete mode 100644 test/csmith-corpus/seed-039.c delete mode 100644 test/csmith-corpus/seed-040.c delete mode 100644 test/csmith-corpus/seed-041.c delete mode 100644 test/csmith-corpus/seed-042.c delete mode 100644 test/csmith-corpus/seed-043.c delete mode 100644 test/csmith-corpus/seed-044.c delete mode 100644 test/csmith-corpus/seed-045.c delete mode 100644 test/csmith-corpus/seed-046.c delete mode 100644 test/csmith-corpus/seed-047.c delete mode 100644 test/csmith-corpus/seed-048.c delete mode 100644 test/csmith-corpus/seed-049.c delete mode 100644 test/csmith-corpus/seed-050.c delete mode 100644 test/csmith-corpus/seed-051.c delete mode 100644 test/csmith-corpus/seed-052.c delete mode 100644 test/csmith-corpus/seed-053.c delete mode 100644 test/csmith-corpus/seed-054.c delete mode 100644 test/csmith-corpus/seed-055.c delete mode 100644 test/csmith-corpus/seed-056.c delete mode 100644 test/csmith-corpus/seed-057.c delete mode 100644 test/csmith-corpus/seed-058.c delete mode 100644 test/csmith-corpus/seed-059.c delete mode 100644 test/csmith-corpus/seed-060.c delete mode 100644 test/csmith-corpus/seed-061.c delete mode 100644 test/csmith-corpus/seed-062.c delete mode 100644 test/csmith-corpus/seed-063.c delete mode 100644 test/csmith-corpus/seed-064.c delete mode 100644 test/csmith-corpus/seed-065.c delete mode 100644 test/csmith-corpus/seed-066.c delete mode 100644 test/csmith-corpus/seed-067.c delete mode 100644 test/csmith-corpus/seed-068.c delete mode 100644 test/csmith-corpus/seed-069.c delete mode 100644 test/csmith-corpus/seed-070.c delete mode 100644 test/csmith-corpus/seed-071.c delete mode 100644 test/csmith-corpus/seed-072.c delete mode 100644 test/csmith-corpus/seed-073.c delete mode 100644 test/csmith-corpus/seed-074.c delete mode 100644 test/csmith-corpus/seed-075.c delete mode 100644 test/csmith-corpus/seed-076.c delete mode 100644 test/csmith-corpus/seed-077.c delete mode 100644 test/csmith-corpus/seed-078.c delete mode 100644 test/csmith-corpus/seed-079.c delete mode 100644 test/csmith-corpus/seed-080.c delete mode 100644 test/csmith-corpus/seed-081.c delete mode 100644 test/csmith-corpus/seed-082.c delete mode 100644 test/csmith-corpus/seed-083.c delete mode 100644 test/csmith-corpus/seed-084.c delete mode 100644 test/csmith-corpus/seed-085.c delete mode 100644 test/csmith-corpus/seed-086.c delete mode 100644 test/csmith-corpus/seed-087.c delete mode 100644 test/csmith-corpus/seed-088.c delete mode 100644 test/csmith-corpus/seed-089.c delete mode 100644 test/csmith-corpus/seed-090.c delete mode 100644 test/csmith-corpus/seed-091.c delete mode 100644 test/csmith-corpus/seed-092.c delete mode 100644 test/csmith-corpus/seed-093.c delete mode 100644 test/csmith-corpus/seed-094.c delete mode 100644 test/csmith-corpus/seed-095.c delete mode 100644 test/csmith-corpus/seed-096.c delete mode 100644 test/csmith-corpus/seed-097.c delete mode 100644 test/csmith-corpus/seed-098.c delete mode 100644 test/csmith-corpus/seed-099.c delete mode 100644 test/csmith-corpus/seed-100.c delete mode 100644 test/csmith-fixture.ts delete mode 100644 test/csmith-fixtures/seed-001.json.gz delete mode 100644 test/csmith-fixtures/seed-002.json.gz delete mode 100644 test/csmith-fixtures/seed-003.json.gz delete mode 100644 test/csmith-fixtures/seed-004.json.gz delete mode 100644 test/csmith-fixtures/seed-005.json.gz delete mode 100644 test/csmith-fixtures/seed-006.json.gz delete mode 100644 test/csmith-fixtures/seed-007.json.gz delete mode 100644 test/csmith-fixtures/seed-008.json.gz delete mode 100644 test/csmith-fixtures/seed-009.json.gz delete mode 100644 test/csmith-fixtures/seed-010.json.gz delete mode 100644 test/csmith-fixtures/seed-011.json.gz delete mode 100644 test/csmith-fixtures/seed-012.json.gz delete mode 100644 test/csmith-fixtures/seed-013.json.gz delete mode 100644 test/csmith-fixtures/seed-014.json.gz delete mode 100644 test/csmith-fixtures/seed-015.json.gz delete mode 100644 test/csmith-fixtures/seed-016.json.gz delete mode 100644 test/csmith-fixtures/seed-017.json.gz delete mode 100644 test/csmith-fixtures/seed-018.json.gz delete mode 100644 test/csmith-fixtures/seed-019.json.gz delete mode 100644 test/csmith-fixtures/seed-020.json.gz delete mode 100644 test/csmith-fixtures/seed-021.json.gz delete mode 100644 test/csmith-fixtures/seed-022.json.gz delete mode 100644 test/csmith-fixtures/seed-023.json.gz delete mode 100644 test/csmith-fixtures/seed-024.json.gz delete mode 100644 test/csmith-fixtures/seed-025.json.gz delete mode 100644 test/csmith-fixtures/seed-026.json.gz delete mode 100644 test/csmith-fixtures/seed-027.json.gz delete mode 100644 test/csmith-fixtures/seed-028.json.gz delete mode 100644 test/csmith-fixtures/seed-029.json.gz delete mode 100644 test/csmith-fixtures/seed-030.json.gz delete mode 100644 test/csmith-fixtures/seed-031.json.gz delete mode 100644 test/csmith-fixtures/seed-032.json.gz delete mode 100644 test/csmith-fixtures/seed-033.json.gz delete mode 100644 test/csmith-fixtures/seed-034.json.gz delete mode 100644 test/csmith-fixtures/seed-035.json.gz delete mode 100644 test/csmith-fixtures/seed-036.json.gz delete mode 100644 test/csmith-fixtures/seed-037.json.gz delete mode 100644 test/csmith-fixtures/seed-038.json.gz delete mode 100644 test/csmith-fixtures/seed-039.json.gz delete mode 100644 test/csmith-fixtures/seed-040.json.gz delete mode 100644 test/csmith-fixtures/seed-041.json.gz delete mode 100644 test/csmith-fixtures/seed-042.json.gz delete mode 100644 test/csmith-fixtures/seed-043.json.gz delete mode 100644 test/csmith-fixtures/seed-044.json.gz delete mode 100644 test/csmith-fixtures/seed-045.json.gz delete mode 100644 test/csmith-fixtures/seed-046.json.gz delete mode 100644 test/csmith-fixtures/seed-047.json.gz delete mode 100644 test/csmith-fixtures/seed-048.json.gz delete mode 100644 test/csmith-fixtures/seed-049.json.gz delete mode 100644 test/csmith-fixtures/seed-050.json.gz delete mode 100644 test/csmith-fixtures/seed-051.json.gz delete mode 100644 test/csmith-fixtures/seed-052.json.gz delete mode 100644 test/csmith-fixtures/seed-053.json.gz delete mode 100644 test/csmith-fixtures/seed-054.json.gz delete mode 100644 test/csmith-fixtures/seed-055.json.gz delete mode 100644 test/csmith-fixtures/seed-056.json.gz delete mode 100644 test/csmith-fixtures/seed-057.json.gz delete mode 100644 test/csmith-fixtures/seed-058.json.gz delete mode 100644 test/csmith-fixtures/seed-059.json.gz delete mode 100644 test/csmith-fixtures/seed-060.json.gz delete mode 100644 test/csmith-fixtures/seed-061.json.gz delete mode 100644 test/csmith-fixtures/seed-062.json.gz delete mode 100644 test/csmith-fixtures/seed-063.json.gz delete mode 100644 test/csmith-fixtures/seed-064.json.gz delete mode 100644 test/csmith-fixtures/seed-065.json.gz delete mode 100644 test/csmith-fixtures/seed-066.json.gz delete mode 100644 test/csmith-fixtures/seed-067.json.gz delete mode 100644 test/csmith-fixtures/seed-068.json.gz delete mode 100644 test/csmith-fixtures/seed-069.json.gz delete mode 100644 test/csmith-fixtures/seed-070.json.gz delete mode 100644 test/csmith-fixtures/seed-071.json.gz delete mode 100644 test/csmith-fixtures/seed-072.json.gz delete mode 100644 test/csmith-fixtures/seed-073.json.gz delete mode 100644 test/csmith-fixtures/seed-074.json.gz delete mode 100644 test/csmith-fixtures/seed-075.json.gz delete mode 100644 test/csmith-fixtures/seed-076.json.gz delete mode 100644 test/csmith-fixtures/seed-077.json.gz delete mode 100644 test/csmith-fixtures/seed-078.json.gz delete mode 100644 test/csmith-fixtures/seed-079.json.gz delete mode 100644 test/csmith-fixtures/seed-080.json.gz delete mode 100644 test/csmith-fixtures/seed-081.json.gz delete mode 100644 test/csmith-fixtures/seed-082.json.gz delete mode 100644 test/csmith-fixtures/seed-083.json.gz delete mode 100644 test/csmith-fixtures/seed-084.json.gz delete mode 100644 test/csmith-fixtures/seed-085.json.gz delete mode 100644 test/csmith-fixtures/seed-086.json.gz delete mode 100644 test/csmith-fixtures/seed-087.json.gz delete mode 100644 test/csmith-fixtures/seed-088.json.gz delete mode 100644 test/csmith-fixtures/seed-089.json.gz delete mode 100644 test/csmith-fixtures/seed-090.json.gz delete mode 100644 test/csmith-fixtures/seed-091.json.gz delete mode 100644 test/csmith-fixtures/seed-092.json.gz delete mode 100644 test/csmith-fixtures/seed-093.json.gz delete mode 100644 test/csmith-fixtures/seed-094.json.gz delete mode 100644 test/csmith-fixtures/seed-095.json.gz delete mode 100644 test/csmith-fixtures/seed-096.json.gz delete mode 100644 test/csmith-fixtures/seed-097.json.gz delete mode 100644 test/csmith-fixtures/seed-098.json.gz delete mode 100644 test/csmith-fixtures/seed-099.json.gz delete mode 100644 test/csmith-fixtures/seed-100.json.gz delete mode 100644 test/csmith-gen.ts delete mode 100644 test/csmith.test.ts delete mode 100644 test/spec/path-dispatch.tsv diff --git a/README.md b/README.md index 8f71473..382ca2a 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,14 @@ # @jsonic/c A [Jsonic](https://jsonic.senecajs.org) plugin that parses **C source code** -into a **concrete syntax tree** — preserving every token, comment, macro -definition, macro use, and compiler extension as-is. +into a **concrete syntax tree** — preserving every token and comment as-is. -Targets **C23** plus the common **GCC / Clang / MSVC** extensions, with -best-effort handling of preprocessor conditional groups. +Targets **plain C23**: keywords, punctuators, literals, declarations / +definitions / statements / expressions, C23 attributes (`[[…]]`), typedef +tracking. Compiler-extension support (preprocessor, GCC `__attribute__` / +`__asm__`, MSVC `__declspec`, etc.) is not currently shipped — the +previous extended-mode implementation has been removed pending a +clean-slate rewrite. ## Quick start @@ -30,24 +33,20 @@ positions are preserved on token spans). ## Architecture - **Focused lex matchers** (`src/matchers.ts`): one matcher per concept — - whitespace, line continuation, line/block comments, preprocessor - directive opener (line-start gated), directive newline, header name, - identifier (with keyword/typedef-name/macro-name reclassification), - integer/float/char/string literals, and longest-match punctuator - dispatch. - -- **Symbol & macro tables** (`src/symbols.ts`): scope stack and macro - lookup live on `ctx.meta.cmeta` so both lex matchers and rule - actions share state. Lex matchers consult the tables when - classifying identifiers; rule actions register names when they - finalize a `typedef` or `#define`. Pre-lexed lookahead tokens are - reclassified in place so the very next match sees the updated - classification immediately. - -- **Token catalog** (`src/tokens.ts`): every C23 keyword, every - compiler-extension keyword, and every punctuator gets its own named - token. Grammar rules and structuring code reference these names - directly. + whitespace, line continuation, line/block comments, identifier (with + keyword/typedef-name reclassification), integer/float/char/string + literals, and longest-match punctuator dispatch. + +- **Symbol table** (`src/symbols.ts`): scope stack lives on + `ctx.meta.cmeta` so both lex matchers and rule actions share state. + Lex matchers consult the table when classifying identifiers; rule + actions register names when they finalise a `typedef`. Pre-lexed + lookahead tokens are reclassified in place so the very next match + sees the updated classification immediately. + +- **Token catalog** (`src/tokens.ts`): every C23 keyword and every + punctuator gets its own named token. Grammar rules and structuring + code reference these names directly. - **Declarative grammar** (`c-grammar.jsonic`): the rule shapes for the entire C surface — translation unit, external declarations, @@ -59,56 +58,19 @@ positions are preserved on token spans). - **Pratt-style expressions** via [`@jsonic/expr`](https://www.npmjs.com/package/@jsonic/expr): the `val` rule absorbs C atoms (`LIT_INT` / `LIT_FLOAT` / `LIT_CHAR` - / `LIT_STRING` / `ID` / `MACRO_NAME` / `TYPEDEF_NAME` / `KW_NULLPTR` - / `KW_TRUE` / `KW_FALSE`), then `@jsonic/expr`'s pratt logic - drives infix / prefix / suffix operator precedence. Custom val - open-alts handle the C-only constructs that aren't simple - operators: `sizeof ( type )` / cast / compound literal / `_Generic` - / GCC statement-expression / brace initializer list / adjacent - string concatenation. - -- **Conditional-group folding** (`src/conditional-groups.ts`): a - translation-unit-level post-pass that collapses contiguous runs - of `#if`/`#ifdef` … `#elif`/`#else` … `#endif` into a single - `conditional_group` node. Self-contained — operates only on - already-parsed `conditional_directive` nodes. - -- **Hybrid dispatch + legacy fallback** (`src/structure.ts`, - `src/expr.ts`): the `external_declaration` cascading wildcard - alts dispatch to `simple_declaration` (or to typed - preprocessor / asm / static_assert sub-rules) whenever - `@looks-simple-decl` recognises the head; otherwise the chomp - loop falls through to a recursive-descent post-processor in - `structure.ts`. Shapes covered by the new path: - - simple declarations (storage prefix, multi-keyword type, - pointer / array, function declarator, function definition) - - tagged-type specifiers (struct / union / enum, including - standalone definitions and C23 fixed-underlying-type enums) - - attribute specs (GCC / MSVC / C23, leading + between-specs - insertion points) - - top-level preprocessor directives (#define, #include, #if - family, #pragma / #error / #warning / #undef / #line) - - top-level GCC `__asm__` - - all expression and statement forms - - Plain function pointers `int (*fp)(int);` and top-level - `static_assert(cond, msg);` moved onto the grammar path in 2.0. - K&R-style function definitions (`int f(a, b) int a; long b; { - … }`) moved onto the grammar path in 2.1; the inter-paren-and- - brace declaration list is preserved as a flat - `kr_declaration_list` node, matching the legacy CST shape. - Complex compound declarators — arrays of fn-pointers (`int - (*arr[N])(int);`), function returning ptr-to-array (`int - (*get())[10];`), nested paren-forms (`int (*(*fpp))(int);`), - and leading-pointer types with paren-form declarators (`char - *(*foo[3])(int);`) — moved onto the grammar path in 2.2; - `paren_inner_declarator` now recurses, dispatches inner - postfixes, and accepts a leading-pointer-type entry from - `init_declarator`'s close. - - Both paths produce identical CST shapes; the - `@jsonic/expr`-driven `val` handles initializer expressions in - either case. + / `LIT_STRING` / `ID` / `TYPEDEF_NAME` / `KW_NULLPTR` / `KW_TRUE` / + `KW_FALSE`), then `@jsonic/expr`'s pratt logic drives infix / + prefix / suffix operator precedence. Custom val open-alts handle + the C constructs that aren't simple operators: `sizeof ( type )` / + cast / compound literal / `_Generic` / brace initialiser list / + adjacent string concatenation. + +The parser is grammar-only: every external declaration flows through +`simple_declaration` (or `static_assert_declaration` for top-level +`static_assert`). There is no chomp fallback, no post-process +structurer, and no extension dispatch — the dispatch alts at the top +of `external_declaration` accept exactly the heads the grammar can +parse, and anything else is a parse error. ## Concrete-syntax shapes @@ -117,9 +79,6 @@ fields. Highlights: ``` translation_unit - conditional_group (#if … #elif … #else … #endif folded) - branches: conditional_branch { branchKind, directive, body } - endif external_declaration { declKind: 'declaration'|'function_definition' } declaration_specifiers attribute_spec, struct_specifier, union_specifier, enum_specifier @@ -135,22 +94,17 @@ translation_unit parameter_type_list { variadic? } parameter_declaration { declaredName } identifier_list (K&R) - asm_label?, attribute_spec? '=' initializer kr_declaration_list (K&R fn-def: flat token-refs between the param `)` and the body `{`) static_assert_declaration { condition, message? } - define_directive { macroName, macroKind, macroParams?, macroVariadic? } - include_directive { includeForm, headerKind, headerName } - conditional_directive { directive } - pragma_directive / error_directive / warning_directive / undef_directive compound_statement declaration | statement if_statement, switch_statement, while_statement, do_statement, for_statement (for_controls), labeled_statement { labelKind, labelName? }, jump_statement { jumpKind }, - expression_statement, asm_statement, preprocessor_line + expression_statement ``` ### Expression shapes (Pratt-parsed via @jsonic/expr) @@ -168,7 +122,7 @@ the per-kind CST shapes below. literal_expression { literalKind, value } identifier_expression { name } paren_expression -call_expression { callee, isMacro } +call_expression { callee } argument_list subscript_expression { target, index_list } member_expression { object, op ('.'|'->'), memberName } @@ -182,7 +136,6 @@ comma_expression generic_selection generic_controlling_expression { expression } generic_association { associationKind, typeName?, value } -statement_expression // GCC ({ ... }) compound_literal { typeName, initializer_list } initializer_list initializer_item { designation?, value } @@ -197,58 +150,45 @@ C's classic ambiguity (an identifier may name a typedef OR a variable) is resolved at lex time. The identifier matcher consults `SymbolTable.isTypedef(word)` and emits **TYPEDEF_NAME** instead of **ID** for every typedef'd name. After a `typedef int T;` declaration -finalizes, the symbol table is updated AND any pre-fetched lookahead +finalises, the symbol table is updated AND any pre-fetched lookahead tokens carrying that name are reclassified in place, so the next declaration sees the new classification regardless of jsonic's arbitrary-lookahead. -A parallel **macro table** records `#define`d names. Identifiers seen -earlier in a `#define` lex as **MACRO_NAME**, and `call_expression` -nodes carrying such a callee get `isMacro: true` so consumers can -distinguish a macro invocation from a real function call without -re-querying any table. `#undef` removes the entry. - Full **nested scoping** (file / function-prototype / function-body / block / struct-or-union / enum / for-init) is implemented in `SymbolTable`. Inner non-typedef bindings shadow outer typedefs. -## Preprocessor - -Each `#-line` is its own structured directive node (see shapes above). -A translation-unit-level post-pass folds the flat sequence of -`#if`/`#ifdef`/`#ifndef` … (`#elif`…)\* (`#else`)? … `#endif` into a -single `conditional_group` containing typed branches. Best-effort: -unmatched `#endif` or unterminated `#if` leaves the surrounding -sequence flat. Nested `#if … #endif` inside a branch is recursively -grouped. - -`#define` directives populate `ctx.meta.cmeta.macros`; `#undef` -removes. The macro table is the single source of truth used by lex-time -**MACRO_NAME** tagging. - -## Attributes (all three forms structured) +## Compound declarators ``` -attribute_spec { attributeForm: 'gcc'|'msvc'|'c23', items } - attribute_item { attributeName, attributePrefix?, argumentList? } - attribute_argument_list // Pratt-parsed args +int (*fp)(int); // pointer to function +int (**fp)(int); // pointer to pointer to function +int (*p)[10]; // pointer to array +int (*arr[3])(int); // array of fn-pointers +int (*get())[10]; // fn returning ptr-to-array +int (*(*fpp))(int); // nested paren-form +char *(*foo[3])(int); // leading-pointer type with paren-form ``` -`__attribute__((noreturn, format(printf, 1, 2)))`, -`__declspec(dllexport)`, and C23 `[[gnu::pure]]` / -`[[deprecated("reason")]]` all produce the same item shape. +`paren_inner_declarator` recurses for nested paren-forms and +dispatches `array_postfix` / `function_postfix` for inner postfixes +that bind to the inner direct_declarator. `init_declarator`'s close +accepts a paren-form after a leading pointer prefix so the +leading-pointer-type shape (`char *(*foo)(int);`) flows through +without falling into a function-postfix interpretation. -## GCC inline assembly +## K&R function definitions ``` -asm_statement { qualifiers } - asm_template { expression } - asm_outputs? asm_operand { asmName?, constraint, value { expression } } - asm_inputs? - asm_clobbers? asm_clobber { value } - asm_labels? asm_label_ref { labelName } +int f(a, b) int a; long b; { return a + b; } ``` +The identifier-list parameter list is captured by `function_postfix`'s +`identifier_list` alt; the parameter-type declarations between `)` +and `{` are absorbed by `kr_declaration_list` as a flat token-ref +sequence (no inner declaration structuring). + ## for-loop controls ``` @@ -258,86 +198,6 @@ for_controls for_iter { value: | empty } ``` -## Coverage and known limitations - -The parser handles every shape in the CSmith-generated regression -corpus (100 random C programs) plus a hand-curated stress sweep -(GCC `__attribute__`, C23 `nullptr` / `[[nodiscard]]` / `_BitInt`, -nested preprocessor `#if` chains, line-continuation in macro -bodies, function pointers, GCC inline assembly with operand -sections, struct bitfields with anonymous unions, designated and -indexed initialisers). - -Known fall-throughs that produce a `declKind: 'unknown'` external -declaration rather than a structured one (still parseable, source -fidelity preserved): - -- GCC `__extern_inline` declarations gated on a `__USE_EXTERN_INLINES` - feature macro that hasn't been `#define`d. - -The first parse of `(struct point){ … }` (compound literal with a -struct-tagged type) inside a function body is not yet structured — -the struct-tagged type isn't in the new path's `SIMPLE_TYPE_HEAD` -set. Top-level brace initialisers on struct types (`struct point p -= { … };`) work because they go through the legacy fallback. - -## Architecture history - -The parser shipped through a 14-phase migration from a pure -chomp-and-post-process design to the current near-pure-grammar -hybrid: - -- **A** install `@jsonic/expr`; `val` accepts C atoms with the - evaluate callback emitting the public CST shapes. -- **B** `simple_declaration` family + statement family — - `block_item` / `statement` / `expression_statement` / - `jump_statement` / `if`/`while`/`do`/`switch`/`for` / - `labeled_statement` / `asm_statement` / `preprocessor_line`. -- **C** `val` open-alts for type-name constructs: - `type_name` / `sizeof_type_form` / `cast_or_compound_literal` / - `initializer_list` (with `designation` / `designator`) / - `generic_selection` / `statement_expression` / `string_atom` / - structured `asm_statement`. -- **D** cutover gates: deep-lookahead body validation - (`fetchDeep()` drives `ctx.lex` directly so the body-supportedness - check walks past the closing `}` of any function body), all - unit tests passing on the new path, csmith fixtures regenerated. - Shipped as `0.2.0`. -- **F** struct / union / enum specifiers + members + bitfields + - enumerators, dispatched from `simple_declaration` / `spec_loop`. -- **G** attribute specs (3 forms × leading + between-specs - insertion points). -- **H** top-level preprocessor directives — define / undef / - include / conditional / pragma / error / warning / line — with - macro registration on `cmeta.macros`, header-name lex-mode - feedback, and the typed sub-rules wrapped under - `external_declaration`. -- **I** top-level GCC `__asm__`. (`static_assert` grammar rule - defined; top-level dispatch deferred pending comma-op gating.) -- **K** `structureConditionalGroups` extracted to its own - module — a self-contained translation-unit-level post-pass. -- **L** standalone struct / enum definitions through grammar - (`@looks-simple-decl` walks past tagged-type bodies). -- **N** ship `1.0.0`. -- **P** parenthesised sub-declarators (function pointers): - `paren_inner_declarator` rule + `@looks-simple-decl` paren-walk - branch. Shapes like `int (*fp)(int);` and - `typedef int (*Fn)(int);` flow through the grammar. -- **O** vendor `@jsonic/expr` under `vendor/jsonic-expr/` and - add a `n.no_comma_op` bail in `val.close` / `expr.close` that - matches the comma op by src. Top-level `static_assert(cond, msg)` - dispatches into the existing `static_assert_declaration` rule - with the flag set, so the `,` lands as a separator instead of - the comma operator. -- **N₂** ship `2.0.0` declaring the hybrid as the final - architecture. - -The legacy chomp + `structureExternalDeclaration` fallback -remains by design for the long-tail shapes — K&R parameter lists -and complex compound declarators beyond simple function pointers. -Both paths emit identical CST nodes, so consumers see one tree -regardless of which path produced it. - ## License MIT. Copyright (c) 2026 Richard Rodger and contributors. diff --git a/c-grammar.jsonic b/c-grammar.jsonic index d602465..ed5baf8 100644 --- a/c-grammar.jsonic +++ b/c-grammar.jsonic @@ -52,94 +52,36 @@ # external_declaration # - # Phase B1 dispatch: if the head token is a recognised simple type - # specifier (currently only KW_INT, broadens later), descend into - # int_declaration which parses through proper grammar (with val - # for initializers via @jsonic/expr). Otherwise fall through to - # the legacy chomp path that absorbs tokens for post-process - # structuring. + # The head token's class picks the dispatch: + # - KW_STATIC_ASSERT / KW__STATIC_ASSERT → static_assert_declaration + # - KW__BITINT, #STORAGE_PREFIX, #SIMPLE_TYPE_HEAD, `[[` → simple_declaration + # Anything else fails — plain C has no chomp fallback. external_declaration: { open: [ { s: '#ZZ' b: 1 g: 'extdecl-eof' } - # [extension: preprocessor] PP_HASH dispatches to preprocessor_directive. - { s: 'PP_HASH PP_HASH' c: '@ext-and-first-iter' b: 2 - p: 'preprocessor_directive' a: '@mark-new-path' - g: 'extdecl-pp-2' } - { s: 'PP_HASH #ANY_C_TOKEN' c: '@ext-and-first-iter' b: 2 - p: 'preprocessor_directive' a: '@mark-new-path' - g: 'extdecl-pp' } - # Phase O: top-level static_assert dispatches into the + # Top-level static_assert dispatches into the # static_assert_declaration grammar rule. The cond / msg # vals are pushed with n.no_comma_op set so the vendored # @jsonic/expr's expr.close bails at `,` rather than # treating it as the comma operator. { s: 'KW_STATIC_ASSERT' c: '@is-first-iter' b: 1 - p: 'static_assert_declaration' a: '@mark-new-path' - g: 'extdecl-sa' } + p: 'static_assert_declaration' g: 'extdecl-sa' } { s: 'KW__STATIC_ASSERT' c: '@is-first-iter' b: 1 - p: 'static_assert_declaration' a: '@mark-new-path' - g: 'extdecl-sa-1' } - # [extension: gcc-asm] top-level inline assembly block. - { s: 'KW_ASM' c: '@ext-and-first-iter' b: 1 - p: 'asm_statement' a: '@mark-new-path' - g: 'extdecl-asm' } - { s: 'KW___ASM' c: '@ext-and-first-iter' b: 1 - p: 'asm_statement' a: '@mark-new-path' - g: 'extdecl-asm-1' } - { s: 'KW___ASM__' c: '@ext-and-first-iter' b: 1 - p: 'asm_statement' a: '@mark-new-path' - g: 'extdecl-asm-2' } - # Plain-mode direct dispatches. When the head token clearly - # starts a declaration, push simple_declaration without a - # lookahead validator — the rule's own alts and per-rule k - # state disambiguate the actual shape. These run only when - # `extended: false` so we don't bypass the wildcard alts' - # @looks-simple-decl + isFunctionBodySupported gate that - # routes asm-body / pp-line function definitions to the - # legacy structuring path (those constructs only matter in - # extended mode anyway). - { s: '#SIMPLE_TYPE_HEAD' c: '@plain-and-first-iter' b: 1 - p: 'simple_declaration' a: '@mark-new-path' - g: 'extdecl-plain-type' } - { s: '#STORAGE_PREFIX' c: '@plain-and-first-iter' b: 1 - p: 'simple_declaration' a: '@mark-new-path' - g: 'extdecl-plain-storage' } - { s: 'KW__BITINT' c: '@plain-and-first-iter' b: 1 - p: 'simple_declaration' a: '@mark-new-path' - g: 'extdecl-plain-bitint' } - { s: 'PUNC_LBRACKET PUNC_LBRACKET' c: '@plain-as23-and-first' - b: 2 p: 'simple_declaration' a: '@mark-new-path' - g: 'extdecl-plain-c23-attr' } - # Phase B2.3 dispatch: cascading wildcard-token alts. Each one - # matches a fixed number of tokens to force lookahead, then the - # @looks-simple-decl cond validates the actual shape — optional - # storage prefix, 1+ simple type specifiers, an ID, and a `;` or - # `=` terminator. b: N back-steps all matched tokens so - # simple_declaration sees them as t0..t(N-1). - # Longest alts first so multi-keyword forms win over shorter - # shapes that would have stopped at the wrong ID. - # Gate: only on the first iteration of an external_declaration - # so the chomp's r:-recursion doesn't re-fire mid-declaration. - { s: '#ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN' - c: '@looks-simple-decl' b: 6 - p: 'simple_declaration' a: '@mark-new-path' g: 'extdecl-new-decl-6' } - { s: '#ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN' - c: '@looks-simple-decl' b: 5 - p: 'simple_declaration' a: '@mark-new-path' g: 'extdecl-new-decl-5' } - { s: '#ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN' - c: '@looks-simple-decl' b: 4 - p: 'simple_declaration' a: '@mark-new-path' g: 'extdecl-new-decl-4' } - { s: '#ANY_C_TOKEN #ANY_C_TOKEN #ANY_C_TOKEN' - c: '@looks-simple-decl' b: 3 - p: 'simple_declaration' a: '@mark-new-path' g: 'extdecl-new-decl-3' } - { s: '#ANY_C_TOKEN' a: '@absorb-token' g: 'extdecl-tok' } + p: 'static_assert_declaration' g: 'extdecl-sa-1' } + # Direct dispatches. When the head token clearly starts a + # declaration, descend into simple_declaration; the rule's own + # alts and per-rule k state disambiguate the actual shape. + { s: '#SIMPLE_TYPE_HEAD' c: '@is-first-iter' b: 1 + p: 'simple_declaration' g: 'extdecl-type' } + { s: '#STORAGE_PREFIX' c: '@is-first-iter' b: 1 + p: 'simple_declaration' g: 'extdecl-storage' } + { s: 'KW__BITINT' c: '@is-first-iter' b: 1 + p: 'simple_declaration' g: 'extdecl-bitint' } + { s: 'PUNC_LBRACKET PUNC_LBRACKET' c: '@as23-and-first' + b: 2 p: 'simple_declaration' g: 'extdecl-c23-attr' } ] close: [ - { c: '@new-path' a: '@finalize-new-path' g: 'extdecl-new-end' } - { s: '#ZZ' b: 1 a: '@finalize-extdecl' g: 'extdecl-finish-eof' } - { c: '@just-closed-and-decl-ahead' a: '@finalize-extdecl' g: 'extdecl-finish-block' } - { c: '@terminated' a: '@finalize-extdecl' g: 'extdecl-finish' } - { r: 'external_declaration' g: 'extdecl-more' } + { a: '@finalize-new-path' g: 'extdecl-end' } ] } @@ -162,17 +104,9 @@ # has wired up for full C precedence). simple_declaration: { open: [ - # Leading C23 attribute spec — plain C23. + # Leading C23 attribute spec. { s: 'PUNC_LBRACKET PUNC_LBRACKET' c: '@as23-adjacent-open' b: 2 p: 'spec_loop' g: 'simple-decl-attr-c23' } - # [extension: gcc-attr] leading GCC __attribute__((…)) spec. - { s: 'KW___ATTRIBUTE__' c: '@extended-on' b: 1 p: 'spec_loop' - g: 'simple-decl-attr-gcc' } - { s: 'KW___ATTRIBUTE' c: '@extended-on' b: 1 p: 'spec_loop' - g: 'simple-decl-attr-gcc-1' } - # [extension: msvc-attr] leading __declspec(…) spec. - { s: 'KW___DECLSPEC' c: '@extended-on' b: 1 p: 'spec_loop' - g: 'simple-decl-attr-msvc' } { s: '#STORAGE_PREFIX' a: '@absorb-spec-storage' p: 'spec_loop' g: 'simple-decl-storage' } # Tagged-type heads dispatch into struct_specifier / @@ -250,19 +184,10 @@ # owning declaration_specifiers list. spec_loop: { open: [ - # Attribute specs interleave freely with simple specifiers and - # tagged-type heads. C23 [[…]] is plain; GCC __attribute__ / - # __attribute / MSVC __declspec are extensions. + # C23 attribute spec [[…]] interleaves with simple specifiers + # and tagged-type heads. { s: 'PUNC_LBRACKET PUNC_LBRACKET' c: '@as23-adjacent-open' b: 2 p: 'attribute_spec_c23' g: 'spec-loop-attr-c23' } - # [extension: gcc-attr] - { s: 'KW___ATTRIBUTE__' c: '@extended-on' b: 1 - p: 'attribute_spec_gcc' g: 'spec-loop-attr-gcc' } - { s: 'KW___ATTRIBUTE' c: '@extended-on' b: 1 - p: 'attribute_spec_gcc' g: 'spec-loop-attr-gcc-1' } - # [extension: msvc-attr] - { s: 'KW___DECLSPEC' c: '@extended-on' b: 1 - p: 'attribute_spec_msvc' g: 'spec-loop-attr-msvc' } # Tagged-type heads dispatch into struct_specifier / # enum_specifier. These must come BEFORE #SIMPLE_TYPE_HEAD # because KW_STRUCT/KW_UNION/KW_ENUM are members of that set @@ -283,19 +208,8 @@ { s: [] g: 'spec-loop-empty' } ] close: [ - # See open above for the plain-vs-extension split. { s: 'PUNC_LBRACKET PUNC_LBRACKET' c: '@as23-adjacent-open' b: 2 p: 'attribute_spec_c23' g: 'spec-loop-more-attr-c23' } - # [extension: gcc-attr] - { s: 'KW___ATTRIBUTE__' c: '@extended-on' b: 1 - p: 'attribute_spec_gcc' g: 'spec-loop-more-attr-gcc' } - { s: 'KW___ATTRIBUTE' c: '@extended-on' b: 1 - p: 'attribute_spec_gcc' g: 'spec-loop-more-attr-gcc-1' } - # [extension: msvc-attr] - { s: 'KW___DECLSPEC' c: '@extended-on' b: 1 - p: 'attribute_spec_msvc' g: 'spec-loop-more-attr-msvc' } - # Tagged-type heads must come before #SIMPLE_TYPE_HEAD here - # too (see open above for rationale). { s: 'KW_STRUCT' b: 1 p: 'struct_specifier' g: 'spec-loop-more-struct' } { s: 'KW_UNION' b: 1 p: 'struct_specifier' g: 'spec-loop-more-union' } { s: 'KW_ENUM' b: 1 p: 'enum_specifier' g: 'spec-loop-more-enum' } @@ -753,17 +667,6 @@ { s: 'KW_BREAK' b: 1 p: 'jump_statement' g: 'stmt-break' } { s: 'KW_CONTINUE' b: 1 p: 'jump_statement' g: 'stmt-continue' } { s: 'KW_GOTO' b: 1 p: 'jump_statement' g: 'stmt-goto' } - # [extension: gcc-asm] inline assembly inside a body - { s: 'KW_ASM' c: '@extended-on' b: 1 p: 'asm_statement' - g: 'stmt-asm' } - { s: 'KW___ASM' c: '@extended-on' b: 1 p: 'asm_statement' - g: 'stmt-asm-1' } - { s: 'KW___ASM__' c: '@extended-on' b: 1 p: 'asm_statement' - g: 'stmt-asm-2' } - # [extension: preprocessor] preprocessor line inside a body - # (rare but legal). - { s: 'PP_HASH' c: '@extended-on' b: 1 p: 'preprocessor_line' - g: 'stmt-pp' } # Expression statement (default fallthrough) { p: 'expression_statement' g: 'stmt-expr' } ] @@ -966,158 +869,6 @@ ] } - # ---- asm_statement (phase B4.2.4, opaque-token form) ------------ - # - # GCC inline asm: `__asm__ volatile? goto? ( template : … ) ;`. - # Phase B4.2.4 captures the whole statement as a flat token-list - # under an asm_statement node — qualifiers / template / operand - # sections are NOT yet broken out (that's a follow-up). The shape - # is enough to unblock the body-supportedness gate. - # asm_statement (phase C.8 — structured form): - # * (