From 07c42f83a0a75a14e9c99660e547b7d64f5cb12d Mon Sep 17 00:00:00 2001 From: Alex Kroman Date: Tue, 23 Jun 2026 13:20:42 -0700 Subject: [PATCH] Fix assembly control: helper failed to build on current macOS SDK The bundled macOS UI-control helper (macos_ui_control.swift, added in #271) called CGDisplayCreateImage, which is unavailable in current macOS SDKs (removed in favor of ScreenCaptureKit). Since helper.py compiles the helper with swiftc on first use, `assembly control` died with "Could not build the macOS UI-control helper" on any modern Mac. The file was also never wired into the gate, so neither the build break nor its swiftlint violations were caught. - screenshot() now shells out to /usr/sbin/screencapture (stable across SDKs, same {ok, path} contract) instead of CGDisplayCreateImage/NSBitmapImageRep. - Fix swiftlint --strict violations: scope the CFGetTypeID-guarded force casts with a documented disable, collapse handle()'s per-action guards into a generic withArg helper (complexity 12 -> under 10), fix brace placement and trailing commas. - Add .swiftlint.yml allowing the conventional short names x/y/ok/up/id (x/y are the JSON keys shared with helper.py and can't be renamed). - Wire both swift helpers into check.sh's swiftlint + compile gates so an SDK-unavailable API can't slip in unbuilt again. Co-Authored-By: Claude Opus 4.8 (1M context) --- .swiftlint.yml | 10 ++++ aai_cli/control/macos_ui_control.swift | 65 ++++++++++++++------------ scripts/check.sh | 18 +++++-- 3 files changed, 61 insertions(+), 32 deletions(-) create mode 100644 .swiftlint.yml diff --git a/.swiftlint.yml b/.swiftlint.yml new file mode 100644 index 00000000..16c5ec68 --- /dev/null +++ b/.swiftlint.yml @@ -0,0 +1,10 @@ +# Coordinate and status names that read better short than padded to three +# characters. `x`/`y` are also the on-the-wire JSON keys the macOS UI-control +# helper shares with aai_cli/control/helper.py, so they can't be renamed. +identifier_name: + excluded: + - id + - x + - y + - ok + - up diff --git a/aai_cli/control/macos_ui_control.swift b/aai_cli/control/macos_ui_control.swift index 3e0225e0..fff16e9f 100644 --- a/aai_cli/control/macos_ui_control.swift +++ b/aai_cli/control/macos_ui_control.swift @@ -21,7 +21,7 @@ let keyCodes: [String: CGKeyCode] = [ "u": 32, "i": 34, "p": 35, "l": 37, "j": 38, "k": 40, "n": 45, "m": 46, "return": 36, "enter": 36, "tab": 48, "space": 49, "delete": 51, "backspace": 51, "escape": 53, "esc": 53, "left": 123, "right": 124, "down": 125, "up": 126, - "home": 115, "end": 119, "pageup": 116, "pagedown": 121, + "home": 115, "end": 119, "pageup": 116, "pagedown": 121 ] // Modifier names key_combo accepts, mapped to CGEvent flags. @@ -30,7 +30,7 @@ let modifierFlags: [String: CGEventFlags] = [ "shift": .maskShift, "ctrl": .maskControl, "control": .maskControl, "alt": .maskAlternate, "option": .maskAlternate, "opt": .maskAlternate, - "fn": .maskSecondaryFn, + "fn": .maskSecondaryFn ] // One request line: the action name plus every argument any action may carry @@ -173,7 +173,12 @@ func elementFrame(_ element: AXUIElement) -> CGRect? { else { return nil } + // The CFGetTypeID checks above are the real type guard: AXValue is a CoreFoundation + // type, so a conditional `as?` cast is a no-op that always succeeds and can't verify + // it. The force cast is therefore safe once the type ids match. + // swiftlint:disable:next force_cast let position = positionValue as! AXValue + // swiftlint:disable:next force_cast let size = sizeValue as! AXValue var point = CGPoint.zero var dimensions = CGSize.zero @@ -275,8 +280,8 @@ func launchApp(_ name: String) -> Response { func focusApp(_ name: String) -> Response { let lower = name.lowercased() - for app in NSWorkspace.shared.runningApplications where app.localizedName?.lowercased() == lower - { + let running = NSWorkspace.shared.runningApplications + for app in running where app.localizedName?.lowercased() == lower { app.activate(options: [.activateAllWindows]) return succeeded() } @@ -284,46 +289,48 @@ func focusApp(_ name: String) -> Response { } func screenshot() -> Response { - guard let image = CGDisplayCreateImage(CGMainDisplayID()) else { - return failure("could not capture the screen; grant Screen Recording permission") - } - let bitmap = NSBitmapImageRep(cgImage: image) - guard let data = bitmap.representation(using: .png, properties: [:]) else { - return failure("could not encode the screenshot") - } + // CoreGraphics' CGDisplayCreateImage is unavailable in current macOS SDKs + // (replaced by ScreenCaptureKit). Shell out to the system `screencapture` + // tool instead: it writes a PNG of the main display straight to disk, is + // stable across SDK versions, and triggers the same Screen Recording prompt. let path = NSTemporaryDirectory() + "aai-control-screenshot.png" + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture") + process.arguments = ["-x", "-m", path] do { - try data.write(to: URL(fileURLWithPath: path)) + try process.run() + process.waitUntilExit() } catch { - return failure("could not save the screenshot: \(error)") + return failure("could not capture the screen: \(error)") + } + guard process.terminationStatus == 0 else { + return failure("could not capture the screen; grant Screen Recording permission") } return Response(ok: true, error: nil, elements: nil, path: path) } +// Run perform with a required argument, or report it missing. Collapses the +// per-action "guard the field is present" boilerplate so handle() stays a flat +// dispatch table. +func withArg(_ value: T?, _ missing: String, _ perform: (T) -> Response) -> Response { + guard let value else { + return failure(missing) + } + return perform(value) +} + func handle(_ request: Request) -> Response { switch request.action { case "type_text": - guard let text = request.text else { - return failure("type_text needs 'text'") - } - return typeText(text) + return withArg(request.text, "type_text needs 'text'", typeText) case "key_combo": - guard let keys = request.keys else { - return failure("key_combo needs 'keys'") - } - return keyCombo(keys) + return withArg(request.keys, "key_combo needs 'keys'", keyCombo) case "click": return click(request) case "launch_app": - guard let name = request.name else { - return failure("launch_app needs 'name'") - } - return launchApp(name) + return withArg(request.name, "launch_app needs 'name'", launchApp) case "focus_app": - guard let name = request.name else { - return failure("focus_app needs 'name'") - } - return focusApp(name) + return withArg(request.name, "focus_app needs 'name'", focusApp) case "get_ui_tree": return buildTree() case "screenshot": diff --git a/scripts/check.sh b/scripts/check.sh index dcb00306..88351624 100755 --- a/scripts/check.sh +++ b/scripts/check.sh @@ -72,14 +72,16 @@ echo "==> xenon (cyclomatic complexity gate, src only)" # Tests are excluded (not shipped); only the aai_cli package is gated. uv run xenon --max-absolute B --max-modules A --max-average A aai_cli -echo "==> swiftlint (macOS audio helper)" +echo "==> swiftlint (macOS helpers)" if command -v swiftlint >/dev/null 2>&1; then - swiftlint lint --no-cache --strict aai_cli/streaming/macos_system_audio.swift + swiftlint lint --no-cache --strict \ + aai_cli/streaming/macos_system_audio.swift \ + aai_cli/control/macos_ui_control.swift else echo " swiftlint not found; skipping (install with: brew install swiftlint)" fi -echo "==> swift compile (macOS audio helper)" +echo "==> swift compile (macOS helpers)" if [[ "$(uname -s)" != "Darwin" ]]; then echo " not macOS; skipping compile for macOS-only frameworks" elif command -v swiftc >/dev/null 2>&1; then @@ -102,6 +104,16 @@ elif command -v swiftc >/dev/null 2>&1; then cat "$swift_error" exit 1 fi + # The UI-control helper reads JSON requests on stdin (no argv to validate), so + # a clean compile is the gate — this is what guards against an SDK-unavailable + # API (e.g. CGDisplayCreateImage) slipping in unbuilt. + swiftc -parse-as-library aai_cli/control/macos_ui_control.swift \ + -module-cache-path "$swift_module_cache" \ + -O \ + -framework AppKit \ + -framework CoreGraphics \ + -framework ApplicationServices \ + -o "$swift_module_cache/aai-macos-ui-control-check" rm -rf "$swift_module_cache" else echo " swiftc not found; skipping (macOS system audio builds on first use)"