From e4945d73eaa58f1ec6c48d1410823f4841248f2e Mon Sep 17 00:00:00 2001 From: Kristiyan Nikolov Date: Fri, 29 May 2026 18:59:09 +0300 Subject: [PATCH] feat: add AirPods media recording mode --- app.go | 171 ++++++++++--- frontend/src/App.tsx | 69 +++++ frontend/wailsjs/go/main/App.d.ts | 8 + frontend/wailsjs/go/main/App.js | 16 ++ frontend/wailsjs/go/models.ts | 6 + internal/hotkey/hotkey_darwin.go | 383 +++++++++++++++++++++++++--- internal/hotkey/hotkey_linux.go | 69 +++-- internal/hotkey/hotkey_other.go | 13 + internal/hotkey/hotkey_windows.go | 13 + internal/models/config.go | 44 +++- internal/system/clipboard.go | 49 +++- internal/system/clipboard_darwin.go | 84 +++++- internal/system/clipboard_other.go | 6 + 13 files changed, 819 insertions(+), 112 deletions(-) diff --git a/app.go b/app.go index 119ef54..70c5932 100644 --- a/app.go +++ b/app.go @@ -92,9 +92,10 @@ type App struct { lastTranscript string lastError string recordStartTime time.Time + lastMediaToggle time.Time hotkeyEnabled bool history []HistoryItem - + // Tray callback to update icon onTrayUpdate func(recording bool) } @@ -108,17 +109,17 @@ func NewApp() *App { state: StateReady, history: make([]HistoryItem, 0), } - + // Set up overlay stop callback app.overlay.SetStopCallback(func() { app.ToggleRecording() }) - + // Set up overlay cancel callback app.overlay.SetCancelCallback(func() { app.CancelRecording() }) - + return app } @@ -182,20 +183,28 @@ func (a *App) startup(ctx context.Context) { fmt.Println("Accessibility permissions granted") } + if configManager.Get().AirPodsControlEnabled { + if !hotkey.RequestInputMonitoringPermissions() { + fmt.Println("WARNING: Input Monitoring permissions not granted; AirPods control may not work.") + } else { + fmt.Println("Input Monitoring permissions granted") + } + } + // Apply configured hotkey type before registering hotkeyType := configManager.Get().RecordingHotkey if hotkeyType == "" { hotkeyType = models.DefaultRecordingHotkey() } a.hotkeyManager.SetHotkeyType(hotkeyType) - + // Apply configured cancel key cancelKey := configManager.Get().CancelHotkey if cancelKey == "" { cancelKey = "escape" } a.hotkeyManager.SetCancelKey(cancelKey) - + // Register global hotkey if err := a.hotkeyManager.Register(func() { a.ToggleRecording() @@ -206,6 +215,10 @@ func (a *App) startup(ctx context.Context) { fmt.Printf("Global hotkey registered: %s\n", hotkey.GetHotkeyDisplayName(hotkeyType)) } + if configManager.Get().AirPodsControlEnabled { + a.hotkeyManager.SetMediaControlEnabled(true, a.ToggleRecordingFromMediaControl) + } + // Load history from disk a.loadHistory() } @@ -327,7 +340,7 @@ func (a *App) saveHistory() { func (a *App) CopyHistoryItem(id string) error { a.mu.Lock() defer a.mu.Unlock() - + for _, item := range a.history { if item.ID == id { return system.CopyToClipboard(item.Text) @@ -339,7 +352,7 @@ func (a *App) CopyHistoryItem(id string) error { // DeleteHistoryItem deletes a history item by ID func (a *App) DeleteHistoryItem(id string) error { a.mu.Lock() - + // Find and remove the item var audioPath string found := false @@ -352,18 +365,18 @@ func (a *App) DeleteHistoryItem(id string) error { } } a.mu.Unlock() - + if !found { return fmt.Errorf("history item not found") } - + // Delete the audio file if it exists if audioPath != "" { if err := os.Remove(audioPath); err != nil && !os.IsNotExist(err) { fmt.Printf("Warning: Failed to delete audio file: %v\n", err) } } - + a.saveHistory() runtime.EventsEmit(a.ctx, "historyChanged", a.history) return nil @@ -399,18 +412,18 @@ func (a *App) ShowInFolder(id string) error { func (a *App) GetAudioData(id string) (string, error) { a.mu.Lock() defer a.mu.Unlock() - + for _, item := range a.history { if item.ID == id { if !item.HasAudio || item.AudioPath == "" { return "", fmt.Errorf("no audio available for this item") } - + data, err := audio.LoadWAV(item.AudioPath) if err != nil { return "", fmt.Errorf("failed to load audio: %v", err) } - + // Return as base64 return base64.StdEncoding.EncodeToString(data), nil } @@ -430,32 +443,48 @@ func (a *App) ToggleRecording() error { return a.StartRecording() } +// ToggleRecordingFromMediaControl handles media controls, which can emit +// duplicate play/pause callbacks for a single press. +func (a *App) ToggleRecordingFromMediaControl() { + a.mu.Lock() + if time.Since(a.lastMediaToggle) < 750*time.Millisecond { + a.mu.Unlock() + return + } + a.lastMediaToggle = time.Now() + a.mu.Unlock() + + if err := a.ToggleRecording(); err != nil { + fmt.Printf("Media control toggle failed: %v\n", err) + } +} + // StartRecording begins audio capture func (a *App) StartRecording() error { runtime.LogInfo(a.ctx, "StartRecording called") fmt.Println("StartRecording: entering function") - + // Save the current frontmost app before we do anything (for auto-paste later) system.SaveFrontmostApp() - + a.mu.Lock() if a.state != StateReady { a.mu.Unlock() runtime.LogWarning(a.ctx, fmt.Sprintf("Cannot start recording in state: %s", a.state)) return fmt.Errorf("cannot start recording in state: %s", a.state) } - + // Check if sound is enabled soundEnabled := a.configManager != nil && (a.configManager.Get().SoundEnabled == nil || *a.configManager.Get().SoundEnabled) fmt.Printf("StartRecording: soundEnabled=%v\n", soundEnabled) - + // Set state to recording first a.state = StateRecording a.lastError = "" a.recordStartTime = time.Now() onTrayUpdate := a.onTrayUpdate a.mu.Unlock() - + // Play start sound (non-blocking) - afplay goes to speakers, not mic input if soundEnabled { fmt.Println("StartRecording: playing start sound") @@ -473,7 +502,7 @@ func (a *App) StartRecording() error { // Create fresh recorder for each recording session a.recorder = audio.NewRecorder() - + // Set audio device from config if available if a.configManager != nil { config := a.configManager.Get() @@ -494,6 +523,8 @@ func (a *App) StartRecording() error { return err } + a.configureAutoStopOnSilence(a.recordStartTime) + // Show native overlay a.overlay.Show() @@ -507,6 +538,51 @@ func (a *App) StartRecording() error { return nil } +func (a *App) configureAutoStopOnSilence(startedAt time.Time) { + if a.configManager == nil || !a.configManager.Get().AutoStopSilence || a.recorder == nil { + return + } + + var mu sync.Mutex + var silenceStarted time.Time + stopped := false + + a.recorder.SetLevelCallback(func(level float32) { + if time.Since(startedAt) < 1500*time.Millisecond { + return + } + + mu.Lock() + defer mu.Unlock() + + if stopped { + return + } + + if level >= 0.025 { + silenceStarted = time.Time{} + return + } + + if silenceStarted.IsZero() { + silenceStarted = time.Now() + return + } + + if time.Since(silenceStarted) < 1400*time.Millisecond { + return + } + + stopped = true + go func() { + fmt.Println("Auto-stop silence threshold reached") + if err := a.StopRecording(); err != nil { + fmt.Printf("Auto-stop failed: %v\n", err) + } + }() + }) +} + // StopRecording ends audio capture and starts transcription func (a *App) StopRecording() error { a.mu.Lock() @@ -514,6 +590,10 @@ func (a *App) StopRecording() error { a.mu.Unlock() return fmt.Errorf("not recording") } + if a.recorder == nil { + a.mu.Unlock() + return fmt.Errorf("recorder is not ready") + } recordDuration := time.Since(a.recordStartTime).Seconds() a.state = StateTranscribing a.mu.Unlock() @@ -537,6 +617,9 @@ func (a *App) StopRecording() error { a.emitState() return err } + if a.hotkeyManager != nil { + a.hotkeyManager.RefreshMediaControl() + } go a.transcribe(samples, recordDuration) @@ -560,6 +643,9 @@ func (a *App) CancelRecording() error { // Stop the recorder (discard samples) a.recorder.Stop() + if a.hotkeyManager != nil { + a.hotkeyManager.RefreshMediaControl() + } // Hide overlay a.overlay.Hide() @@ -571,7 +657,6 @@ func (a *App) CancelRecording() error { a.emitState() - return nil } @@ -638,7 +723,7 @@ func (a *App) transcribe(samples []float32, duration float64) { HasAudio: hasAudio, } a.history = append([]HistoryItem{historyItem}, a.history...) - + // Keep only last 50 items if len(a.history) > 50 { // Delete audio files for items being removed @@ -665,11 +750,11 @@ func (a *App) transcribe(samples []float32, duration float64) { // Wait for the overlay to hide and the previous app to regain focus fmt.Println("DEBUG: Waiting 500ms before paste...") time.Sleep(500 * time.Millisecond) - fmt.Println("DEBUG: Calling CopyAndPaste now") - if err := system.CopyAndPaste(textToPaste); err != nil { + fmt.Println("DEBUG: Calling CopyPasteAndSubmit now") + if err := system.CopyPasteAndSubmit(textToPaste, config.PasteSubmit); err != nil { fmt.Printf("Failed to paste: %v\n", err) } else { - fmt.Println("DEBUG: CopyAndPaste succeeded") + fmt.Println("DEBUG: CopyPasteAndSubmit succeeded") } }(text) } else { @@ -763,6 +848,30 @@ func (a *App) SetAutoPaste(enabled bool) error { return a.configManager.SetAutoPaste(enabled) } +// SetPasteSubmit enables/disables pressing Enter after auto-paste. +func (a *App) SetPasteSubmit(enabled bool) error { + return a.configManager.SetPasteSubmit(enabled) +} + +// SetAirPodsControlEnabled enables/disables media play-pause control. +func (a *App) SetAirPodsControlEnabled(enabled bool) error { + if err := a.configManager.SetAirPodsControlEnabled(enabled); err != nil { + return err + } + if enabled && !hotkey.RequestInputMonitoringPermissions() { + fmt.Println("WARNING: Input Monitoring permissions not granted; AirPods control may not work.") + } + if a.hotkeyManager != nil { + a.hotkeyManager.SetMediaControlEnabled(enabled, a.ToggleRecordingFromMediaControl) + } + return nil +} + +// SetAutoStopSilence enables/disables stopping after sustained silence. +func (a *App) SetAutoStopSilence(enabled bool) error { + return a.configManager.SetAutoStopSilence(enabled) +} + // SetSoundEnabled enables/disables recording start/stop sound func (a *App) SetSoundEnabled(enabled bool) error { return a.configManager.SetSoundEnabled(enabled) @@ -811,7 +920,7 @@ func (a *App) GetStats() UsageStats { if a.statsManager == nil { return UsageStats{} } - + stats := a.statsManager.Get() return UsageStats{ AverageWPM: a.statsManager.GetAverageWPM(), @@ -830,12 +939,12 @@ func (a *App) SetRecordingHotkey(keyName string) error { if keyName == "" { return fmt.Errorf("hotkey cannot be empty") } - + // Update the hotkey manager if a.hotkeyManager != nil { a.hotkeyManager.SetHotkeyType(keyName) } - + // Save to config return a.configManager.SetRecordingHotkey(keyName) } @@ -859,12 +968,12 @@ func (a *App) SetCancelHotkey(keyName string) error { if keyName == "" { return fmt.Errorf("cancel hotkey cannot be empty") } - + // Update the hotkey manager if a.hotkeyManager != nil { a.hotkeyManager.SetCancelKey(keyName) } - + // Save to config return a.configManager.SetCancelHotkey(keyName) } @@ -982,7 +1091,7 @@ func (a *App) CheckMicrophonePermission() string { // Returns: "granted", "denied", "undetermined" // For now, we'll trigger a permission request by attempting to list devices // The actual permission check requires calling macOS APIs - + // Try to list audio devices - this will trigger the permission prompt if needed devices, err := audio.GetAudioInputDevices() if err != nil { diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index e36cd00..ce61498 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -15,6 +15,9 @@ import { DownloadModel, GetConfig, SetAutoPaste, + SetPasteSubmit, + SetAirPodsControlEnabled, + SetAutoStopSilence, SetSoundEnabled, GetHistory, ClearHistory, @@ -60,6 +63,9 @@ interface Config { openaiApiKey?: string; audioInputDevice?: string; autoPaste: boolean; + pasteSubmit: boolean; + airPodsControlEnabled: boolean; + autoStopSilence: boolean; soundEnabled?: boolean; } @@ -340,6 +346,21 @@ function App() { GetConfig().then((c: Config) => setConfig(c)); }, []); + const handlePasteSubmitChange = useCallback(async (enabled: boolean) => { + await SetPasteSubmit(enabled); + GetConfig().then((c: Config) => setConfig(c)); + }, []); + + const handleAirPodsControlChange = useCallback(async (enabled: boolean) => { + await SetAirPodsControlEnabled(enabled); + GetConfig().then((c: Config) => setConfig(c)); + }, []); + + const handleAutoStopSilenceChange = useCallback(async (enabled: boolean) => { + await SetAutoStopSilence(enabled); + GetConfig().then((c: Config) => setConfig(c)); + }, []); + const handleSoundEnabledChange = useCallback(async (enabled: boolean) => { await SetSoundEnabled(enabled); GetConfig().then((c: Config) => setConfig(c)); @@ -850,6 +871,54 @@ function App() { +
+
+ +

Press Return after auto-pasting the transcript

+
+ +
+ + {platform === 'darwin' && ( +
+
+ +

Keep a silent media session active so AirPods/media play-pause can control recording

+
+ +
+ )} + +
+
+ +

Stop recording after a short pause in speech

+
+ +
+
diff --git a/frontend/wailsjs/go/main/App.d.ts b/frontend/wailsjs/go/main/App.d.ts index 7e6aaf2..0c76ffb 100755 --- a/frontend/wailsjs/go/main/App.d.ts +++ b/frontend/wailsjs/go/main/App.d.ts @@ -55,10 +55,14 @@ export function QuitApp():Promise; export function RequestMicrophonePermission():Promise; +export function SetAirPodsControlEnabled(arg1:boolean):Promise; + export function SetAudioInputDevice(arg1:string):Promise; export function SetAutoPaste(arg1:boolean):Promise; +export function SetAutoStopSilence(arg1:boolean):Promise; + export function SetCancelHotkey(arg1:string):Promise; export function SetModel(arg1:string):Promise; @@ -67,6 +71,8 @@ export function SetOnboardingCompleted(arg1:boolean):Promise; export function SetOpenAIKey(arg1:string):Promise; +export function SetPasteSubmit(arg1:boolean):Promise; + export function SetProvider(arg1:string):Promise; export function SetRecordingHotkey(arg1:string):Promise; @@ -84,3 +90,5 @@ export function StartRecording():Promise; export function StopRecording():Promise; export function ToggleRecording():Promise; + +export function ToggleRecordingFromMediaControl():Promise; diff --git a/frontend/wailsjs/go/main/App.js b/frontend/wailsjs/go/main/App.js index 53c9164..f615a34 100755 --- a/frontend/wailsjs/go/main/App.js +++ b/frontend/wailsjs/go/main/App.js @@ -106,6 +106,10 @@ export function RequestMicrophonePermission() { return window['go']['main']['App']['RequestMicrophonePermission'](); } +export function SetAirPodsControlEnabled(arg1) { + return window['go']['main']['App']['SetAirPodsControlEnabled'](arg1); +} + export function SetAudioInputDevice(arg1) { return window['go']['main']['App']['SetAudioInputDevice'](arg1); } @@ -114,6 +118,10 @@ export function SetAutoPaste(arg1) { return window['go']['main']['App']['SetAutoPaste'](arg1); } +export function SetAutoStopSilence(arg1) { + return window['go']['main']['App']['SetAutoStopSilence'](arg1); +} + export function SetCancelHotkey(arg1) { return window['go']['main']['App']['SetCancelHotkey'](arg1); } @@ -130,6 +138,10 @@ export function SetOpenAIKey(arg1) { return window['go']['main']['App']['SetOpenAIKey'](arg1); } +export function SetPasteSubmit(arg1) { + return window['go']['main']['App']['SetPasteSubmit'](arg1); +} + export function SetProvider(arg1) { return window['go']['main']['App']['SetProvider'](arg1); } @@ -165,3 +177,7 @@ export function StopRecording() { export function ToggleRecording() { return window['go']['main']['App']['ToggleRecording'](); } + +export function ToggleRecordingFromMediaControl() { + return window['go']['main']['App']['ToggleRecordingFromMediaControl'](); +} diff --git a/frontend/wailsjs/go/models.ts b/frontend/wailsjs/go/models.ts index 66e2a0f..b4269ce 100755 --- a/frontend/wailsjs/go/models.ts +++ b/frontend/wailsjs/go/models.ts @@ -115,6 +115,9 @@ export namespace models { openaiApiKey?: string; audioInputDevice?: string; autoPaste: boolean; + pasteSubmit: boolean; + airPodsControlEnabled: boolean; + autoStopSilence: boolean; showNotification: boolean; recordingHotkey: string; cancelHotkey: string; @@ -132,6 +135,9 @@ export namespace models { this.openaiApiKey = source["openaiApiKey"]; this.audioInputDevice = source["audioInputDevice"]; this.autoPaste = source["autoPaste"]; + this.pasteSubmit = source["pasteSubmit"]; + this.airPodsControlEnabled = source["airPodsControlEnabled"]; + this.autoStopSilence = source["autoStopSilence"]; this.showNotification = source["showNotification"]; this.recordingHotkey = source["recordingHotkey"]; this.cancelHotkey = source["cancelHotkey"]; diff --git a/internal/hotkey/hotkey_darwin.go b/internal/hotkey/hotkey_darwin.go index d2bf32a..75c2da5 100644 --- a/internal/hotkey/hotkey_darwin.go +++ b/internal/hotkey/hotkey_darwin.go @@ -4,17 +4,27 @@ package hotkey /* #cgo CFLAGS: -x objective-c -#cgo LDFLAGS: -framework Cocoa -framework Carbon -framework ApplicationServices +#cgo LDFLAGS: -framework Cocoa -framework Carbon -framework ApplicationServices -framework AVFoundation -framework MediaPlayer #import #import #import +#import +#import static id gEventMonitor = nil; static id gKeyEventMonitor = nil; static id gLocalKeyEventMonitor = nil; +static id gMediaEventMonitor = nil; +static id gPlayCommandTarget = nil; +static id gPauseCommandTarget = nil; +static id gToggleCommandTarget = nil; +static AVAudioPlayer *gSilentPlayer = nil; +static CFMachPortRef gSystemEventTap = NULL; +static CFRunLoopSourceRef gSystemEventTapSource = NULL; static BOOL gHotkeyKeyDown = NO; static BOOL gCancelKeyEnabled = NO; +static BOOL gMediaControlEnabled = NO; static UInt16 gCurrentHotkeyCode = 0x3D; // Default: Right Option static UInt16 gCurrentCancelCode = 53; // Default: Escape static BOOL gHotkeyIsModifier = YES; // Is the hotkey a modifier key? @@ -22,6 +32,10 @@ static BOOL gCancelIsModifier = NO; // Is the cancel key a modifier? extern void goHotkeyPressed(void); extern void goCancelPressed(void); +extern void goMediaPressed(void); + +#define NX_KEYTYPE_PLAY 16 +#define NX_SYSDEFINED 14 // Common key codes #define kVK_RightOption 0x3D @@ -39,6 +53,33 @@ extern void goCancelPressed(void); #define kVK_Tab 0x30 #define kVK_Return 0x24 +static void stopSystemEventTap(void); +static void stopRemoteCommandMonitoring(void); + +static CGEventRef systemEventTapCallback(CGEventTapProxy proxy, CGEventType type, CGEventRef event, void *refcon) { + if (!gMediaControlEnabled || type != NX_SYSDEFINED) { + return event; + } + + NSEvent *nsEvent = [NSEvent eventWithCGEvent:event]; + if (nsEvent == nil || [nsEvent subtype] != 8) { + return event; + } + + int keyCode = (([nsEvent data1] & 0xFFFF0000) >> 16); + int keyFlags = ([nsEvent data1] & 0x0000FFFF); + BOOL keyDown = (((keyFlags & 0xFF00) >> 8) == 0xA); + BOOL keyRepeat = (keyFlags & 0x1); + + NSLog(@"System event tap media key: code=%d down=%d repeat=%d flags=%d", keyCode, keyDown, keyRepeat, keyFlags); + + if (keyDown && !keyRepeat && keyCode == NX_KEYTYPE_PLAY) { + goMediaPressed(); + } + + return event; +} + // Check if accessibility permissions are granted (with optional prompt) static int checkAccessibilityPermissionsWithPrompt(int shouldPrompt) { NSDictionary *options = @{(__bridge NSString *)kAXTrustedCheckOptionPrompt: @(shouldPrompt ? YES : NO)}; @@ -66,6 +107,18 @@ static int requestAccessibilityPermissions(void) { return trusted; } +// Request Input Monitoring permissions for lower-level media key events. +static int requestInputMonitoringPermissions(void) { + if (@available(macOS 10.15, *)) { + if (CGPreflightListenEventAccess()) { + return 1; + } + NSLog(@"Requesting Input Monitoring permissions..."); + return CGRequestListenEventAccess() ? 1 : 0; + } + return 1; +} + // Check if a key code is a modifier key static BOOL isModifierKeyCode(UInt16 keyCode) { switch (keyCode) { @@ -110,6 +163,7 @@ static NSEventModifierFlags getModifierFlag(UInt16 keyCode) { } static void stopAllMonitoring(void) { + BOOL wasMediaControlEnabled = gMediaControlEnabled; if (gEventMonitor != nil) { [NSEvent removeMonitor:gEventMonitor]; gEventMonitor = nil; @@ -122,25 +176,245 @@ static void stopAllMonitoring(void) { [NSEvent removeMonitor:gLocalKeyEventMonitor]; gLocalKeyEventMonitor = nil; } + if (!wasMediaControlEnabled && gMediaEventMonitor != nil) { + [NSEvent removeMonitor:gMediaEventMonitor]; + gMediaEventMonitor = nil; + } gHotkeyKeyDown = NO; gCancelKeyEnabled = NO; } +static void stopAllMonitoringForShutdown(void) { + gMediaControlEnabled = NO; + stopAllMonitoring(); + stopSystemEventTap(); + stopRemoteCommandMonitoring(); +} + +static void startSystemEventTap(void) { + if (gSystemEventTap != NULL) { + return; + } + + gSystemEventTap = CGEventTapCreate(kCGSessionEventTap, + kCGHeadInsertEventTap, + kCGEventTapOptionListenOnly, + CGEventMaskBit(NX_SYSDEFINED), + systemEventTapCallback, + NULL); + if (gSystemEventTap == NULL) { + NSLog(@"Failed to create system event tap for media keys"); + return; + } + + gSystemEventTapSource = CFMachPortCreateRunLoopSource(kCFAllocatorDefault, gSystemEventTap, 0); + CFRunLoopAddSource(CFRunLoopGetMain(), gSystemEventTapSource, kCFRunLoopCommonModes); + CGEventTapEnable(gSystemEventTap, true); + NSLog(@"System event tap started for AirPods/media control"); +} + +static void stopSystemEventTap(void) { + if (gSystemEventTap != NULL) { + CGEventTapEnable(gSystemEventTap, false); + } + if (gSystemEventTapSource != NULL) { + CFRunLoopRemoveSource(CFRunLoopGetMain(), gSystemEventTapSource, kCFRunLoopCommonModes); + CFRelease(gSystemEventTapSource); + gSystemEventTapSource = NULL; + } + if (gSystemEventTap != NULL) { + CFRelease(gSystemEventTap); + gSystemEventTap = NULL; + } +} + +static void appendUInt16LE(NSMutableData *data, uint16_t value) { + uint8_t bytes[2] = { value & 0xff, (value >> 8) & 0xff }; + [data appendBytes:bytes length:2]; +} + +static void appendUInt32LE(NSMutableData *data, uint32_t value) { + uint8_t bytes[4] = { value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, (value >> 24) & 0xff }; + [data appendBytes:bytes length:4]; +} + +static NSData *silentWAVData(void) { + const uint32_t sampleRate = 16000; + const uint16_t channels = 1; + const uint16_t bitsPerSample = 16; + const uint32_t frames = sampleRate; + const uint32_t dataSize = frames * channels * (bitsPerSample / 8); + + NSMutableData *data = [NSMutableData dataWithCapacity:44 + dataSize]; + [data appendBytes:"RIFF" length:4]; + appendUInt32LE(data, 36 + dataSize); + [data appendBytes:"WAVE" length:4]; + [data appendBytes:"fmt " length:4]; + appendUInt32LE(data, 16); + appendUInt16LE(data, 1); + appendUInt16LE(data, channels); + appendUInt32LE(data, sampleRate); + appendUInt32LE(data, sampleRate * channels * (bitsPerSample / 8)); + appendUInt16LE(data, channels * (bitsPerSample / 8)); + appendUInt16LE(data, bitsPerSample); + [data appendBytes:"data" length:4]; + appendUInt32LE(data, dataSize); + [data increaseLengthBy:dataSize]; + return data; +} + +static void startSilentPlayback(void) { + if (gSilentPlayer != nil && gSilentPlayer.playing) { + return; + } + + NSError *error = nil; + gSilentPlayer = [[AVAudioPlayer alloc] initWithData:silentWAVData() error:&error]; + if (gSilentPlayer == nil) { + NSLog(@"Failed to create silent media player: %@", error); + return; + } + + gSilentPlayer.numberOfLoops = -1; + gSilentPlayer.volume = 0.0; + [gSilentPlayer prepareToPlay]; + [gSilentPlayer play]; + NSLog(@"Silent media playback started for AirPods control"); +} + +static void stopSilentPlayback(void) { + if (gSilentPlayer != nil) { + [gSilentPlayer stop]; + gSilentPlayer = nil; + NSLog(@"Silent media playback stopped"); + } +} + +static void stopRemoteCommandMonitoring(void) { + MPRemoteCommandCenter *commandCenter = [MPRemoteCommandCenter sharedCommandCenter]; + + if (gPlayCommandTarget != nil) { + [commandCenter.playCommand removeTarget:gPlayCommandTarget]; + gPlayCommandTarget = nil; + } + if (gPauseCommandTarget != nil) { + [commandCenter.pauseCommand removeTarget:gPauseCommandTarget]; + gPauseCommandTarget = nil; + } + if (gToggleCommandTarget != nil) { + [commandCenter.togglePlayPauseCommand removeTarget:gToggleCommandTarget]; + gToggleCommandTarget = nil; + } + + commandCenter.playCommand.enabled = NO; + commandCenter.pauseCommand.enabled = NO; + commandCenter.togglePlayPauseCommand.enabled = NO; + [MPNowPlayingInfoCenter defaultCenter].nowPlayingInfo = nil; + stopSilentPlayback(); +} + +static void startRemoteCommandMonitoring(void) { + if (gToggleCommandTarget != nil) { + startSilentPlayback(); + return; + } + + MPRemoteCommandCenter *commandCenter = [MPRemoteCommandCenter sharedCommandCenter]; + commandCenter.playCommand.enabled = YES; + commandCenter.pauseCommand.enabled = YES; + commandCenter.togglePlayPauseCommand.enabled = YES; + + MPRemoteCommandHandlerStatus (^handler)(MPRemoteCommandEvent *) = ^MPRemoteCommandHandlerStatus(MPRemoteCommandEvent *event) { + if (!gMediaControlEnabled) { + return MPRemoteCommandHandlerStatusCommandFailed; + } + NSLog(@"AirPods/media remote command received: %@", event.command); + goMediaPressed(); + startSilentPlayback(); + return MPRemoteCommandHandlerStatusSuccess; + }; + + gPlayCommandTarget = [commandCenter.playCommand addTargetWithHandler:handler]; + gPauseCommandTarget = [commandCenter.pauseCommand addTargetWithHandler:handler]; + gToggleCommandTarget = [commandCenter.togglePlayPauseCommand addTargetWithHandler:handler]; + + [MPNowPlayingInfoCenter defaultCenter].nowPlayingInfo = @{ + MPMediaItemPropertyTitle: @"Yap Recording Control", + MPMediaItemPropertyArtist: @"Yap", + MPNowPlayingInfoPropertyPlaybackRate: @1 + }; + + startSilentPlayback(); + NSLog(@"Remote command monitoring started for AirPods control"); +} + +static void refreshMediaControl(void) { + if (!gMediaControlEnabled) { + return; + } + startRemoteCommandMonitoring(); + startSilentPlayback(); + NSLog(@"Media control session refreshed"); +} + +static void startMediaMonitoring(void) { + if (gMediaEventMonitor != nil) { + return; + } + + gMediaEventMonitor = [NSEvent addGlobalMonitorForEventsMatchingMask:NSEventMaskSystemDefined + handler:^(NSEvent *event) { + if (!gMediaControlEnabled || [event subtype] != 8) { + return; + } + + int keyCode = (([event data1] & 0xFFFF0000) >> 16); + int keyFlags = ([event data1] & 0x0000FFFF); + BOOL keyDown = (((keyFlags & 0xFF00) >> 8) == 0xA); + BOOL keyRepeat = (keyFlags & 0x1); + + if (keyDown && !keyRepeat && keyCode == NX_KEYTYPE_PLAY) { + goMediaPressed(); + } + }]; + + NSLog(@"Media key monitoring started for AirPods control"); +} + +static void setMediaControlEnabled(int enabled) { + gMediaControlEnabled = enabled ? YES : NO; + + if (gMediaControlEnabled) { + startRemoteCommandMonitoring(); + startSystemEventTap(); + startMediaMonitoring(); + } else if (gMediaEventMonitor != nil) { + [NSEvent removeMonitor:gMediaEventMonitor]; + gMediaEventMonitor = nil; + stopRemoteCommandMonitoring(); + stopSystemEventTap(); + NSLog(@"Media key monitoring stopped"); + } else { + stopRemoteCommandMonitoring(); + stopSystemEventTap(); + } +} + static void startMonitoring(void) { stopAllMonitoring(); - + // Check accessibility permissions first if (!hasAccessibilityPermissions()) { NSLog(@"Cannot start monitoring without accessibility permissions"); return; } - + // Monitor for modifier keys (flagsChanged events) gEventMonitor = [NSEvent addGlobalMonitorForEventsMatchingMask:NSEventMaskFlagsChanged handler:^(NSEvent *event) { UInt16 keyCode = [event keyCode]; NSEventModifierFlags flags = [event modifierFlags]; - + // Check hotkey (if it's a modifier) if (gHotkeyIsModifier && keyCode == gCurrentHotkeyCode) { NSEventModifierFlags modFlag = getModifierFlag(keyCode); @@ -153,7 +427,7 @@ static void startMonitoring(void) { gHotkeyKeyDown = NO; } } - + // Check cancel key (if it's a modifier) if (gCancelKeyEnabled && gCancelIsModifier && keyCode == gCurrentCancelCode) { NSEventModifierFlags modFlag = getModifierFlag(keyCode); @@ -162,43 +436,43 @@ static void startMonitoring(void) { } } }]; - + // Monitor for regular keys (keyDown events) gKeyEventMonitor = [NSEvent addGlobalMonitorForEventsMatchingMask:NSEventMaskKeyDown handler:^(NSEvent *event) { UInt16 keyCode = [event keyCode]; - + // Check hotkey (if it's NOT a modifier) if (!gHotkeyIsModifier && keyCode == gCurrentHotkeyCode) { goHotkeyPressed(); } - + // Check cancel key (if it's NOT a modifier) if (gCancelKeyEnabled && !gCancelIsModifier && keyCode == gCurrentCancelCode) { goCancelPressed(); } }]; - + // Local monitor for when this app has focus gLocalKeyEventMonitor = [NSEvent addLocalMonitorForEventsMatchingMask:NSEventMaskKeyDown handler:^NSEvent *(NSEvent *event) { UInt16 keyCode = [event keyCode]; - + // Check hotkey (if it's NOT a modifier) if (!gHotkeyIsModifier && keyCode == gCurrentHotkeyCode) { goHotkeyPressed(); return nil; // Consume event } - + // Check cancel key (if it's NOT a modifier) if (gCancelKeyEnabled && !gCancelIsModifier && keyCode == gCurrentCancelCode) { goCancelPressed(); return nil; // Consume event } - + return event; }]; - + NSLog(@"Key monitoring started - hotkey: %d, cancel: %d", gCurrentHotkeyCode, gCurrentCancelCode); } @@ -239,6 +513,8 @@ var ( hotkeyC = make(chan struct{}, 1) cancelCallbackMu sync.Mutex cancelCallback func() + mediaCallbackMu sync.Mutex + mediaCallback func() ) //export goHotkeyPressed @@ -260,16 +536,27 @@ func goCancelPressed() { } } +//export goMediaPressed +func goMediaPressed() { + fmt.Println("AirPods/media control callback triggered") + mediaCallbackMu.Lock() + cb := mediaCallback + mediaCallbackMu.Unlock() + if cb != nil { + go cb() + } +} + // Callback is the function type for hotkey events type Callback func() // Manager handles global hotkey registration type Manager struct { - mu sync.Mutex - running bool - stopC chan struct{} - hotkeyStr string - cancelStr string + mu sync.Mutex + running bool + stopC chan struct{} + hotkeyStr string + cancelStr string } // NewManager creates a new hotkey manager @@ -296,7 +583,7 @@ func (m *Manager) Register(cb Callback) error { // Set the hotkey code keyCode := KeyNameToCode(m.hotkeyStr) C.setHotkeyCode(C.UInt16(keyCode)) - + // Set the cancel key code cancelCode := KeyNameToCode(m.cancelStr) C.setCancelCode(C.UInt16(cancelCode)) @@ -336,25 +623,50 @@ func (m *Manager) Unregister() error { } close(m.stopC) - C.stopAllMonitoring() + C.stopAllMonitoringForShutdown() m.running = false return nil } +// SetMediaControlEnabled starts or stops using AirPods/media play-pause as a recording control. +func (m *Manager) SetMediaControlEnabled(enabled bool, cb func()) { + mediaCallbackMu.Lock() + if enabled { + mediaCallback = cb + } else { + mediaCallback = nil + } + mediaCallbackMu.Unlock() + + C.setMediaControlEnabled(C.int(boolToInt(enabled))) +} + +// RefreshMediaControl re-primes the media session after recording releases the mic. +func (m *Manager) RefreshMediaControl() { + C.refreshMediaControl() +} + +func boolToInt(value bool) int { + if value { + return 1 + } + return 0 +} + // SetHotkeyType sets the recording hotkey by name func (m *Manager) SetHotkeyType(hotkeyName string) { m.mu.Lock() defer m.mu.Unlock() - + m.hotkeyStr = strings.ToLower(hotkeyName) keyCode := KeyNameToCode(m.hotkeyStr) C.setHotkeyCode(C.UInt16(keyCode)) - + if m.running { C.startMonitoring() } - + fmt.Printf("Hotkey set to: %s (code: %d)\n", m.hotkeyStr, keyCode) } @@ -362,11 +674,11 @@ func (m *Manager) SetHotkeyType(hotkeyName string) { func (m *Manager) SetCancelKey(keyName string) { m.mu.Lock() defer m.mu.Unlock() - + m.cancelStr = strings.ToLower(keyName) cancelCode := KeyNameToCode(m.cancelStr) C.setCancelCode(C.UInt16(cancelCode)) - + fmt.Printf("Cancel key set to: %s (code: %d)\n", m.cancelStr, cancelCode) } @@ -382,14 +694,14 @@ func (m *Manager) EnableCancelKey(cb func()) { cancelCallbackMu.Lock() cancelCallback = cb cancelCallbackMu.Unlock() - + C.enableCancelKey() } // DisableCancelKey stops monitoring for the cancel key func (m *Manager) DisableCancelKey() { C.disableCancelKey() - + cancelCallbackMu.Lock() cancelCallback = nil cancelCallbackMu.Unlock() @@ -405,6 +717,11 @@ func RequestAccessibilityPermissions() bool { return C.requestAccessibilityPermissions() != 0 } +// RequestInputMonitoringPermissions prompts user for Input Monitoring permissions. +func RequestInputMonitoringPermissions() bool { + return C.requestInputMonitoringPermissions() != 0 +} + // KeyNameToCode converts a key name to a macOS key code func KeyNameToCode(name string) uint16 { switch strings.ToLower(name) { @@ -429,7 +746,7 @@ func KeyNameToCode(name string) uint16 { return 0x3F case "capslock": return 0x39 - + // Special keys case "escape", "esc": return 0x35 @@ -443,7 +760,7 @@ func KeyNameToCode(name string) uint16 { return 0x33 case "forwarddelete": return 0x75 - + // Arrow keys case "left", "arrowleft": return 0x7B @@ -453,7 +770,7 @@ func KeyNameToCode(name string) uint16 { return 0x7E case "down", "arrowdown": return 0x7D - + // Function keys case "f1": return 0x7A @@ -479,7 +796,7 @@ func KeyNameToCode(name string) uint16 { return 0x67 case "f12": return 0x6F - + // Letter keys case "a": return 0x00 @@ -533,7 +850,7 @@ func KeyNameToCode(name string) uint16 { return 0x10 case "z": return 0x06 - + // Number keys case "0": return 0x1D @@ -555,7 +872,7 @@ func KeyNameToCode(name string) uint16 { return 0x1C case "9": return 0x19 - + default: return 0x3D // Default to right option } diff --git a/internal/hotkey/hotkey_linux.go b/internal/hotkey/hotkey_linux.go index e6db0fa..14fdf49 100644 --- a/internal/hotkey/hotkey_linux.go +++ b/internal/hotkey/hotkey_linux.go @@ -23,7 +23,7 @@ extern void goCancelPressed(void); static int initDisplay(void) { if (display != NULL) return 1; - + display = XOpenDisplay(NULL); if (display == NULL) { fprintf(stderr, "Cannot open X display\n"); @@ -61,10 +61,10 @@ static void disableCancel(void) { static void startMonitoring(void) { if (display == NULL) return; running = 1; - + // Grab the hotkey XGrabKey(display, hotkeyCode, AnyModifier, root, True, GrabModeAsync, GrabModeAsync); - + // Also grab common modifier combinations XGrabKey(display, hotkeyCode, Mod2Mask, root, True, GrabModeAsync, GrabModeAsync); XGrabKey(display, hotkeyCode, LockMask, root, True, GrabModeAsync, GrabModeAsync); @@ -74,7 +74,7 @@ static void startMonitoring(void) { static void stopMonitoring(void) { if (display == NULL) return; running = 0; - + XUngrabKey(display, hotkeyCode, AnyModifier, root); XUngrabKey(display, hotkeyCode, Mod2Mask, root); XUngrabKey(display, hotkeyCode, LockMask, root); @@ -83,18 +83,18 @@ static void stopMonitoring(void) { static void processEvents(void) { if (display == NULL || !running) return; - + XEvent event; while (XPending(display) > 0) { XNextEvent(display, &event); - + if (event.type == KeyPress) { KeyCode keycode = event.xkey.keycode; - + if (keycode == hotkeyCode) { goHotkeyPressed(); } - + if (cancelEnabled && keycode == cancelCode) { goCancelPressed(); } @@ -132,10 +132,10 @@ type Manager struct { } var ( - callbackMu sync.Mutex - hotkeyCallback Callback + callbackMu sync.Mutex + hotkeyCallback Callback cancelCallbackMu sync.Mutex - cancelCallback func() + cancelCallback func() ) //export goHotkeyPressed @@ -187,7 +187,7 @@ func (m *Manager) Register(cb Callback) error { // Set the hotkey keysym := KeyNameToKeysym(m.hotkeyStr) C.setHotkeyKeysym(C.KeySym(keysym)) - + // Set the cancel key cancelKeysym := KeyNameToKeysym(m.cancelStr) C.setCancelKeysym(C.KeySym(cancelKeysym)) @@ -201,7 +201,7 @@ func (m *Manager) Register(cb Callback) error { go func() { ticker := time.NewTicker(10 * time.Millisecond) defer ticker.Stop() - + for { select { case <-m.stopCh: @@ -237,10 +237,10 @@ func (m *Manager) Unregister() error { func (m *Manager) SetHotkeyType(hotkeyName string) { m.mu.Lock() defer m.mu.Unlock() - + m.hotkeyStr = strings.ToLower(hotkeyName) keysym := KeyNameToKeysym(m.hotkeyStr) - + if m.running { C.stopMonitoring() C.setHotkeyKeysym(C.KeySym(keysym)) @@ -248,7 +248,7 @@ func (m *Manager) SetHotkeyType(hotkeyName string) { } else { C.setHotkeyKeysym(C.KeySym(keysym)) } - + fmt.Printf("Hotkey set to: %s\n", m.hotkeyStr) } @@ -256,11 +256,11 @@ func (m *Manager) SetHotkeyType(hotkeyName string) { func (m *Manager) SetCancelKey(keyName string) { m.mu.Lock() defer m.mu.Unlock() - + m.cancelStr = strings.ToLower(keyName) keysym := KeyNameToKeysym(m.cancelStr) C.setCancelKeysym(C.KeySym(keysym)) - + fmt.Printf("Cancel key set to: %s\n", m.cancelStr) } @@ -276,29 +276,37 @@ func (m *Manager) EnableCancelKey(cb func()) { cancelCallbackMu.Lock() cancelCallback = cb cancelCallbackMu.Unlock() - + m.mu.Lock() m.cancelCallback = cb m.cancelEnabled = true m.mu.Unlock() - + C.enableCancel() } // DisableCancelKey stops monitoring for the cancel key func (m *Manager) DisableCancelKey() { C.disableCancel() - + cancelCallbackMu.Lock() cancelCallback = nil cancelCallbackMu.Unlock() - + m.mu.Lock() m.cancelEnabled = false m.cancelCallback = nil m.mu.Unlock() } +// SetMediaControlEnabled is not supported on Linux. +func (m *Manager) SetMediaControlEnabled(enabled bool, cb func()) { +} + +// RefreshMediaControl is not supported on Linux. +func (m *Manager) RefreshMediaControl() { +} + // GetHotkeyDisplayName returns the display name for a hotkey func GetHotkeyDisplayName(hotkeyName string) string { return KeyNameToDisplayName(hotkeyName) @@ -309,6 +317,11 @@ func RequestAccessibilityPermissions() bool { return true } +// RequestInputMonitoringPermissions is a no-op on Linux. +func RequestInputMonitoringPermissions() bool { + return true +} + // KeyNameToKeysym converts a key name to an X11 KeySym func KeyNameToKeysym(name string) uint64 { switch strings.ToLower(name) { @@ -331,7 +344,7 @@ func KeyNameToKeysym(name string) uint64 { return 0xFFEB // XK_Super_L case "capslock": return 0xFFE5 // XK_Caps_Lock - + // Special keys case "escape", "esc": return 0xFF1B // XK_Escape @@ -345,7 +358,7 @@ func KeyNameToKeysym(name string) uint64 { return 0xFF08 // XK_BackSpace case "delete": return 0xFFFF // XK_Delete - + // Arrow keys case "left", "arrowleft": return 0xFF51 // XK_Left @@ -355,7 +368,7 @@ func KeyNameToKeysym(name string) uint64 { return 0xFF52 // XK_Up case "down", "arrowdown": return 0xFF54 // XK_Down - + // Function keys case "f1": return 0xFFBE @@ -381,7 +394,7 @@ func KeyNameToKeysym(name string) uint64 { return 0xFFC8 case "f12": return 0xFFC9 - + // Letter keys (lowercase) case "a": return 0x0061 @@ -435,7 +448,7 @@ func KeyNameToKeysym(name string) uint64 { return 0x0079 case "z": return 0x007A - + // Number keys case "0": return 0x0030 @@ -457,7 +470,7 @@ func KeyNameToKeysym(name string) uint64 { return 0x0038 case "9": return 0x0039 - + default: return 0xFFEA // Default to right alt } diff --git a/internal/hotkey/hotkey_other.go b/internal/hotkey/hotkey_other.go index 52bc49b..5c2325a 100644 --- a/internal/hotkey/hotkey_other.go +++ b/internal/hotkey/hotkey_other.go @@ -66,6 +66,14 @@ func (m *Manager) DisableCancelKey() { // Not supported on this platform } +// SetMediaControlEnabled is not supported on this platform. +func (m *Manager) SetMediaControlEnabled(enabled bool, cb func()) { +} + +// RefreshMediaControl is not supported on this platform. +func (m *Manager) RefreshMediaControl() { +} + // SetHotkeyType sets the hotkey (no-op) func (m *Manager) SetHotkeyType(hotkeyType string) { // Not supported on this platform @@ -86,6 +94,11 @@ func RequestAccessibilityPermissions() bool { return true } +// RequestInputMonitoringPermissions is a no-op on unsupported platforms. +func RequestInputMonitoringPermissions() bool { + return true +} + // KeyNameToDisplayName converts a key name to a display-friendly name func KeyNameToDisplayName(name string) string { switch strings.ToLower(name) { diff --git a/internal/hotkey/hotkey_windows.go b/internal/hotkey/hotkey_windows.go index 667fd48..58247d9 100644 --- a/internal/hotkey/hotkey_windows.go +++ b/internal/hotkey/hotkey_windows.go @@ -304,6 +304,14 @@ func (m *Manager) DisableCancelKey() { m.cancelCallback = nil } +// SetMediaControlEnabled is not supported on Windows. +func (m *Manager) SetMediaControlEnabled(enabled bool, cb func()) { +} + +// RefreshMediaControl is not supported on Windows. +func (m *Manager) RefreshMediaControl() { +} + // GetHotkeyDisplayName returns the display name for a hotkey func GetHotkeyDisplayName(hotkeyName string) string { return KeyNameToDisplayName(hotkeyName) @@ -314,6 +322,11 @@ func RequestAccessibilityPermissions() bool { return true } +// RequestInputMonitoringPermissions is a no-op on Windows. +func RequestInputMonitoringPermissions() bool { + return true +} + // KeyNameToCode converts a key name to a Windows virtual key code func KeyNameToCode(name string) uint32 { switch strings.ToLower(name) { diff --git a/internal/models/config.go b/internal/models/config.go index c329d00..ced530c 100644 --- a/internal/models/config.go +++ b/internal/models/config.go @@ -24,6 +24,15 @@ type Config struct { // Auto-paste after transcription AutoPaste bool `json:"autoPaste"` + // Press Enter after auto-pasting the transcription + PasteSubmit bool `json:"pasteSubmit"` + + // Allow keyboard/media play-pause keys to start/stop recording + AirPodsControlEnabled bool `json:"airPodsControlEnabled"` + + // Automatically stop recording after sustained silence + AutoStopSilence bool `json:"autoStopSilence"` + // Show notification after transcription ShowNotification bool `json:"showNotification"` @@ -46,13 +55,16 @@ type Config struct { func DefaultConfig() *Config { soundEnabled := true return &Config{ - Provider: "local", - Model: "base.en", - AutoPaste: true, - ShowNotification: true, - RecordingHotkey: DefaultRecordingHotkey(), - CancelHotkey: "escape", - SoundEnabled: &soundEnabled, + Provider: "local", + Model: "base.en", + AutoPaste: true, + PasteSubmit: false, + AirPodsControlEnabled: false, + AutoStopSilence: false, + ShowNotification: true, + RecordingHotkey: DefaultRecordingHotkey(), + CancelHotkey: "escape", + SoundEnabled: &soundEnabled, } } @@ -175,6 +187,24 @@ func (cm *ConfigManager) SetAutoPaste(enabled bool) error { return cm.Save() } +// SetPasteSubmit updates the paste-submit setting +func (cm *ConfigManager) SetPasteSubmit(enabled bool) error { + cm.config.PasteSubmit = enabled + return cm.Save() +} + +// SetAirPodsControlEnabled updates the AirPods control setting +func (cm *ConfigManager) SetAirPodsControlEnabled(enabled bool) error { + cm.config.AirPodsControlEnabled = enabled + return cm.Save() +} + +// SetAutoStopSilence updates the auto-stop-on-silence setting +func (cm *ConfigManager) SetAutoStopSilence(enabled bool) error { + cm.config.AutoStopSilence = enabled + return cm.Save() +} + // SetAudioInputDevice updates the audio input device setting func (cm *ConfigManager) SetAudioInputDevice(deviceName string) error { cm.config.AudioInputDevice = deviceName diff --git a/internal/system/clipboard.go b/internal/system/clipboard.go index 3972406..343a1f1 100644 --- a/internal/system/clipboard.go +++ b/internal/system/clipboard.go @@ -20,13 +20,25 @@ func ReadFromClipboard() (string, error) { // CopyAndPaste copies text to clipboard and simulates Cmd+V / Ctrl+V func CopyAndPaste(text string) error { + return CopyPasteAndSubmit(text, false) +} + +// CopyPasteAndSubmit copies text, pastes it, and optionally presses Enter. +func CopyPasteAndSubmit(text string, submit bool) error { // First copy to clipboard if err := CopyToClipboard(text); err != nil { return fmt.Errorf("failed to copy to clipboard: %w", err) } // Then simulate paste - return SimulatePaste() + if err := SimulatePaste(); err != nil { + return err + } + + if submit { + return SimulateEnter() + } + return nil } // SimulatePaste simulates pressing Cmd+V (macOS) or Ctrl+V (others) @@ -43,12 +55,30 @@ func SimulatePaste() error { } } +// SimulateEnter simulates pressing Enter/Return. +func SimulateEnter() error { + switch runtime.GOOS { + case "darwin": + return simulateEnterMacOS() + case "linux": + return simulateEnterLinux() + case "windows": + return simulateEnterWindows() + default: + return fmt.Errorf("unsupported OS: %s", runtime.GOOS) + } +} + // simulatePasteMacOS uses native CGEvent to simulate Cmd+V func simulatePasteMacOS() error { // Use native CGEvent approach which works with the app's accessibility permissions return simulatePasteMacOSNative() } +func simulateEnterMacOS() error { + return simulateEnterMacOSNative() +} + // simulatePasteLinux uses xdotool to simulate Ctrl+V func simulatePasteLinux() error { cmd := exec.Command("xdotool", "key", "ctrl+v") @@ -58,6 +88,14 @@ func simulatePasteLinux() error { return nil } +func simulateEnterLinux() error { + cmd := exec.Command("xdotool", "key", "Return") + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to simulate enter (ensure xdotool is installed): %w", err) + } + return nil +} + // simulatePasteWindows uses PowerShell to simulate Ctrl+V func simulatePasteWindows() error { script := `Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait("^v")` @@ -67,3 +105,12 @@ func simulatePasteWindows() error { } return nil } + +func simulateEnterWindows() error { + script := `Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait("{ENTER}")` + cmd := exec.Command("powershell", "-Command", script) + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to simulate enter: %w", err) + } + return nil +} diff --git a/internal/system/clipboard_darwin.go b/internal/system/clipboard_darwin.go index a8b0bb9..e75ec64 100644 --- a/internal/system/clipboard_darwin.go +++ b/internal/system/clipboard_darwin.go @@ -49,13 +49,13 @@ void activatePreviousApp(void) { int simulatePasteKeystroke(void) { NSLog(@"simulatePasteKeystroke: starting"); NSLog(@"simulatePasteKeystroke: current frontmost = %@", getFrontmostAppName()); - + // Check accessibility first if (!AXIsProcessTrusted()) { NSLog(@"simulatePasteKeystroke: ERROR - No accessibility permissions!"); return 0; } - + // Activate the previous app first (in case our app took focus) if (gPreviousFrontApp) { NSLog(@"simulatePasteKeystroke: activating previous app %@", gPreviousFrontApp.localizedName); @@ -63,35 +63,74 @@ int simulatePasteKeystroke(void) { usleep(100000); // 100ms for activation to complete NSLog(@"simulatePasteKeystroke: after activation, frontmost = %@", getFrontmostAppName()); } - + // Create key down event for 'v' with Command modifier CGEventRef keyDown = CGEventCreateKeyboardEvent(NULL, (CGKeyCode)kVK_ANSI_V, true); CGEventRef keyUp = CGEventCreateKeyboardEvent(NULL, (CGKeyCode)kVK_ANSI_V, false); - + if (keyDown == NULL || keyUp == NULL) { NSLog(@"simulatePasteKeystroke: failed to create events"); if (keyDown) CFRelease(keyDown); if (keyUp) CFRelease(keyUp); return 0; } - - // Set Command modifier flag + + // Set Command modifier flag for key down only. Keeping Command on key up can + // leave the next synthetic key interpreted as a modified shortcut in some apps. CGEventSetFlags(keyDown, kCGEventFlagMaskCommand); - CGEventSetFlags(keyUp, kCGEventFlagMaskCommand); - + CGEventSetFlags(keyUp, 0); + // Post events NSLog(@"simulatePasteKeystroke: posting keyDown"); CGEventPost(kCGHIDEventTap, keyDown); usleep(50000); // 50ms delay NSLog(@"simulatePasteKeystroke: posting keyUp"); CGEventPost(kCGHIDEventTap, keyUp); - + // Release CFRelease(keyDown); CFRelease(keyUp); NSLog(@"simulatePasteKeystroke: done"); return 1; } + +// Simulate Return key using CGEvent. +int simulateEnterKeystroke(void) { + NSLog(@"simulateEnterKeystroke: starting"); + + if (!AXIsProcessTrusted()) { + NSLog(@"simulateEnterKeystroke: ERROR - No accessibility permissions!"); + return 0; + } + + if (gPreviousFrontApp) { + NSLog(@"simulateEnterKeystroke: activating previous app %@", gPreviousFrontApp.localizedName); + [gPreviousFrontApp activateWithOptions:NSApplicationActivateIgnoringOtherApps]; + } + + CGEventRef keyDown = CGEventCreateKeyboardEvent(NULL, (CGKeyCode)kVK_Return, true); + CGEventRef keyUp = CGEventCreateKeyboardEvent(NULL, (CGKeyCode)kVK_Return, false); + + if (keyDown == NULL || keyUp == NULL) { + NSLog(@"simulateEnterKeystroke: failed to create events"); + if (keyDown) CFRelease(keyDown); + if (keyUp) CFRelease(keyUp); + return 0; + } + + CGEventSetFlags(keyDown, 0); + CGEventSetFlags(keyUp, 0); + + usleep(300000); // Let the paste land before submitting. + CGEventPost(kCGHIDEventTap, keyDown); + usleep(50000); + CGEventPost(kCGHIDEventTap, keyUp); + + CFRelease(keyDown); + CFRelease(keyUp); + NSLog(@"simulateEnterKeystroke: done"); + return 1; +} */ import "C" @@ -107,18 +146,39 @@ func simulatePasteMacOSNative() error { fmt.Println("simulatePasteMacOSNative: checking accessibility...") hasAccess := C.hasAccessibilityForPaste() fmt.Printf("simulatePasteMacOSNative: hasAccessibility=%d\n", hasAccess) - + if hasAccess == 0 { fmt.Println("ERROR: No accessibility permissions for paste! Please grant in System Settings > Privacy & Security > Accessibility") return fmt.Errorf("no accessibility permissions") } - + fmt.Println("simulatePasteMacOSNative: calling C function") result := C.simulatePasteKeystroke() fmt.Printf("simulatePasteMacOSNative: result=%d\n", result) - + if result == 0 { return fmt.Errorf("paste keystroke failed") } return nil } + +// simulateEnterMacOSNative uses CGEvent to simulate Return. +func simulateEnterMacOSNative() error { + fmt.Println("simulateEnterMacOSNative: checking accessibility...") + hasAccess := C.hasAccessibilityForPaste() + fmt.Printf("simulateEnterMacOSNative: hasAccessibility=%d\n", hasAccess) + + if hasAccess == 0 { + fmt.Println("ERROR: No accessibility permissions for enter! Please grant in System Settings > Privacy & Security > Accessibility") + return fmt.Errorf("no accessibility permissions") + } + + fmt.Println("simulateEnterMacOSNative: calling C function") + result := C.simulateEnterKeystroke() + fmt.Printf("simulateEnterMacOSNative: result=%d\n", result) + + if result == 0 { + return fmt.Errorf("enter keystroke failed") + } + return nil +} diff --git a/internal/system/clipboard_other.go b/internal/system/clipboard_other.go index 47a3d6e..cc009b4 100644 --- a/internal/system/clipboard_other.go +++ b/internal/system/clipboard_other.go @@ -14,3 +14,9 @@ func SaveFrontmostApp() { func simulatePasteMacOSNative() error { return fmt.Errorf("simulatePasteMacOSNative is only available on macOS") } + +// simulateEnterMacOSNative is a stub for non-macOS platforms. +// This should never be called since SimulateEnter checks runtime.GOOS. +func simulateEnterMacOSNative() error { + return fmt.Errorf("simulateEnterMacOSNative is only available on macOS") +}