Skip to content

Commit f20ef9b

Browse files
authored
Merge pull request #105 from tstromberg/main
auto-recover from auth failures
2 parents df99906 + d857f1c commit f20ef9b

11 files changed

Lines changed: 321 additions & 191 deletions

File tree

cmd/review-goose/github.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,7 @@ func (app *App) fetchTurnDataSync(ctx context.Context, issues []*github.Issue, u
527527
continue
528528
}
529529

530-
wg.Add(1)
531-
go func(issue *github.Issue) {
532-
defer wg.Done()
533-
530+
wg.Go(func() {
534531
// Acquire semaphore
535532
sem <- struct{}{}
536533
defer func() { <-sem }()
@@ -548,7 +545,7 @@ func (app *App) fetchTurnDataSync(ctx context.Context, issues []*github.Issue, u
548545
isOwner: issue.GetUser().GetLogin() == user,
549546
wasFromCache: wasFromCache,
550547
}
551-
}(issue)
548+
})
552549
}
553550

554551
// Close the results channel when all goroutines are done

cmd/review-goose/icons_badge.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func getIcon(iconType IconType, counts PRCounts) []byte {
5555
}
5656

5757
// Check cache
58-
if cached, ok := cache.Get(incoming, outgoing); ok {
58+
if cached, ok := cache.Lookup(incoming, outgoing); ok {
5959
return cached
6060
}
6161

cmd/review-goose/main.go

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ const (
7070
maxConcurrentTurnAPICalls = 20
7171
defaultMaxBrowserOpensDay = 20
7272
startupGracePeriod = 1 * time.Minute // Don't play sounds or auto-open for first minute
73+
authRetryInterval = 2 * time.Minute // Retry authentication periodically when in error state
7374
)
7475

7576
// PR represents a pull request with metadata.
@@ -420,6 +421,46 @@ func (app *App) handleReauthentication(ctx context.Context) {
420421
}
421422
}
422423

424+
// authRetryLoop periodically attempts to re-authenticate when in auth error state.
425+
// This ensures the app can recover from transient auth failures without user intervention.
426+
func (app *App) authRetryLoop(ctx context.Context) {
427+
ticker := time.NewTicker(authRetryInterval)
428+
defer ticker.Stop()
429+
430+
slog.Info("[AUTH] Starting auth retry loop", "interval", authRetryInterval)
431+
432+
for {
433+
select {
434+
case <-ctx.Done():
435+
slog.Info("[AUTH] Auth retry loop stopped (context cancelled)")
436+
return
437+
case <-ticker.C:
438+
app.mu.RLock()
439+
hasAuthError := app.authError != ""
440+
app.mu.RUnlock()
441+
442+
if !hasAuthError {
443+
slog.Info("[AUTH] Auth error cleared, stopping retry loop")
444+
return
445+
}
446+
447+
slog.Info("[AUTH] Attempting automatic re-authentication")
448+
app.handleReauthentication(ctx)
449+
450+
// Check if we recovered
451+
app.mu.RLock()
452+
stillHasError := app.authError != ""
453+
app.mu.RUnlock()
454+
455+
if !stillHasError {
456+
slog.Info("[AUTH] Automatic re-authentication successful, stopping retry loop")
457+
return
458+
}
459+
slog.Info("[AUTH] Re-authentication failed, will retry", "nextRetry", authRetryInterval)
460+
}
461+
}
462+
}
463+
423464
func (app *App) onReady(ctx context.Context) {
424465
slog.Info("System tray ready")
425466

@@ -501,6 +542,8 @@ func (app *App) onReady(ctx context.Context) {
501542
app.rebuildMenu(ctx)
502543
// Clean old cache on startup
503544
app.cleanupOldCache()
545+
// Start background auth retry loop
546+
go app.authRetryLoop(ctx)
504547
return
505548
}
506549

@@ -721,7 +764,7 @@ func (app *App) updatePRs(ctx context.Context) {
721764

722765
// Process notifications using the simplified state manager
723766
slog.Debug("[DEBUG] Processing PR state updates and notifications")
724-
app.updatePRStatesAndNotify(ctx)
767+
app.processNotifications(ctx)
725768
slog.Debug("[DEBUG] Completed PR state updates and notifications")
726769
}
727770

@@ -884,7 +927,7 @@ func (app *App) updatePRsWithWait(ctx context.Context) {
884927

885928
// Process notifications using the simplified state manager
886929
slog.Info("[FLOW] About to process PR state updates and notifications")
887-
app.updatePRStatesAndNotify(ctx)
930+
app.processNotifications(ctx)
888931
slog.Info("[FLOW] Completed PR state updates and notifications")
889932
// Mark initial load as complete after first successful update
890933
if !app.initialLoadComplete {
@@ -955,9 +998,7 @@ func (app *App) tryAutoOpenPR(ctx context.Context, pr *PR, autoBrowserEnabled bo
955998
}
956999
}
9571000

958-
// checkForNewlyBlockedPRs provides backward compatibility for tests
959-
// while using the new state manager internally.
1001+
// checkForNewlyBlockedPRs provides backward compatibility for tests.
9601002
func (app *App) checkForNewlyBlockedPRs(ctx context.Context) {
961-
// Simply delegate to the new implementation
962-
app.updatePRStatesAndNotify(ctx)
1003+
app.processNotifications(ctx)
9631004
}

cmd/review-goose/main_test.go

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,3 +880,154 @@ func TestNewlyBlockedPRAfterGracePeriod(t *testing.T) {
880880
t.Error("Expected FirstBlockedAt to be set for newly blocked PR in state manager")
881881
}
882882
}
883+
884+
// TestAuthRetryLoopStopsOnSuccess tests that the auth retry loop stops when auth succeeds.
885+
func TestAuthRetryLoopStopsOnSuccess(t *testing.T) {
886+
ctx := t.Context()
887+
888+
app := &App{
889+
mu: sync.RWMutex{},
890+
authError: "initial auth error",
891+
systrayInterface: &MockSystray{},
892+
}
893+
894+
// Track how many times we check authError
895+
checkCount := 0
896+
done := make(chan struct{})
897+
898+
// Start a goroutine that simulates the auth retry loop behavior
899+
go func() {
900+
defer close(done)
901+
for {
902+
select {
903+
case <-ctx.Done():
904+
return
905+
case <-time.After(10 * time.Millisecond): // Fast ticker for testing
906+
app.mu.RLock()
907+
hasError := app.authError != ""
908+
app.mu.RUnlock()
909+
910+
checkCount++
911+
912+
if !hasError {
913+
return // Loop should exit when auth succeeds
914+
}
915+
916+
// Simulate clearing auth error on 3rd attempt
917+
if checkCount >= 3 {
918+
app.mu.Lock()
919+
app.authError = ""
920+
app.mu.Unlock()
921+
}
922+
}
923+
}
924+
}()
925+
926+
// Wait for the goroutine to finish
927+
select {
928+
case <-done:
929+
// Success - loop exited
930+
case <-time.After(1 * time.Second):
931+
t.Fatal("Auth retry loop did not stop after auth succeeded")
932+
}
933+
934+
// Verify auth error was cleared
935+
app.mu.RLock()
936+
finalError := app.authError
937+
app.mu.RUnlock()
938+
939+
if finalError != "" {
940+
t.Errorf("Expected auth error to be cleared, got: %s", finalError)
941+
}
942+
943+
if checkCount < 3 {
944+
t.Errorf("Expected at least 3 retry attempts, got: %d", checkCount)
945+
}
946+
}
947+
948+
// TestAuthRetryLoopStopsOnContextCancel tests that the auth retry loop stops on context cancellation.
949+
func TestAuthRetryLoopStopsOnContextCancel(t *testing.T) {
950+
ctx, cancel := context.WithCancel(context.Background())
951+
952+
app := &App{
953+
mu: sync.RWMutex{},
954+
authError: "persistent auth error",
955+
systrayInterface: &MockSystray{},
956+
}
957+
958+
done := make(chan struct{})
959+
960+
// Start a goroutine that simulates the auth retry loop behavior
961+
go func() {
962+
defer close(done)
963+
ticker := time.NewTicker(10 * time.Millisecond)
964+
defer ticker.Stop()
965+
966+
for {
967+
select {
968+
case <-ctx.Done():
969+
return
970+
case <-ticker.C:
971+
app.mu.RLock()
972+
hasError := app.authError != ""
973+
app.mu.RUnlock()
974+
975+
if !hasError {
976+
return
977+
}
978+
// Auth error persists, loop continues
979+
}
980+
}
981+
}()
982+
983+
// Cancel context after a short delay
984+
time.Sleep(50 * time.Millisecond)
985+
cancel()
986+
987+
// Wait for the goroutine to finish
988+
select {
989+
case <-done:
990+
// Success - loop exited on context cancel
991+
case <-time.After(1 * time.Second):
992+
t.Fatal("Auth retry loop did not stop after context cancellation")
993+
}
994+
}
995+
996+
// TestAuthErrorStatePreservation tests that auth error state is correctly preserved and accessible.
997+
func TestAuthErrorStatePreservation(t *testing.T) {
998+
app := &App{
999+
mu: sync.RWMutex{},
1000+
systrayInterface: &MockSystray{},
1001+
}
1002+
1003+
// Initially no auth error
1004+
app.mu.RLock()
1005+
if app.authError != "" {
1006+
t.Errorf("Expected no initial auth error, got: %s", app.authError)
1007+
}
1008+
app.mu.RUnlock()
1009+
1010+
// Set auth error
1011+
app.mu.Lock()
1012+
app.authError = "token expired"
1013+
app.mu.Unlock()
1014+
1015+
// Verify auth error is set
1016+
app.mu.RLock()
1017+
if app.authError != "token expired" {
1018+
t.Errorf("Expected auth error 'token expired', got: %s", app.authError)
1019+
}
1020+
app.mu.RUnlock()
1021+
1022+
// Clear auth error
1023+
app.mu.Lock()
1024+
app.authError = ""
1025+
app.mu.Unlock()
1026+
1027+
// Verify auth error is cleared
1028+
app.mu.RLock()
1029+
if app.authError != "" {
1030+
t.Errorf("Expected auth error to be cleared, got: %s", app.authError)
1031+
}
1032+
app.mu.RUnlock()
1033+
}

cmd/review-goose/notifications.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,3 @@ func (app *App) sendPRNotification(ctx context.Context, pr *PR, title string, so
116116
*playedSound = true
117117
}
118118
}
119-
120-
// updatePRStatesAndNotify is the simplified replacement for checkForNewlyBlockedPRs.
121-
func (app *App) updatePRStatesAndNotify(ctx context.Context) {
122-
// Simple and clear: just process notifications
123-
app.processNotifications(ctx)
124-
}

cmd/review-goose/ratelimit.go

Lines changed: 0 additions & 52 deletions
This file was deleted.

cmd/review-goose/reliability.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ func (hm *healthMonitor) recordCacheAccess(hit bool) {
144144
}
145145
}
146146

147-
func (hm *healthMonitor) getMetrics() map[string]any {
147+
func (hm *healthMonitor) metrics() map[string]any {
148148
hm.mu.RLock()
149149
defer hm.mu.RUnlock()
150150

@@ -172,7 +172,7 @@ func (hm *healthMonitor) getMetrics() map[string]any {
172172
}
173173

174174
func (hm *healthMonitor) logMetrics() {
175-
metrics := hm.getMetrics()
175+
m := hm.metrics()
176176

177177
// Get sprinkler connection status
178178
sprinklerConnected := false
@@ -186,11 +186,11 @@ func (hm *healthMonitor) logMetrics() {
186186
}
187187

188188
slog.Info("[HEALTH] Application metrics",
189-
"uptime", metrics["uptime"],
190-
"api_calls", metrics["api_calls"],
191-
"api_errors", metrics["api_errors"],
192-
"error_rate_pct", fmt.Sprintf("%.1f", metrics["error_rate"]),
193-
"cache_hit_rate_pct", fmt.Sprintf("%.1f", metrics["cache_hit_rate"]),
189+
"uptime", m["uptime"],
190+
"api_calls", m["api_calls"],
191+
"api_errors", m["api_errors"],
192+
"error_rate_pct", fmt.Sprintf("%.1f", m["error_rate"]),
193+
"cache_hit_rate_pct", fmt.Sprintf("%.1f", m["cache_hit_rate"]),
194194
"sprinkler_connected", sprinklerConnected,
195195
"sprinkler_last_connected", sprinklerLastConnected)
196196
}

0 commit comments

Comments
 (0)