Skip to content

Commit b8165b8

Browse files
authored
Ignore TLS error when closing listener from the notifier (#1216)
Here, address a request coming from #256 in which it's fairly common to see an like this one going to the error logs: > `tls: failed to send closeNotify alert (but connection was closed anyway): write tcp 10.0.5.131:40192->10.0.7.96:5432: i/o timeout` If you read the error message carefully, it indicates that this really isn't a problem. The connection is closed already, and the TLS package just wasn't able to close it as cleanly as it wanted to. Here, modify the site that calls listener close in the notifier so that it ignores these types of errors, thereby cleaning up peoples' error logs somewhat. Fixes #256.
1 parent e3fc3c2 commit b8165b8

3 files changed

Lines changed: 60 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
12+
- Ignore errors like `tls: failed to send closeNotify alert (but connection was closed anyway)` when closing listeners. [PR #1216](https://github.com/riverqueue/river/pull/1216).
13+
1014
### Fixed
1115

1216
- Fixed leader election to track explicit database-issued leadership terms, reducing handoff flakiness and same-client reacquisition edge cases while making reelection and resign target the current leadership lease instead of a stale one. [PR #1213](https://github.com/riverqueue/river/pull/1213).

internal/notifier/notifier.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"log/slog"
99
"slices"
10+
"strings"
1011
"sync"
1112
"time"
1213

@@ -262,14 +263,40 @@ func (n *Notifier) listenerClose(ctx context.Context, skipLock bool) {
262263

263264
n.Logger.DebugContext(ctx, n.Name+": Listener closing")
264265
if err := n.listener.Close(ctx); err != nil {
265-
if !errors.Is(err, context.Canceled) {
266+
if shouldIgnoreListenerError(err) {
266267
n.Logger.ErrorContext(ctx, n.Name+": Error closing listener", "err", err)
267268
}
268269
}
269270

270271
n.isConnected = false
271272
}
272273

274+
// shouldIgnoreListenerError returns true if the error is a certain type of
275+
// common error that we see when trying to close a listener.
276+
//
277+
// It's probably not strictly necessary to log errors on listener close, but it
278+
// seems not ideal to ignore them completely either. If this function were ever
279+
// to become to unwieldy though, we might want to go back to the drawing board.
280+
func shouldIgnoreListenerError(err error) bool {
281+
if errors.Is(err, context.Canceled) {
282+
return true
283+
}
284+
285+
// In practice, this occurs a fair bit in some systems. See:
286+
//
287+
// https://github.com/riverqueue/river/issues/256
288+
//
289+
// There's been an issue opened to make this a well-known error type, but
290+
// it's not right now:
291+
//
292+
// https://github.com/golang/go/issues/75600
293+
if strings.Contains(err.Error(), "tls: failed to send closeNotify alert") {
294+
return true
295+
}
296+
297+
return false
298+
}
299+
273300
const listenerTimeout = 10 * time.Second
274301

275302
func (n *Notifier) listenerConnect(ctx context.Context, skipLock bool) error {

internal/notifier/notifier_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,34 @@ func topicAndPayloadNotifyFunc(notifyChan chan TopicAndPayload) NotifyFunc {
671671
}
672672
}
673673

674+
func TestShouldIgnoreListenerError(t *testing.T) {
675+
t.Parallel()
676+
677+
t.Run("ContextCanceled", func(t *testing.T) {
678+
t.Parallel()
679+
680+
require.True(t, shouldIgnoreListenerError(context.Canceled))
681+
})
682+
683+
t.Run("WrappedContextCanceled", func(t *testing.T) {
684+
t.Parallel()
685+
686+
require.True(t, shouldIgnoreListenerError(fmt.Errorf("wrapped: %w", context.Canceled)))
687+
})
688+
689+
t.Run("TLSCloseNotifyAlert", func(t *testing.T) {
690+
t.Parallel()
691+
692+
require.True(t, shouldIgnoreListenerError(errors.New("tls: failed to send closeNotify alert (but connection was closed anyway): write tcp 10.0.5.131:40192->10.0.7.96:5432: i/o timeout")))
693+
})
694+
695+
t.Run("ArbitraryError", func(t *testing.T) {
696+
t.Parallel()
697+
698+
require.False(t, shouldIgnoreListenerError(errors.New("some other error")))
699+
})
700+
}
701+
674702
func sendNotification(ctx context.Context, t *testing.T, exec riverdriver.Executor, schema, topic string, payload string) {
675703
t.Helper()
676704

0 commit comments

Comments
 (0)