Skip to content

Commit c0d3465

Browse files
committed
Widen error responses that trigger re-authentication
Apart from token expiry, we have encountered at least two other kinds of persistent error responses that are resolved by re-authentication (presumably other kinds of token invalidation). The client is effectively broken once this happens until a re-auth is done. This change widens the "requires re-auth" criteria used to catch those and potentially others. It also includes some related hardening/refactoring of how automatic re-auths are handled. The original RPC that triggered the auth failure is now included in the cause chain.
1 parent 6dff291 commit c0d3465

3 files changed

Lines changed: 162 additions & 65 deletions

File tree

src/main/java/com/ibm/etcd/client/EtcdClient.java

Lines changed: 87 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.InputStream;
2323
import java.util.Arrays;
2424
import java.util.List;
25+
import java.util.Set;
2526
import java.util.concurrent.BrokenBarrierException;
2627
import java.util.concurrent.Callable;
2728
import java.util.concurrent.CyclicBarrier;
@@ -42,6 +43,7 @@
4243
import com.google.common.annotations.VisibleForTesting;
4344
import com.google.common.base.Predicate;
4445
import com.google.common.base.Predicates;
46+
import com.google.common.collect.ImmutableSet;
4547
import com.google.common.io.ByteSource;
4648
import com.google.common.util.concurrent.ForwardingExecutorService;
4749
import com.google.common.util.concurrent.Futures;
@@ -289,8 +291,12 @@ public boolean requiresReauth(Throwable t) {
289291
return EtcdClient.reauthRequired(t);
290292
}
291293
@Override
294+
public CallCredentials refreshCredentials(Throwable trigger) {
295+
return EtcdClient.this.refreshCredentials(trigger);
296+
}
297+
@Override
292298
public CallCredentials refreshCredentials() {
293-
return EtcdClient.this.refreshCredentials();
299+
return refreshCredentials(null);
294300
}
295301
};
296302

@@ -385,21 +391,68 @@ public boolean isClosed() {
385391

386392
// ------ authentication logic
387393

394+
private static final Set<Code> OTHER_AUTH_FAILURE_CODES = ImmutableSet.of(
395+
Code.INVALID_ARGUMENT, Code.FAILED_PRECONDITION, Code.PERMISSION_DENIED, Code.UNKNOWN);
396+
397+
// Various different errors can imply a re-auth is required (sometimes non-obvious),
398+
// so we cover most related messages to be safe. This should not cause problems since
399+
// re-authentication attempts are rate-limited to once every 15 seconds.
400+
//
401+
// See https://godoc.org/github.com/coreos/etcd/auth#pkg-variables
402+
// and https://godoc.org/github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes#pkg-variables
403+
private static final String[] AUTH_FAILURE_MESSAGES = {
404+
// These will be prefixed with "etcdserver: "
405+
"root user does not exist",
406+
"root user does not have root role",
407+
"user name already exists",
408+
"user name is empty",
409+
"user name not found",
410+
"role name already exists",
411+
"role name not found",
412+
"role name is empty",
413+
"authentication failed, invalid user ID or password",
414+
"permission denied",
415+
"role is not granted to the user",
416+
"permission is not granted to the role",
417+
"authentication is not enabled",
418+
"invalid auth token",
419+
"invalid auth management"
420+
};
421+
422+
private Status lastAuthFailStatus;
423+
private long lastAuthFailTime;
424+
388425
protected static boolean reauthRequired(Throwable error) {
389-
Code statusCode = GrpcClient.codeFromThrowable(error);
390-
return statusCode == Code.UNAUTHENTICATED ||
391-
(statusCode == Code.INVALID_ARGUMENT
392-
&& contains(error.getMessage(), "user name is empty") ||
393-
(statusCode == Code.CANCELLED
394-
&& reauthRequired(error.getCause())));
426+
Status status = Status.fromThrowable(error);
427+
Code code = status.getCode();
428+
return code == Code.UNAUTHENTICATED
429+
|| (OTHER_AUTH_FAILURE_CODES.contains(code) &&
430+
(startsWith(status.getDescription(), "auth: ")
431+
|| endsWith(status.getDescription(), AUTH_FAILURE_MESSAGES)))
432+
|| (code == Code.CANCELLED && reauthRequired(error.getCause()));
395433
}
396434

397-
private CallCredentials refreshCredentials() {
435+
// This is only called in a synchronized context
436+
private CallCredentials refreshCredentials(Throwable trigger) {
437+
long lastAuthFailure = System.currentTimeMillis() - lastAuthFailTime;
438+
if (lastAuthFailure < 15_000L) {
439+
// Rate-limit how often re-auth attempts are made,
440+
// return last auth failure in the meantime
441+
return new CallCredentials() {
442+
@Override
443+
public void applyRequestMetadata(RequestInfo requestInfo,
444+
Executor appExecutor, MetadataApplier applier) {
445+
applier.fail(lastAuthFailStatus);
446+
}
447+
//@Override
448+
public void thisUsesUnstableApi() {}
449+
};
450+
}
398451
return new CallCredentials() {
399-
private Metadata tokenHeader; //TODO volatile TBD
400-
private final long authTime = System.currentTimeMillis();
452+
private volatile Metadata tokenHeader; //TODO volatile TBD
401453
private final ListenableFuture<Metadata> futureTokenHeader =
402454
Futures.transform(authenticate(), ar -> tokenHeader = tokenHeader(ar), directExecutor());
455+
403456
@Override
404457
public void applyRequestMetadata(RequestInfo requestInfo,
405458
Executor appExecutor, MetadataApplier applier) {
@@ -411,11 +464,14 @@ public void applyRequestMetadata(RequestInfo requestInfo,
411464
applier.apply(futureTokenHeader.get());
412465
} catch (ExecutionException | InterruptedException ee) { // (IE won't be thrown)
413466
Status failStatus = Status.fromThrowable(ee.getCause());
414-
Code code = failStatus != null ? failStatus.getCode() : null;
415-
if (code != Code.INVALID_ARGUMENT
416-
&& (System.currentTimeMillis() - authTime) > 15_000L) {
417-
// this will force another auth attempt
418-
failStatus = Status.UNAUTHENTICATED.withDescription("re-attempt re-auth");
467+
lastAuthFailStatus = failStatus;
468+
lastAuthFailTime = System.currentTimeMillis();
469+
if (trigger != null) {
470+
if (failStatus.getCause() != null) {
471+
failStatus.getCause().addSuppressed(trigger);
472+
} else {
473+
failStatus = failStatus.withCause(trigger);
474+
}
419475
}
420476
applier.fail(failStatus);
421477
}
@@ -439,16 +495,15 @@ private ListenableFuture<AuthenticateResponse> authenticate() {
439495
CallOptions callOpts = CallOptions.DEFAULT;
440496
return Futures.catchingAsync(
441497
grpc.fuCall(METHOD_AUTHENTICATE, request, callOpts, 0L),
442-
Exception.class, ex -> !retryAuthRequest(ex)
498+
Exception.class, ex -> !retryAuthRequest(ex)
443499
? Futures.immediateFailedFuture(ex)
444500
: grpc.fuCall(METHOD_AUTHENTICATE, request, callOpts, 0L),
445501
directExecutor());
446502
}
447503

448504
protected static boolean retryAuthRequest(Throwable error) {
449-
Status status = Status.fromThrowable(error);
450-
Code statusCode = status != null ? status.getCode() : null;
451-
return statusCode == Code.UNAVAILABLE && GrpcClient.isConnectException(error);
505+
return GrpcClient.codeFromThrowable(error) == Code.UNAVAILABLE
506+
&& GrpcClient.isConnectException(error);
452507
}
453508

454509
// ------ individual client getters
@@ -518,6 +573,19 @@ protected static boolean contains(String str, String subStr) {
518573
return str != null && str.contains(subStr);
519574
}
520575

576+
protected static boolean startsWith(String str, String prefix) {
577+
return str != null && str.startsWith(prefix);
578+
}
579+
580+
protected static boolean endsWith(String str, String... suffixes) {
581+
if (str != null) {
582+
for (String suffix : suffixes) {
583+
if (str.endsWith(suffix)) return true;
584+
}
585+
}
586+
return false;
587+
}
588+
521589
protected static final class EtcdEventThread extends FastThreadLocalThread {
522590
public EtcdEventThread(Runnable r) {
523591
super(r);

src/main/java/com/ibm/etcd/client/GrpcClient.java

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,16 @@ public class GrpcClient {
7575
private final long defaultTimeoutMs;
7676

7777
public interface AuthProvider {
78+
/**
79+
* Called from a synchronized context
80+
*/
7881
CallCredentials refreshCredentials();
82+
default CallCredentials refreshCredentials(Throwable trigger) {
83+
return refreshCredentials();
84+
}
85+
/**
86+
* Not called from a synchronized context
87+
*/
7988
boolean requiresReauth(Throwable t);
8089
}
8190

@@ -166,7 +175,7 @@ public Executor getResponseExecutor() {
166175

167176
public void authenticateNow() {
168177
assert authProvider != NO_AUTH;
169-
reauthenticate(getCallOptions());
178+
reauthenticate(getCallOptions(), null);
170179
}
171180

172181
protected CallOptions getCallOptions() {
@@ -201,13 +210,13 @@ public static <R> RetryDecision<R> retryDecision(boolean idempotent) {
201210
public <ReqT,R> ListenableFuture<R> call(MethodDescriptor<ReqT,R> method,
202211
ReqT request, boolean idempotent) {
203212
return call(method, null, request, null, retryDecision(idempotent),
204-
0, false, null, 0L);
213+
0, false, false, null, 0L);
205214
}
206215

207216
public <ReqT,R> ListenableFuture<R> call(MethodDescriptor<ReqT,R> method,
208217
ReqT request, boolean idempotent, long timeoutMillis, Executor executor) {
209218
return call(method, null, request, executor, retryDecision(idempotent),
210-
0, false, null, timeoutMillis);
219+
0, false, false, null, timeoutMillis);
211220
}
212221

213222
//TODO probably move this
@@ -218,14 +227,14 @@ public static interface RetryDecision<ReqT> {
218227
public <ReqT,R> ListenableFuture<R> call(MethodDescriptor<ReqT,R> method, Condition precondition,
219228
ReqT request, Executor executor, RetryDecision<ReqT> retry, boolean backoff,
220229
Deadline deadline, long timeoutMs) {
221-
return call(method, precondition, request, executor, retry, 0, backoff, deadline, timeoutMs);
230+
return call(method, precondition, request, executor, retry, 0, false, backoff, deadline, timeoutMs);
222231
}
223232

224233
// deadline is for entire request (including retry pauses),
225234
// timeout is per-attempt and 0 means not specified
226235
private <ReqT,R> ListenableFuture<R> call(MethodDescriptor<ReqT,R> method,
227236
Condition precondition, ReqT request, Executor executor, RetryDecision<ReqT> retry,
228-
int attempt, boolean backoff, Deadline deadline, long timeoutMs) {
237+
int attempt, boolean afterReauth, boolean backoff, Deadline deadline, long timeoutMs) {
229238
if (precondition != null && !precondition.satisfied()) {
230239
return failInExecutor(new CancellationException("precondition false"), executor);
231240
}
@@ -237,18 +246,29 @@ private <ReqT,R> ListenableFuture<R> call(MethodDescriptor<ReqT,R> method,
237246
callOpts = callOpts.withExecutor(executor);
238247
}
239248
return Futures.catchingAsync(fuCall(method, request, callOpts, timeoutMs), Exception.class, t -> {
240-
boolean reauth;
241-
// first case: if no retries to do then fail
242-
if ((!backoff && attempt > 0) || (deadline != null && deadline.isExpired())
243-
|| (!(reauth = reauthIfRequired(t, baseCallOpts))
244-
&& !retry.retry(t, request))) {
249+
// first cases: determine if we fail immediately
250+
if ((!backoff && attempt > 0) || (deadline != null && deadline.isExpired())) {
251+
// multiple retries disabled or deadline expired
252+
return Futures.immediateFailedFuture(t);
253+
}
254+
boolean reauth = false;
255+
if (authProvider.requiresReauth(t)) {
256+
if (afterReauth) {
257+
// if we have an auth failure immediately following a reauth, give up
258+
// (important to avoid infinite loop of auth failures)
259+
return Futures.immediateFailedFuture(t);
260+
}
261+
reauthenticate(baseCallOpts, t);
262+
reauth = true;
263+
} else if (!retry.retry(t, request)) {
264+
// retry predicate says no (non retryable request and/or error)
245265
return Futures.immediateFailedFuture(t);
246266
}
247267

248268
// second case: immediate retry (first failure or after auth failure + reauth)
249269
if (reauth || attempt == 0 && immediateRetryLimiter.tryAcquire()) {
250270
return call(method, precondition, request, executor, retry,
251-
reauth ? attempt : 1, backoff, deadline, timeoutMs);
271+
reauth ? attempt : 1, reauth, backoff, deadline, timeoutMs);
252272
}
253273
int nextAttempt = attempt <= 1 ? 2 : attempt + 1; // skip attempt if we were rate-limited
254274

@@ -257,8 +277,8 @@ private <ReqT,R> ListenableFuture<R> call(MethodDescriptor<ReqT,R> method,
257277
if (deadline != null && deadline.timeRemaining(MILLISECONDS) < delayMs) {
258278
return Futures.immediateFailedFuture(t);
259279
}
260-
return Futures.scheduleAsync(() -> call(method, precondition, request, executor,
261-
retry, nextAttempt, backoff, deadline, timeoutMs), delayMs, MILLISECONDS, ses);
280+
return Futures.scheduleAsync(() -> call(method, precondition, request, executor, retry,
281+
nextAttempt, false, backoff, deadline, timeoutMs), delayMs, MILLISECONDS, ses);
262282
}, executor != null ? executor : directExecutor());
263283
}
264284

@@ -313,19 +333,15 @@ protected <ReqT,R> ListenableFuture<R> fuCall(MethodDescriptor<ReqT,R> method, R
313333

314334
protected boolean retryableStreamError(Throwable error) {
315335
return (Status.fromThrowable(error).getCode() != Code.INVALID_ARGUMENT
316-
&& !causedByJavaError(error));
317-
}
318-
319-
protected static boolean causedByJavaError(Throwable t) {
320-
return causedBy(t, Error.class);
336+
&& !causedBy(error, Error.class));
321337
}
322338

323339
/**
324340
* @return true if reauthentication was required and attempted
325341
*/
326342
protected boolean reauthIfRequired(Throwable error, CallOptions callOpts) {
327343
if (authProvider.requiresReauth(error)) {
328-
reauthenticate(callOpts);
344+
reauthenticate(callOpts, error);
329345
return true;
330346
}
331347
return false;
@@ -336,18 +352,18 @@ public static boolean isConnectException(Throwable t) {
336352
}
337353

338354
public static Code codeFromThrowable(Throwable t) {
339-
Status status = Status.fromThrowable(t);
340-
return status != null ? status.getCode() : null;
355+
return Status.fromThrowable(t).getCode(); // fromThrowable won't return null
341356
}
342357

343358

344-
private void reauthenticate(CallOptions failedOpts) {
359+
private void reauthenticate(CallOptions failedOpts, Throwable authFailure) {
345360
// assert name != null && password != null;
346361
if (getCallOptions() == failedOpts) { // obj identity comparison intentional
347362
synchronized (this) {
348363
CallOptions callOpts = getCallOptions();
349364
if (callOpts == failedOpts) {
350-
callOptions = callOpts.withCallCredentials(authProvider.refreshCredentials());
365+
callOptions = callOpts.withCallCredentials(
366+
authProvider.refreshCredentials(authFailure));
351367
}
352368
}
353369
}
@@ -413,6 +429,8 @@ final class ResilientBiDiStream<ReqT,RespT> {
413429
private boolean finished;
414430
private Throwable error;
415431

432+
private boolean lastAuthFailed;
433+
416434
/**
417435
*
418436
* @param method
@@ -605,6 +623,7 @@ public void beforeStart(ClientCallStreamObserver<ReqT> rs) {
605623
// called from grpc response thread
606624
@Override
607625
public void onNext(RespT value) {
626+
lastAuthFailed = false;
608627
respStream.onNext(value);
609628
}
610629
// called from grpc response thread
@@ -614,9 +633,10 @@ public void onError(Throwable t) {
614633
if (finished) {
615634
finalError = true;
616635
} else {
617-
reauthed = reauthIfRequired(t, sentCallOptions);
636+
reauthed = !lastAuthFailed && reauthIfRequired(t, sentCallOptions);
618637
finalError = !reauthed && !retryableStreamError(t);
619638
}
639+
lastAuthFailed = reauthed;
620640
if (!finalError) {
621641
int errCount = -1;
622642
String msg;
@@ -670,6 +690,7 @@ public void onError(Throwable t) {
670690
// called from grpc response thread
671691
@Override
672692
public void onCompleted() {
693+
lastAuthFailed = false;
673694
if (!finished) {
674695
logger.warn("Unexpected onCompleted received"
675696
+ " for stream of method " + method.getFullMethodName());

0 commit comments

Comments
 (0)