3030 r ,
3131 args ,
3232 reqs ,
33- workers
33+ workers ,
34+ errors = []
3435}).
3536
3637go (_DbName , [], _Options ) ->
@@ -72,6 +73,9 @@ handle_message({rexi_DOWN, _, {_, NodeRef}, _}, _Worker, #st{} = St) ->
7273 Reqs1 = maps :fold (FoldFun , Reqs , DeadWorkers ),
7374 Error = {error , {nodedown , <<" progress not possible" >>}},
7475 handle_error (Error , St # st {workers = Workers1 , reqs = Reqs1 });
76+ handle_message ({rexi_EXIT , {maintenance_mode , _Node }}, Worker , # st {} = St ) ->
77+ % Remove the node to make it easier to de-duplicate later
78+ handle_message (maintenance_mode , Worker , St );
7579handle_message ({rexi_EXIT , Reason }, Worker , # st {} = St ) ->
7680 handle_message (Reason , Worker , St );
7781handle_message ({error , Reason }, Worker , # st {} = St ) ->
@@ -112,7 +116,7 @@ responses_fold({ArgRef, NewResp}, #{} = Reqs) ->
112116 }
113117 }.
114118
115- handle_error (Error , # st {workers = Workers , reqs = Reqs } = St ) ->
119+ handle_error (Error , # st {workers = Workers , errors = Errors , reqs = Reqs } = St ) ->
116120 case success_possible (Reqs ) of
117121 true ->
118122 case have_viable_workers (Workers ) of
@@ -124,7 +128,8 @@ handle_error(Error, #st{workers = Workers, reqs = Reqs} = St) ->
124128 end ;
125129 false ->
126130 stop_workers (Workers ),
127- {error , Error }
131+ % We may have multiple errors but need to pick one, so pick the first
132+ {error , hd (merge_errors (Errors , Error ))}
128133 end .
129134
130135% De-duplicate identical responses as we go along
@@ -142,6 +147,17 @@ sort_key({ok, #doc{id = Id, revs = Revs, deleted = Deleted}}) ->
142147sort_key (NotFound ) ->
143148 NotFound .
144149
150+ % We're trying to hide maintenance mode if possible. So if there are
151+ % non-maintenance mode errors, such as timeouts, etc, we remove the maintenance
152+ % mode from the list, otherwise, we keep it.
153+ %
154+ merge_errors (Errors , Error ) ->
155+ Errors1 = lists :uniq ([Error | Errors ]),
156+ case Errors1 of
157+ [maintenance_mode ] -> [maintenance_mode ];
158+ [_ | _ ] -> lists :delete (maintenance_mode , Errors1 )
159+ end .
160+
145161% Build a #{ArgRef => #req{}} map. ArgRef references the initial {{Id, Revs},
146162% Opts} tuples and the #req{} is a record keeping track of response for just
147163% that {Id, Revs} pair.
@@ -206,11 +222,17 @@ have_viable_workers(#{} = Workers) ->
206222 map_size (Workers ) > 0 .
207223
208224% We can still return success if we either have some waiting workers, or at
209- % least one response already for each {Id, Revs} pair.
225+ % least one response already for each {Id, Revs} pair. We don't simply check
226+ % for W + R > 0 but check that responses have any entries, as not_found entries
227+ % won't bump the R values.
210228%
211229success_possible (#{} = Reqs ) ->
212- Fun = fun (_ , # req {wcnt = W , rcnt = R }, Acc ) -> Acc andalso W + R > 0 end ,
213- maps :fold (Fun , true , Reqs ).
230+ maps :fold (fun success_possible_fold /3 , true , Reqs ).
231+
232+ success_possible_fold (_Key , # req {}, _Acc = false ) ->
233+ false ;
234+ success_possible_fold (_Key , # req {wcnt = W , responses = Resps }, _Acc ) ->
235+ W > 0 orelse Resps =/= [].
214236
215237r_met (#{} = Reqs , ExpectedR ) ->
216238 Fun = fun (_ , # req {rcnt = R }, Acc ) -> min (R , Acc ) end ,
@@ -316,6 +338,8 @@ open_revs_quorum_test_() ->
316338 ? TDEF_FE (t_finish_quorum ),
317339 ? TDEF_FE (t_no_quorum_on_different_responses ),
318340 ? TDEF_FE (t_no_quorum_on_not_found ),
341+ ? TDEF_FE (t_not_found_and_maintenance_mode ),
342+ ? TDEF_FE (t_all_maintenance_mode ),
319343 ? TDEF_FE (t_done_on_third ),
320344 ? TDEF_FE (t_all_different_responses ),
321345 ? TDEF_FE (t_ancestors_merge_correctly ),
@@ -400,6 +424,22 @@ t_no_quorum_on_not_found(_) ->
400424 Res2 = handle_message ([[foo2 (), bar1 ()]], W3 , S2 ),
401425 ? assertEqual ({stop , [[foo2 (), bar1 ()]]}, Res2 ).
402426
427+ t_not_found_and_maintenance_mode (_ ) ->
428+ S0 = # st {workers = Workers0 } = st0 (),
429+ [W1 , W2 , W3 ] = lists :sort (maps :keys (Workers0 )),
430+ {ok , S1 } = handle_message ([[bazNF ()]], W1 , S0 ),
431+ {ok , S2 } = handle_message ({error , timeout }, W2 , S1 ),
432+ Res = handle_message ({rexi_EXIT , {maintenance_mode , foo }}, W3 , S2 ),
433+ ? assertEqual ({stop , [[bazNF ()]]}, Res ).
434+
435+ t_all_maintenance_mode (_ ) ->
436+ S0 = # st {workers = Workers0 } = st0 (),
437+ [W1 , W2 , W3 ] = lists :sort (maps :keys (Workers0 )),
438+ {ok , S1 } = handle_message ({rexi_EXIT , {maintenance_mode , foo }}, W1 , S0 ),
439+ {ok , S2 } = handle_message ({rexi_EXIT , {maintenance_mode , foo }}, W2 , S1 ),
440+ Res = handle_message ({rexi_EXIT , {maintenance_mode , foo }}, W3 , S2 ),
441+ ? assertEqual ({error , maintenance_mode }, Res ).
442+
403443t_done_on_third (_ ) ->
404444 S0 = # st {workers = Workers0 } = st0 (),
405445 [W1 , W2 , W3 ] = lists :sort (maps :keys (Workers0 )),
0 commit comments