1515 */
1616package io .javaoperatorsdk .operator ;
1717
18- import java .util .Arrays ;
1918import java .util .Collection ;
19+ import java .util .List ;
2020import java .util .UUID ;
2121import java .util .concurrent .CompletableFuture ;
22+ import java .util .concurrent .atomic .AtomicBoolean ;
2223import java .util .function .Predicate ;
2324import java .util .stream .Collectors ;
2425
3637import io .javaoperatorsdk .operator .api .config .ConfigurationService ;
3738import io .javaoperatorsdk .operator .api .config .LeaderElectionConfiguration ;
3839
40+ /**
41+ * Manages the leader-election lifecycle for an {@link Operator} instance. Leader election ensures
42+ * that, in a high-availability setup with multiple replicas of the same operator, only one replica
43+ * at a time actively reconciles resources. The replica currently holding the lease is referred to
44+ * as the leader, and the others stand by until the lease becomes available.
45+ *
46+ * <p>Leader election is opt-in. It is enabled when a {@link LeaderElectionConfiguration} is
47+ * supplied via {@link
48+ * io.javaoperatorsdk.operator.api.config.ConfigurationServiceOverrider#withLeaderElectionConfiguration(LeaderElectionConfiguration)
49+ * ConfigurationServiceOverrider#withLeaderElectionConfiguration(LeaderElectionConfiguration)}. The
50+ * configuration controls the lease name, namespace, durations, and optional user-supplied {@link
51+ * LeaderCallbacks}.
52+ *
53+ * <p>{@link #stopLeading()} behaves differently depending on how it was triggered:
54+ *
55+ * <ul>
56+ * <li>If {@link #stop()} has already been called (graceful shutdown), it logs and returns without
57+ * exiting. This avoids deadlocking against the JVM shutdown hook lock when {@link
58+ * Operator#stop()} is invoked from a JVM shutdown hook.
59+ * <li>Otherwise, if the configured {@link LeaderElectionConfiguration#isExitOnStopLeading()} is
60+ * {@code true} (the default), it calls {@code System.exit(1)} so the process restarts and
61+ * another replica can take over.
62+ * <li>If {@code isExitOnStopLeading()} is {@code false}, it only logs and returns.
63+ * </ul>
64+ *
65+ * <p>The lifecycle methods {@link #start()} and {@link #stop()} are called by {@link Operator} as
66+ * part of {@link Operator#start()} and {@link Operator#stop()} respectively. Users typically do not
67+ * interact with this class directly.
68+ */
3969public class LeaderElectionManager {
4070
4171 private static final Logger log = LoggerFactory .getLogger (LeaderElectionManager .class );
72+ private static final List <String > REQUIRED_VERBS = List .of ("create" , "update" , "get" );
4273
4374 public static final String NO_PERMISSION_TO_LEASE_RESOURCE_MESSAGE =
4475 "No permission to lease resource." ;
@@ -53,6 +84,10 @@ public class LeaderElectionManager {
5384 private final ConfigurationService configurationService ;
5485 private String leaseNamespace ;
5586 private String leaseName ;
87+ // Set in stop() before cancelling the leader-election future. Checked in stopLeading() so that
88+ // a graceful shutdown does not call System.exit, which would otherwise deadlock against the
89+ // JVM shutdown hook lock when stop() is invoked from a JVM shutdown hook.
90+ private final AtomicBoolean stoppingGracefully = new AtomicBoolean (false );
5691
5792 LeaderElectionManager (
5893 ControllerManager controllerManager , ConfigurationService configurationService ) {
@@ -118,7 +153,11 @@ private void startLeading() {
118153 controllerManager .startEventProcessing ();
119154 }
120155
121- private void stopLeading () {
156+ void stopLeading () {
157+ if (stoppingGracefully .get ()) {
158+ log .info ("Stopped leading for identity: {} during graceful shutdown." , identity );
159+ return ;
160+ }
122161 if (configurationService .getLeaderElectionConfiguration ().orElseThrow ().isExitOnStopLeading ()) {
123162 log .info ("Stopped leading for identity: {}. Exiting." , identity );
124163 // When leader stops leading the process ends immediately to prevent multiple reconciliations
@@ -147,13 +186,13 @@ public void start() {
147186 }
148187
149188 public void stop () {
189+ stoppingGracefully .set (true );
150190 if (leaderElectionFuture != null ) {
151191 leaderElectionFuture .cancel (false );
152192 }
153193 }
154194
155195 private void checkLeaseAccess () {
156- var verbsRequired = Arrays .asList ("create" , "update" , "get" );
157196 SelfSubjectRulesReview review = new SelfSubjectRulesReview ();
158197 review .setSpec (new SelfSubjectRulesReviewSpecBuilder ().withNamespace (leaseNamespace ).build ());
159198 var reviewResult = configurationService .getKubernetesClient ().resource (review ).create ();
@@ -168,16 +207,15 @@ private void checkLeaseAccess() {
168207 || rule .getResourceNames ().contains (leaseName ))
169208 .map (ResourceRule ::getVerbs )
170209 .flatMap (Collection ::stream )
171- .distinct ()
172- .collect (Collectors .toList ());
173- if (verbsAllowed .contains (UNIVERSAL_VALUE ) || verbsAllowed .containsAll (verbsRequired )) {
210+ .collect (Collectors .toUnmodifiableSet ());
211+ if (verbsAllowed .contains (UNIVERSAL_VALUE ) || verbsAllowed .containsAll (REQUIRED_VERBS )) {
174212 return ;
175213 }
176214
177215 var missingVerbs =
178- verbsRequired .stream ()
216+ REQUIRED_VERBS .stream ()
179217 .filter (Predicate .not (verbsAllowed ::contains ))
180- .collect (Collectors .toList ( ));
218+ .collect (Collectors .joining ( "," ));
181219
182220 throw new OperatorException (
183221 NO_PERMISSION_TO_LEASE_RESOURCE_MESSAGE
0 commit comments