From 601908d0f5e528a3ddfc7cefb2bbfd9f99e7a39f Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Tue, 9 Mar 2010 13:07:08 +0100
Subject: [PATCH] Rework the node modify for mc-demotion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current code in LUSetNodeParms regarding the demotion from master
candidate role is complicated and duplicates the code in ConfigWriter,
where such decisions should be made. Furthermore, we still cannot demote
nodes (not even with force), if other regular nodes exist.

This patch adds a new opcode attribute β€˜auto_promote’, and changes the
decision tree as follows:

- if the node will be set to offline or drained or explicitly demoted
  from master candidate, and this parameter is set, then we lock all
  nodes in ExpandNames()
- later, in CheckPrereq(), if the node is
  indeed a master candidate, and the future state (as computed via
  GetMasterCandidateStats with the current node in the exception list)
  has fewer nodes than it should, and we didn't lock all nodes, we exit
  with an exception
- in Exec, if we locked all nodes, we do a AdjustCandidatePool() run, to
  ensure nodes are locked as needed (we do it before updating the node
  to remove a warning, and prevent the situation that if the LU fails
  between these, we're not left with an inconsistent state)

Note that in Exec we run the AdjustCP irrespective of any node state
change (just based on lock status), so we might simplify the CheckPrereq
even more by not checking the future state, basically requiring
auto_promote/lock_all for master candidates, since the case where we
have more than needed master candidates is rarer; OTOH, this would prevent
manual promotion ahead of time of another node, which is why I didn't
choose this way.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Guido Trotter <ultrotter@google.com>
Reviewed-by: Michael Hanselmann <hansmi@google.com>
---
 lib/cmdlib.py  | 54 ++++++++++++++++++++++++++++++--------------------
 lib/opcodes.py |  1 +
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index 693736fbc..39e7d0651 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -3158,6 +3158,7 @@ class LUSetNodeParams(LogicalUnit):
     _CheckBooleanOpField(self.op, 'master_candidate')
     _CheckBooleanOpField(self.op, 'offline')
     _CheckBooleanOpField(self.op, 'drained')
+    _CheckBooleanOpField(self.op, 'auto_promote')
     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
     if all_mods.count(None) == 3:
       raise errors.OpPrereqError("Please pass at least one modification",
@@ -3167,8 +3168,22 @@ class LUSetNodeParams(LogicalUnit):
                                  " state at the same time",
                                  errors.ECODE_INVAL)
 
+    # Boolean value that tells us whether we're offlining or draining the node
+    self.offline_or_drain = (self.op.offline == True or
+                             self.op.drained == True)
+    self.deoffline_or_drain = (self.op.offline == False or
+                               self.op.drained == False)
+    self.might_demote = (self.op.master_candidate == False or
+                         self.offline_or_drain)
+
+    self.lock_all = self.op.auto_promote and self.might_demote
+
+
   def ExpandNames(self):
-    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
+    if self.lock_all:
+      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
+    else:
+      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
 
   def BuildHooksEnv(self):
     """Build hooks env.
@@ -3203,25 +3218,17 @@ class LUSetNodeParams(LogicalUnit):
                                    " only via masterfailover",
                                    errors.ECODE_INVAL)
 
-    # Boolean value that tells us whether we're offlining or draining the node
-    offline_or_drain = self.op.offline == True or self.op.drained == True
-    deoffline_or_drain = self.op.offline == False or self.op.drained == False
-
-    if (node.master_candidate and
-        (self.op.master_candidate == False or offline_or_drain)):
-      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
-      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
-      if mc_now <= cp_size:
-        msg = ("Not enough master candidates (desired"
-               " %d, new value will be %d)" % (cp_size, mc_now-1))
-        # Only allow forcing the operation if it's an offline/drain operation,
-        # and we could not possibly promote more nodes.
-        # FIXME: this can still lead to issues if in any way another node which
-        # could be promoted appears in the meantime.
-        if self.op.force and offline_or_drain and mc_should == mc_max:
-          self.LogWarning(msg)
-        else:
-          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
+
+    if node.master_candidate and self.might_demote and not self.lock_all:
+      assert not self.op.auto_promote, "auto-promote set but lock_all not"
+      # check if after removing the current node, we're missing master
+      # candidates
+      (mc_remaining, mc_should, _) = \
+          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
+      if mc_remaining != mc_should:
+        raise errors.OpPrereqError("Not enough master candidates, please"
+                                   " pass auto_promote to allow promotion",
+                                   errors.ECODE_INVAL)
 
     if (self.op.master_candidate == True and
         ((node.offline and not self.op.offline == False) or
@@ -3231,7 +3238,7 @@ class LUSetNodeParams(LogicalUnit):
                                  errors.ECODE_INVAL)
 
     # If we're being deofflined/drained, we'll MC ourself if needed
-    if (deoffline_or_drain and not offline_or_drain and not
+    if (self.deoffline_or_drain and not self.offline_or_drain and not
         self.op.master_candidate == True and not node.master_candidate):
       self.op.master_candidate = _DecideSelfPromotion(self)
       if self.op.master_candidate:
@@ -3286,8 +3293,13 @@ class LUSetNodeParams(LogicalUnit):
           node.offline = False
           result.append(("offline", "clear offline status due to drain"))
 
+    # we locked all nodes, we adjust the CP before updating this node
+    if self.lock_all:
+      _AdjustCandidatePool(self, [node.name])
+
     # this will trigger configuration file update, if needed
     self.cfg.Update(node, feedback_fn)
+
     # this will trigger job queue propagation or cleanup
     if changed_mc:
       self.context.ReaddNode(node)
diff --git a/lib/opcodes.py b/lib/opcodes.py
index 0799f8ca4..dd5ff095d 100644
--- a/lib/opcodes.py
+++ b/lib/opcodes.py
@@ -413,6 +413,7 @@ class OpSetNodeParams(OpCode):
     "master_candidate",
     "offline",
     "drained",
+    "auto_promote",
     ]
 
 
-- 
GitLab