From 44485f49ac9d7e4169f81a28545d1aec5955d01e Mon Sep 17 00:00:00 2001
From: Guido Trotter <ultrotter@google.com>
Date: Thu, 1 Oct 2009 17:13:41 +0100
Subject: [PATCH] Fix master candidate removal

Currently during a master candidate removal, when it's possible to
promote another node, the removal operation fails because of a corrupt
config before it's even possible to do the promotion. Fixing this by
doing the promotion before, excluding the current node.

Signed-off-by: Guido Trotter <ultrotter@google.com>
Reviewed-by: Olivier Tharan <olive@google.com>
---
 lib/cmdlib.py | 13 ++++++-------
 lib/config.py |  9 ++++++---
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index e1056903c..3ce419c95 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -670,17 +670,17 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
   return _BuildInstanceHookEnv(**args)
 
 
-def _AdjustCandidatePool(lu):
+def _AdjustCandidatePool(lu, exceptions):
   """Adjust the candidate pool after node operations.
 
   """
-  mod_list = lu.cfg.MaintainCandidatePool()
+  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
   if mod_list:
     lu.LogInfo("Promoted nodes to master candidate role: %s",
                ", ".join(node.name for node in mod_list))
     for name in mod_list:
       lu.context.ReaddNode(name)
-  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats()
+  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
   if mc_now > mc_max:
     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
                (mc_now, mc_max))
@@ -1946,7 +1946,7 @@ class LUSetClusterParams(LogicalUnit):
     if self.op.candidate_pool_size is not None:
       self.cluster.candidate_pool_size = self.op.candidate_pool_size
       # we need to update the pool size here, otherwise the save will fail
-      _AdjustCandidatePool(self)
+      _AdjustCandidatePool(self, [])
 
     self.cfg.Update(self.cluster)
 
@@ -2282,6 +2282,8 @@ class LURemoveNode(LogicalUnit):
     logging.info("Stopping the node daemon and removing configs from node %s",
                  node.name)
 
+    # Promote nodes to master candidate as needed
+    _AdjustCandidatePool(self, exceptions=[node.name])
     self.context.RemoveNode(node.name)
 
     # Run post hooks on the node before it's removed
@@ -2297,9 +2299,6 @@ class LURemoveNode(LogicalUnit):
       self.LogWarning("Errors encountered on the remote node while leaving"
                       " the cluster: %s", msg)
 
-    # Promote nodes to master candidate as needed
-    _AdjustCandidatePool(self)
-
 
 class LUQueryNodes(NoHooksLU):
   """Logical unit for querying nodes.
diff --git a/lib/config.py b/lib/config.py
index 41fcf094d..54f80702a 100644
--- a/lib/config.py
+++ b/lib/config.py
@@ -1044,14 +1044,16 @@ class ConfigWriter:
     return self._UnlockedGetMasterCandidateStats(exceptions)
 
   @locking.ssynchronized(_config_lock)
-  def MaintainCandidatePool(self):
+  def MaintainCandidatePool(self, exceptions):
     """Try to grow the candidate pool to the desired size.
 
+    @type exceptions: list
+    @param exceptions: if passed, list of nodes that should be ignored
     @rtype: list
     @return: list with the adjusted nodes (L{objects.Node} instances)
 
     """
-    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
+    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
     mod_list = []
     if mc_now < mc_max:
       node_list = self._config_data.nodes.keys()
@@ -1060,7 +1062,8 @@ class ConfigWriter:
         if mc_now >= mc_max:
           break
         node = self._config_data.nodes[name]
-        if node.master_candidate or node.offline or node.drained:
+        if (node.master_candidate or node.offline or node.drained or
+            node.name in exceptions):
           continue
         mod_list.append(node)
         node.master_candidate = True
-- 
GitLab