From 44485f49ac9d7e4169f81a28545d1aec5955d01e Mon Sep 17 00:00:00 2001 From: Guido Trotter <ultrotter@google.com> Date: Thu, 1 Oct 2009 17:13:41 +0100 Subject: [PATCH] Fix master candidate removal Currently during a master candidate removal, when it's possible to promote another node, the removal operation fails because of a corrupt config before it's even possible to do the promotion. Fixing this by doing the promotion before, excluding the current node. Signed-off-by: Guido Trotter <ultrotter@google.com> Reviewed-by: Olivier Tharan <olive@google.com> --- lib/cmdlib.py | 13 ++++++------- lib/config.py | 9 ++++++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/cmdlib.py b/lib/cmdlib.py index e1056903c..3ce419c95 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -670,17 +670,17 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None): return _BuildInstanceHookEnv(**args) -def _AdjustCandidatePool(lu): +def _AdjustCandidatePool(lu, exceptions): """Adjust the candidate pool after node operations. """ - mod_list = lu.cfg.MaintainCandidatePool() + mod_list = lu.cfg.MaintainCandidatePool(exceptions) if mod_list: lu.LogInfo("Promoted nodes to master candidate role: %s", ", ".join(node.name for node in mod_list)) for name in mod_list: lu.context.ReaddNode(name) - mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats() + mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) if mc_now > mc_max: lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % (mc_now, mc_max)) @@ -1946,7 +1946,7 @@ class LUSetClusterParams(LogicalUnit): if self.op.candidate_pool_size is not None: self.cluster.candidate_pool_size = self.op.candidate_pool_size # we need to update the pool size here, otherwise the save will fail - _AdjustCandidatePool(self) + _AdjustCandidatePool(self, []) self.cfg.Update(self.cluster) @@ -2282,6 +2282,8 @@ class LURemoveNode(LogicalUnit): logging.info("Stopping the node daemon and removing configs from node %s", node.name) + # Promote nodes to master candidate as needed + _AdjustCandidatePool(self, exceptions=[node.name]) self.context.RemoveNode(node.name) # Run post hooks on the node before it's removed @@ -2297,9 +2299,6 @@ class LURemoveNode(LogicalUnit): self.LogWarning("Errors encountered on the remote node while leaving" " the cluster: %s", msg) - # Promote nodes to master candidate as needed - _AdjustCandidatePool(self) - class LUQueryNodes(NoHooksLU): """Logical unit for querying nodes. diff --git a/lib/config.py b/lib/config.py index 41fcf094d..54f80702a 100644 --- a/lib/config.py +++ b/lib/config.py @@ -1044,14 +1044,16 @@ class ConfigWriter: return self._UnlockedGetMasterCandidateStats(exceptions) @locking.ssynchronized(_config_lock) - def MaintainCandidatePool(self): + def MaintainCandidatePool(self, exceptions): """Try to grow the candidate pool to the desired size. + @type exceptions: list + @param exceptions: if passed, list of nodes that should be ignored @rtype: list @return: list with the adjusted nodes (L{objects.Node} instances) """ - mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats() + mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions) mod_list = [] if mc_now < mc_max: node_list = self._config_data.nodes.keys() @@ -1060,7 +1062,8 @@ class ConfigWriter: if mc_now >= mc_max: break node = self._config_data.nodes[name] - if node.master_candidate or node.offline or node.drained: + if (node.master_candidate or node.offline or node.drained or + node.name in exceptions): continue mod_list.append(node) node.master_candidate = True -- GitLab