Commit 44485f49 authored by Guido Trotter's avatar Guido Trotter
Browse files

Fix master candidate removal



Currently during a master candidate removal, when it's possible to
promote another node, the removal operation fails because of a corrupt
config before it's even possible to do the promotion. Fixing this by
doing the promotion before, excluding the current node.
Signed-off-by: default avatarGuido Trotter <ultrotter@google.com>
Reviewed-by: default avatarOlivier Tharan <olive@google.com>
parent 8fbf5ac7
......@@ -670,17 +670,17 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
return _BuildInstanceHookEnv(**args)
def _AdjustCandidatePool(lu):
def _AdjustCandidatePool(lu, exceptions):
"""Adjust the candidate pool after node operations.
"""
mod_list = lu.cfg.MaintainCandidatePool()
mod_list = lu.cfg.MaintainCandidatePool(exceptions)
if mod_list:
lu.LogInfo("Promoted nodes to master candidate role: %s",
", ".join(node.name for node in mod_list))
for name in mod_list:
lu.context.ReaddNode(name)
mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats()
mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
if mc_now > mc_max:
lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
(mc_now, mc_max))
......@@ -1946,7 +1946,7 @@ class LUSetClusterParams(LogicalUnit):
if self.op.candidate_pool_size is not None:
self.cluster.candidate_pool_size = self.op.candidate_pool_size
# we need to update the pool size here, otherwise the save will fail
_AdjustCandidatePool(self)
_AdjustCandidatePool(self, [])
self.cfg.Update(self.cluster)
......@@ -2282,6 +2282,8 @@ class LURemoveNode(LogicalUnit):
logging.info("Stopping the node daemon and removing configs from node %s",
node.name)
# Promote nodes to master candidate as needed
_AdjustCandidatePool(self, exceptions=[node.name])
self.context.RemoveNode(node.name)
# Run post hooks on the node before it's removed
......@@ -2297,9 +2299,6 @@ class LURemoveNode(LogicalUnit):
self.LogWarning("Errors encountered on the remote node while leaving"
" the cluster: %s", msg)
# Promote nodes to master candidate as needed
_AdjustCandidatePool(self)
class LUQueryNodes(NoHooksLU):
"""Logical unit for querying nodes.
......
......@@ -1044,14 +1044,16 @@ class ConfigWriter:
return self._UnlockedGetMasterCandidateStats(exceptions)
@locking.ssynchronized(_config_lock)
def MaintainCandidatePool(self):
def MaintainCandidatePool(self, exceptions):
"""Try to grow the candidate pool to the desired size.
@type exceptions: list
@param exceptions: if passed, list of nodes that should be ignored
@rtype: list
@return: list with the adjusted nodes (L{objects.Node} instances)
"""
mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
mod_list = []
if mc_now < mc_max:
node_list = self._config_data.nodes.keys()
......@@ -1060,7 +1062,8 @@ class ConfigWriter:
if mc_now >= mc_max:
break
node = self._config_data.nodes[name]
if node.master_candidate or node.offline or node.drained:
if (node.master_candidate or node.offline or node.drained or
node.name in exceptions):
continue
mod_list.append(node)
node.master_candidate = True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment