Commit 44485f49 authored by Guido Trotter's avatar Guido Trotter
Browse files

Fix master candidate removal



Currently during a master candidate removal, when it's possible to
promote another node, the removal operation fails because of a corrupt
config before it's even possible to do the promotion. Fixing this by
doing the promotion before, excluding the current node.
Signed-off-by: default avatarGuido Trotter <ultrotter@google.com>
Reviewed-by: default avatarOlivier Tharan <olive@google.com>
parent 8fbf5ac7
...@@ -670,17 +670,17 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None): ...@@ -670,17 +670,17 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
return _BuildInstanceHookEnv(**args) return _BuildInstanceHookEnv(**args)
def _AdjustCandidatePool(lu): def _AdjustCandidatePool(lu, exceptions):
"""Adjust the candidate pool after node operations. """Adjust the candidate pool after node operations.
""" """
mod_list = lu.cfg.MaintainCandidatePool() mod_list = lu.cfg.MaintainCandidatePool(exceptions)
if mod_list: if mod_list:
lu.LogInfo("Promoted nodes to master candidate role: %s", lu.LogInfo("Promoted nodes to master candidate role: %s",
", ".join(node.name for node in mod_list)) ", ".join(node.name for node in mod_list))
for name in mod_list: for name in mod_list:
lu.context.ReaddNode(name) lu.context.ReaddNode(name)
mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats() mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
if mc_now > mc_max: if mc_now > mc_max:
lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
(mc_now, mc_max)) (mc_now, mc_max))
...@@ -1946,7 +1946,7 @@ class LUSetClusterParams(LogicalUnit): ...@@ -1946,7 +1946,7 @@ class LUSetClusterParams(LogicalUnit):
if self.op.candidate_pool_size is not None: if self.op.candidate_pool_size is not None:
self.cluster.candidate_pool_size = self.op.candidate_pool_size self.cluster.candidate_pool_size = self.op.candidate_pool_size
# we need to update the pool size here, otherwise the save will fail # we need to update the pool size here, otherwise the save will fail
_AdjustCandidatePool(self) _AdjustCandidatePool(self, [])
self.cfg.Update(self.cluster) self.cfg.Update(self.cluster)
...@@ -2282,6 +2282,8 @@ class LURemoveNode(LogicalUnit): ...@@ -2282,6 +2282,8 @@ class LURemoveNode(LogicalUnit):
logging.info("Stopping the node daemon and removing configs from node %s", logging.info("Stopping the node daemon and removing configs from node %s",
node.name) node.name)
# Promote nodes to master candidate as needed
_AdjustCandidatePool(self, exceptions=[node.name])
self.context.RemoveNode(node.name) self.context.RemoveNode(node.name)
# Run post hooks on the node before it's removed # Run post hooks on the node before it's removed
...@@ -2297,9 +2299,6 @@ class LURemoveNode(LogicalUnit): ...@@ -2297,9 +2299,6 @@ class LURemoveNode(LogicalUnit):
self.LogWarning("Errors encountered on the remote node while leaving" self.LogWarning("Errors encountered on the remote node while leaving"
" the cluster: %s", msg) " the cluster: %s", msg)
# Promote nodes to master candidate as needed
_AdjustCandidatePool(self)
class LUQueryNodes(NoHooksLU): class LUQueryNodes(NoHooksLU):
"""Logical unit for querying nodes. """Logical unit for querying nodes.
......
...@@ -1044,14 +1044,16 @@ class ConfigWriter: ...@@ -1044,14 +1044,16 @@ class ConfigWriter:
return self._UnlockedGetMasterCandidateStats(exceptions) return self._UnlockedGetMasterCandidateStats(exceptions)
@locking.ssynchronized(_config_lock) @locking.ssynchronized(_config_lock)
def MaintainCandidatePool(self): def MaintainCandidatePool(self, exceptions):
"""Try to grow the candidate pool to the desired size. """Try to grow the candidate pool to the desired size.
@type exceptions: list
@param exceptions: if passed, list of nodes that should be ignored
@rtype: list @rtype: list
@return: list with the adjusted nodes (L{objects.Node} instances) @return: list with the adjusted nodes (L{objects.Node} instances)
""" """
mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats() mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
mod_list = [] mod_list = []
if mc_now < mc_max: if mc_now < mc_max:
node_list = self._config_data.nodes.keys() node_list = self._config_data.nodes.keys()
...@@ -1060,7 +1062,8 @@ class ConfigWriter: ...@@ -1060,7 +1062,8 @@ class ConfigWriter:
if mc_now >= mc_max: if mc_now >= mc_max:
break break
node = self._config_data.nodes[name] node = self._config_data.nodes[name]
if node.master_candidate or node.offline or node.drained: if (node.master_candidate or node.offline or node.drained or
node.name in exceptions):
continue continue
mod_list.append(node) mod_list.append(node)
node.master_candidate = True node.master_candidate = True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment