Commit a8ae3eb5 authored by Iustin Pop's avatar Iustin Pop
Browse files

Fix node readd issues



This patch fixes a few node readd issues.

Currently, the node readd consists of two opcodes:
  - OpSetNodeParms, which resets the offline/drained flags
  - OpAddNode (with readd=True), which reconfigures the node

The problem is that between these two, the configuration is inconsistent
for certain cluster configurations. Thus, this patch removes the first
opcode and modified the LUAddNode to deal with this case too.

The patch also modifies the computation of the intended master_candidate
status, and actually sets the readded node to master candidate if
needed. Previously, we didn't modify the existing node at all.

Finally, the patch modifies the bottom of the Exec() function for this
LU to:
  - trigger a node update, which in turn redistributes the ssconf files
    to all nodes (and thus the new node too)
  - if the new node is not a master candidate, then call the
    node_demote_from_mc RPC so that old master files are cleared

My testing shows this behaves correctly for various cases.
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarMichael Hanselmann <hansmi@google.com>
parent 9a5cb537
......@@ -2202,14 +2202,24 @@ class LUAddNode(LogicalUnit):
" based ping to noded port")
cp_size = self.cfg.GetClusterInfo().candidate_pool_size
mc_now, _ = self.cfg.GetMasterCandidateStats()
master_candidate = mc_now < cp_size
if self.op.readd:
exceptions = [node]
else:
exceptions = []
mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
# the new node will increase mc_max with one, so:
mc_max = min(mc_max + 1, cp_size)
self.master_candidate = mc_now < mc_max
self.new_node = objects.Node(name=node,
primary_ip=primary_ip,
secondary_ip=secondary_ip,
master_candidate=master_candidate,
offline=False, drained=False)
if self.op.readd:
self.new_node = self.cfg.GetNodeInfo(node)
assert self.new_node is not None, "Can't retrieve locked node %s" % node
else:
self.new_node = objects.Node(name=node,
primary_ip=primary_ip,
secondary_ip=secondary_ip,
master_candidate=self.master_candidate,
offline=False, drained=False)
def Exec(self, feedback_fn):
"""Adds the new node to the cluster.
......@@ -2218,6 +2228,20 @@ class LUAddNode(LogicalUnit):
new_node = self.new_node
node = new_node.name
# for re-adds, reset the offline/drained/master-candidate flags;
# we need to reset here, otherwise offline would prevent RPC calls
# later in the procedure; this also means that if the re-add
# fails, we are left with a non-offlined, broken node
if self.op.readd:
new_node.drained = new_node.offline = False
self.LogInfo("Readding a node, the offline/drained flags were reset")
# if we demote the node, we do cleanup later in the procedure
new_node.master_candidate = self.master_candidate
# notify the user about any possible mc promotion
if new_node.master_candidate:
self.LogInfo("Node will be a master candidate")
# check connectivity
result = self.rpc.call_version([node])[node]
result.Raise()
......@@ -2313,6 +2337,15 @@ class LUAddNode(LogicalUnit):
if self.op.readd:
self.context.ReaddNode(new_node)
# make sure we redistribute the config
self.cfg.Update(new_node)
# and make sure the new node will not have old files around
if not new_node.master_candidate:
result = self.rpc.call_node_demote_from_mc(new_node.name)
msg = result.RemoteFailMsg()
if msg:
self.LogWarning("Node failed to demote itself from master"
" candidate status: %s" % msg)
else:
self.context.AddNode(new_node)
......
......@@ -100,18 +100,7 @@ def AddNode(opts, args):
output = cl.QueryConfigValues(['cluster_name'])
cluster_name = output[0]
if readd:
# clear the offline and drain flags on the node
ToStdout("Resetting the 'offline' and 'drained' flags due to re-add")
op = opcodes.OpSetNodeParams(node_name=node, force=True,
offline=False, drained=False)
result = SubmitOpCode(op, cl=cl)
if result:
ToStdout("Modified:")
for param, data in result:
ToStdout(" - %-5s -> %s", param, data)
else:
if not readd:
ToStderr("-- WARNING -- \n"
"Performing this operation is going to replace the ssh daemon"
" keypair\n"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment