Commit c06e0c83 authored by Andrea Spadaccini's avatar Andrea Spadaccini Committed by Guido Trotter
Browse files

Split starting and stopping master IP and daemons



lib/backend.py
* split StartMaster() in ActivateMasterIp() and StartMasterDaemons()
* split StopMaster() in DeactivateMasterIp() and StopMasterDaemons()

lib/server/noded.py, lib/rpc.py
* adapt the call chains to the new functions, define new RPCs

lib/bootstrap.py, lib/cmdlib.py, lib/server/masterd.py
* use the new RPCs
Signed-off-by: default avatarAndrea Spadaccini <spadaccio@google.com>
Reviewed-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
(cherry picked from commit fb460cf7)
parent 34fbc862
......@@ -246,93 +246,79 @@ def GetMasterInfo():
return (master_netdev, master_ip, master_node, primary_ip_family)
def StartMaster(start_daemons, no_voting):
def ActivateMasterIp():
"""Activate the IP address of the master daemon.
"""
# GetMasterInfo will raise an exception if not able to return data
master_netdev, master_ip, _, family = GetMasterInfo()
err_msg = None
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
if netutils.IPAddress.Own(master_ip):
# we already have the ip:
logging.debug("Master IP already configured, doing nothing")
else:
err_msg = "Someone else has the master ip, not activating"
logging.error(err_msg)
else:
ipcls = netutils.IP4Address
if family == netutils.IP6Address.family:
ipcls = netutils.IP6Address
result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add",
"%s/%d" % (master_ip, ipcls.iplen),
"dev", master_netdev, "label",
"%s:0" % master_netdev])
if result.failed:
err_msg = "Can't activate master IP: %s" % result.output
logging.error(err_msg)
# we ignore the exit code of the following cmds
if ipcls == netutils.IP4Address:
utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev, "-s",
master_ip, master_ip])
elif ipcls == netutils.IP6Address:
try:
utils.RunCmd(["ndisc6", "-q", "-r 3", master_ip, master_netdev])
except errors.OpExecError:
# TODO: Better error reporting
logging.warning("Can't execute ndisc6, please install if missing")
if err_msg:
_Fail(err_msg)
def StartMasterDaemons(no_voting):
"""Activate local node as master node.
The function will either try activate the IP address of the master
(unless someone else has it) or also start the master daemons, based
on the start_daemons parameter.
The function will start the master daemons (ganeti-masterd and ganeti-rapi).
@type start_daemons: boolean
@param start_daemons: whether to start the master daemons
(ganeti-masterd and ganeti-rapi), or (if false) activate the
master ip
@type no_voting: boolean
@param no_voting: whether to start ganeti-masterd without a node vote
(if start_daemons is True), but still non-interactively
but still non-interactively
@rtype: None
"""
# GetMasterInfo will raise an exception if not able to return data
master_netdev, master_ip, _, family = GetMasterInfo()
err_msgs = []
# either start the master and rapi daemons
if start_daemons:
if no_voting:
masterd_args = "--no-voting --yes-do-it"
else:
masterd_args = ""
env = {
"EXTRA_MASTERD_ARGS": masterd_args,
}
result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env)
if result.failed:
msg = "Can't start Ganeti master: %s" % result.output
logging.error(msg)
err_msgs.append(msg)
# or activate the IP
if no_voting:
masterd_args = "--no-voting --yes-do-it"
else:
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
if netutils.IPAddress.Own(master_ip):
# we already have the ip:
logging.debug("Master IP already configured, doing nothing")
else:
msg = "Someone else has the master ip, not activating"
logging.error(msg)
err_msgs.append(msg)
else:
ipcls = netutils.IP4Address
if family == netutils.IP6Address.family:
ipcls = netutils.IP6Address
result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add",
"%s/%d" % (master_ip, ipcls.iplen),
"dev", master_netdev, "label",
"%s:0" % master_netdev])
if result.failed:
msg = "Can't activate master IP: %s" % result.output
logging.error(msg)
err_msgs.append(msg)
# we ignore the exit code of the following cmds
if ipcls == netutils.IP4Address:
utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev, "-s",
master_ip, master_ip])
elif ipcls == netutils.IP6Address:
try:
utils.RunCmd(["ndisc6", "-q", "-r 3", master_ip, master_netdev])
except errors.OpExecError:
# TODO: Better error reporting
logging.warning("Can't execute ndisc6, please install if missing")
if err_msgs:
_Fail("; ".join(err_msgs))
masterd_args = ""
env = {
"EXTRA_MASTERD_ARGS": masterd_args,
}
def StopMaster(stop_daemons):
"""Deactivate this node as master.
result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env)
if result.failed:
msg = "Can't start Ganeti master: %s" % result.output
logging.error(msg)
_Fail(msg)
The function will always try to deactivate the IP address of the
master. It will also stop the master daemons depending on the
stop_daemons parameter.
@type stop_daemons: boolean
@param stop_daemons: whether to also stop the master daemons
(ganeti-masterd and ganeti-rapi)
@rtype: None
def DeactivateMasterIp():
"""Deactivate the master IP on this node.
"""
# TODO: log and report back to the caller the error failures; we
......@@ -352,12 +338,23 @@ def StopMaster(stop_daemons):
logging.error("Can't remove the master IP, error: %s", result.output)
# but otherwise ignore the failure
if stop_daemons:
result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"])
if result.failed:
logging.error("Could not stop Ganeti master, command %s had exitcode %s"
" and error %s",
result.cmd, result.exit_code, result.output)
def StopMasterDaemons():
"""Stop the master daemons on this node.
Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node.
@rtype: None
"""
# TODO: log and report back to the caller the error failures; we
# need to decide in which case we fail the RPC for this
result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"])
if result.failed:
logging.error("Could not stop Ganeti master, command %s had exitcode %s"
" and error %s",
result.cmd, result.exit_code, result.output)
def EtcHostsModify(mode, host, ip):
......
......@@ -560,7 +560,7 @@ def FinalizeClusterDestroy(master):
"""
cfg = config.ConfigWriter()
modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup
result = rpc.RpcRunner.call_node_stop_master(master, True)
result = rpc.RpcRunner.call_node_stop_master(master)
msg = result.fail_msg
if msg:
logging.warning("Could not disable the master role: %s", msg)
......@@ -692,7 +692,7 @@ def MasterFailover(no_voting=False):
logging.info("Stopping the master daemon on node %s", old_master)
result = rpc.RpcRunner.call_node_stop_master(old_master, True)
result = rpc.RpcRunner.call_node_stop_master(old_master)
msg = result.fail_msg
if msg:
logging.error("Could not disable the master role on the old master"
......@@ -721,7 +721,7 @@ def MasterFailover(no_voting=False):
logging.info("Starting the master daemons on the new master")
result = rpc.RpcRunner.call_node_start_master(new_master, True, no_voting)
result = rpc.RpcRunner.call_node_start_master_daemons(new_master, no_voting)
msg = result.fail_msg
if msg:
logging.error("Could not start the master role on the new master"
......
......@@ -1355,7 +1355,7 @@ class LUClusterDestroy(LogicalUnit):
# Run post hooks on master node before it's removed
_RunPostHook(self, master)
result = self.rpc.call_node_stop_master(master, False)
result = self.rpc.call_node_deactivate_master_ip(master)
result.Raise("Could not disable the master role")
return master
......@@ -3297,7 +3297,7 @@ class LUClusterRename(LogicalUnit):
# shutdown the master IP
master = self.cfg.GetMasterNode()
result = self.rpc.call_node_stop_master(master, False)
result = self.rpc.call_node_deactivate_master_ip(master)
result.Raise("Could not disable the master role")
try:
......@@ -3315,7 +3315,7 @@ class LUClusterRename(LogicalUnit):
pass
_UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
finally:
result = self.rpc.call_node_start_master(master, False, False)
result = self.rpc.call_node_activate_master_ip(master)
msg = result.fail_msg
if msg:
self.LogWarning("Could not re-enable the master role on"
......@@ -3648,7 +3648,7 @@ class LUClusterSetParams(LogicalUnit):
master = self.cfg.GetMasterNode()
feedback_fn("Shutting down master ip on the current netdev (%s)" %
self.cluster.master_netdev)
result = self.rpc.call_node_stop_master(master, False)
result = self.rpc.call_node_deactivate_master_ip(master)
result.Raise("Could not disable the master ip")
feedback_fn("Changing master_netdev from %s to %s" %
(self.cluster.master_netdev, self.op.master_netdev))
......@@ -3659,7 +3659,7 @@ class LUClusterSetParams(LogicalUnit):
if self.op.master_netdev:
feedback_fn("Starting the master ip on the new master netdev (%s)" %
self.op.master_netdev)
result = self.rpc.call_node_start_master(master, False, False)
result = self.rpc.call_node_activate_master_ip(master)
if result.fail_msg:
self.LogWarning("Could not re-enable the master ip on"
" the master, please restart manually: %s",
......
......@@ -919,24 +919,44 @@ class RpcRunner(object):
@classmethod
@_RpcTimeout(_TMO_FAST)
def call_node_start_master(cls, node, start_daemons, no_voting):
"""Tells a node to activate itself as a master.
def call_node_start_master_daemons(cls, node, no_voting):
"""Starts master daemons on a node.
This is a single-node call.
"""
return cls._StaticSingleNodeCall(node, "node_start_master",
[start_daemons, no_voting])
return cls._StaticSingleNodeCall(node, "node_start_master_daemons",
[no_voting])
@classmethod
@_RpcTimeout(_TMO_FAST)
def call_node_stop_master(cls, node, stop_daemons):
"""Tells a node to demote itself from master status.
def call_node_activate_master_ip(cls, node):
"""Activates master IP on a node.
This is a single-node call.
"""
return cls._StaticSingleNodeCall(node, "node_stop_master", [stop_daemons])
return cls._StaticSingleNodeCall(node, "node_activate_master_ip", [])
@classmethod
@_RpcTimeout(_TMO_FAST)
def call_node_stop_master(cls, node):
"""Deactivates master IP and stops master daemons on a node.
This is a single-node call.
"""
return cls._StaticSingleNodeCall(node, "node_stop_master", [])
@classmethod
@_RpcTimeout(_TMO_FAST)
def call_node_deactivate_master_ip(cls, node):
"""Deactivates master IP on a node.
This is a single-node call.
"""
return cls._StaticSingleNodeCall(node, "node_deactivate_master_ip", [])
@classmethod
@_RpcTimeout(_TMO_URGENT)
......
......@@ -524,7 +524,7 @@ def CheckAgreement():
def ActivateMasterIP():
# activate ip
master_node = ssconf.SimpleStore().GetMasterNode()
result = rpc.RpcRunner.call_node_start_master(master_node, False, False)
result = rpc.RpcRunner.call_node_activate_master_ip(master_node)
msg = result.fail_msg
if msg:
logging.error("Can't activate master IP address: %s", msg)
......
......@@ -677,18 +677,36 @@ class NodeHttpServer(http.server.HttpServer):
return backend.VerifyNode(params[0], params[1])
@staticmethod
def perspective_node_start_master(params):
"""Promote this node to master status.
def perspective_node_start_master_daemons(params):
"""Start the master daemons on this node.
"""
return backend.StartMaster(params[0], params[1])
return backend.StartMasterDaemons(params[0])
@staticmethod
def perspective_node_activate_master_ip(params):
"""Activate the master IP on this node.
"""
return backend.ActivateMasterIp()
@staticmethod
def perspective_node_deactivate_master_ip(params):
"""Deactivate the master IP on this node.
"""
return backend.DeactivateMasterIp()
@staticmethod
def perspective_node_stop_master(params):
"""Demote this node from master status.
"""Deactivate the master IP and stops master daemons on this node.
Sometimes both operations need to be executed at the same time (doing one of
the two would make impossible to do the other one).
"""
return backend.StopMaster(params[0])
backend.DeactivateMasterIp()
return backend.StopMasterDaemons()
@staticmethod
def perspective_node_leave_cluster(params):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment