Commit 0623d351 authored by Iustin Pop's avatar Iustin Pop

Conver node_leave_cluster rpc to new style result

This patch converts this rpc call to the new style result, and also
changes in the process the meaning of the QuitGanetiException's
arguments and the node daemon rpc call exception handler.

The problem with the exception handler is that we used a two-stage one,
and the inner used to catch all exception (including this one), so in
the logs we always had an exception logged, instead of the normal
'leaving cluster message'. The patch also adds logging of the
exception's arguments, so that we have a trail in the logs about the
shutdown mode.

The exception's arguments were reversed from the normal RPC results
style. While it makes somewhat more sense for this exception, we change
them such that they match the rpc result format.
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parent 6c00d19a
......@@ -93,19 +93,23 @@ class NodeHttpServer(http.server.HttpServer):
raise http.HttpNotFound()
try:
try:
return method(req.request_body)
except backend.RPCFail, err:
# our custom failure exception; str(err) works fine if the
# exception was constructed with a single argument, and in
# this case, err.message == err.args[0] == str(err)
return (False, str(err))
except:
logging.exception("Error in RPC call")
raise
return method(req.request_body)
except backend.RPCFail, err:
# our custom failure exception; str(err) works fine if the
# exception was constructed with a single argument, and in
# this case, err.message == err.args[0] == str(err)
return (False, str(err))
except errors.QuitGanetiException, err:
# Tell parent to quit
logging.info("Shutting down the node daemon, arguments: %s",
str(err.args))
os.kill(self.noded_pid, signal.SIGTERM)
# And return the error's arguments, which must be already in
# correct tuple format
return err.args
except:
logging.exception("Error in RPC call")
raise
# the new block devices --------------------------
......
......@@ -323,21 +323,20 @@ def LeaveCluster():
try:
priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
except errors.OpExecError:
logging.exception("Error while processing ssh files")
return
f = open(pub_key, 'r')
try:
utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
finally:
f.close()
f = open(pub_key, 'r')
try:
utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
finally:
f.close()
utils.RemoveFile(priv_key)
utils.RemoveFile(pub_key)
utils.RemoveFile(priv_key)
utils.RemoveFile(pub_key)
except errors.OpExecError:
logging.exception("Error while processing ssh files")
# Return a reassuring string to the caller, and quit
raise errors.QuitGanetiException(False, 'Shutdown scheduled')
# Raise a custom exception (handled in ganeti-noded)
raise errors.QuitGanetiException(True, 'Shutdown scheduled')
def GetNodeInfo(vgname, hypervisor_type):
......
......@@ -322,8 +322,10 @@ def FinalizeClusterDestroy(master):
if msg:
logging.warning("Could not disable the master role: %s" % msg)
result = rpc.RpcRunner.call_node_leave_cluster(master)
if result.failed or not result.data:
logging.warning("Could not shutdown the node daemon and cleanup the node")
msg = result.RemoteFailMsg()
if msg:
logging.warning("Could not shutdown the node daemon and cleanup"
" the node: %s", msg)
def SetupNodeDaemon(cluster_name, node, ssh_key_check):
......
......@@ -1917,7 +1917,11 @@ class LURemoveNode(LogicalUnit):
self.context.RemoveNode(node.name)
self.rpc.call_node_leave_cluster(node.name)
result = self.rpc.call_node_leave_cluster(node.name)
msg = result.RemoteFailMsg()
if msg:
self.LogWarning("Errors encountered on the remote node while leaving"
" the cluster: %s", msg)
# Promote nodes to master candidate as needed
_AdjustCandidatePool(self)
......
......@@ -226,18 +226,21 @@ class CommandError(GenericError):
class QuitGanetiException(Exception):
"""Signal that Ganeti that it must quit.
This is not necessarily an error (and thus not a subclass of GenericError),
but it's an exceptional circumstance and it is thus treated. This instance
should be instantiated with two values. The first one will specify whether an
error should returned to the caller, and the second one will be the returned
result (either as an error or as a normal result).
This is not necessarily an error (and thus not a subclass of
GenericError), but it's an exceptional circumstance and it is thus
treated. This instance should be instantiated with two values. The
first one will specify the return code to the caller, and the second
one will be the returned result (either as an error or as a normal
result). Usually only the leave cluster rpc call should return
status True (as there it's expected we quit), every other call will
return status False (as a critical error was encountered).
Examples::
# Return a result of "True" to the caller, but quit ganeti afterwards
raise QuitGanetiException(False, True)
raise QuitGanetiException(True, None)
# Send an error to the caller, and quit ganeti
raise QuitGanetiException(True, "Fatal safety violation, shutting down")
raise QuitGanetiException(False, "Fatal safety violation, shutting down")
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment