Commit 52f7ab91 authored by Helga Velroyen's avatar Helga Velroyen
Browse files

Make LURenewCrypto handle unreachable nodes properly

Currently an unreachable node can make LURenewCrypto fail
completely. This patch adds a unit test for it, and
improves the error handling of unreachable nodes in
a way, that the rest of the nodes are still handled
Signed-off-by: default avatarHelga Velroyen <>
Reviewed-by: default avatarPetr Pudlak <>
parent c017cd6a
...@@ -152,17 +152,30 @@ class LUClusterRenewCrypto(NoHooksLU): ...@@ -152,17 +152,30 @@ class LUClusterRenewCrypto(NoHooksLU):
except IOError: except IOError:
pass pass
node_errors = {}
nodes = self.cfg.GetAllNodesInfo() nodes = self.cfg.GetAllNodesInfo()
for (node_uuid, node_info) in nodes.items(): for (node_uuid, node_info) in nodes.items():
if node_info.offline: if node_info.offline:
feedback_fn("* Skipping offline node %s" % feedback_fn("* Skipping offline node %s" %
continue continue
if node_uuid != master_uuid: if node_uuid != master_uuid:
new_digest = CreateNewClientCert(self, node_uuid) try:
if node_info.master_candidate: new_digest = CreateNewClientCert(self, node_uuid)
utils.AddNodeToCandidateCerts(node_uuid, if node_info.master_candidate:
new_digest, utils.AddNodeToCandidateCerts(node_uuid,
cluster.candidate_certs) new_digest,
except errors.OpExecError as e:
node_errors[node_uuid] = e
if node_errors:
msg = ("Some nodes' SSL client certificates could not be renewed."
" Please make sure those nodes are reachable and rerun"
" the operation. The affected nodes and their errors are:\n")
for uuid, e in node_errors.items():
msg += "Node %s: %s\n" % (uuid, e)
utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid, utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid,
cluster.candidate_certs) cluster.candidate_certs)
utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid, utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
...@@ -2351,6 +2351,56 @@ class TestLUClusterRenewCrypto(CmdlibTestCase): ...@@ -2351,6 +2351,56 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
cluster = self.cfg.GetClusterInfo() cluster = self.cfg.GetClusterInfo()
self.assertFalse(cluster.candidate_certs) self.assertFalse(cluster.candidate_certs)
def _partiallyFailingRpc(self, node_uuid, _):
if node_uuid == self._failed_node:
return self.RpcResultsBuilder() \
return self.RpcResultsBuilder() \
[(constants.CRYPTO_TYPE_SSL_DIGEST, self._GetFakeDigest(node_uuid))])
def testNonMasterFails(self, pathutils):
# patch pathutils to point to temporary files
pathutils.NODED_CERT_FILE = self._node_cert
pathutils.NODED_CLIENT_CERT_FILE = self._client_node_cert
# create a few non-master, online nodes
num_nodes = 3
for _ in range(num_nodes):
nodes = self.cfg.GetAllNodesInfo()
# pick one node as the failing one
master_uuid = self.cfg.GetMasterNode()
self._failed_node = [node_uuid for node_uuid in nodes
if node_uuid != master_uuid][1]
self.rpc.call_node_crypto_tokens = self._partiallyFailingRpc
op = opcodes.OpClusterRenewCrypto()
# Check if the correct certificates exist and don't exist on the master
# Check if we have the correct digests in the configuration
cluster = self.cfg.GetClusterInfo()
# There should be one digest missing.
self.assertEqual(num_nodes, len(cluster.candidate_certs))
nodes = self.cfg.GetAllNodesInfo()
for (node_uuid, _) in nodes.items():
if node_uuid == self._failed_node:
self.assertTrue(node_uuid not in cluster.candidate_certs)
expected_digest = self._GetFakeDigest(node_uuid)
self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
if __name__ == "__main__": if __name__ == "__main__":
testutils.GanetiTestProgram() testutils.GanetiTestProgram()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment