Commit 096b394e authored by Helga Velroyen's avatar Helga Velroyen

Renew-crypto: rebuild digest map of all nodes

During a renew-crypto operation, all nodes will create
new client certificates. Afterwards, the fingerprints
(digests) of the master candidate nodes needs to be
collected and added to the configuration. This is done
by an RPC call, which will succeed as the master
node's certficate digest was propagated to the nodes
before.

This also removes two unittest which are no longer
necessary, because there will be no RPC call from
the master to itself anymore.
Signed-off-by: default avatarHelga Velroyen <helgav@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parent 38c0d67d
......@@ -46,6 +46,7 @@ from ganeti.cli import *
from ganeti import bootstrap
from ganeti import compat
from ganeti import constants
from ganeti import config
from ganeti import errors
from ganeti import netutils
from ganeti import objects
......@@ -1087,6 +1088,28 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
ssh_port,
data)
# Create a temporary ssconf file using the master's client cert digest
# and the 'bootstrap' keyword to enable distribution of all nodes' digests.
master_digest = utils.GetCertificateDigest()
ssconf_master_candidate_certs_filename = os.path.join(
pathutils.DATA_DIR, "%s%s" %
(constants.SSCONF_FILEPREFIX, constants.SS_MASTER_CANDIDATES_CERTS))
utils.WriteFile(
ssconf_master_candidate_certs_filename,
data="%s=%s" % (constants.CRYPTO_BOOTSTRAP, master_digest))
for node_name in ctx.nonmaster_nodes:
port = ctx.ssh_ports[node_name]
ctx.feedback_fn("Copying %s to %s:%d" %
(ssconf_master_candidate_certs_filename, node_name, port))
ctx.ssh.CopyFileToNode(node_name, port,
ssconf_master_candidate_certs_filename)
# Write the boostrap entry to the config using wconfd.
config_live_lock = utils.livelock.LiveLock("renew_crypto")
cfg = config.GetConfig(None, config_live_lock)
cfg.AddNodeToCandidateCerts(constants.CRYPTO_BOOTSTRAP, master_digest)
cfg.Update(cfg.GetClusterInfo(), ctx.feedback_fn)
def _RenewServerAndClientCerts(ctx):
ctx.feedback_fn("Updating the cluster SSL certificate.")
......@@ -1108,7 +1131,6 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
_RenewClientCerts(ctx)
if new_cluster_cert or new_rapi_cert or new_spice_cert \
or new_confd_hmac_key or new_cds:
RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
......@@ -1127,6 +1149,11 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
ToStdout("All requested certificates and keys have been replaced."
" Running \"gnt-cluster verify\" now is recommended.")
if new_node_cert or new_cluster_cert:
cl = GetClient()
renew_op = opcodes.OpClusterRenewCrypto()
SubmitOpCode(renew_op, cl=cl)
return 0
......
......@@ -118,60 +118,14 @@ class LUClusterRenewCrypto(NoHooksLU):
logging.debug("Renewing the master's SSL node certificate."
" Master's UUID: %s.", master_uuid)
server_digest = utils.GetCertificateDigest(
cert_filename=pathutils.NODED_CERT_FILE)
logging.debug("SSL digest of the node certificate: %s.", server_digest)
self.cfg.AddNodeToCandidateCerts("%s-SERVER" % master_uuid,
server_digest)
logging.debug("Added master's digest as *-SERVER entry to configuration."
" Current list of candidate certificates: %s.",
str(cluster.candidate_certs))
try:
old_master_digest = utils.GetCertificateDigest(
# mapping node UUIDs to client certificate digests
digest_map = {}
master_digest = utils.GetCertificateDigest(
cert_filename=pathutils.NODED_CLIENT_CERT_FILE)
logging.debug("SSL digest of old master's SSL node certificate: %s.",
old_master_digest)
self.cfg.AddNodeToCandidateCerts("%s-OLDMASTER" % master_uuid,
old_master_digest)
logging.debug("Added old master's node certificate digest to config"
" as *-OLDMASTER. Current list of candidate certificates:"
" %s.", str(cluster.candidate_certs))
except IOError:
logging.info("No old master certificate available.")
last_exception = None
for i in range(self._MAX_NUM_RETRIES):
try:
# Technically it should not be necessary to set the cert
# paths. However, due to a bug in the mock library, we
# have to do this to be able to test the function properly.
_UpdateMasterClientCert(
self, self.cfg, master_uuid,
client_cert=pathutils.NODED_CLIENT_CERT_FILE,
client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
logging.debug("Successfully renewed the master's node certificate.")
break
except errors.OpExecError as e:
logging.error("Renewing the master's SSL node certificate failed"
" at attempt no. %s with error '%s'", str(i), e)
last_exception = e
else:
if last_exception:
feedback_fn("Could not renew the master's client SSL certificate."
" Cleaning up. Error: %s." % last_exception)
# Cleaning up temporary certificates
self.cfg.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid)
self.cfg.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid)
logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from"
" master candidate cert list. Current state of the"
" list: %s.", str(cluster.candidate_certs))
try:
utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
except IOError as e:
logging.debug("Could not clean up temporary node certificate of the"
" master node. (Possibly because it was already removed"
" properly.) Error: %s.", e)
return
digest_map[master_uuid] = master_digest
logging.debug("Adding the master's SSL node certificate digest to the"
" configuration. Master's UUID: %s, Digest: %s",
master_uuid, master_digest)
node_errors = {}
nodes = self.cfg.GetAllNodesInfo()
......@@ -183,21 +137,20 @@ class LUClusterRenewCrypto(NoHooksLU):
node_info.name, node_uuid)
continue
if node_uuid != master_uuid:
logging.debug("Renewing node certificate of node '%s'.", node_uuid)
logging.debug("Adding certificate digest of node '%s'.", node_uuid)
last_exception = None
for i in range(self._MAX_NUM_RETRIES):
try:
new_digest = GetClientCertDigest(self, node_uuid)
if node_info.master_candidate:
self.cfg.AddNodeToCandidateCerts(node_uuid,
new_digest)
node_digest = GetClientCertDigest(self, node_uuid)
digest_map[node_uuid] = node_digest
logging.debug("Added the node's certificate to candidate"
" certificate list. Current list: %s.",
str(cluster.candidate_certs))
break
except errors.OpExecError as e:
last_exception = e
logging.error("Could not renew a non-master node's SSL node"
logging.error("Could not fetch a non-master node's SSL node"
" certificate at attempt no. %s. The node's UUID"
" is %s, and the error was: %s.",
str(i), node_uuid, e)
......@@ -206,18 +159,14 @@ class LUClusterRenewCrypto(NoHooksLU):
node_errors[node_uuid] = last_exception
if node_errors:
msg = ("Some nodes' SSL client certificates could not be renewed."
msg = ("Some nodes' SSL client certificates could not be fetched."
" Please make sure those nodes are reachable and rerun"
" the operation. The affected nodes and their errors are:\n")
for uuid, e in node_errors.items():
msg += "Node %s: %s\n" % (uuid, e)
feedback_fn(msg)
self.cfg.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid)
self.cfg.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid)
logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from"
" master candidate cert list. Current state of the"
" list: %s.", cluster.candidate_certs)
self.cfg.SetCandidateCerts(digest_map)
class LUClusterActivateMasterIp(NoHooksLU):
......
......@@ -3411,6 +3411,16 @@ class ConfigWriter(object):
"""
return self._ConfigData().cluster.candidate_certs
@_ConfigSync()
def SetCandidateCerts(self, certs):
"""Replaces the master candidate cert list with the new values.
@type certs: dict of string to string
@param certs: map of node UUIDs to SSL client certificate digests.
"""
self._ConfigData().cluster.candidate_certs = certs
@_ConfigSync()
def AddNodeToCandidateCerts(self, node_uuid, cert_digest,
info_fn=logging.info, warn_fn=logging.warn):
......
......@@ -124,7 +124,6 @@ RESTRICTED_COMMANDS_DIR = CONF_DIR + "/restricted-commands"
#: Node daemon certificate path
NODED_CERT_FILE = DATA_DIR + "/server.pem"
NODED_CLIENT_CERT_FILE = DATA_DIR + "/client.pem"
NODED_CLIENT_CERT_FILE_TMP = DATA_DIR + "/client.pem.tmp"
#: Node daemon certificate file permissions
NODED_CERT_MODE = 0440
......
......@@ -2300,7 +2300,8 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
shutil.copy(testutils.TestDataFilename("cert1.pem"), self._node_cert)
self._client_node_cert = self._CreateTempFile()
shutil.copy(testutils.TestDataFilename("cert2.pem"), self._client_node_cert)
self._client_node_cert_tmp = self._CreateTempFile()
self._client_node_cert_digest = \
"BF:24:F7:57:50:60:43:87:83:E3:0D:7E:EF:DD:14:6C:13:43:20:4E"
def tearDown(self):
super(TestLUClusterRenewCrypto, self).tearDown()
......@@ -2321,8 +2322,6 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
"""
pathutils.NODED_CERT_FILE = self._node_cert
pathutils.NODED_CLIENT_CERT_FILE = self._client_node_cert
pathutils.NODED_CLIENT_CERT_FILE_TMP = \
self._client_node_cert_tmp
def _AssertCertFiles(self, pathutils):
"""Check if the correct certificates exist and don't exist on the master.
......@@ -2330,7 +2329,6 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
"""
self.assertTrue(os.path.exists(pathutils.NODED_CERT_FILE))
self.assertTrue(os.path.exists(pathutils.NODED_CLIENT_CERT_FILE))
self.assertFalse(os.path.exists(pathutils.NODED_CLIENT_CERT_FILE_TMP))
def _CompletelySuccessfulRpc(self, node_uuid, _):
"""Fake RPC call which always returns successfully.
......@@ -2360,27 +2358,17 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
cluster = self.cfg.GetClusterInfo()
self.assertEqual(num_nodes + 1, len(cluster.candidate_certs))
nodes = self.cfg.GetAllNodesInfo()
for (node_uuid, _) in nodes.items():
expected_digest = self._GetFakeDigest(node_uuid)
self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
@patchPathutils("cluster")
def testMasterFails(self, pathutils):
self._InitPathutils(pathutils)
# make sure the RPC calls are failing for all nodes
master_uuid = self.cfg.GetMasterNode()
self.rpc.call_node_crypto_tokens.return_value = self.RpcResultsBuilder() \
.CreateFailedNodeResult(master_uuid)
op = opcodes.OpClusterRenewCrypto()
self.ExecOpCode(op)
self._AssertCertFiles(pathutils)
# Check if we correctly have no candidate certificates
cluster = self.cfg.GetClusterInfo()
self.assertFalse(cluster.candidate_certs)
for (node_uuid, _) in nodes.items():
if node_uuid == master_uuid:
# The master digest is from the actual test certificate.
self.assertEqual(self._client_node_cert_digest,
cluster.candidate_certs[node_uuid])
else:
# The non-master nodes have the fake digest from the
# mock RPC.
expected_digest = self._GetFakeDigest(node_uuid)
self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
def _partiallyFailingRpc(self, node_uuid, _):
if node_uuid == self._failed_node:
......@@ -2421,8 +2409,7 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
if node_uuid == self._failed_node:
self.assertTrue(node_uuid not in cluster.candidate_certs)
else:
expected_digest = self._GetFakeDigest(node_uuid)
self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
self.assertTrue(node_uuid in cluster.candidate_certs)
@patchPathutils("cluster")
def testOfflineNodes(self, pathutils):
......@@ -2450,8 +2437,7 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
if node_info.offline == True:
self.assertTrue(node_uuid not in cluster.candidate_certs)
else:
expected_digest = self._GetFakeDigest(node_uuid)
self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
self.assertTrue(node_uuid in cluster.candidate_certs)
def _RpcSuccessfulAfterRetries(self, node_uuid, _):
if self._retries < self._max_retries:
......@@ -2463,40 +2449,6 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
.CreateSuccessfulNodeResult(node_uuid,
[(constants.CRYPTO_TYPE_SSL_DIGEST, self._GetFakeDigest(node_uuid))])
@patchPathutils("cluster")
def testMasterRetriesSuccess(self, pathutils):
self._InitPathutils(pathutils)
self._max_retries = 2
self._retries = 0
self.rpc.call_node_crypto_tokens = self._RpcSuccessfulAfterRetries
op = opcodes.OpClusterRenewCrypto()
self.ExecOpCode(op)
self._AssertCertFiles(pathutils)
cluster = self.cfg.GetClusterInfo()
master_uuid = self.cfg.GetMasterNode()
self.assertTrue(self._GetFakeDigest(master_uuid)
in cluster.candidate_certs.values())
@patchPathutils("cluster")
def testMasterRetriesFail(self, pathutils):
self._InitPathutils(pathutils)
self._max_retries = 5
self._retries = 0
self.rpc.call_node_crypto_tokens = self._RpcSuccessfulAfterRetries
op = opcodes.OpClusterRenewCrypto()
self.ExecOpCode(op)
self._AssertCertFiles(pathutils)
cluster = self.cfg.GetClusterInfo()
self.assertFalse(cluster.candidate_certs)
def _RpcSuccessfulAfterRetriesNonMaster(self, node_uuid, _):
if self._retries < self._max_retries and node_uuid != self._master_uuid:
self._retries += 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment