Commit b3cc1646 authored by Helga Velroyen's avatar Helga Velroyen
Browse files

Verify incoming RPCs against candidate map

From this patch on, incoming RPC calls are checked against
the map of valid master candidate certificates. If no map
is present, the cluster is assumed to be in
bootstrap/upgrade mode and compares the incoming call
against the server certificate. This is necessary, because
neither at cluster initialization nor at upgrades from
pre-2.11 versions a candidate map is established yet.

After an upgrade, the cluster RPC communication continues
to use the server certificate until the client certificates
are created and the candidate map is populated using
'gnt-cluster renew-crypto --new-node-certificates'.

Note that for updating the master's certificate, a trick
was necessary. The new certificate is first created under
a temporary name, then it's digest is updated and
distributed using the old certificate, because otherwise
distribution will fail since the nodes don't know the
new digest yet. Then the certificate is moved to its
proper location.
Signed-off-by: default avatarHelga Velroyen <>
Reviewed-by: default avatarHrvoje Ribicic <>
parent 28756f80
......@@ -1210,11 +1210,11 @@ def GetCryptoTokens(token_requests):
True, cert_filename,
"Create new client SSL certificate in %s." % cert_filename)
elif action == constants.CRYPTO_ACTION_GET:
return tokens
......@@ -91,12 +91,12 @@ def GenerateHmacKey(file_name):
# pylint: disable=R0913
def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_spice_cert,
new_confd_hmac_key, new_cds, new_node_client_cert,
new_confd_hmac_key, new_cds,
rapi_cert_pem=None, spice_cert_pem=None,
spice_cacert_pem=None, cds=None,
......@@ -114,9 +114,6 @@ def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_spice_cert,
@param new_confd_hmac_key: Whether to generate a new HMAC key
@type new_cds: bool
@param new_cds: Whether to generate a new cluster domain secret
@type new_node_client_cert: bool
@param new_node_client_cert: Whether to generate a new node (SSL)
client certificate
@type rapi_cert_pem: string
@param rapi_cert_pem: New RAPI certificate in PEM format
@type spice_cert_pem: string
......@@ -128,9 +125,6 @@ def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_spice_cert,
@param cds: New cluster domain secret
@type nodecert_file: string
@param nodecert_file: optional override of the node cert file path
@type nodecert_client_file: string
@param nodecert_client_file: optional override of the node client certificate
file path
@type rapicert_file: string
@param rapicert_file: optional override of the rapi cert file path
@type spicecert_file: string
......@@ -141,16 +135,12 @@ def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_spice_cert,
@param hmackey_file: optional override of the hmac key file path
# pylint: disable=R0913
# noded SSL certificate
new_cluster_cert, nodecert_file,
"Generating new cluster certificate at %s" % nodecert_file)
# noded client SSL certificate (to be used only by this very node)
new_node_client_cert, nodecert_client_file,
"Generating new node client certificate at %s" % nodecert_client_file)
# confd HMAC key
if new_confd_hmac_key or not os.path.exists(hmackey_file):
logging.debug("Writing new confd HMAC key to %s", hmackey_file)
......@@ -212,7 +202,7 @@ def _InitGanetiServerSetup(master_name):
# Generate cluster secrets
GenerateClusterCrypto(True, False, False, False, False, True)
GenerateClusterCrypto(True, False, False, False, False)
result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.NODED])
if result.failed:
......@@ -126,6 +126,7 @@ __all__ = [
......@@ -1397,6 +1398,10 @@ NEW_CLUSTER_CERT_OPT = cli_option("--new-cluster-certificate",
default=False, action="store_true",
help="Generate a new cluster certificate")
NEW_NODE_CERT_OPT = cli_option(
"--new-node-certificates", dest="new_node_cert", default=False,
action="store_true", help="Generate new node certificates (for all nodes)")
RAPI_CERT_OPT = cli_option("--rapi-certificate", dest="rapi_cert",
help="File containing new RAPI certificate")
......@@ -885,7 +885,7 @@ def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
rapi_cert_filename, new_spice_cert, spice_cert_filename,
spice_cacert_filename, new_confd_hmac_key, new_cds,
cds_filename, force):
cds_filename, force, new_node_cert):
"""Renews cluster certificates, keys and secrets.
@type new_cluster_cert: bool
......@@ -909,6 +909,8 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
@param cds_filename: Path to file containing new cluster domain secret
@type force: bool
@param force: Whether to ask user for confirmation
@type new_node_cert: string
@param new_node_cert: Whether to generate new node certificates
if new_rapi_cert and rapi_cert_filename:
......@@ -961,13 +963,12 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
def _RenewCryptoInner(ctx):
ctx.feedback_fn("Updating certificates and keys")
# FIXME: add separate option for client certs
# Note: the node certificate will be generated in the LU
......@@ -1004,6 +1005,11 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
ToStdout("All requested certificates and keys have been replaced."
" Running \"gnt-cluster verify\" now is recommended.")
if new_node_cert:
cl = GetClient()
renew_op = opcodes.OpClusterRenewCrypto()
SubmitOpCode(renew_op, cl=cl)
return 0
......@@ -1020,7 +1026,8 @@ def RenewCrypto(opts, args):
def _GetEnabledDiskTemplates(opts):
......@@ -2137,7 +2144,8 @@ commands = {
"Renews cluster certificates, keys and secrets"),
"epo": (
......@@ -45,7 +45,8 @@ from ganeti.cmdlib.cluster import \
LUClusterVerify, \
LUClusterVerifyConfig, \
LUClusterVerifyGroup, \
LUClusterVerifyDisks, \
from import \
LUGroupAdd, \
LUGroupAssignNodes, \
......@@ -58,11 +58,75 @@ from ganeti.cmdlib.common import ShareAll, RunPostHook, \
CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \
ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \
CheckIpolicyVsDiskTemplates, CheckDiskAccessModeValidity, \
CheckDiskAccessModeConsistency, AddNodeCertToCandidateCerts
CheckDiskAccessModeConsistency, CreateNewClientCert
import ganeti.masterd.instance
def _UpdateMasterClientCert(
lu, master_uuid, cluster, feedback_fn,
"""Renews the master's client certificate and propagates the config.
@type lu: C{LogicalUnit}
@param lu: the logical unit holding the config
@type master_uuid: string
@param master_uuid: the master node's UUID
@type cluster: C{objects.Cluster}
@param cluster: the cluster's configuration
@type feedback_fn: function
@param feedback_fn: feedback functions for config updates
@type client_cert: string
@param client_cert: the path of the client certificate
@type client_cert_tmp: string
@param client_cert_tmp: the temporary path of the client certificate
@rtype: string
@return: the digest of the newly created client certificate
client_digest = CreateNewClientCert(lu, master_uuid, filename=client_cert_tmp)
utils.AddNodeToCandidateCerts(master_uuid, client_digest,
# This triggers an update of the config and distribution of it with the old
# SSL certificate
lu.cfg.Update(cluster, feedback_fn)
utils.RenameFile(client_cert_tmp, client_cert)
return client_digest
class LUClusterRenewCrypto(NoHooksLU):
"""Renew the cluster's crypto tokens.
Note that most of this operation is done in, this LU only
takes care of the renewal of the client SSL certificates.
def Exec(self, feedback_fn):
master_uuid = self.cfg.GetMasterNode()
cluster = self.cfg.GetClusterInfo()
server_digest = utils.GetCertificateDigest(
utils.AddNodeToCandidateCerts("%s-SERVER" % master_uuid,
new_master_digest = _UpdateMasterClientCert(self, master_uuid, cluster,
cluster.candidate_certs = {master_uuid: new_master_digest}
nodes = self.cfg.GetAllNodesInfo()
for (node_uuid, node_info) in nodes.items():
if node_uuid != master_uuid:
new_digest = CreateNewClientCert(self, node_uuid)
if node_info.master_candidate:
cluster.candidate_certs[node_uuid] = new_digest
# Trigger another update of the config now with the new master cert
self.cfg.Update(cluster, feedback_fn)
class LUClusterActivateMasterIp(NoHooksLU):
"""Activate the master IP on the master node.
......@@ -221,8 +285,8 @@ class LUClusterPostInit(LogicalUnit):
self.master_ndparams.get(constants.ND_OVS_LINK, None))
result.Raise("Could not successully configure Open vSwitch")
AddNodeCertToCandidateCerts(self, self.master_uuid,
cluster = self.cfg.GetClusterInfo()
_UpdateMasterClientCert(self, self.master_uuid, cluster, feedback_fn)
return True
......@@ -1251,7 +1251,7 @@ def RemoveNodeCertFromCandidateCerts(node_uuid, cluster):
utils.RemoveNodeFromCandidateCerts(node_uuid, cluster.candidate_certs)
def CreateNewClientCert(self, node_uuid, filename=None):
def CreateNewClientCert(lu, node_uuid, filename=None):
"""Creates a new client SSL certificate for the node.
@type node_uuid: string
......@@ -1265,7 +1265,7 @@ def CreateNewClientCert(self, node_uuid, filename=None):
options = {}
if filename:
options[constants.CRYPTO_OPTION_CERT_FILE] = filename
result = self.rpc.call_node_crypto_tokens(
result = lu.rpc.call_node_crypto_tokens(
......@@ -567,7 +567,8 @@ class HttpBase(object):
self._ssl_key = None
self._ssl_cert = None
def _CreateSocket(self, ssl_params, ssl_verify_peer, family):
def _CreateSocket(self, ssl_params, ssl_verify_peer, family,
"""Creates a TCP socket and initializes SSL if needed.
@type ssl_params: HttpSslParams
......@@ -580,6 +581,8 @@ class HttpBase(object):
assert family in (socket.AF_INET, socket.AF_INET6)
if ssl_verify_peer:
assert ssl_verify_callback is not None
self._ssl_params = ssl_params
sock = socket.socket(family, socket.SOCK_STREAM)
......@@ -607,7 +610,7 @@ class HttpBase(object):
if ssl_verify_peer:
ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
# Also add our certificate as a trusted CA to be sent to the client.
# This is required at least for GnuTLS clients to work.
......@@ -470,7 +470,7 @@ class HttpServer(http.HttpBase, asyncore.dispatcher):
def __init__(self, mainloop, local_address, port, handler,
ssl_params=None, ssl_verify_peer=False,
request_executor_class=None, ssl_verify_callback=None):
"""Initializes the HTTP server
@type mainloop: ganeti.daemon.Mainloop
......@@ -502,7 +502,8 @@ class HttpServer(http.HttpBase, asyncore.dispatcher):
self.port = port
self.handler = handler
family = netutils.IPAddress.GetAddressFamily(local_address)
self.socket = self._CreateSocket(ssl_params, ssl_verify_peer, family)
self.socket = self._CreateSocket(ssl_params, ssl_verify_peer, family,
# Allow port to be reused
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
......@@ -36,6 +36,7 @@ import base64
import pycurl
import threading
import copy
import os
from ganeti import utils
from ganeti import objects
......@@ -97,15 +98,23 @@ def Shutdown():
def _ConfigRpcCurl(curl):
noded_cert = str(pathutils.NODED_CERT_FILE)
noded_client_cert = str(pathutils.NODED_CLIENT_CERT_FILE)
# FIXME: The next two lines are necessary to ensure upgradability from
# 2.10 to 2.11. Remove in 2.12, because this slows down RPC calls.
if not os.path.exists(noded_client_cert):"Using server certificate as client certificate for RPC"
noded_client_cert = noded_cert
curl.setopt(pycurl.FOLLOWLOCATION, False)
curl.setopt(pycurl.CAINFO, noded_cert)
curl.setopt(pycurl.SSL_VERIFYHOST, 0)
curl.setopt(pycurl.SSL_VERIFYPEER, True)
curl.setopt(pycurl.SSLCERTTYPE, "PEM")
curl.setopt(pycurl.SSLCERT, noded_cert)
curl.setopt(pycurl.SSLCERT, noded_client_cert)
curl.setopt(pycurl.SSLKEYTYPE, "PEM")
curl.setopt(pycurl.SSLKEY, noded_cert)
curl.setopt(pycurl.SSLKEY, noded_client_cert)
curl.setopt(pycurl.CONNECTTIMEOUT, constants.RPC_CONNECT_TIMEOUT)
......@@ -1182,6 +1182,53 @@ def CheckNoded(_, args):
def SSLVerifyPeer(conn, cert, errnum, errdepth, ok):
"""Callback function to verify a peer against the candidate cert map.
Note that we have a chicken-and-egg problem during cluster init and upgrade.
This method checks whether the incoming connection comes from a master
candidate by comparing it to the master certificate map in the cluster
configuration. However, during cluster init and cluster upgrade there
are various RPC calls done to the master node itself, before the candidate
certificate list is established and the cluster configuration is written.
In this case, we cannot check against the master candidate map.
This problem is solved by checking whether the candidate map is empty. An
initialized 2.11 or higher cluster has at least one entry for the master
node in the candidate map. If the map is empty, we know that we are still
in the bootstrap/upgrade phase. In this case, we read the server certificate
digest and compare it to the incoming request.
This means that after an upgrade of Ganeti, the system continues to operate
like before, using server certificates only. After the client certificates
are generated with ``gnt-cluster renew-crypto --new-node-certificates``,
RPC communication is switched to using client certificates and the trick of
using server certificates does not work anymore.
@type conn: C{OpenSSL.SSL.Connection}
@param conn: the OpenSSL connection object
@type cert: C{OpenSSL.X509}
@param cert: the peer's SSL certificate
# some parameters are unused, but this is the API
# pylint: disable=W0613
_BOOTSTRAP = "bootstrap"
sstore = ssconf.SimpleStore()
candidate_certs = sstore.GetMasterCandidatesCertMap()
except IOError:"No candidate certificates found. Switching to "
"bootstrap/update mode.")
candidate_certs = None
if not candidate_certs:
candidate_certs = {
_BOOTSTRAP: utils.GetCertificateDigest(
return cert.digest("sha1") in candidate_certs.values()
# pylint: enable=W0613
def PrepNoded(options, _):
"""Preparation node daemon function, executed with the PID file held.
......@@ -1216,7 +1263,8 @@ def PrepNoded(options, _):
server = \
http.server.HttpServer(mainloop, options.bind_address, options.port,
handler, ssl_params=ssl_params, ssl_verify_peer=True,
return (mainloop, server)
......@@ -82,14 +82,10 @@ def RemoveNodeFromCandidateCerts(node_uuid, candidate_certs,
del candidate_certs[node_uuid]
def GetClientCertificateDigest(cert_filename=pathutils.NODED_CERT_FILE):
def GetCertificateDigest(cert_filename=pathutils.NODED_CLIENT_CERT_FILE):
"""Reads the SSL certificate and returns the sha1 digest.
# FIXME: This is supposed to read the client certificate, but
# in this stage of the patch series there is no client certificate
# yet, so we return the digest of the server certificate to get
# the rest of the key management infrastructure running.
cert_plain = io.ReadFile(cert_filename)
cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
......@@ -766,7 +766,8 @@ RENEW-CRYPTO
| **renew-crypto** [-f]
| [\--new-cluster-certificate] [\--new-confd-hmac-key]
| [\--new-cluster-certificate] | [\--new-node-certificates]
| [\--new-confd-hmac-key]
| [\--new-rapi-certificate] [\--rapi-certificate *rapi-cert*]
| [\--new-spice-certificate | \--spice-certificate *spice-cert*
| \--spice-ca-certificate *spice-ca-cert*]
......@@ -778,6 +779,11 @@ options ``--new-cluster-certificate`` and ``--new-confd-hmac-key``
can be used to regenerate respectively the cluster-internal SSL
certificate and the HMAC key used by **ganeti-confd**\(8).
The option ``--new-node-certificates`` will generate new node SSL
certificates for all nodes. Note that the regeneration of the node
certificates takes place after the other certificates are created
and distributed and the ganeti daemons are restarted again.
To generate a new self-signed RAPI certificate (used by
**ganeti-rapi**\(8)) specify ``--new-rapi-certificate``. If you want to
use your own certificate, e.g. one signed by a certificate
......@@ -132,6 +132,10 @@ opClusterDeactivateMasterIp :: String
opClusterDeactivateMasterIp =
"Deactivate the master IP on the master node."
opClusterRenewCrypto :: String
opClusterRenewCrypto =
"Renews the cluster node's SSL client certificates."
opQuery :: String
opQuery =
"Query for resources/items.\n\
......@@ -254,6 +254,11 @@ $(genOpCode "OpCode"
, ("OpClusterRenewCrypto",
[t| () |],
, ("OpQuery",
[t| QueryResponse |],
......@@ -34,6 +34,7 @@ module Ganeti.Path
, clusterConfFile
, watcherPauseFile
, nodedCertFile
, nodedClientCertFile
, queueDir
, jobQueueSerialFile
, jobQueueLockFile
......@@ -109,6 +110,10 @@ watcherPauseFile = dataDirP "watcher.pause"
nodedCertFile :: IO FilePath
nodedCertFile = dataDirP "server.pem"
-- | Path to the noded client certificate.
nodedClientCertFile :: IO FilePath
nodedClientCertFile = dataDirP "client.pem"
-- | Job queue directory.
queueDir :: IO FilePath
queueDir = dataDirP "queue"
......@@ -88,6 +88,7 @@ import Data.Maybe (fromMaybe)
import qualified Text.JSON as J
import Text.JSON.Pretty (pp_value)
import qualified Data.ByteString.Base64.Lazy as Base64
import System.Directory
import Network.Curl hiding (content)
import qualified Ganeti.Path as P
......@@ -214,20 +215,27 @@ logRpcErrors allElems =
in mapM_ logOneRpcErr allElems
-- | Get options for RPC call
getOptionsForCall :: (Rpc a b) => FilePath -> a -> [CurlOption]
getOptionsForCall certPath call =
getOptionsForCall :: (Rpc a b) => FilePath -> FilePath -> a -> [CurlOption]
getOptionsForCall cert_path client_cert_path call =
[ CurlTimeout (fromIntegral $ rpcCallTimeout call)
, CurlSSLCert certPath
, CurlSSLKey certPath
, CurlCAInfo certPath
, CurlSSLCert client_cert_path
, CurlSSLKey client_cert_path
, CurlCAInfo cert_path
-- | Execute multiple RPC calls in parallel
executeRpcCalls :: (Rpc a b) => [(Node, a)] -> IO [(Node, ERpcError b)]
executeRpcCalls nodeCalls = do
cert_file <- P.nodedCertFile
let (nodes, calls) = unzip nodeCalls
opts = map (getOptionsForCall cert_file) calls
client_cert_file_name <- P.nodedClientCertFile
client_file_exists <- doesFileExist client_cert_file_name
-- FIXME: This is needed to ensure upgradability to 2.11
-- Remove in 2.12.
let client_cert_file = if client_file_exists
then client_cert_file_name
else cert_file
(nodes, calls) = unzip nodeCalls
opts = map (getOptionsForCall cert_file client_cert_file) calls
opts_urls = zipWith3 (\n c o ->
case prepareHttpRequest o n c of
Left v -> Left v
......@@ -148,6 +148,7 @@ instance Arbitrary OpCodes.OpCode where
"OP_CLUSTER_POST_INIT" -> pure OpCodes.OpClusterPostInit
"OP_CLUSTER_RENEW_CRYPTO" -> pure OpCodes.OpClusterRenewCrypto
"OP_CLUSTER_DESTROY" -> pure OpCodes.OpClusterDestroy
"OP_CLUSTER_QUERY" -> pure OpCodes.OpClusterQuery
......@@ -33,6 +33,7 @@ import shutil
from collections import defaultdict
from ganeti.cmdlib import cluster
from ganeti import constants
from ganeti import errors
from ganeti import netutils
......@@ -41,7 +42,6 @@ from ganeti import opcodes
from ganeti import utils
from ganeti import pathutils
from ganeti import query
from ganeti.cmdlib import cluster
from ganeti.hypervisor import hv_xen
from testsupport import *
......@@ -231,7 +231,11 @@ class TestLUClusterDestroy(CmdlibTestCase):
class TestLUClusterPostInit(CmdlibTestCase):
def testExecution(self):
@testutils.patch_object(cluster, "_UpdateMasterClientCert")
def testExecution(self, update_client_cert_mock):
# mock the client certificate creation as it is tested separately
update_client_cert_mock.return_value = None
# For the purpose of this test, return the same certificate digest for all
# nodes
self.rpc.call_node_crypto_tokens = \
......@@ -68,6 +68,7 @@ RAPI_OPCODE_EXCLUDE = compat.UniqueFrozenset([
# Very sensitive in nature
# Helper opcodes (e.g. submitted by LUs)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment