Commit 20d317d4 authored by Iustin Pop's avatar Iustin Pop
Browse files

Cluster verify: check for missing bridges



Currently cluster verify doesn't check for bridge information; the
only checks are done at instance create and failover/migrate
time. This means a cluster that seems healthy will fail creation jobs.

This patch implements a simple verification that all nodes (in the
entire cluster, so doesn't work well for multi-group) have all the
required bridges: the default one plus any instance bridge.
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarMichael Hanselmann <hansmi@google.com>
parent 29b8eaee
......@@ -644,6 +644,10 @@ def VerifyNode(what, cluster_name):
if constants.NV_OSLIST in what and vm_capable:
result[constants.NV_OSLIST] = DiagnoseOS()
if constants.NV_BRIDGES in what and vm_capable:
result[constants.NV_BRIDGES] = [bridge
for bridge in what[constants.NV_BRIDGES]
if not utils.BridgeExists(bridge)]
return result
......
......@@ -1480,6 +1480,29 @@ class LUClusterVerify(LogicalUnit):
_ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
" '%s' of VG '%s'", pvname, owner_vg)
def _VerifyNodeBridges(self, ninfo, nresult, bridges):
"""Check the node bridges.
@type ninfo: L{objects.Node}
@param ninfo: the node to check
@param nresult: the remote results for the node
@param vg_name: the expected list of bridges
"""
if not bridges:
return
node = ninfo.name
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
missing = nresult.get(constants.NV_BRIDGES, None)
test = not isinstance(missing, list)
_ErrorIf(test, self.ENODENET, node,
"did not return valid bridge information")
if not test:
_ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
utils.CommaJoin(sorted(missing)))
def _VerifyNodeNetwork(self, ninfo, nresult):
"""Check the node time.
......@@ -2177,6 +2200,21 @@ class LUClusterVerify(LogicalUnit):
if drbd_helper:
node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
# bridge checks
# FIXME: this needs to be changed per node-group, not cluster-wide
bridges = set()
default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
bridges.add(default_nicpp[constants.NIC_LINK])
for instance in instanceinfo.values():
for nic in instance.nics:
full_nic = cluster.SimpleFillNIC(nic.nicparams)
if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
bridges.add(full_nic[constants.NIC_LINK])
if bridges:
node_verify_param[constants.NV_BRIDGES] = list(bridges)
# Build our expected cluster state
node_image = dict((node.name, self.NodeImage(offline=node.offline,
name=node.name,
......@@ -2287,6 +2325,7 @@ class LUClusterVerify(LogicalUnit):
if refos_img is None:
refos_img = nimg
self._VerifyNodeOS(node_i, nimg, refos_img)
self._VerifyNodeBridges(node_i, nresult, bridges)
feedback_fn("* Verifying instance status")
for instance in instancelist:
......
......@@ -870,6 +870,7 @@ NV_VERSION = "version"
NV_VGLIST = "vglist"
NV_VMNODES = "vmnodes"
NV_OOB_PATHS = "oob-paths"
NV_BRIDGES = "bridges"
# SSL certificate check constants (in days)
SSL_CERT_EXPIRATION_WARN = 30
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment