Commit 8964ee14 authored by Iustin Pop's avatar Iustin Pop
Browse files

Add support for vm_capable in cluster verify



The method to make vm_capable integrate easily into cluster verify is as follows:

- we add a new NV_VMNODES that represents *non*-vm-capable nodes
- the LU populates this list (it's expected that non-vm_capable nodes
  are few compared to vm_capable nodes)
- backend skips the checks that are related to VM hosting
- in the LU, we reorder the VM-related checks so that they occur after
  the non-VM (generic) tests, and we only execute them conditionally

Additionally, we add some support to the instance checks to detect
instances living on bad nodes.
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarMichael Hanselmann <hansmi@google.com>
parent 53919782
......@@ -491,8 +491,9 @@ def VerifyNode(what, cluster_name):
result = {}
my_name = netutils.Hostname.GetSysName()
port = netutils.GetDaemonPort(constants.NODED)
vm_capable = my_name not in what.get(constants.NV_VMNODES, [])
if constants.NV_HYPERVISOR in what:
if constants.NV_HYPERVISOR in what and vm_capable:
result[constants.NV_HYPERVISOR] = tmp = {}
for hv_name in what[constants.NV_HYPERVISOR]:
try:
......@@ -547,14 +548,14 @@ def VerifyNode(what, cluster_name):
result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port,
source=source)
if constants.NV_LVLIST in what:
if constants.NV_LVLIST in what and vm_capable:
try:
val = GetVolumeList(what[constants.NV_LVLIST])
except RPCFail, err:
val = str(err)
result[constants.NV_LVLIST] = val
if constants.NV_INSTANCELIST in what:
if constants.NV_INSTANCELIST in what and vm_capable:
# GetInstanceList can fail
try:
val = GetInstanceList(what[constants.NV_INSTANCELIST])
......@@ -562,10 +563,10 @@ def VerifyNode(what, cluster_name):
val = str(err)
result[constants.NV_INSTANCELIST] = val
if constants.NV_VGLIST in what:
if constants.NV_VGLIST in what and vm_capable:
result[constants.NV_VGLIST] = utils.ListVolumeGroups()
if constants.NV_PVLIST in what:
if constants.NV_PVLIST in what and vm_capable:
result[constants.NV_PVLIST] = \
bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
filter_allocatable=False)
......@@ -574,11 +575,11 @@ def VerifyNode(what, cluster_name):
result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
constants.RELEASE_VERSION)
if constants.NV_HVINFO in what:
if constants.NV_HVINFO in what and vm_capable:
hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO])
result[constants.NV_HVINFO] = hyper.GetNodeInfo()
if constants.NV_DRBDLIST in what:
if constants.NV_DRBDLIST in what and vm_capable:
try:
used_minors = bdev.DRBD8.GetUsedDevs().keys()
except errors.BlockDeviceError, err:
......@@ -586,7 +587,7 @@ def VerifyNode(what, cluster_name):
used_minors = str(err)
result[constants.NV_DRBDLIST] = used_minors
if constants.NV_DRBDHELPER in what:
if constants.NV_DRBDHELPER in what and vm_capable:
status = True
try:
payload = bdev.BaseDRBD.GetUsermodeHelper()
......@@ -611,7 +612,7 @@ def VerifyNode(what, cluster_name):
if constants.NV_TIME in what:
result[constants.NV_TIME] = utils.SplitTime(time.time())
if constants.NV_OSLIST in what:
if constants.NV_OSLIST in what and vm_capable:
result[constants.NV_OSLIST] = DiagnoseOS()
return result
......
......@@ -1219,9 +1219,11 @@ class LUVerifyCluster(LogicalUnit):
@ivar os_fail: whether the RPC call didn't return valid OS data
@type oslist: list
@ivar oslist: list of OSes as diagnosed by DiagnoseOS
@type vm_capable: boolean
@ivar vm_capable: whether the node can host instances
"""
def __init__(self, offline=False, name=None):
def __init__(self, offline=False, name=None, vm_capable=True):
self.name = name
self.volumes = {}
self.instances = []
......@@ -1231,6 +1233,7 @@ class LUVerifyCluster(LogicalUnit):
self.mfree = 0
self.dfree = 0
self.offline = offline
self.vm_capable = vm_capable
self.rpc_fail = False
self.lvm_fail = False
self.hyp_fail = False
......@@ -1335,7 +1338,7 @@ class LUVerifyCluster(LogicalUnit):
code=self.ETYPE_WARNING)
hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
if isinstance(hyp_result, dict):
if ninfo.vm_capable and isinstance(hyp_result, dict):
for hv_name, hv_result in hyp_result.iteritems():
test = hv_result is not None
_ErrorIf(test, self.ENODEHV, node,
......@@ -2010,6 +2013,7 @@ class LUVerifyCluster(LogicalUnit):
constants.NV_TIME: None,
constants.NV_MASTERIP: (master_node, master_ip),
constants.NV_OSLIST: None,
constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
}
if vg_name is not None:
......@@ -2023,7 +2027,8 @@ class LUVerifyCluster(LogicalUnit):
# Build our expected cluster state
node_image = dict((node.name, self.NodeImage(offline=node.offline,
name=node.name))
name=node.name,
vm_capable=node.vm_capable))
for node in nodeinfo)
for instance in instancelist:
......@@ -2100,22 +2105,24 @@ class LUVerifyCluster(LogicalUnit):
nresult = all_nvinfo[node].payload
nimg.call_ok = self._VerifyNode(node_i, nresult)
self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
self._VerifyNodeNetwork(node_i, nresult)
self._VerifyNodeLVM(node_i, nresult, vg_name)
self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
master_files)
self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
all_drbd_map)
self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
self._UpdateNodeInstances(node_i, nresult, nimg)
self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
self._UpdateNodeOS(node_i, nresult, nimg)
if not nimg.os_fail:
if refos_img is None:
refos_img = nimg
self._VerifyNodeOS(node_i, nimg, refos_img)
if nimg.vm_capable:
self._VerifyNodeLVM(node_i, nresult, vg_name)
self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
all_drbd_map)
self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
self._UpdateNodeInstances(node_i, nresult, nimg)
self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
self._UpdateNodeOS(node_i, nresult, nimg)
if not nimg.os_fail:
if refos_img is None:
refos_img = nimg
self._VerifyNodeOS(node_i, nimg, refos_img)
feedback_fn("* Verifying instance status")
for instance in instancelist:
......@@ -2162,10 +2169,12 @@ class LUVerifyCluster(LogicalUnit):
_ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
"instance lives on offline node(s) %s",
utils.CommaJoin(inst_nodes_offline))
# ... or ghost nodes
# ... or ghost/non-vm_capable nodes
for node in inst_config.all_nodes:
_ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
"instance lives on ghost node %s", node)
_ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
instance, "instance lives on non-vm_capable node %s", node)
feedback_fn("* Verifying orphan volumes")
reserved = utils.FieldSet(*cluster.reserved_lvs)
......
......@@ -785,6 +785,7 @@ NV_PVLIST = "pvlist"
NV_TIME = "time"
NV_VERSION = "version"
NV_VGLIST = "vglist"
NV_VMNODES = "vmnodes"
# SSL certificate check constants (in days)
SSL_CERT_EXPIRATION_WARN = 30
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment