Commit d41d07d4 authored by Iustin Pop's avatar Iustin Pop
Browse files

Fix disk status verification in LUClusterVerify

Commit b8d26c6e

 added disk status verification, but it has two
(different) bugs for not healthy nodes.

For offline nodes, we don't add at all the disk status to the
instance/node dict, with the result that the instance is not present in
the instdisk dict if all of its nodes are offline. This creates a
KeyError later when we call VerifyInstance with instdisk[instance].

For online nodes, but which don't return a valid disk status, we simply
set the status to None for each disk, but the code in _VerifyInstance
presumes and requires that each status is a valid tuple of length two.

For both these bugs, we redo the instdisk computations to always include
valid data, and we enhance the asserts to check for consistency.
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parent 5500ca1f
......@@ -1914,6 +1914,10 @@ class LUVerifyCluster(LogicalUnit):
@param node_image: Node objects
@type instanceinfo: dict of (name, L{objects.Instance})
@param instanceinfo: Instance objects
@rtype: {instance: {node: [(succes, payload)]}}
@return: a dictionary of per-instance dictionaries with nodes as
keys and disk information as values; the disk information is a
list of tuples (success, payload)
"""
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
......@@ -1954,28 +1958,38 @@ class LUVerifyCluster(LogicalUnit):
instdisk = {}
for (nname, nres) in result.items():
if nres.offline:
# Ignore offline node
continue
disks = node_disks[nname]
msg = nres.fail_msg
_ErrorIf(msg, self.ENODERPC, nname,
"while getting disk information: %s", nres.fail_msg)
if msg:
if nres.offline:
# No data from this node
data = len(disks) * [None]
data = len(disks) * [(False, "node offline")]
else:
data = nres.payload
msg = nres.fail_msg
_ErrorIf(msg, self.ENODERPC, nname,
"while getting disk information: %s", msg)
if msg:
# No data from this node
data = len(disks) * [(False, msg)]
else:
data = []
for idx, i in enumerate(nres.payload):
if isinstance(i, (tuple, list)) and len(i) == 2:
data.append(i)
else:
logging.warning("Invalid result from node %s, entry %d: %s",
nname, idx, i)
data.append((False, "Invalid result from the remote node"))
for ((inst, _), status) in zip(disks, data):
instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
len(nnames) <= len(instanceinfo[inst].all_nodes)
len(nnames) <= len(instanceinfo[inst].all_nodes) and
compat.all(isinstance(s, (tuple, list)) and
len(s) == 2 for s in statuses)
for inst, nnames in instdisk.items()
for nname, statuses in nnames.items())
assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
return instdisk
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment