Commit 170b02b7 authored by Michael Hanselmann's avatar Michael Hanselmann
Browse files

Fix issue when verifying cluster files

If a cluster has any non-master-candidate nodes, those don't contain all
files (e.g. config.data). With commit aef59ae7

 (March 31st, 2011)
the logic was changed and subsequently verifying a cluster with non-mc
nodes would complain.

This patch fixes this issue by changing the algorithm. It also adds an
additional check for files which shouldn't exist on a machine. A newly
added unittest is included.
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarIustin Pop <iustin@google.com>
parent d728ac75
......@@ -2108,26 +2108,38 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
@param all_nvinfo: RPC results
"""
node_names = frozenset(node.name for node in nodeinfo if not node.offline)
assert master_node in node_names
assert (len(files_all | files_all_opt | files_mc | files_vm) ==
sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
"Found file listed in more than one file list"
# Define functions determining which nodes to consider for a file
file2nodefn = dict([(filename, fn)
for (files, fn) in [(files_all, None),
(files_all_opt, None),
(files_mc, lambda node: (node.master_candidate or
node.name == master_node)),
(files_vm, lambda node: node.vm_capable)]
for filename in files])
files2nodefn = [
(files_all, None),
(files_all_opt, None),
(files_mc, lambda node: (node.master_candidate or
node.name == master_node)),
(files_vm, lambda node: node.vm_capable),
]
# Build mapping from filename to list of nodes which should have the file
nodefiles = {}
for (files, fn) in files2nodefn:
if fn is None:
filenodes = nodeinfo
else:
filenodes = filter(fn, nodeinfo)
nodefiles.update((filename,
frozenset(map(operator.attrgetter("name"), filenodes)))
for filename in files)
fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
fileinfo = dict((filename, {}) for filename in nodefiles)
ignore_nodes = set()
for node in nodeinfo:
if node.offline:
ignore_nodes.add(node.name)
continue
nresult = all_nvinfo[node.name]
......@@ -2141,13 +2153,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
errorif(test, cls.ENODEFILECHECK, node.name,
"Node did not return file checksum data")
if test:
ignore_nodes.add(node.name)
continue
# Build per-checksum mapping from filename to nodes having it
for (filename, checksum) in node_files.items():
# Check if the file should be considered for a node
fn = file2nodefn[filename]
if fn is None or fn(node):
fileinfo[filename].setdefault(checksum, set()).add(node.name)
assert filename in nodefiles
fileinfo[filename].setdefault(checksum, set()).add(node.name)
for (filename, checksums) in fileinfo.items():
assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
......@@ -2155,23 +2167,33 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
# Nodes having the file
with_file = frozenset(node_name
for nodes in fileinfo[filename].values()
for node_name in nodes)
for node_name in nodes) - ignore_nodes
expected_nodes = nodefiles[filename] - ignore_nodes
# Nodes missing file
missing_file = node_names - with_file
missing_file = expected_nodes - with_file
if filename in files_all_opt:
# All or no nodes
errorif(missing_file and missing_file != node_names,
errorif(missing_file and missing_file != expected_nodes,
cls.ECLUSTERFILECHECK, None,
"File %s is optional, but it must exist on all or no"
" nodes (not found on %s)",
filename, utils.CommaJoin(utils.NiceSort(missing_file)))
else:
# Non-optional files
errorif(missing_file, cls.ECLUSTERFILECHECK, None,
"File %s is missing from node(s) %s", filename,
utils.CommaJoin(utils.NiceSort(missing_file)))
# Warn if a node has a file it shouldn't
unexpected = with_file - expected_nodes
errorif(unexpected,
cls.ECLUSTERFILECHECK, None,
"File %s should not exist on node(s) %s",
filename, utils.CommaJoin(utils.NiceSort(unexpected)))
# See if there are multiple versions of the file
test = len(checksums) > 1
if test:
......
......@@ -38,6 +38,8 @@ from ganeti import utils
from ganeti import luxi
from ganeti import ht
from ganeti import objects
from ganeti import compat
from ganeti import rpc
import testutils
import mocks
......@@ -267,5 +269,99 @@ class TestClusterVerifySsh(unittest.TestCase):
})
class TestClusterVerifyFiles(unittest.TestCase):
@staticmethod
def _FakeErrorIf(errors, cond, ecode, item, msg, *args, **kwargs):
assert ((ecode == cmdlib.LUClusterVerifyGroup.ENODEFILECHECK and
ht.TNonEmptyString(item)) or
(ecode == cmdlib.LUClusterVerifyGroup.ECLUSTERFILECHECK and
item is None))
if args:
msg = msg % args
if cond:
errors.append((item, msg))
_VerifyFiles = cmdlib.LUClusterVerifyGroup._VerifyFiles
def test(self):
errors = []
master_name = "master.example.com"
nodeinfo = [
objects.Node(name=master_name, offline=False),
objects.Node(name="node2.example.com", offline=False),
objects.Node(name="node3.example.com", master_candidate=True),
objects.Node(name="node4.example.com", offline=False),
objects.Node(name="nodata.example.com"),
objects.Node(name="offline.example.com", offline=True),
]
cluster = objects.Cluster(modify_etc_hosts=True,
enabled_hypervisors=[constants.HT_XEN_HVM])
files_all = set([
constants.CLUSTER_DOMAIN_SECRET_FILE,
constants.RAPI_CERT_FILE,
])
files_all_opt = set([
constants.RAPI_USERS_FILE,
])
files_mc = set([
constants.CLUSTER_CONF_FILE,
])
files_vm = set()
nvinfo = {
master_name: rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.CLUSTER_CONF_FILE: "82314f897f38b35f9dab2f7c6b1593e0",
constants.RAPI_CERT_FILE: "babbce8f387bc082228e544a2146fee4",
constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4",
}})),
"node2.example.com": rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a",
}
})),
"node3.example.com": rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a",
constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4",
}
})),
"node4.example.com": rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a",
constants.CLUSTER_CONF_FILE: "conf-a6d4b13e407867f7a7b4f0f232a8f527",
constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4",
constants.RAPI_USERS_FILE: "rapiusers-ea3271e8d810ef3",
}
})),
"nodata.example.com": rpc.RpcResult(data=(True, {})),
"offline.example.com": rpc.RpcResult(offline=True),
}
assert set(nvinfo.keys()) == set(map(operator.attrgetter("name"), nodeinfo))
self._VerifyFiles(compat.partial(self._FakeErrorIf, errors), nodeinfo,
master_name, nvinfo,
(files_all, files_all_opt, files_mc, files_vm))
self.assertEqual(sorted(errors), sorted([
(None, ("File %s found with 2 different checksums (variant 1 on"
" node2.example.com, node3.example.com, node4.example.com;"
" variant 2 on master.example.com)" % constants.RAPI_CERT_FILE)),
(None, ("File %s is missing from node(s) node2.example.com" %
constants.CLUSTER_DOMAIN_SECRET_FILE)),
(None, ("File %s should not exist on node(s) node4.example.com" %
constants.CLUSTER_CONF_FILE)),
(None, ("File %s is missing from node(s) node3.example.com" %
constants.CLUSTER_CONF_FILE)),
(None, ("File %s found with 2 different checksums (variant 1 on"
" master.example.com; variant 2 on node4.example.com)" %
constants.CLUSTER_CONF_FILE)),
(None, ("File %s is optional, but it must exist on all or no nodes (not"
" found on master.example.com, node2.example.com,"
" node3.example.com)" % constants.RAPI_USERS_FILE)),
("nodata.example.com", "Node did not return file checksum data"),
]))
if __name__ == "__main__":
testutils.GanetiTestProgram()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment