diff --git a/NEWS b/NEWS index 2e7d18f38fa30f3c8b3a378ed8cd201c1c3b6c24..aec004633819a323f0627dfd737cc5c6fe48f16f 100644 --- a/NEWS +++ b/NEWS @@ -1,10 +1,10 @@ News ==== -Version 2.5.0 beta3 -------------------- +Version 2.5.0 rc1 +----------------- -*(Released Wed, 31 Aug 2011)* +*(Released Tue, 4 Oct 2011)* Incompatible/important changes and bugfixes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -129,6 +129,14 @@ Misc - DRBD metadata volumes are overwritten with zeros during disk creation. +Version 2.5.0 beta3 +------------------- + +*(Released Wed, 31 Aug 2011)* + +This was the third beta release of the 2.5 series. + + Version 2.5.0 beta2 ------------------- diff --git a/configure.ac b/configure.ac index 04352d8acaaca5c3e17905e1d667558935f402f9..82e4a35a681ce6e1adaba9c10a39aae367a6c938 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([gnt_version_major], [2]) m4_define([gnt_version_minor], [5]) m4_define([gnt_version_revision], [0]) -m4_define([gnt_version_suffix], [~beta3]) +m4_define([gnt_version_suffix], [~rc1]) m4_define([gnt_version_full], m4_format([%d.%d.%d%s], gnt_version_major, gnt_version_minor, diff --git a/lib/backend.py b/lib/backend.py index fcb9bba11c00df52d7f78f3d8f4237c2ccf470d9..6a7efd69d2ff6a7a1a4548bc17ddd96ba14e4ba5 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -522,12 +522,25 @@ def VerifyNode(what, cluster_name): what[constants.NV_FILELIST]) if constants.NV_NODELIST in what: - result[constants.NV_NODELIST] = tmp = {} - random.shuffle(what[constants.NV_NODELIST]) - for node in what[constants.NV_NODELIST]: + (nodes, bynode) = what[constants.NV_NODELIST] + + # Add nodes from other groups (different for each node) + try: + nodes.extend(bynode[my_name]) + except KeyError: + pass + + # Use a random order + random.shuffle(nodes) + + # Try to contact all nodes + val = {} + for node in nodes: success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node) if not success: - tmp[node] = message + val[node] = message + + result[constants.NV_NODELIST] = val if constants.NV_NODENETTEST in what: result[constants.NV_NODENETTEST] = tmp = {} diff --git a/lib/cli.py b/lib/cli.py index 3f8d4979d85a87e01fb4d5f0857b2cc42fc063ca..2656d8896e00fcb0a01a52f782eda37ca845b27d 100644 --- a/lib/cli.py +++ b/lib/cli.py @@ -260,6 +260,9 @@ _PRIONAME_TO_VALUE = dict(_PRIORITY_NAMES) QR_UNKNOWN, QR_INCOMPLETE) = range(3) +#: Maximum batch size for ChooseJob +_CHOOSE_BATCH = 25 + class _Argument: def __init__(self, min=0, max=None): # pylint: disable=W0622 @@ -3073,7 +3076,8 @@ class JobExecutor(object): """ assert self.jobs, "_ChooseJob called with empty job list" - result = self.cl.QueryJobs([i[2] for i in self.jobs], ["status"]) + result = self.cl.QueryJobs([i[2] for i in self.jobs[:_CHOOSE_BATCH]], + ["status"]) assert result for job_data, status in zip(self.jobs, result): diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 53193c3158f4e3dc7c0ad1b6e9838c29bbccdb79..7d126899d04748135abae78cf4d363d2a6cbc246 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -2108,26 +2108,38 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): @param all_nvinfo: RPC results """ - node_names = frozenset(node.name for node in nodeinfo if not node.offline) - - assert master_node in node_names assert (len(files_all | files_all_opt | files_mc | files_vm) == sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \ "Found file listed in more than one file list" # Define functions determining which nodes to consider for a file - file2nodefn = dict([(filename, fn) - for (files, fn) in [(files_all, None), - (files_all_opt, None), - (files_mc, lambda node: (node.master_candidate or - node.name == master_node)), - (files_vm, lambda node: node.vm_capable)] - for filename in files]) + files2nodefn = [ + (files_all, None), + (files_all_opt, None), + (files_mc, lambda node: (node.master_candidate or + node.name == master_node)), + (files_vm, lambda node: node.vm_capable), + ] + + # Build mapping from filename to list of nodes which should have the file + nodefiles = {} + for (files, fn) in files2nodefn: + if fn is None: + filenodes = nodeinfo + else: + filenodes = filter(fn, nodeinfo) + nodefiles.update((filename, + frozenset(map(operator.attrgetter("name"), filenodes))) + for filename in files) + + assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm) - fileinfo = dict((filename, {}) for filename in file2nodefn.keys()) + fileinfo = dict((filename, {}) for filename in nodefiles) + ignore_nodes = set() for node in nodeinfo: if node.offline: + ignore_nodes.add(node.name) continue nresult = all_nvinfo[node.name] @@ -2141,13 +2153,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): errorif(test, cls.ENODEFILECHECK, node.name, "Node did not return file checksum data") if test: + ignore_nodes.add(node.name) continue + # Build per-checksum mapping from filename to nodes having it for (filename, checksum) in node_files.items(): - # Check if the file should be considered for a node - fn = file2nodefn[filename] - if fn is None or fn(node): - fileinfo[filename].setdefault(checksum, set()).add(node.name) + assert filename in nodefiles + fileinfo[filename].setdefault(checksum, set()).add(node.name) for (filename, checksums) in fileinfo.items(): assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" @@ -2155,23 +2167,33 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): # Nodes having the file with_file = frozenset(node_name for nodes in fileinfo[filename].values() - for node_name in nodes) + for node_name in nodes) - ignore_nodes + + expected_nodes = nodefiles[filename] - ignore_nodes # Nodes missing file - missing_file = node_names - with_file + missing_file = expected_nodes - with_file if filename in files_all_opt: # All or no nodes - errorif(missing_file and missing_file != node_names, + errorif(missing_file and missing_file != expected_nodes, cls.ECLUSTERFILECHECK, None, "File %s is optional, but it must exist on all or no" " nodes (not found on %s)", filename, utils.CommaJoin(utils.NiceSort(missing_file))) else: + # Non-optional files errorif(missing_file, cls.ECLUSTERFILECHECK, None, "File %s is missing from node(s) %s", filename, utils.CommaJoin(utils.NiceSort(missing_file))) + # Warn if a node has a file it shouldn't + unexpected = with_file - expected_nodes + errorif(unexpected, + cls.ECLUSTERFILECHECK, None, + "File %s should not exist on node(s) %s", + filename, utils.CommaJoin(utils.NiceSort(unexpected))) + # See if there are multiple versions of the file test = len(checksums) > 1 if test: @@ -2542,6 +2564,40 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): return instdisk + @staticmethod + def _SshNodeSelector(group_uuid, all_nodes): + """Create endless iterators for all potential SSH check hosts. + + """ + nodes = [node for node in all_nodes + if (node.group != group_uuid and + not node.offline)] + keyfunc = operator.attrgetter("group") + + return map(itertools.cycle, + [sorted(map(operator.attrgetter("name"), names)) + for _, names in itertools.groupby(sorted(nodes, key=keyfunc), + keyfunc)]) + + @classmethod + def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes): + """Choose which nodes should talk to which other nodes. + + We will make nodes contact all nodes in their group, and one node from + every other group. + + @warning: This algorithm has a known issue if one node group is much + smaller than others (e.g. just one node). In such a case all other + nodes will talk to the single node. + + """ + online_nodes = sorted(node.name for node in group_nodes if not node.offline) + sel = cls._SshNodeSelector(group_uuid, all_nodes) + + return (online_nodes, + dict((name, sorted([i.next() for i in sel])) + for name in online_nodes)) + def BuildHooksEnv(self): """Build hooks env. @@ -2605,25 +2661,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names)) - # We will make nodes contact all nodes in their group, and one node from - # every other group. - # TODO: should it be a *random* node, different every time? - online_nodes = [node.name for node in node_data_list if not node.offline] - other_group_nodes = {} - - for name in sorted(self.all_node_info): - node = self.all_node_info[name] - if (node.group not in other_group_nodes - and node.group != self.group_uuid - and not node.offline): - other_group_nodes[node.group] = node.name - node_verify_param = { constants.NV_FILELIST: utils.UniqueSequence(filename for files in filemap for filename in files), - constants.NV_NODELIST: online_nodes + other_group_nodes.values(), + constants.NV_NODELIST: + self._SelectSshCheckNodes(node_data_list, self.group_uuid, + self.all_node_info.values()), constants.NV_HYPERVISOR: hypervisors, constants.NV_HVPARAMS: _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), @@ -5061,7 +5106,7 @@ class LUNodeAdd(LogicalUnit): node_verify_list = [self.cfg.GetMasterNode()] node_verify_param = { - constants.NV_NODELIST: [node], + constants.NV_NODELIST: ([node], {}), # TODO: do a node-net-test as well? } diff --git a/lib/utils/log.py b/lib/utils/log.py index ceff2506918e51278757acccc3c45c110e7b9fac..281f59045ac8e7e7ae2505e154d55f6fdf4f1d84 100644 --- a/lib/utils/log.py +++ b/lib/utils/log.py @@ -230,7 +230,7 @@ def SetupLogging(logfile, program, debug=0, stderr_logging=False, if debug: stderr_handler.setLevel(logging.NOTSET) else: - stderr_handler.setLevel(logging.ERROR) + stderr_handler.setLevel(logging.CRITICAL) root_logger.addHandler(stderr_handler) if syslog in (constants.SYSLOG_YES, constants.SYSLOG_ONLY): diff --git a/test/ganeti.cmdlib_unittest.py b/test/ganeti.cmdlib_unittest.py index b44c5476a8e06b279e2dff6ebe7bdc4442ec986a..40eebe0aa8c734ef96720e9a1134973ab49542ee 100755 --- a/test/ganeti.cmdlib_unittest.py +++ b/test/ganeti.cmdlib_unittest.py @@ -27,6 +27,7 @@ import unittest import time import tempfile import shutil +import operator from ganeti import constants from ganeti import mcpu @@ -37,6 +38,8 @@ from ganeti import utils from ganeti import luxi from ganeti import ht from ganeti import objects +from ganeti import compat +from ganeti import rpc import testutils import mocks @@ -207,5 +210,158 @@ class TestLUGroupAssignNodes(unittest.TestCase): self.assertEqual(set(["inst3c"]), set(prev)) +class TestClusterVerifySsh(unittest.TestCase): + def testMultipleGroups(self): + fn = cmdlib.LUClusterVerifyGroup._SelectSshCheckNodes + mygroupnodes = [ + objects.Node(name="node20", group="my", offline=False), + objects.Node(name="node21", group="my", offline=False), + objects.Node(name="node22", group="my", offline=False), + objects.Node(name="node23", group="my", offline=False), + objects.Node(name="node24", group="my", offline=False), + objects.Node(name="node25", group="my", offline=False), + objects.Node(name="node26", group="my", offline=True), + ] + nodes = [ + objects.Node(name="node1", group="g1", offline=True), + objects.Node(name="node2", group="g1", offline=False), + objects.Node(name="node3", group="g1", offline=False), + objects.Node(name="node4", group="g1", offline=True), + objects.Node(name="node5", group="g1", offline=False), + objects.Node(name="node10", group="xyz", offline=False), + objects.Node(name="node11", group="xyz", offline=False), + objects.Node(name="node40", group="alloff", offline=True), + objects.Node(name="node41", group="alloff", offline=True), + objects.Node(name="node50", group="aaa", offline=False), + ] + mygroupnodes + assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes)) + + (online, perhost) = fn(mygroupnodes, "my", nodes) + self.assertEqual(online, ["node%s" % i for i in range(20, 26)]) + self.assertEqual(set(perhost.keys()), set(online)) + + self.assertEqual(perhost, { + "node20": ["node10", "node2", "node50"], + "node21": ["node11", "node3", "node50"], + "node22": ["node10", "node5", "node50"], + "node23": ["node11", "node2", "node50"], + "node24": ["node10", "node3", "node50"], + "node25": ["node11", "node5", "node50"], + }) + + def testSingleGroup(self): + fn = cmdlib.LUClusterVerifyGroup._SelectSshCheckNodes + nodes = [ + objects.Node(name="node1", group="default", offline=True), + objects.Node(name="node2", group="default", offline=False), + objects.Node(name="node3", group="default", offline=False), + objects.Node(name="node4", group="default", offline=True), + ] + assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes)) + + (online, perhost) = fn(nodes, "default", nodes) + self.assertEqual(online, ["node2", "node3"]) + self.assertEqual(set(perhost.keys()), set(online)) + + self.assertEqual(perhost, { + "node2": [], + "node3": [], + }) + + +class TestClusterVerifyFiles(unittest.TestCase): + @staticmethod + def _FakeErrorIf(errors, cond, ecode, item, msg, *args, **kwargs): + assert ((ecode == cmdlib.LUClusterVerifyGroup.ENODEFILECHECK and + ht.TNonEmptyString(item)) or + (ecode == cmdlib.LUClusterVerifyGroup.ECLUSTERFILECHECK and + item is None)) + + if args: + msg = msg % args + + if cond: + errors.append((item, msg)) + + _VerifyFiles = cmdlib.LUClusterVerifyGroup._VerifyFiles + + def test(self): + errors = [] + master_name = "master.example.com" + nodeinfo = [ + objects.Node(name=master_name, offline=False), + objects.Node(name="node2.example.com", offline=False), + objects.Node(name="node3.example.com", master_candidate=True), + objects.Node(name="node4.example.com", offline=False), + objects.Node(name="nodata.example.com"), + objects.Node(name="offline.example.com", offline=True), + ] + cluster = objects.Cluster(modify_etc_hosts=True, + enabled_hypervisors=[constants.HT_XEN_HVM]) + files_all = set([ + constants.CLUSTER_DOMAIN_SECRET_FILE, + constants.RAPI_CERT_FILE, + ]) + files_all_opt = set([ + constants.RAPI_USERS_FILE, + ]) + files_mc = set([ + constants.CLUSTER_CONF_FILE, + ]) + files_vm = set() + nvinfo = { + master_name: rpc.RpcResult(data=(True, { + constants.NV_FILELIST: { + constants.CLUSTER_CONF_FILE: "82314f897f38b35f9dab2f7c6b1593e0", + constants.RAPI_CERT_FILE: "babbce8f387bc082228e544a2146fee4", + constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4", + }})), + "node2.example.com": rpc.RpcResult(data=(True, { + constants.NV_FILELIST: { + constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a", + } + })), + "node3.example.com": rpc.RpcResult(data=(True, { + constants.NV_FILELIST: { + constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a", + constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4", + } + })), + "node4.example.com": rpc.RpcResult(data=(True, { + constants.NV_FILELIST: { + constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a", + constants.CLUSTER_CONF_FILE: "conf-a6d4b13e407867f7a7b4f0f232a8f527", + constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4", + constants.RAPI_USERS_FILE: "rapiusers-ea3271e8d810ef3", + } + })), + "nodata.example.com": rpc.RpcResult(data=(True, {})), + "offline.example.com": rpc.RpcResult(offline=True), + } + assert set(nvinfo.keys()) == set(map(operator.attrgetter("name"), nodeinfo)) + + self._VerifyFiles(compat.partial(self._FakeErrorIf, errors), nodeinfo, + master_name, nvinfo, + (files_all, files_all_opt, files_mc, files_vm)) + self.assertEqual(sorted(errors), sorted([ + (None, ("File %s found with 2 different checksums (variant 1 on" + " node2.example.com, node3.example.com, node4.example.com;" + " variant 2 on master.example.com)" % constants.RAPI_CERT_FILE)), + (None, ("File %s is missing from node(s) node2.example.com" % + constants.CLUSTER_DOMAIN_SECRET_FILE)), + (None, ("File %s should not exist on node(s) node4.example.com" % + constants.CLUSTER_CONF_FILE)), + (None, ("File %s is missing from node(s) node3.example.com" % + constants.CLUSTER_CONF_FILE)), + (None, ("File %s found with 2 different checksums (variant 1 on" + " master.example.com; variant 2 on node4.example.com)" % + constants.CLUSTER_CONF_FILE)), + (None, ("File %s is optional, but it must exist on all or no nodes (not" + " found on master.example.com, node2.example.com," + " node3.example.com)" % constants.RAPI_USERS_FILE)), + ("nodata.example.com", "Node did not return file checksum data"), + ])) + + if __name__ == "__main__": testutils.GanetiTestProgram()