diff --git a/lib/backend.py b/lib/backend.py index d02a8884155c96be66fc35685dca31b22e363b8f..3f793da862d48d97305f0de2db08779ec8c344e8 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -520,12 +520,25 @@ def VerifyNode(what, cluster_name): what[constants.NV_FILELIST]) if constants.NV_NODELIST in what: - result[constants.NV_NODELIST] = tmp = {} - random.shuffle(what[constants.NV_NODELIST]) - for node in what[constants.NV_NODELIST]: + (nodes, bynode) = what[constants.NV_NODELIST] + + # Add nodes from other groups (different for each node) + try: + nodes.extend(bynode[my_name]) + except KeyError: + pass + + # Use a random order + random.shuffle(nodes) + + # Try to contact all nodes + val = {} + for node in nodes: success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node) if not success: - tmp[node] = message + val[node] = message + + result[constants.NV_NODELIST] = val if constants.NV_NODENETTEST in what: result[constants.NV_NODENETTEST] = tmp = {} diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 2b2ea239fe37dc19c8761c899f9fb7732f6dcb82..25113bd6a2f471e2925e542b8f6a5f65be7bdca7 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -2542,6 +2542,40 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): return instdisk + @staticmethod + def _SshNodeSelector(group_uuid, all_nodes): + """Create endless iterators for all potential SSH check hosts. + + """ + nodes = [node for node in all_nodes + if (node.group != group_uuid and + not node.offline)] + keyfunc = operator.attrgetter("group") + + return map(itertools.cycle, + [sorted(map(operator.attrgetter("name"), names)) + for _, names in itertools.groupby(sorted(nodes, key=keyfunc), + keyfunc)]) + + @classmethod + def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes): + """Choose which nodes should talk to which other nodes. + + We will make nodes contact all nodes in their group, and one node from + every other group. + + @warning: This algorithm has a known issue if one node group is much + smaller than others (e.g. just one node). In such a case all other + nodes will talk to the single node. + + """ + online_nodes = sorted(node.name for node in group_nodes if not node.offline) + sel = cls._SshNodeSelector(group_uuid, all_nodes) + + return (online_nodes, + dict((name, sorted([i.next() for i in sel])) + for name in online_nodes)) + def BuildHooksEnv(self): """Build hooks env. @@ -2605,25 +2639,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names)) - # We will make nodes contact all nodes in their group, and one node from - # every other group. - # TODO: should it be a *random* node, different every time? - online_nodes = [node.name for node in node_data_list if not node.offline] - other_group_nodes = {} - - for name in sorted(self.all_node_info): - node = self.all_node_info[name] - if (node.group not in other_group_nodes - and node.group != self.group_uuid - and not node.offline): - other_group_nodes[node.group] = node.name - node_verify_param = { constants.NV_FILELIST: utils.UniqueSequence(filename for files in filemap for filename in files), - constants.NV_NODELIST: online_nodes + other_group_nodes.values(), + constants.NV_NODELIST: + self._SelectSshCheckNodes(node_data_list, self.group_uuid, + self.all_node_info.values()), constants.NV_HYPERVISOR: hypervisors, constants.NV_HVPARAMS: _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), diff --git a/test/ganeti.cmdlib_unittest.py b/test/ganeti.cmdlib_unittest.py index b44c5476a8e06b279e2dff6ebe7bdc4442ec986a..feb69b5088bc833a171d3d7575386f05d73fa3f8 100755 --- a/test/ganeti.cmdlib_unittest.py +++ b/test/ganeti.cmdlib_unittest.py @@ -27,6 +27,7 @@ import unittest import time import tempfile import shutil +import operator from ganeti import constants from ganeti import mcpu @@ -207,5 +208,64 @@ class TestLUGroupAssignNodes(unittest.TestCase): self.assertEqual(set(["inst3c"]), set(prev)) +class TestClusterVerifySsh(unittest.TestCase): + def testMultipleGroups(self): + fn = cmdlib.LUClusterVerifyGroup._SelectSshCheckNodes + mygroupnodes = [ + objects.Node(name="node20", group="my", offline=False), + objects.Node(name="node21", group="my", offline=False), + objects.Node(name="node22", group="my", offline=False), + objects.Node(name="node23", group="my", offline=False), + objects.Node(name="node24", group="my", offline=False), + objects.Node(name="node25", group="my", offline=False), + objects.Node(name="node26", group="my", offline=True), + ] + nodes = [ + objects.Node(name="node1", group="g1", offline=True), + objects.Node(name="node2", group="g1", offline=False), + objects.Node(name="node3", group="g1", offline=False), + objects.Node(name="node4", group="g1", offline=True), + objects.Node(name="node5", group="g1", offline=False), + objects.Node(name="node10", group="xyz", offline=False), + objects.Node(name="node11", group="xyz", offline=False), + objects.Node(name="node40", group="alloff", offline=True), + objects.Node(name="node41", group="alloff", offline=True), + objects.Node(name="node50", group="aaa", offline=False), + ] + mygroupnodes + assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes)) + + (online, perhost) = fn(mygroupnodes, "my", nodes) + self.assertEqual(online, ["node%s" % i for i in range(20, 26)]) + self.assertEqual(set(perhost.keys()), set(online)) + + self.assertEqual(perhost, { + "node20": ["node10", "node2", "node50"], + "node21": ["node11", "node3", "node50"], + "node22": ["node10", "node5", "node50"], + "node23": ["node11", "node2", "node50"], + "node24": ["node10", "node3", "node50"], + "node25": ["node11", "node5", "node50"], + }) + + def testSingleGroup(self): + fn = cmdlib.LUClusterVerifyGroup._SelectSshCheckNodes + nodes = [ + objects.Node(name="node1", group="default", offline=True), + objects.Node(name="node2", group="default", offline=False), + objects.Node(name="node3", group="default", offline=False), + objects.Node(name="node4", group="default", offline=True), + ] + assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes)) + + (online, perhost) = fn(nodes, "default", nodes) + self.assertEqual(online, ["node2", "node3"]) + self.assertEqual(set(perhost.keys()), set(online)) + + self.assertEqual(perhost, { + "node2": [], + "node3": [], + }) + + if __name__ == "__main__": testutils.GanetiTestProgram()