Commit ea9c753d authored by Andrea Spadaccini's avatar Andrea Spadaccini
Browse files

Merge branch 'devel-2.5'

* devel-2.5:
  cluster-merge: log an info message at node readd
  Bump version to 2.5.0~rc1
  Fix issue when verifying cluster files
  Revert "utils.log: Write error messages to stderr"
  Fix adding nodes after commit 64c7b383

  LUClusterVerifyGroup: Spread SSH checks over more nodes
  Optimise cli.JobExecutor with many pending jobs
Signed-off-by: default avatarAndrea Spadaccini <>
Reviewed-by: default avatarRené Nussbaumer <>
parents 9822b1dd a080bab8
Version 2.5.0 beta3
Version 2.5.0 rc1
*(Released Wed, 31 Aug 2011)*
*(Released Tue, 4 Oct 2011)*
Incompatible/important changes and bugfixes
......@@ -129,6 +129,14 @@ Misc
- DRBD metadata volumes are overwritten with zeros during disk creation.
Version 2.5.0 beta3
*(Released Wed, 31 Aug 2011)*
This was the third beta release of the 2.5 series.
Version 2.5.0 beta2
......@@ -2,7 +2,7 @@
m4_define([gnt_version_major], [2])
m4_define([gnt_version_minor], [5])
m4_define([gnt_version_revision], [0])
m4_define([gnt_version_suffix], [~beta3])
m4_define([gnt_version_suffix], [~rc1])
gnt_version_major, gnt_version_minor,
......@@ -522,12 +522,25 @@ def VerifyNode(what, cluster_name):
if constants.NV_NODELIST in what:
result[constants.NV_NODELIST] = tmp = {}
for node in what[constants.NV_NODELIST]:
(nodes, bynode) = what[constants.NV_NODELIST]
# Add nodes from other groups (different for each node)
except KeyError:
# Use a random order
# Try to contact all nodes
val = {}
for node in nodes:
success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node)
if not success:
tmp[node] = message
val[node] = message
result[constants.NV_NODELIST] = val
if constants.NV_NODENETTEST in what:
result[constants.NV_NODENETTEST] = tmp = {}
......@@ -260,6 +260,9 @@ _PRIONAME_TO_VALUE = dict(_PRIORITY_NAMES)
QR_INCOMPLETE) = range(3)
#: Maximum batch size for ChooseJob
class _Argument:
def __init__(self, min=0, max=None): # pylint: disable=W0622
......@@ -3073,7 +3076,8 @@ class JobExecutor(object):
assert, "_ChooseJob called with empty job list"
result =[i[2] for i in], ["status"])
result =[i[2] for i in[:_CHOOSE_BATCH]],
assert result
for job_data, status in zip(, result):
......@@ -2108,26 +2108,38 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
@param all_nvinfo: RPC results
node_names = frozenset( for node in nodeinfo if not node.offline)
assert master_node in node_names
assert (len(files_all | files_all_opt | files_mc | files_vm) ==
sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
"Found file listed in more than one file list"
# Define functions determining which nodes to consider for a file
file2nodefn = dict([(filename, fn)
for (files, fn) in [(files_all, None),
(files_all_opt, None),
(files_mc, lambda node: (node.master_candidate or == master_node)),
(files_vm, lambda node: node.vm_capable)]
for filename in files])
files2nodefn = [
(files_all, None),
(files_all_opt, None),
(files_mc, lambda node: (node.master_candidate or == master_node)),
(files_vm, lambda node: node.vm_capable),
# Build mapping from filename to list of nodes which should have the file
nodefiles = {}
for (files, fn) in files2nodefn:
if fn is None:
filenodes = nodeinfo
filenodes = filter(fn, nodeinfo)
frozenset(map(operator.attrgetter("name"), filenodes)))
for filename in files)
assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
fileinfo = dict((filename, {}) for filename in nodefiles)
ignore_nodes = set()
for node in nodeinfo:
if node.offline:
nresult = all_nvinfo[]
......@@ -2141,13 +2153,13 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
errorif(test, cls.ENODEFILECHECK,,
"Node did not return file checksum data")
if test:
# Build per-checksum mapping from filename to nodes having it
for (filename, checksum) in node_files.items():
# Check if the file should be considered for a node
fn = file2nodefn[filename]
if fn is None or fn(node):
fileinfo[filename].setdefault(checksum, set()).add(
assert filename in nodefiles
fileinfo[filename].setdefault(checksum, set()).add(
for (filename, checksums) in fileinfo.items():
assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
......@@ -2155,23 +2167,33 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
# Nodes having the file
with_file = frozenset(node_name
for nodes in fileinfo[filename].values()
for node_name in nodes)
for node_name in nodes) - ignore_nodes
expected_nodes = nodefiles[filename] - ignore_nodes
# Nodes missing file
missing_file = node_names - with_file
missing_file = expected_nodes - with_file
if filename in files_all_opt:
# All or no nodes
errorif(missing_file and missing_file != node_names,
errorif(missing_file and missing_file != expected_nodes,
"File %s is optional, but it must exist on all or no"
" nodes (not found on %s)",
filename, utils.CommaJoin(utils.NiceSort(missing_file)))
# Non-optional files
errorif(missing_file, cls.ECLUSTERFILECHECK, None,
"File %s is missing from node(s) %s", filename,
# Warn if a node has a file it shouldn't
unexpected = with_file - expected_nodes
"File %s should not exist on node(s) %s",
filename, utils.CommaJoin(utils.NiceSort(unexpected)))
# See if there are multiple versions of the file
test = len(checksums) > 1
if test:
......@@ -2542,6 +2564,40 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
return instdisk
def _SshNodeSelector(group_uuid, all_nodes):
"""Create endless iterators for all potential SSH check hosts.
nodes = [node for node in all_nodes
if ( != group_uuid and
not node.offline)]
keyfunc = operator.attrgetter("group")
return map(itertools.cycle,
[sorted(map(operator.attrgetter("name"), names))
for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
"""Choose which nodes should talk to which other nodes.
We will make nodes contact all nodes in their group, and one node from
every other group.
@warning: This algorithm has a known issue if one node group is much
smaller than others (e.g. just one node). In such a case all other
nodes will talk to the single node.
online_nodes = sorted( for node in group_nodes if not node.offline)
sel = cls._SshNodeSelector(group_uuid, all_nodes)
return (online_nodes,
dict((name, sorted([ for i in sel]))
for name in online_nodes))
def BuildHooksEnv(self):
"""Build hooks env.
......@@ -2605,25 +2661,14 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
# We will make nodes contact all nodes in their group, and one node from
# every other group.
# TODO: should it be a *random* node, different every time?
online_nodes = [ for node in node_data_list if not node.offline]
other_group_nodes = {}
for name in sorted(self.all_node_info):
node = self.all_node_info[name]
if ( not in other_group_nodes
and != self.group_uuid
and not node.offline):
other_group_nodes[] =
node_verify_param = {
for files in filemap
for filename in files),
constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
self._SelectSshCheckNodes(node_data_list, self.group_uuid,
constants.NV_HYPERVISOR: hypervisors,
_GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
......@@ -5061,7 +5106,7 @@ class LUNodeAdd(LogicalUnit):
node_verify_list = [self.cfg.GetMasterNode()]
node_verify_param = {
constants.NV_NODELIST: [node],
constants.NV_NODELIST: ([node], {}),
# TODO: do a node-net-test as well?
......@@ -230,7 +230,7 @@ def SetupLogging(logfile, program, debug=0, stderr_logging=False,
if debug:
if syslog in (constants.SYSLOG_YES, constants.SYSLOG_ONLY):
......@@ -27,6 +27,7 @@ import unittest
import time
import tempfile
import shutil
import operator
from ganeti import constants
from ganeti import mcpu
......@@ -37,6 +38,8 @@ from ganeti import utils
from ganeti import luxi
from ganeti import ht
from ganeti import objects
from ganeti import compat
from ganeti import rpc
import testutils
import mocks
......@@ -207,5 +210,158 @@ class TestLUGroupAssignNodes(unittest.TestCase):
self.assertEqual(set(["inst3c"]), set(prev))
class TestClusterVerifySsh(unittest.TestCase):
def testMultipleGroups(self):
fn = cmdlib.LUClusterVerifyGroup._SelectSshCheckNodes
mygroupnodes = [
objects.Node(name="node20", group="my", offline=False),
objects.Node(name="node21", group="my", offline=False),
objects.Node(name="node22", group="my", offline=False),
objects.Node(name="node23", group="my", offline=False),
objects.Node(name="node24", group="my", offline=False),
objects.Node(name="node25", group="my", offline=False),
objects.Node(name="node26", group="my", offline=True),
nodes = [
objects.Node(name="node1", group="g1", offline=True),
objects.Node(name="node2", group="g1", offline=False),
objects.Node(name="node3", group="g1", offline=False),
objects.Node(name="node4", group="g1", offline=True),
objects.Node(name="node5", group="g1", offline=False),
objects.Node(name="node10", group="xyz", offline=False),
objects.Node(name="node11", group="xyz", offline=False),
objects.Node(name="node40", group="alloff", offline=True),
objects.Node(name="node41", group="alloff", offline=True),
objects.Node(name="node50", group="aaa", offline=False),
] + mygroupnodes
assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes))
(online, perhost) = fn(mygroupnodes, "my", nodes)
self.assertEqual(online, ["node%s" % i for i in range(20, 26)])
self.assertEqual(set(perhost.keys()), set(online))
self.assertEqual(perhost, {
"node20": ["node10", "node2", "node50"],
"node21": ["node11", "node3", "node50"],
"node22": ["node10", "node5", "node50"],
"node23": ["node11", "node2", "node50"],
"node24": ["node10", "node3", "node50"],
"node25": ["node11", "node5", "node50"],
def testSingleGroup(self):
fn = cmdlib.LUClusterVerifyGroup._SelectSshCheckNodes
nodes = [
objects.Node(name="node1", group="default", offline=True),
objects.Node(name="node2", group="default", offline=False),
objects.Node(name="node3", group="default", offline=False),
objects.Node(name="node4", group="default", offline=True),
assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes))
(online, perhost) = fn(nodes, "default", nodes)
self.assertEqual(online, ["node2", "node3"])
self.assertEqual(set(perhost.keys()), set(online))
self.assertEqual(perhost, {
"node2": [],
"node3": [],
class TestClusterVerifyFiles(unittest.TestCase):
def _FakeErrorIf(errors, cond, ecode, item, msg, *args, **kwargs):
assert ((ecode == cmdlib.LUClusterVerifyGroup.ENODEFILECHECK and
ht.TNonEmptyString(item)) or
(ecode == cmdlib.LUClusterVerifyGroup.ECLUSTERFILECHECK and
item is None))
if args:
msg = msg % args
if cond:
errors.append((item, msg))
_VerifyFiles = cmdlib.LUClusterVerifyGroup._VerifyFiles
def test(self):
errors = []
master_name = ""
nodeinfo = [
objects.Node(name=master_name, offline=False),
objects.Node(name="", offline=False),
objects.Node(name="", master_candidate=True),
objects.Node(name="", offline=False),
objects.Node(name="", offline=True),
cluster = objects.Cluster(modify_etc_hosts=True,
files_all = set([
files_all_opt = set([
files_mc = set([
files_vm = set()
nvinfo = {
master_name: rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.CLUSTER_CONF_FILE: "82314f897f38b35f9dab2f7c6b1593e0",
constants.RAPI_CERT_FILE: "babbce8f387bc082228e544a2146fee4",
constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4",
"": rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a",
"": rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a",
constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4",
"": rpc.RpcResult(data=(True, {
constants.NV_FILELIST: {
constants.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a",
constants.CLUSTER_CONF_FILE: "conf-a6d4b13e407867f7a7b4f0f232a8f527",
constants.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4",
constants.RAPI_USERS_FILE: "rapiusers-ea3271e8d810ef3",
"": rpc.RpcResult(data=(True, {})),
"": rpc.RpcResult(offline=True),
assert set(nvinfo.keys()) == set(map(operator.attrgetter("name"), nodeinfo))
self._VerifyFiles(compat.partial(self._FakeErrorIf, errors), nodeinfo,
master_name, nvinfo,
(files_all, files_all_opt, files_mc, files_vm))
self.assertEqual(sorted(errors), sorted([
(None, ("File %s found with 2 different checksums (variant 1 on"
" variant 2 on" % constants.RAPI_CERT_FILE)),
(None, ("File %s is missing from node(s)" %
(None, ("File %s should not exist on node(s)" %
(None, ("File %s is missing from node(s)" %
(None, ("File %s found with 2 different checksums (variant 1 on"
"; variant 2 on" %
(None, ("File %s is optional, but it must exist on all or no nodes (not"
" found on,,"
"" % constants.RAPI_USERS_FILE)),
("", "Node did not return file checksum data"),
if __name__ == "__main__":
......@@ -648,6 +648,7 @@ class Merger(object):
for data in self.merger_data:
for node in data.nodes:"Readding node %s", node)
result = utils.RunCmd(["gnt-node", "add", "--readd",
"--no-ssh-key-check", "--force-join", node])
if result.failed:
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment