Commit 25361b9a authored by Iustin Pop's avatar Iustin Pop
Browse files

Fix gnt-cluster verify w.r.t. rpc changes

This partially reorganizes the cluster verify LU:
  - introduce constants for the node verify rpc call
  - move from additional rpc calls to a single rpc call, the
    call_node_info, which gaters all data needed

Also fix a small error (self.LogWarning instead of self.Warning).

Reviewed-by: imsnah
parent 55cf7d83
...@@ -350,37 +350,38 @@ def VerifyNode(what, cluster_name): ...@@ -350,37 +350,38 @@ def VerifyNode(what, cluster_name):
""" """
result = {} result = {}
if 'hypervisor' in what: if constants.NV_HYPERVISOR in what:
result['hypervisor'] = my_dict = {} result[constants.NV_HYPERVISOR] = tmp = {}
for hv_name in what['hypervisor']: for hv_name in what[constants.NV_HYPERVISOR]:
my_dict[hv_name] = hypervisor.GetHypervisor(hv_name).Verify() tmp[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
if 'filelist' in what: if constants.NV_FILELIST in what:
result['filelist'] = utils.FingerprintFiles(what['filelist']) result[constants.NV_FILELIST] = utils.FingerprintFiles(
what[constants.NV_FILELIST])
if 'nodelist' in what:
result['nodelist'] = {} if constants.NV_NODELIST in what:
random.shuffle(what['nodelist']) result[constants.NV_NODELIST] = tmp = {}
for node in what['nodelist']: random.shuffle(what[constants.NV_NODELIST])
for node in what[constants.NV_NODELIST]:
success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node) success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node)
if not success: if not success:
result['nodelist'][node] = message tmp[node] = message
if 'node-net-test' in what:
result['node-net-test'] = {} if constants.NV_NODENETTEST in what:
result[constants.NV_NODENETTEST] = tmp = {}
my_name = utils.HostInfo().name my_name = utils.HostInfo().name
my_pip = my_sip = None my_pip = my_sip = None
for name, pip, sip in what['node-net-test']: for name, pip, sip in what[constants.NV_NODENETTEST]:
if name == my_name: if name == my_name:
my_pip = pip my_pip = pip
my_sip = sip my_sip = sip
break break
if not my_pip: if not my_pip:
result['node-net-test'][my_name] = ("Can't find my own" tmp[my_name] = ("Can't find my own primary/secondary IP"
" primary/secondary IP"
" in the node list") " in the node list")
else: else:
port = utils.GetNodeDaemonPort() port = utils.GetNodeDaemonPort()
for name, pip, sip in what['node-net-test']: for name, pip, sip in what[constants.NV_NODENETTEST]:
fail = [] fail = []
if not utils.TcpPing(pip, port, source=my_pip): if not utils.TcpPing(pip, port, source=my_pip):
fail.append("primary") fail.append("primary")
...@@ -388,10 +389,26 @@ def VerifyNode(what, cluster_name): ...@@ -388,10 +389,26 @@ def VerifyNode(what, cluster_name):
if not utils.TcpPing(sip, port, source=my_sip): if not utils.TcpPing(sip, port, source=my_sip):
fail.append("secondary") fail.append("secondary")
if fail: if fail:
result['node-net-test'][name] = ("failure using the %s" tmp[name] = ("failure using the %s interface(s)" %
" interface(s)" %
" and ".join(fail)) " and ".join(fail))
if constants.NV_LVLIST in what:
result[constants.NV_LVLIST] = GetVolumeList(what[constants.NV_LVLIST])
if constants.NV_INSTANCELIST in what:
result[constants.NV_INSTANCELIST] = GetInstanceList(
what[constants.NV_INSTANCELIST])
if constants.NV_VGLIST in what:
result[constants.NV_VGLIST] = ListVolumeGroups()
if constants.NV_VERSION in what:
result[constants.NV_VERSION] = constants.PROTOCOL_VERSION
if constants.NV_HVINFO in what:
hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO])
result[constants.NV_HVINFO] = hyper.GetNodeInfo()
return result return result
......
...@@ -570,8 +570,8 @@ class LUVerifyCluster(LogicalUnit): ...@@ -570,8 +570,8 @@ class LUVerifyCluster(LogicalUnit):
} }
self.share_locks = dict(((i, 1) for i in locking.LEVELS)) self.share_locks = dict(((i, 1) for i in locking.LEVELS))
def _VerifyNode(self, nodeinfo, file_list, local_cksum, vglist, node_result, def _VerifyNode(self, nodeinfo, file_list, local_cksum,
remote_version, feedback_fn, master_files): node_result, feedback_fn, master_files):
"""Run multiple tests against a node. """Run multiple tests against a node.
Test list: Test list:
...@@ -585,17 +585,21 @@ class LUVerifyCluster(LogicalUnit): ...@@ -585,17 +585,21 @@ class LUVerifyCluster(LogicalUnit):
@param nodeinfo: the node to check @param nodeinfo: the node to check
@param file_list: required list of files @param file_list: required list of files
@param local_cksum: dictionary of local files and their checksums @param local_cksum: dictionary of local files and their checksums
@type vglist: dict
@param vglist: dictionary of volume group names and their size
@param node_result: the results from the node @param node_result: the results from the node
@param remote_version: the RPC version from the remote node
@param feedback_fn: function used to accumulate results @param feedback_fn: function used to accumulate results
@param master_files: list of files that only masters should have @param master_files: list of files that only masters should have
""" """
node = nodeinfo.name node = nodeinfo.name
# main result, node_result should be a non-empty dict
if not node_result or not isinstance(node_result, dict):
feedback_fn(" - ERROR: unable to verify node %s." % (node,))
return True
# compares ganeti version # compares ganeti version
local_version = constants.PROTOCOL_VERSION local_version = constants.PROTOCOL_VERSION
remote_version = node_result.get('version', None)
if not remote_version: if not remote_version:
feedback_fn(" - ERROR: connection to %s failed" % (node)) feedback_fn(" - ERROR: connection to %s failed" % (node))
return True return True
...@@ -608,6 +612,7 @@ class LUVerifyCluster(LogicalUnit): ...@@ -608,6 +612,7 @@ class LUVerifyCluster(LogicalUnit):
# checks vg existance and size > 20G # checks vg existance and size > 20G
bad = False bad = False
vglist = node_result.get(constants.NV_VGLIST, None)
if not vglist: if not vglist:
feedback_fn(" - ERROR: unable to check volume groups on node %s." % feedback_fn(" - ERROR: unable to check volume groups on node %s." %
(node,)) (node,))
...@@ -619,18 +624,13 @@ class LUVerifyCluster(LogicalUnit): ...@@ -619,18 +624,13 @@ class LUVerifyCluster(LogicalUnit):
feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node)) feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
bad = True bad = True
if not node_result:
feedback_fn(" - ERROR: unable to verify node %s." % (node,))
return True
# checks config file checksum # checks config file checksum
# checks ssh to any
if 'filelist' not in node_result: remote_cksum = node_result.get(constants.NV_FILELIST, None)
if not isinstance(remote_cksum, dict):
bad = True bad = True
feedback_fn(" - ERROR: node hasn't returned file checksum data") feedback_fn(" - ERROR: node hasn't returned file checksum data")
else: else:
remote_cksum = node_result['filelist']
for file_name in file_list: for file_name in file_list:
node_is_mc = nodeinfo.master_candidate node_is_mc = nodeinfo.master_candidate
must_have_file = file_name not in master_files must_have_file = file_name not in master_files
...@@ -653,27 +653,30 @@ class LUVerifyCluster(LogicalUnit): ...@@ -653,27 +653,30 @@ class LUVerifyCluster(LogicalUnit):
feedback_fn(" - ERROR: file '%s' should not exist on non master" feedback_fn(" - ERROR: file '%s' should not exist on non master"
" candidates" % file_name) " candidates" % file_name)
if 'nodelist' not in node_result: # checks ssh to any
if constants.NV_NODELIST not in node_result:
bad = True bad = True
feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data") feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
else: else:
if node_result['nodelist']: if node_result[constants.NV_NODELIST]:
bad = True bad = True
for node in node_result['nodelist']: for node in node_result[constants.NV_NODELIST]:
feedback_fn(" - ERROR: ssh communication with node '%s': %s" % feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
(node, node_result['nodelist'][node])) (node, node_result[constants.NV_NODELIST][node]))
if 'node-net-test' not in node_result:
if constants.NV_NODENETTEST not in node_result:
bad = True bad = True
feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data") feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
else: else:
if node_result['node-net-test']: if node_result[constants.NV_NODENETTEST]:
bad = True bad = True
nlist = utils.NiceSort(node_result['node-net-test'].keys()) nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
for node in nlist: for node in nlist:
feedback_fn(" - ERROR: tcp communication with node '%s': %s" % feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
(node, node_result['node-net-test'][node])) (node, node_result[constants.NV_NODENETTEST][node]))
hyp_result = node_result.get('hypervisor', None) hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
if isinstance(hyp_result, dict): if isinstance(hyp_result, dict):
for hv_name, hv_result in hyp_result.iteritems(): for hv_name, hv_result in hyp_result.iteritems():
if hv_result is not None: if hv_result is not None:
...@@ -836,73 +839,78 @@ class LUVerifyCluster(LogicalUnit): ...@@ -836,73 +839,78 @@ class LUVerifyCluster(LogicalUnit):
local_checksums = utils.FingerprintFiles(file_names) local_checksums = utils.FingerprintFiles(file_names)
feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
all_volumeinfo = self.rpc.call_volume_list(nodelist, vg_name)
all_instanceinfo = self.rpc.call_instance_list(nodelist, hypervisors)
all_vglist = self.rpc.call_vg_list(nodelist)
node_verify_param = { node_verify_param = {
'filelist': file_names, constants.NV_FILELIST: file_names,
'nodelist': nodelist, constants.NV_NODELIST: nodelist,
'hypervisor': hypervisors, constants.NV_HYPERVISOR: hypervisors,
'node-net-test': [(node.name, node.primary_ip, node.secondary_ip) constants.NV_NODENETTEST: [(node.name, node.primary_ip,
for node in nodeinfo] node.secondary_ip) for node in nodeinfo],
constants.NV_LVLIST: vg_name,
constants.NV_INSTANCELIST: hypervisors,
constants.NV_VGLIST: None,
constants.NV_VERSION: None,
constants.NV_HVINFO: self.cfg.GetHypervisorType(),
} }
all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param, all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
self.cfg.GetClusterName()) self.cfg.GetClusterName())
all_rversion = self.rpc.call_version(nodelist)
all_ninfo = self.rpc.call_node_info(nodelist, self.cfg.GetVGName(),
self.cfg.GetHypervisorType())
cluster = self.cfg.GetClusterInfo() cluster = self.cfg.GetClusterInfo()
master_node = self.cfg.GetMasterNode() master_node = self.cfg.GetMasterNode()
for node_i in nodeinfo: for node_i in nodeinfo:
node = node_i.name node = node_i.name
nresult = all_nvinfo[node].data
if node == master_node: if node == master_node:
ntype="master" ntype = "master"
elif node_i.master_candidate: elif node_i.master_candidate:
ntype="master candidate" ntype = "master candidate"
else: else:
ntype="regular" ntype = "regular"
feedback_fn("* Verifying node %s (%s)" % (node, ntype)) feedback_fn("* Verifying node %s (%s)" % (node, ntype))
if all_nvinfo[node].failed or not isinstance(nresult, dict):
feedback_fn(" - ERROR: connection to %s failed" % (node,))
bad = True
continue
result = self._VerifyNode(node_i, file_names, local_checksums, result = self._VerifyNode(node_i, file_names, local_checksums,
all_vglist[node], all_nvinfo[node], nresult, feedback_fn, master_files)
all_rversion[node], feedback_fn, master_files)
bad = bad or result bad = bad or result
# node_volume lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
volumeinfo = all_volumeinfo[node] if isinstance(lvdata, basestring):
if isinstance(volumeinfo, basestring):
feedback_fn(" - ERROR: LVM problem on node %s: %s" % feedback_fn(" - ERROR: LVM problem on node %s: %s" %
(node, volumeinfo[-400:].encode('string_escape'))) (node, lvdata.encode('string_escape')))
bad = True bad = True
node_volume[node] = {} node_volume[node] = {}
elif not isinstance(volumeinfo, dict): elif not isinstance(lvdata, dict):
feedback_fn(" - ERROR: connection to %s failed" % (node,)) feedback_fn(" - ERROR: connection to %s failed (lvlist)" % (node,))
bad = True bad = True
continue continue
else: else:
node_volume[node] = volumeinfo node_volume[node] = lvdata
# node_instance # node_instance
nodeinstance = all_instanceinfo[node] idata = nresult.get(constants.NV_INSTANCELIST, None)
if type(nodeinstance) != list: if not isinstance(idata, list):
feedback_fn(" - ERROR: connection to %s failed" % (node,)) feedback_fn(" - ERROR: connection to %s failed (instancelist)" %
(node,))
bad = True bad = True
continue continue
node_instance[node] = nodeinstance node_instance[node] = idata
# node_info # node_info
nodeinfo = all_ninfo[node] nodeinfo = nresult.get(constants.NV_HVINFO, None)
if not isinstance(nodeinfo, dict): if not isinstance(nodeinfo, dict):
feedback_fn(" - ERROR: connection to %s failed" % (node,)) feedback_fn(" - ERROR: connection to %s failed (hvinfo)" % (node,))
bad = True bad = True
continue continue
try: try:
node_info[node] = { node_info[node] = {
"mfree": int(nodeinfo['memory_free']), "mfree": int(nodeinfo['memory_free']),
"dfree": int(nodeinfo['vg_free']), "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
"pinst": [], "pinst": [],
"sinst": [], "sinst": [],
# dictionary holding all instances this node is secondary for, # dictionary holding all instances this node is secondary for,
...@@ -1017,11 +1025,11 @@ class LUVerifyCluster(LogicalUnit): ...@@ -1017,11 +1025,11 @@ class LUVerifyCluster(LogicalUnit):
for node_name in hooks_results: for node_name in hooks_results:
show_node_header = True show_node_header = True
res = hooks_results[node_name] res = hooks_results[node_name]
if res is False or not isinstance(res, list): if res.failed or res.data is False or not isinstance(res.data, list):
feedback_fn(" Communication failure") feedback_fn(" Communication failure in hooks execution")
lu_result = 1 lu_result = 1
continue continue
for script, hkr, output in res: for script, hkr, output in res.data:
if hkr == constants.HKR_FAIL: if hkr == constants.HKR_FAIL:
# The node header is only shown once, if there are # The node header is only shown once, if there are
# failing hooks on that node # failing hooks on that node
...@@ -5500,7 +5508,7 @@ class LURemoveExport(NoHooksLU): ...@@ -5500,7 +5508,7 @@ class LURemoveExport(NoHooksLU):
found = False found = False
for node in exportlist: for node in exportlist:
if exportlist[node].failed: if exportlist[node].failed:
self.Warning("Failed to query node %s, continuing" % node) self.LogWarning("Failed to query node %s, continuing" % node)
continue continue
if instance_name in exportlist[node].data: if instance_name in exportlist[node].data:
found = True found = True
......
...@@ -333,6 +333,17 @@ HT_HVM_VALID_DISK_TYPES = frozenset([HT_HVM_DEV_PARAVIRTUAL, HT_HVM_DEV_IOEMU]) ...@@ -333,6 +333,17 @@ HT_HVM_VALID_DISK_TYPES = frozenset([HT_HVM_DEV_PARAVIRTUAL, HT_HVM_DEV_IOEMU])
VERIFY_NPLUSONE_MEM = 'nplusone_mem' VERIFY_NPLUSONE_MEM = 'nplusone_mem'
VERIFY_OPTIONAL_CHECKS = frozenset([VERIFY_NPLUSONE_MEM]) VERIFY_OPTIONAL_CHECKS = frozenset([VERIFY_NPLUSONE_MEM])
# Node verify constants
NV_FILELIST = "filelist"
NV_HVINFO = "hvinfo"
NV_HYPERVISOR = "hypervisor"
NV_INSTANCELIST = "instancelist"
NV_LVLIST = "lvlist"
NV_NODELIST = "nodelist"
NV_NODENETTEST = "node-net-test"
NV_VERSION = "version"
NV_VGLIST = "vglist"
# Allocator framework constants # Allocator framework constants
IALLOCATOR_DIR_IN = "in" IALLOCATOR_DIR_IN = "in"
IALLOCATOR_DIR_OUT = "out" IALLOCATOR_DIR_OUT = "out"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment