Commit 9d4bfc96 authored by Iustin Pop's avatar Iustin Pop
Browse files

Implement node daemon conectivity tests

This patch adds in gnt-cluster verify checks for inter-node tcp
communication checks on the node daemon port for both the primary and
(if defined) secondary networks.

The output looks like (4-node cluster, one with the secondary interface
down):
* Verifying node node1.example.com
  - ERROR: tcp communication with node 'node3.example.com': failure using the secondary interface(s)
* Verifying node node2.example.com
  - ERROR: tcp communication with node 'node3.example.com': failure using the secondary interface(s)
* Verifying node node3.example.com
  - ERROR: tcp communication with node 'node1.example.com': failure using the secondary interface(s)
  - ERROR: tcp communication with node 'node2.example.com': failure using the secondary interface(s)
  - ERROR: tcp communication with node 'node4.example.com': failure using the secondary interface(s)
* Verifying node node4.example.com
  - ERROR: tcp communication with node 'node3.example.com': failure using the secondary interface(s)

Reviewed-by: imsnah
parent 102b115b
......@@ -206,6 +206,33 @@ def VerifyNode(what):
success, message = _GetSshRunner().VerifyNodeHostname(node)
if not success:
result['nodelist'][node] = message
if 'node-net-test' in what:
result['node-net-test'] = {}
my_name = utils.HostInfo().name
my_pip = my_sip = None
for name, pip, sip in what['node-net-test']:
if name == my_name:
my_pip = pip
my_sip = sip
break
if not my_pip:
result['node-net-test'][my_name] = ("Can't find my own"
" primary/secondary IP"
" in the node list")
else:
port = ssconf.SimpleStore().GetNodeDaemonPort()
for name, pip, sip in what['node-net-test']:
fail = []
if not utils.TcpPing(pip, port, source=my_pip):
fail.append("primary")
if sip != pip:
if not utils.TcpPing(sip, port, source=my_sip):
fail.append("secondary")
if fail:
result['node-net-test'][name] = ("failure using the %s"
" interface(s)" %
" and ".join(fail))
return result
......
......@@ -673,13 +673,24 @@ class LUVerifyCluster(LogicalUnit):
if 'nodelist' not in node_result:
bad = True
feedback_fn(" - ERROR: node hasn't returned node connectivity data")
feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
else:
if node_result['nodelist']:
bad = True
for node in node_result['nodelist']:
feedback_fn(" - ERROR: communication with node '%s': %s" %
feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
(node, node_result['nodelist'][node]))
if 'node-net-test' not in node_result:
bad = True
feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
else:
if node_result['node-net-test']:
bad = True
nlist = utils.NiceSort(node_result['node-net-test'].keys())
for node in nlist:
feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
(node, node_result['node-net-test'][node]))
hyp_result = node_result.get('hypervisor', None)
if hyp_result is not None:
feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
......@@ -817,6 +828,7 @@ class LUVerifyCluster(LogicalUnit):
vg_name = self.cfg.GetVGName()
nodelist = utils.NiceSort(self.cfg.GetNodeList())
nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
i_non_redundant = [] # Non redundant instances
node_volume = {}
......@@ -839,6 +851,8 @@ class LUVerifyCluster(LogicalUnit):
'filelist': file_names,
'nodelist': nodelist,
'hypervisor': None,
'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
for node in nodeinfo]
}
all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
all_rversion = rpc.call_version(nodelist)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment