From 9d4bfc96db3a46516fa14cca283dc316aa0b7e17 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Tue, 13 May 2008 14:33:12 +0000 Subject: [PATCH] Implement node daemon conectivity tests This patch adds in gnt-cluster verify checks for inter-node tcp communication checks on the node daemon port for both the primary and (if defined) secondary networks. The output looks like (4-node cluster, one with the secondary interface down): * Verifying node node1.example.com - ERROR: tcp communication with node 'node3.example.com': failure using the secondary interface(s) * Verifying node node2.example.com - ERROR: tcp communication with node 'node3.example.com': failure using the secondary interface(s) * Verifying node node3.example.com - ERROR: tcp communication with node 'node1.example.com': failure using the secondary interface(s) - ERROR: tcp communication with node 'node2.example.com': failure using the secondary interface(s) - ERROR: tcp communication with node 'node4.example.com': failure using the secondary interface(s) * Verifying node node4.example.com - ERROR: tcp communication with node 'node3.example.com': failure using the secondary interface(s) Reviewed-by: imsnah --- lib/backend.py | 27 +++++++++++++++++++++++++++ lib/cmdlib.py | 18 ++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/lib/backend.py b/lib/backend.py index 401710c72..8caf53f35 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -206,6 +206,33 @@ def VerifyNode(what): success, message = _GetSshRunner().VerifyNodeHostname(node) if not success: result['nodelist'][node] = message + if 'node-net-test' in what: + result['node-net-test'] = {} + my_name = utils.HostInfo().name + my_pip = my_sip = None + for name, pip, sip in what['node-net-test']: + if name == my_name: + my_pip = pip + my_sip = sip + break + if not my_pip: + result['node-net-test'][my_name] = ("Can't find my own" + " primary/secondary IP" + " in the node list") + else: + port = ssconf.SimpleStore().GetNodeDaemonPort() + for name, pip, sip in what['node-net-test']: + fail = [] + if not utils.TcpPing(pip, port, source=my_pip): + fail.append("primary") + if sip != pip: + if not utils.TcpPing(sip, port, source=my_sip): + fail.append("secondary") + if fail: + result['node-net-test'][name] = ("failure using the %s" + " interface(s)" % + " and ".join(fail)) + return result diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 099def196..9008862ac 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -673,13 +673,24 @@ class LUVerifyCluster(LogicalUnit): if 'nodelist' not in node_result: bad = True - feedback_fn(" - ERROR: node hasn't returned node connectivity data") + feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data") else: if node_result['nodelist']: bad = True for node in node_result['nodelist']: - feedback_fn(" - ERROR: communication with node '%s': %s" % + feedback_fn(" - ERROR: ssh communication with node '%s': %s" % (node, node_result['nodelist'][node])) + if 'node-net-test' not in node_result: + bad = True + feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data") + else: + if node_result['node-net-test']: + bad = True + nlist = utils.NiceSort(node_result['node-net-test'].keys()) + for node in nlist: + feedback_fn(" - ERROR: tcp communication with node '%s': %s" % + (node, node_result['node-net-test'][node])) + hyp_result = node_result.get('hypervisor', None) if hyp_result is not None: feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result) @@ -817,6 +828,7 @@ class LUVerifyCluster(LogicalUnit): vg_name = self.cfg.GetVGName() nodelist = utils.NiceSort(self.cfg.GetNodeList()) + nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist] instancelist = utils.NiceSort(self.cfg.GetInstanceList()) i_non_redundant = [] # Non redundant instances node_volume = {} @@ -839,6 +851,8 @@ class LUVerifyCluster(LogicalUnit): 'filelist': file_names, 'nodelist': nodelist, 'hypervisor': None, + 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip) + for node in nodeinfo] } all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param) all_rversion = rpc.call_version(nodelist) -- GitLab