From 0cf5e7f559f47e015e16c79a8c4becb705ef0db5 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Fri, 12 Mar 2010 09:34:45 +0100 Subject: [PATCH] Improve cluster verify with hypervisor errors In case the hypervisor has issues on one node, currently backend.VerifyNode will exit via an exception (two exit paths possible, one via HypervisorError from hypervisor.Verify(), and one via RPCFail from GetInstanceList). This is bad as it invalidates all other checks of that node. This patch catches these two errors and allows the rest of the VerifyNode function to run. This leads to a more complete verify cluster run, for example now only real missing LVs are reported, not all of them. The cluster verify is not perfect as it will skip some tests even if it has data, but this will require a more complete rewrite (see issue 90). Also, the patch fixes and improves some error messages in cmdlib. Signed-off-by: Iustin Pop <iustin@google.com> Reviewed-by: Guido Trotter <ultrotter@google.com> --- lib/backend.py | 14 +++++++++++--- lib/cmdlib.py | 5 +++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/backend.py b/lib/backend.py index 60826438c..7d7b04073 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -480,7 +480,11 @@ def VerifyNode(what, cluster_name): if constants.NV_HYPERVISOR in what: result[constants.NV_HYPERVISOR] = tmp = {} for hv_name in what[constants.NV_HYPERVISOR]: - tmp[hv_name] = hypervisor.GetHypervisor(hv_name).Verify() + try: + val = hypervisor.GetHypervisor(hv_name).Verify() + except errors.HypervisorError, err: + val = "Error while checking hypervisor: %s" % str(err) + tmp[hv_name] = val if constants.NV_FILELIST in what: result[constants.NV_FILELIST] = utils.FingerprintFiles( @@ -523,8 +527,12 @@ def VerifyNode(what, cluster_name): result[constants.NV_LVLIST] = GetVolumeList(what[constants.NV_LVLIST]) if constants.NV_INSTANCELIST in what: - result[constants.NV_INSTANCELIST] = GetInstanceList( - what[constants.NV_INSTANCELIST]) + # GetInstanceList can fail + try: + val = GetInstanceList(what[constants.NV_INSTANCELIST]) + except RPCFail, err: + val = str(err) + result[constants.NV_INSTANCELIST] = val if constants.NV_VGLIST in what: result[constants.NV_VGLIST] = utils.ListVolumeGroups() diff --git a/lib/cmdlib.py b/lib/cmdlib.py index e88f88bc3..4b6b77833 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1439,7 +1439,8 @@ class LUVerifyCluster(LogicalUnit): idata = nresult.get(constants.NV_INSTANCELIST, None) test = not isinstance(idata, list) _ErrorIf(test, self.ENODEHV, node, - "rpc call to node failed (instancelist)") + "rpc call to node failed (instancelist): %s", + utils.SafeEncode(str(idata))) if test: continue @@ -1544,7 +1545,7 @@ class LUVerifyCluster(LogicalUnit): _ErrorIf(snode not in node_info and snode not in n_offline, self.ENODERPC, snode, "instance %s, connection to secondary node" - "failed", instance) + " failed", instance) if snode in node_info: node_info[snode]['sinst'].append(instance) -- GitLab