From 0cf5e7f559f47e015e16c79a8c4becb705ef0db5 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Fri, 12 Mar 2010 09:34:45 +0100
Subject: [PATCH] Improve cluster verify with hypervisor errors

In case the hypervisor has issues on one node, currently
backend.VerifyNode will exit via an exception (two exit paths possible,
one via HypervisorError from hypervisor.Verify(), and one via RPCFail
from GetInstanceList). This is bad as it invalidates all other checks of
that node.

This patch catches these two errors and allows the rest of the
VerifyNode function to run. This leads to a more complete verify cluster
run, for example now only real missing LVs are reported, not all of
them.

The cluster verify is not perfect as it will skip some tests even if it
has data, but this will require a more complete rewrite (see issue 90).

Also, the patch fixes and improves some error messages in cmdlib.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Guido Trotter <ultrotter@google.com>
---
 lib/backend.py | 14 +++++++++++---
 lib/cmdlib.py  |  5 +++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/lib/backend.py b/lib/backend.py
index 60826438c..7d7b04073 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -480,7 +480,11 @@ def VerifyNode(what, cluster_name):
   if constants.NV_HYPERVISOR in what:
     result[constants.NV_HYPERVISOR] = tmp = {}
     for hv_name in what[constants.NV_HYPERVISOR]:
-      tmp[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
+      try:
+        val = hypervisor.GetHypervisor(hv_name).Verify()
+      except errors.HypervisorError, err:
+        val = "Error while checking hypervisor: %s" % str(err)
+      tmp[hv_name] = val
 
   if constants.NV_FILELIST in what:
     result[constants.NV_FILELIST] = utils.FingerprintFiles(
@@ -523,8 +527,12 @@ def VerifyNode(what, cluster_name):
     result[constants.NV_LVLIST] = GetVolumeList(what[constants.NV_LVLIST])
 
   if constants.NV_INSTANCELIST in what:
-    result[constants.NV_INSTANCELIST] = GetInstanceList(
-      what[constants.NV_INSTANCELIST])
+    # GetInstanceList can fail
+    try:
+      val = GetInstanceList(what[constants.NV_INSTANCELIST])
+    except RPCFail, err:
+      val = str(err)
+    result[constants.NV_INSTANCELIST] = val
 
   if constants.NV_VGLIST in what:
     result[constants.NV_VGLIST] = utils.ListVolumeGroups()
diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index e88f88bc3..4b6b77833 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -1439,7 +1439,8 @@ class LUVerifyCluster(LogicalUnit):
       idata = nresult.get(constants.NV_INSTANCELIST, None)
       test = not isinstance(idata, list)
       _ErrorIf(test, self.ENODEHV, node,
-               "rpc call to node failed (instancelist)")
+               "rpc call to node failed (instancelist): %s",
+               utils.SafeEncode(str(idata)))
       if test:
         continue
 
@@ -1544,7 +1545,7 @@ class LUVerifyCluster(LogicalUnit):
         _ErrorIf(snode not in node_info and snode not in n_offline,
                  self.ENODERPC, snode,
                  "instance %s, connection to secondary node"
-                 "failed", instance)
+                 " failed", instance)
 
         if snode in node_info:
           node_info[snode]['sinst'].append(instance)
-- 
GitLab