From c6a9dffacbeb54230aca53da217aadf1393f1eeb Mon Sep 17 00:00:00 2001
From: Michael Hanselmann <hansmi@google.com>
Date: Wed, 3 Nov 2010 13:56:16 +0100
Subject: [PATCH] =?UTF-8?q?Fix=20disk=20checks=20in=20=E2=80=9Cgnt-cluster?=
 =?UTF-8?q?=20verify=E2=80=9D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests have shown that the changes in commit b8d26c6e5 don't work as
wanted. If any disk wasn't found on the node, all disks located on the
same node would show as faulty. The cause was incorrect exception
handling on the node.

This patch changes the RPC call to return a per-disk success/error
status, avoiding the problem.

Signed-off-by: Michael Hanselmann <hansmi@google.com>
Reviewed-by: Luca Bigliardi <shammash@google.com>
---
 lib/backend.py      | 35 ++++++++++++++++++++++++++++++++---
 lib/cmdlib.py       | 14 ++++++++------
 lib/rpc.py          | 10 +++++++---
 lib/server/noded.py | 11 +++++++++--
 4 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/lib/backend.py b/lib/backend.py
index 4bf94fb08..1e5afa026 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -1539,9 +1539,7 @@ def BlockdevGetmirrorstatus(disks):
   @type disks: list of L{objects.Disk}
   @param disks: the list of disks which we should query
   @rtype: disk
-  @return:
-      a list of (mirror_done, estimated_time) tuples, which
-      are the result of L{bdev.BlockDev.CombinedSyncStatus}
+  @return: List of L{objects.BlockDevStatus}, one for each disk
   @raise errors.BlockDeviceError: if any of the disks cannot be
       found
 
@@ -1557,6 +1555,37 @@ def BlockdevGetmirrorstatus(disks):
   return stats
 
 
+def BlockdevGetmirrorstatusMulti(disks):
+  """Get the mirroring status of a list of devices.
+
+  @type disks: list of L{objects.Disk}
+  @param disks: the list of disks which we should query
+  @rtype: disk
+  @return: List of tuples, (bool, status), one for each disk; bool denotes
+    success/failure, status is L{objects.BlockDevStatus} on success, string
+    otherwise
+
+  """
+  result = []
+  for disk in disks:
+    try:
+      rbd = _RecursiveFindBD(disk)
+      if rbd is None:
+        result.append((False, "Can't find device %s" % disk))
+        continue
+
+      status = rbd.CombinedSyncStatus()
+    except errors.BlockDeviceError, err:
+      logging.exception("Error while getting disk status")
+      result.append((False, str(err)))
+    else:
+      result.append((True, status))
+
+  assert len(disks) == len(result)
+
+  return result
+
+
 def _RecursiveFindBD(disk):
   """Check if a device is activated.
 
diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index 20f2af7f6..c2085b9f3 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -1539,15 +1539,17 @@ class LUVerifyCluster(LogicalUnit):
         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
                  "instance should not run on node %s", node)
 
-    diskdata = [(nname, disk, idx)
+    diskdata = [(nname, success, status, idx)
                 for (nname, disks) in diskstatus.items()
-                for idx, disk in enumerate(disks)]
+                for idx, (success, status) in enumerate(disks)]
 
-    for nname, bdev_status, idx in diskdata:
-      _ErrorIf(not bdev_status,
+    for nname, success, bdev_status, idx in diskdata:
+      _ErrorIf(instanceconfig.admin_up and not success,
                self.EINSTANCEFAULTYDISK, instance,
-               "couldn't retrieve status for disk/%s on %s", idx, nname)
-      _ErrorIf(bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY,
+               "couldn't retrieve status for disk/%s on %s: %s",
+               idx, nname, bdev_status)
+      _ErrorIf((instanceconfig.admin_up and success and
+                bdev_status.ldisk_status == constants.LDS_FAULTY),
                self.EINSTANCEFAULTYDISK, instance,
                "disk/%s on %s is faulty", idx, nname)
 
diff --git a/lib/rpc.py b/lib/rpc.py
index 9090390fc..4ec0ed460 100644
--- a/lib/rpc.py
+++ b/lib/rpc.py
@@ -1057,9 +1057,13 @@ class RpcRunner(object):
                                  [dict((name, [dsk.ToDict() for dsk in disks])
                                        for name, disks in node_disks.items())])
     for nres in result.values():
-      if not nres.fail_msg:
-        nres.payload = [objects.BlockDevStatus.FromDict(i)
-                        for i in nres.payload]
+      if nres.fail_msg:
+        continue
+
+      for idx, (success, status) in enumerate(nres.payload):
+        if success:
+          nres.payload[idx] = (success, objects.BlockDevStatus.FromDict(status))
+
     return result
 
   @_RpcTimeout(_TMO_NORMAL)
diff --git a/lib/server/noded.py b/lib/server/noded.py
index 14d6bb5d7..7a76882a9 100644
--- a/lib/server/noded.py
+++ b/lib/server/noded.py
@@ -283,8 +283,15 @@ class NodeHttpServer(http.server.HttpServer):
     disks = [objects.Disk.FromDict(dsk_s)
              for dsk_s in node_disks.get(node_name, [])]
 
-    return [status.ToDict()
-            for status in backend.BlockdevGetmirrorstatus(disks)]
+    result = []
+
+    for (success, status) in backend.BlockdevGetmirrorstatusMulti(disks):
+      if success:
+        result.append((success, status.ToDict()))
+      else:
+        result.append((success, status))
+
+    return result
 
   @staticmethod
   def perspective_blockdev_find(params):
-- 
GitLab