From 6d2e83d5cf81ef128ab5e2ad2fdc406b980f2487 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Tue, 20 Jan 2009 14:20:15 +0000
Subject: [PATCH] Make cluster-verify check the drbd minors space

This patch adds support for verification of drbd minors space in cluster
verify: minors which belong to running instances and should be online
but are not, and minors which do not belong to any instace but are in
use.

The patch requires exposing some methods from bdev.DRBD8 and
config.ConfigWriter which were until now private methods.

Reviewed-by: ultrotter
---
 lib/backend.py   |  8 ++++++++
 lib/bdev.py      |  4 ++--
 lib/cmdlib.py    | 33 ++++++++++++++++++++++++++++++---
 lib/config.py    | 17 +++++++++++++++--
 lib/constants.py |  1 +
 5 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/lib/backend.py b/lib/backend.py
index 2ce138527..389d4ed35 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -432,6 +432,14 @@ def VerifyNode(what, cluster_name):
     hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO])
     result[constants.NV_HVINFO] = hyper.GetNodeInfo()
 
+  if constants.NV_DRBDLIST in what:
+    try:
+      used_minors = bdev.DRBD8.GetUsedDevs().keys()
+    except errors.BlockDeviceErrors:
+      logging.warning("Can't get used minors list", exc_info=True)
+      used_minors = []
+    result[constants.NV_DRBDLIST] = used_minors
+
   return result
 
 
diff --git a/lib/bdev.py b/lib/bdev.py
index 9207e2c06..08551b2d8 100644
--- a/lib/bdev.py
+++ b/lib/bdev.py
@@ -692,7 +692,7 @@ class BaseDRBD(BlockDev):
     return "/dev/drbd%d" % minor
 
   @classmethod
-  def _GetUsedDevs(cls):
+  def GetUsedDevs(cls):
     """Compute the list of used DRBD devices.
 
     """
@@ -1343,7 +1343,7 @@ class DRBD8(BaseDRBD):
     /proc).
 
     """
-    used_devs = self._GetUsedDevs()
+    used_devs = self.GetUsedDevs()
     if self._aminor in used_devs:
       minor = self._aminor
     else:
diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index 2614f2413..01ad1fd8d 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -613,7 +613,8 @@ class LUVerifyCluster(LogicalUnit):
     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
 
   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
-                  node_result, feedback_fn, master_files):
+                  node_result, feedback_fn, master_files,
+                  drbd_map):
     """Run multiple tests against a node.
 
     Test list:
@@ -630,6 +631,9 @@ class LUVerifyCluster(LogicalUnit):
     @param node_result: the results from the node
     @param feedback_fn: function used to accumulate results
     @param master_files: list of files that only masters should have
+    @param drbd_map: the useddrbd minors for this node, in
+        form of minor: (instance, must_exist) which correspond to instances
+        and their running status
 
     """
     node = nodeinfo.name
@@ -724,6 +728,19 @@ class LUVerifyCluster(LogicalUnit):
         if hv_result is not None:
           feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
                       (hv_name, hv_result))
+
+    # check used drbd list
+    used_minors = node_result.get(constants.NV_DRBDLIST, [])
+    for minor, (iname, must_exist) in drbd_map.items():
+      if minor not in used_minors and must_exist:
+        feedback_fn("  - ERROR: drbd minor %d of instance %s is not active" %
+                    (minor, iname))
+        bad = True
+    for minor in used_minors:
+      if minor not in drbd_map:
+        feedback_fn("  - ERROR: unallocated drbd minor %d is in use" % minor)
+        bad = True
+
     return bad
 
   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
@@ -867,6 +884,8 @@ class LUVerifyCluster(LogicalUnit):
     nodelist = utils.NiceSort(self.cfg.GetNodeList())
     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
+    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
+                        for iname in instancelist)
     i_non_redundant = [] # Non redundant instances
     i_non_a_balanced = [] # Non auto-balanced instances
     n_offline = [] # List of offline nodes
@@ -900,12 +919,15 @@ class LUVerifyCluster(LogicalUnit):
       constants.NV_VGLIST: None,
       constants.NV_VERSION: None,
       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
+      constants.NV_DRBDLIST: None,
       }
     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                            self.cfg.GetClusterName())
 
     cluster = self.cfg.GetClusterInfo()
     master_node = self.cfg.GetMasterNode()
+    all_drbd_map = self.cfg.ComputeDRBDMap()
+
     for node_i in nodeinfo:
       node = node_i.name
       nresult = all_nvinfo[node].data
@@ -928,8 +950,13 @@ class LUVerifyCluster(LogicalUnit):
         bad = True
         continue
 
+      node_drbd = {}
+      for minor, instance in all_drbd_map[node].items():
+        instance = instanceinfo[instance]
+        node_drbd[minor] = (instance.name, instance.status == "up")
       result = self._VerifyNode(node_i, file_names, local_checksums,
-                                nresult, feedback_fn, master_files)
+                                nresult, feedback_fn, master_files,
+                                node_drbd)
       bad = bad or result
 
       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
@@ -985,7 +1012,7 @@ class LUVerifyCluster(LogicalUnit):
 
     for instance in instancelist:
       feedback_fn("* Verifying instance %s" % instance)
-      inst_config = self.cfg.GetInstanceInfo(instance)
+      inst_config = instanceinfo[instance]
       result =  self._VerifyInstance(instance, inst_config, node_volume,
                                      node_instance, feedback_fn, n_offline)
       bad = bad or result
diff --git a/lib/config.py b/lib/config.py
index 29acbd16a..67b449066 100644
--- a/lib/config.py
+++ b/lib/config.py
@@ -389,7 +389,7 @@ class ConfigWriter:
     self._WriteConfig()
     return port
 
-  def _ComputeDRBDMap(self, instance):
+  def _UnlockedComputeDRBDMap(self):
     """Compute the used DRBD minor/nodes.
 
     @return: dictionary of node_name: dict of minor: instance_name;
@@ -421,6 +421,19 @@ class ConfigWriter:
         _AppendUsedPorts(instance.name, disk, my_dict)
     return my_dict
 
+  @locking.ssynchronized(_config_lock)
+  def ComputeDRBDMap(self):
+    """Compute the used DRBD minor/nodes.
+
+    This is just a wrapper over L{_UnlockedComputeDRBDMap}.
+
+    @return: dictionary of node_name: dict of minor: instance_name;
+        the returned dict will have all the nodes in it (even if with
+        an empty list).
+
+    """
+    return self._UnlockedComputeDRBDMap()
+
   @locking.ssynchronized(_config_lock)
   def AllocateDRBDMinor(self, nodes, instance):
     """Allocate a drbd minor.
@@ -431,7 +444,7 @@ class ConfigWriter:
     order as the passed nodes.
 
     """
-    d_map = self._ComputeDRBDMap(instance)
+    d_map = self._UnlockedComputeDRBDMap()
     result = []
     for nname in nodes:
       ndata = d_map[nname]
diff --git a/lib/constants.py b/lib/constants.py
index a483165fb..7250b4f78 100644
--- a/lib/constants.py
+++ b/lib/constants.py
@@ -351,6 +351,7 @@ NV_NODELIST = "nodelist"
 NV_NODENETTEST = "node-net-test"
 NV_VERSION = "version"
 NV_VGLIST = "vglist"
+NV_DRBDLIST = "drbd-list"
 
 # Allocator framework constants
 IALLOCATOR_DIR_IN = "in"
-- 
GitLab