Commit 235a6b29 authored by Thomas Thrainer's avatar Thomas Thrainer

Check DRBD status on verify-disks

Enhance `gnt-cluster verify-disks` such that it checks the DRBD status
of all disks. If the status of at least one disk of an instance is
either StandAlone or Diskless, the instance disks are activated
automatically.
Signed-off-by: default avatarThomas Thrainer <thomasth@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parent 843094ad
......@@ -3801,14 +3801,20 @@ def CleanupImportExport(name):
shutil.rmtree(status_dir, ignore_errors=True)
def _FindDisks(target_node_uuid, nodes_ip, disks):
"""Sets the physical ID on disks and returns the block devices.
def _SetPhysicalId(target_node_uuid, nodes_ip, disks):
"""Sets the correct physical ID on all passed disks.
"""
# set the correct physical ID
for cf in disks:
cf.SetPhysicalID(target_node_uuid, nodes_ip)
def _FindDisks(target_node_uuid, nodes_ip, disks):
"""Sets the physical ID on disks and returns the block devices.
"""
_SetPhysicalId(target_node_uuid, nodes_ip, disks)
bdevs = []
for cf in disks:
......@@ -3927,6 +3933,26 @@ def DrbdWaitSync(target_node_uuid, nodes_ip, disks):
return (alldone, min_resync)
def DrbdNeedsActivation(target_node_uuid, nodes_ip, disks):
"""Checks which of the passed disks needs activation and returns their UUIDs.
"""
_SetPhysicalId(target_node_uuid, nodes_ip, disks)
faulty_disks = []
for disk in disks:
rd = _RecursiveFindBD(disk)
if rd is None:
faulty_disks.append(disk)
continue
stats = rd.GetProcStatus()
if stats.is_standalone or stats.is_diskless:
faulty_disks.append(disk)
return [disk.uuid for disk in faulty_disks]
def GetDrbdUsermodeHelper():
"""Returns DRBD usermode helper currently configured.
......
......@@ -21,6 +21,7 @@
"""Logical units dealing with node groups."""
import itertools
import logging
from ganeti import constants
......@@ -902,22 +903,10 @@ class LUGroupVerifyDisks(NoHooksLU):
CheckInstancesNodeGroups(self.cfg, self.instances,
owned_groups, owned_node_uuids, self.group_uuid)
def Exec(self, feedback_fn):
"""Verify integrity of cluster disks.
@rtype: tuple of three items
@return: a tuple of (dict of node-to-node_error, list of instances
which need activate-disks, dict of instance: (node, volume) for
missing volumes
"""
node_errors = {}
offline_lv_instance_names = set()
missing_lvs = {}
def _VerifyInstanceLvs(self, node_errors, offline_disk_instance_names,
missing_disks):
node_lv_to_inst = MapInstanceLvsToNodes(
[inst for inst in self.instances.values() if inst.disks_active])
if node_lv_to_inst:
node_uuids = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
set(self.cfg.GetVmCapableNodeList()))
......@@ -938,11 +927,57 @@ class LUGroupVerifyDisks(NoHooksLU):
for lv_name, (_, _, lv_online) in node_res.payload.items():
inst = node_lv_to_inst.pop((node_uuid, lv_name), None)
if not lv_online and inst is not None:
offline_lv_instance_names.add(inst.name)
offline_disk_instance_names.add(inst.name)
# any leftover items in nv_dict are missing LVs, let's arrange the data
# better
for key, inst in node_lv_to_inst.iteritems():
missing_lvs.setdefault(inst.name, []).append(list(key))
missing_disks.setdefault(inst.name, []).append(list(key))
def _VerifyDrbdStates(self, node_errors, offline_disk_instance_names):
node_to_inst = {}
for inst in self.instances.values():
if not inst.disks_active or inst.disk_template != constants.DT_DRBD8:
continue
for node_uuid in itertools.chain([inst.primary_node],
inst.secondary_nodes):
node_to_inst.setdefault(node_uuid, []).append(inst)
nodes_ip = dict((uuid, node.secondary_ip) for (uuid, node)
in self.cfg.GetMultiNodeInfo(node_to_inst.keys()))
for (node_uuid, insts) in node_to_inst.items():
node_disks = [(inst.disks, inst) for inst in insts]
node_res = self.rpc.call_drbd_needs_activation(node_uuid, nodes_ip,
node_disks)
msg = node_res.fail_msg
if msg:
logging.warning("Error getting DRBD status on node %s: %s",
self.cfg.GetNodeName(node_uuid), msg)
node_errors[node_uuid] = msg
continue
faulty_disk_uuids = set(node_res.payload)
for inst in self.instances.values():
inst_disk_uuids = set([disk.uuid for disk in inst.disks])
if inst_disk_uuids.intersection(faulty_disk_uuids):
offline_disk_instance_names.add(inst.name)
def Exec(self, feedback_fn):
"""Verify integrity of cluster disks.
@rtype: tuple of three items
@return: a tuple of (dict of node-to-node_error, list of instances
which need activate-disks, dict of instance: (node, volume) for
missing volumes
"""
node_errors = {}
offline_disk_instance_names = set()
missing_disks = {}
self._VerifyInstanceLvs(node_errors, offline_disk_instance_names,
missing_disks)
self._VerifyDrbdStates(node_errors, offline_disk_instance_names)
return (node_errors, list(offline_lv_instance_names), missing_lvs)
return (node_errors, list(offline_disk_instance_names), missing_disks)
......@@ -837,6 +837,7 @@ class RpcRunner(_RpcClientBase,
# Encoders annotating disk parameters
rpc_defs.ED_DISKS_DICT_DP: self._DisksDictDP,
rpc_defs.ED_MULTI_DISKS_DICT_DP: self._MultiDiskDictDP,
rpc_defs.ED_SINGLE_DISK_DICT_DP: self._SingleDiskDictDP,
# Encoders with special requirements
......@@ -935,6 +936,14 @@ class RpcRunner(_RpcClientBase,
for disk in AnnotateDiskParams(instance.disk_template,
disks, diskparams)]
def _MultiDiskDictDP(self, disks_insts):
"""Wrapper for L{AnnotateDiskParams}.
Supports a list of (disk, instance) tuples.
"""
return [disk for disk_inst in disks_insts
for disk in self._DisksDictDP(disk_inst)]
def _SingleDiskDictDP(self, (disk, instance)):
"""Wrapper for L{AnnotateDiskParams}.
......
......@@ -70,8 +70,9 @@ ACCEPT_OFFLINE_NODE = object()
ED_COMPRESS,
ED_BLOCKDEV_RENAME,
ED_DISKS_DICT_DP,
ED_MULTI_DISKS_DICT_DP,
ED_SINGLE_DISK_DICT_DP,
ED_NIC_DICT) = range(1, 15)
ED_NIC_DICT) = range(1, 16)
def _Prepare(calls):
......@@ -412,6 +413,11 @@ _BLOCKDEV_CALLS = [
("disks", ED_DISKS_DICT_DP, None),
], _DrbdCallsPreProc, None,
"Waits for the synchronization of drbd devices is complete"),
("drbd_needs_activation", SINGLE, None, constants.RPC_TMO_NORMAL, [
("nodes_ip", None, None),
("disks", ED_MULTI_DISKS_DICT_DP, None),
], _DrbdCallsPreProc, None,
"Returns the drbd disks which need activation"),
("blockdev_grow", SINGLE, None, constants.RPC_TMO_NORMAL, [
("cf_bdev", ED_SINGLE_DISK_DICT_DP, None),
("amount", None, None),
......
......@@ -444,6 +444,18 @@ class NodeRequestHandler(http.server.HttpServerHandler):
disks = [objects.Disk.FromDict(cf) for cf in disks]
return backend.DrbdWaitSync(target_node_uuid, nodes_ip, disks)
@staticmethod
def perspective_drbd_needs_activation(params):
"""Checks if the drbd devices need activation
Note that this is only valid for drbd disks, so the members of the
disk list must all be drbd devices.
"""
nodes_ip, disks, target_node_uuid = params
disks = [objects.Disk.FromDict(cf) for cf in disks]
return backend.DrbdNeedsActivation(target_node_uuid, nodes_ip, disks)
@staticmethod
def perspective_drbd_helper(params):
"""Query drbd helper.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment