Commit 3f78eef2 authored by Iustin Pop's avatar Iustin Pop
Browse files

Implement device to instance mapping cache

Currently, troubleshooting DRBD problems involves a manual process of going
backwards from the DRBD device to the instance that owns it.

This patch adds a weak (i.e. not guaranteed to be correct or up-to-date)
cache of device to instance. The cache should be, in normal operation,
having correct information as the only time when devices change paths
are when they are started/stopped, and the code in backend.py adds cache
updates to exactly these operations.

The only drawback of this implementation is that we don't fully update
the cache on renames of devices (we clean the old entries but we don't
add new ones). Since the rename changes the path only for LVs (and not
drbd and md), this is less of a problem as the target of this code is
debugging DRBD and MD issues.

The patch writes files named bdev_drbd<N> (or bdev_md<N>,
bdev_xenvg_...) in /var/run/ganeti (more exactly, LOCALSTATEDIR/ganeti).
The files start with 'bdev_' and continue with the path of the device
under /dev/ (this prefix stripped), and contain the following values,
space separated:
  - instance name
  - primary or secondary (depending on how the device is on the primary
    or secondary node)
  - instance visible name: sda or sdb or not_visible, the latter case
    when the device is not the top-level device (i.e. remote_raid1
    templates will have sd[ab] for the md, but not_visible for drbd and
    logical volumes)

The cache is designed to not raise any errors, if there is an I/O error
it will only be logged in the node daemon log file. This is in order to
reduce the possible impact of the cache on the block device activation
and shutdown code.

Reviewed-by: imsnah
parent 79f87a76
......@@ -97,11 +97,11 @@ class ServerObject(pb.Avatar):
"""Create a block device.
"""
bdev_s, size, on_primary, info = params
bdev_s, size, owner, on_primary, info = params
bdev = objects.Disk.FromDict(bdev_s)
if bdev is None:
raise ValueError("can't unserialize data!")
return backend.CreateBlockDevice(bdev, size, on_primary, info)
return backend.CreateBlockDevice(bdev, size, owner, on_primary, info)
@staticmethod
def perspective_blockdev_remove(params):
......@@ -125,11 +125,11 @@ class ServerObject(pb.Avatar):
"""Assemble a block device.
"""
bdev_s, on_primary = params
bdev_s, owner, on_primary = params
bdev = objects.Disk.FromDict(bdev_s)
if bdev is None:
raise ValueError("can't unserialize data!")
return backend.AssembleBlockDevice(bdev, on_primary)
return backend.AssembleBlockDevice(bdev, owner, on_primary)
@staticmethod
def perspective_blockdev_shutdown(params):
......
......@@ -620,7 +620,7 @@ def RebootInstance(instance, reboot_type, extra_args):
return True
def CreateBlockDevice(disk, size, on_primary, info):
def CreateBlockDevice(disk, size, owner, on_primary, info):
"""Creates a block device for an instance.
Args:
......@@ -638,7 +638,7 @@ def CreateBlockDevice(disk, size, on_primary, info):
clist = []
if disk.children:
for child in disk.children:
crdev = _RecursiveAssembleBD(child, on_primary)
crdev = _RecursiveAssembleBD(child, owner, on_primary)
if on_primary or disk.AssembleOnSecondary():
# we need the children open in case the device itself has to
# be assembled
......@@ -664,6 +664,8 @@ def CreateBlockDevice(disk, size, on_primary, info):
device.SetSyncSpeed(constants.SYNC_SPEED)
if on_primary or disk.OpenOnSecondary():
device.Open(force=True)
DevCacheManager.UpdateCache(device.dev_path, owner,
on_primary, disk.iv_name)
device.SetInfo(info)
......@@ -686,7 +688,10 @@ def RemoveBlockDevice(disk):
logger.Info("Can't attach to device %s in remove" % disk)
rdev = None
if rdev is not None:
r_path = rdev.dev_path
result = rdev.Remove()
if result:
DevCacheManager.RemoveCache(r_path)
else:
result = True
if disk.children:
......@@ -695,7 +700,7 @@ def RemoveBlockDevice(disk):
return result
def _RecursiveAssembleBD(disk, as_primary):
def _RecursiveAssembleBD(disk, owner, as_primary):
"""Activate a block device for an instance.
This is run on the primary and secondary nodes for an instance.
......@@ -715,7 +720,7 @@ def _RecursiveAssembleBD(disk, as_primary):
children = []
if disk.children:
for chld_disk in disk.children:
children.append(_RecursiveAssembleBD(chld_disk, as_primary))
children.append(_RecursiveAssembleBD(chld_disk, owner, as_primary))
if as_primary or disk.AssembleOnSecondary():
r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
......@@ -725,12 +730,15 @@ def _RecursiveAssembleBD(disk, as_primary):
r_dev.Open()
else:
r_dev.Close()
DevCacheManager.UpdateCache(r_dev.dev_path, owner,
as_primary, disk.iv_name)
else:
result = True
return result
def AssembleBlockDevice(disk, as_primary):
def AssembleBlockDevice(disk, owner, as_primary):
"""Activate a block device for an instance.
This is a wrapper over _RecursiveAssembleBD.
......@@ -740,7 +748,7 @@ def AssembleBlockDevice(disk, as_primary):
True for secondary nodes
"""
result = _RecursiveAssembleBD(disk, as_primary)
result = _RecursiveAssembleBD(disk, owner, as_primary)
if isinstance(result, bdev.BlockDev):
result = result.dev_path
return result
......@@ -759,7 +767,10 @@ def ShutdownBlockDevice(disk):
"""
r_dev = _RecursiveFindBD(disk)
if r_dev is not None:
r_path = r_dev.dev_path
result = r_dev.Shutdown()
if result:
DevCacheManager.RemoveCache(r_path)
else:
result = True
if disk.children:
......@@ -1356,7 +1367,16 @@ def RenameBlockDevices(devlist):
result = False
continue
try:
old_rpath = dev.dev_path
dev.Rename(unique_id)
new_rpath = dev.dev_path
if old_rpath != new_rpath:
DevCacheManager.RemoveCache(old_rpath)
# FIXME: we should add the new cache information here, like:
# DevCacheManager.UpdateCache(new_rpath, owner, ...)
# but we don't have the owner here - maybe parse from existing
# cache? for now, we only lose lvm data when we rename, which
# is less critical than DRBD or MD
except errors.BlockDeviceError, err:
logger.Error("Can't rename device '%s' to '%s': %s" %
(dev, unique_id, err))
......@@ -1473,3 +1493,56 @@ class HooksRunner(object):
rr.append(("%s/%s" % (subdir, relname), rrval, output))
return rr
class DevCacheManager(object):
"""Simple class for managing a chache of block device information.
"""
_DEV_PREFIX = "/dev/"
_ROOT_DIR = constants.BDEV_CACHE_DIR
@classmethod
def _ConvertPath(cls, dev_path):
"""Converts a /dev/name path to the cache file name.
This replaces slashes with underscores and strips the /dev
prefix. It then returns the full path to the cache file
"""
if dev_path.startswith(cls._DEV_PREFIX):
dev_path = dev_path[len(cls._DEV_PREFIX):]
dev_path = dev_path.replace("/", "_")
fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
return fpath
@classmethod
def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
"""Updates the cache information for a given device.
"""
fpath = cls._ConvertPath(dev_path)
if on_primary:
state = "primary"
else:
state = "secondary"
if iv_name is None:
iv_name = "not_visible"
fdata = "%s %s %s\n" % (str(owner), state, iv_name)
try:
utils.WriteFile(fpath, data=fdata)
except EnvironmentError, err:
logger.Error("Can't update bdev cache for %s, error %s" %
(dev_path, str(err)))
@classmethod
def RemoveCache(cls, dev_path):
"""Remove data for a dev_path.
"""
fpath = cls._ConvertPath(dev_path)
try:
utils.RemoveFile(fpath)
except EnvironmentError, err:
logger.Error("Can't update bdev cache for %s, error %s" %
(dev_path, str(err)))
......@@ -1821,7 +1821,8 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
cfg.SetDiskID(node_disk, node)
is_primary = node == instance.primary_node
result = rpc.call_blockdev_assemble(node, node_disk, is_primary)
result = rpc.call_blockdev_assemble(node, node_disk,
instance.name, is_primary)
if not result:
logger.Error("could not prepare block device %s on node %s (is_pri"
"mary=%s)" % (inst_disk.iv_name, node, is_primary))
......@@ -2560,7 +2561,7 @@ class LUFailoverInstance(LogicalUnit):
(instance.name, target_node))
def _CreateBlockDevOnPrimary(cfg, node, device, info):
def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
"""Create a tree of block devices on the primary node.
This always creates all devices.
......@@ -2568,11 +2569,12 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info):
"""
if device.children:
for child in device.children:
if not _CreateBlockDevOnPrimary(cfg, node, child, info):
if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
return False
cfg.SetDiskID(device, node)
new_id = rpc.call_blockdev_create(node, device, device.size, True, info)
new_id = rpc.call_blockdev_create(node, device, device.size,
instance.name, True, info)
if not new_id:
return False
if device.physical_id is None:
......@@ -2580,7 +2582,7 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info):
return True
def _CreateBlockDevOnSecondary(cfg, node, device, force, info):
def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
"""Create a tree of block devices on a secondary node.
If this device type has to be created on secondaries, create it and
......@@ -2593,13 +2595,15 @@ def _CreateBlockDevOnSecondary(cfg, node, device, force, info):
force = True
if device.children:
for child in device.children:
if not _CreateBlockDevOnSecondary(cfg, node, child, force, info):
if not _CreateBlockDevOnSecondary(cfg, node, instance,
child, force, info):
return False
if not force:
return True
cfg.SetDiskID(device, node)
new_id = rpc.call_blockdev_create(node, device, device.size, False, info)
new_id = rpc.call_blockdev_create(node, device, device.size,
instance.name, False, info)
if not new_id:
return False
if device.physical_id is None:
......@@ -2754,13 +2758,14 @@ def _CreateDisks(cfg, instance):
(device.iv_name, instance.name))
#HARDCODE
for secondary_node in instance.secondary_nodes:
if not _CreateBlockDevOnSecondary(cfg, secondary_node, device, False,
info):
if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
device, False, info):
logger.Error("failed to create volume %s (%s) on secondary node %s!" %
(device.iv_name, device, secondary_node))
return False
#HARDCODE
if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, device, info):
if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
instance, device, info):
logger.Error("failed to create volume %s on primary!" %
device.iv_name)
return False
......@@ -3206,14 +3211,16 @@ class LUAddMDDRBDComponent(LogicalUnit):
logger.Info("adding new mirror component on secondary")
#HARDCODE
if not _CreateBlockDevOnSecondary(self.cfg, remote_node, new_drbd, False,
if not _CreateBlockDevOnSecondary(self.cfg, remote_node, instance,
new_drbd, False,
_GetInstanceInfoText(instance)):
raise errors.OpExecError("Failed to create new component on secondary"
" node %s" % remote_node)
logger.Info("adding new mirror component on primary")
#HARDCODE
if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node, new_drbd,
if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node,
instance, new_drbd,
_GetInstanceInfoText(instance)):
# remove secondary dev
self.cfg.SetDiskID(new_drbd, remote_node)
......@@ -3444,7 +3451,8 @@ class LUReplaceDisks(LogicalUnit):
logger.Info("adding new mirror component on secondary for %s" %
dev.iv_name)
#HARDCODE
if not _CreateBlockDevOnSecondary(cfg, remote_node, new_drbd, False,
if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
new_drbd, False,
_GetInstanceInfoText(instance)):
raise errors.OpExecError("Failed to create new component on"
" secondary node %s\n"
......@@ -3453,7 +3461,8 @@ class LUReplaceDisks(LogicalUnit):
logger.Info("adding new mirror component on primary")
#HARDCODE
if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, new_drbd,
if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
instance, new_drbd,
_GetInstanceInfoText(instance)):
# remove secondary dev
cfg.SetDiskID(new_drbd, remote_node)
......@@ -3558,7 +3567,7 @@ class LUReplaceDisks(LogicalUnit):
# _Create...OnPrimary (which forces the creation), even if we
# are talking about the secondary node
for new_lv in new_lvs:
if not _CreateBlockDevOnPrimary(cfg, tgt_node, new_lv,
if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
_GetInstanceInfoText(instance)):
raise errors.OpExecError("Failed to create new LV named '%s' on"
" node '%s'" %
......@@ -3669,7 +3678,7 @@ class LUReplaceDisks(LogicalUnit):
# _Create...OnPrimary (which forces the creation), even if we
# are talking about the secondary node
for new_lv in dev.children:
if not _CreateBlockDevOnPrimary(cfg, new_node, new_lv,
if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
_GetInstanceInfoText(instance)):
raise errors.OpExecError("Failed to create new LV named '%s' on"
" node '%s'" %
......@@ -3680,7 +3689,8 @@ class LUReplaceDisks(LogicalUnit):
logical_id=(pri_node, new_node,
dev.logical_id[2]),
children=dev.children)
if not _CreateBlockDevOnSecondary(cfg, new_node, new_drbd, False,
if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
new_drbd, False,
_GetInstanceInfoText(instance)):
raise errors.OpExecError("Failed to create new DRBD on"
" node '%s'" % new_node)
......
......@@ -25,7 +25,7 @@ from ganeti import _autoconf
# various versions
CONFIG_VERSION = 3
PROTOCOL_VERSION = 4
PROTOCOL_VERSION = 5
RELEASE_VERSION = _autoconf.PACKAGE_VERSION
OS_API_VERSION = 5
EXPORT_VERSION = 0
......@@ -33,6 +33,7 @@ EXPORT_VERSION = 0
# file paths
DATA_DIR = _autoconf.LOCALSTATEDIR + "/lib/ganeti"
BDEV_CACHE_DIR = _autoconf.LOCALSTATEDIR + "/run/ganeti"
CLUSTER_CONF_FILE = DATA_DIR + "/config.data"
SSL_CERT_FILE = DATA_DIR + "/server.pem"
WATCHER_STATEFILE = DATA_DIR + "/watcher.data"
......
......@@ -489,13 +489,13 @@ def call_version(node_list):
return c.getresult()
def call_blockdev_create(node, bdev, size, on_primary, info):
def call_blockdev_create(node, bdev, size, owner, on_primary, info):
"""Request creation of a given block device.
This is a single-node call.
"""
params = [bdev.ToDict(), size, on_primary, info]
params = [bdev.ToDict(), size, owner, on_primary, info]
c = Client("blockdev_create", params)
c.connect(node)
c.run()
......@@ -527,13 +527,13 @@ def call_blockdev_rename(node, devlist):
return c.getresult().get(node, False)
def call_blockdev_assemble(node, disk, on_primary):
def call_blockdev_assemble(node, disk, owner, on_primary):
"""Request assembling of a given block device.
This is a single-node call.
"""
params = [disk.ToDict(), on_primary]
params = [disk.ToDict(), owner, on_primary]
c = Client("blockdev_assemble", params)
c.connect(node)
c.run()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment