From 3f78eef21e5c4f401db51376664f68ed16a67e90 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Fri, 2 Nov 2007 13:44:29 +0000 Subject: [PATCH] Implement device to instance mapping cache Currently, troubleshooting DRBD problems involves a manual process of going backwards from the DRBD device to the instance that owns it. This patch adds a weak (i.e. not guaranteed to be correct or up-to-date) cache of device to instance. The cache should be, in normal operation, having correct information as the only time when devices change paths are when they are started/stopped, and the code in backend.py adds cache updates to exactly these operations. The only drawback of this implementation is that we don't fully update the cache on renames of devices (we clean the old entries but we don't add new ones). Since the rename changes the path only for LVs (and not drbd and md), this is less of a problem as the target of this code is debugging DRBD and MD issues. The patch writes files named bdev_drbd<N> (or bdev_md<N>, bdev_xenvg_...) in /var/run/ganeti (more exactly, LOCALSTATEDIR/ganeti). The files start with 'bdev_' and continue with the path of the device under /dev/ (this prefix stripped), and contain the following values, space separated: - instance name - primary or secondary (depending on how the device is on the primary or secondary node) - instance visible name: sda or sdb or not_visible, the latter case when the device is not the top-level device (i.e. remote_raid1 templates will have sd[ab] for the md, but not_visible for drbd and logical volumes) The cache is designed to not raise any errors, if there is an I/O error it will only be logged in the node daemon log file. This is in order to reduce the possible impact of the cache on the block device activation and shutdown code. Reviewed-by: imsnah --- daemons/ganeti-noded | 8 ++--- lib/backend.py | 85 ++++++++++++++++++++++++++++++++++++++++---- lib/cmdlib.py | 44 ++++++++++++++--------- lib/constants.py | 3 +- lib/rpc.py | 8 ++--- 5 files changed, 116 insertions(+), 32 deletions(-) diff --git a/daemons/ganeti-noded b/daemons/ganeti-noded index 954010645..92825bb42 100755 --- a/daemons/ganeti-noded +++ b/daemons/ganeti-noded @@ -97,11 +97,11 @@ class ServerObject(pb.Avatar): """Create a block device. """ - bdev_s, size, on_primary, info = params + bdev_s, size, owner, on_primary, info = params bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") - return backend.CreateBlockDevice(bdev, size, on_primary, info) + return backend.CreateBlockDevice(bdev, size, owner, on_primary, info) @staticmethod def perspective_blockdev_remove(params): @@ -125,11 +125,11 @@ class ServerObject(pb.Avatar): """Assemble a block device. """ - bdev_s, on_primary = params + bdev_s, owner, on_primary = params bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") - return backend.AssembleBlockDevice(bdev, on_primary) + return backend.AssembleBlockDevice(bdev, owner, on_primary) @staticmethod def perspective_blockdev_shutdown(params): diff --git a/lib/backend.py b/lib/backend.py index 2719ab3c7..69d75d80a 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -620,7 +620,7 @@ def RebootInstance(instance, reboot_type, extra_args): return True -def CreateBlockDevice(disk, size, on_primary, info): +def CreateBlockDevice(disk, size, owner, on_primary, info): """Creates a block device for an instance. Args: @@ -638,7 +638,7 @@ def CreateBlockDevice(disk, size, on_primary, info): clist = [] if disk.children: for child in disk.children: - crdev = _RecursiveAssembleBD(child, on_primary) + crdev = _RecursiveAssembleBD(child, owner, on_primary) if on_primary or disk.AssembleOnSecondary(): # we need the children open in case the device itself has to # be assembled @@ -664,6 +664,8 @@ def CreateBlockDevice(disk, size, on_primary, info): device.SetSyncSpeed(constants.SYNC_SPEED) if on_primary or disk.OpenOnSecondary(): device.Open(force=True) + DevCacheManager.UpdateCache(device.dev_path, owner, + on_primary, disk.iv_name) device.SetInfo(info) @@ -686,7 +688,10 @@ def RemoveBlockDevice(disk): logger.Info("Can't attach to device %s in remove" % disk) rdev = None if rdev is not None: + r_path = rdev.dev_path result = rdev.Remove() + if result: + DevCacheManager.RemoveCache(r_path) else: result = True if disk.children: @@ -695,7 +700,7 @@ def RemoveBlockDevice(disk): return result -def _RecursiveAssembleBD(disk, as_primary): +def _RecursiveAssembleBD(disk, owner, as_primary): """Activate a block device for an instance. This is run on the primary and secondary nodes for an instance. @@ -715,7 +720,7 @@ def _RecursiveAssembleBD(disk, as_primary): children = [] if disk.children: for chld_disk in disk.children: - children.append(_RecursiveAssembleBD(chld_disk, as_primary)) + children.append(_RecursiveAssembleBD(chld_disk, owner, as_primary)) if as_primary or disk.AssembleOnSecondary(): r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children) @@ -725,12 +730,15 @@ def _RecursiveAssembleBD(disk, as_primary): r_dev.Open() else: r_dev.Close() + DevCacheManager.UpdateCache(r_dev.dev_path, owner, + as_primary, disk.iv_name) + else: result = True return result -def AssembleBlockDevice(disk, as_primary): +def AssembleBlockDevice(disk, owner, as_primary): """Activate a block device for an instance. This is a wrapper over _RecursiveAssembleBD. @@ -740,7 +748,7 @@ def AssembleBlockDevice(disk, as_primary): True for secondary nodes """ - result = _RecursiveAssembleBD(disk, as_primary) + result = _RecursiveAssembleBD(disk, owner, as_primary) if isinstance(result, bdev.BlockDev): result = result.dev_path return result @@ -759,7 +767,10 @@ def ShutdownBlockDevice(disk): """ r_dev = _RecursiveFindBD(disk) if r_dev is not None: + r_path = r_dev.dev_path result = r_dev.Shutdown() + if result: + DevCacheManager.RemoveCache(r_path) else: result = True if disk.children: @@ -1356,7 +1367,16 @@ def RenameBlockDevices(devlist): result = False continue try: + old_rpath = dev.dev_path dev.Rename(unique_id) + new_rpath = dev.dev_path + if old_rpath != new_rpath: + DevCacheManager.RemoveCache(old_rpath) + # FIXME: we should add the new cache information here, like: + # DevCacheManager.UpdateCache(new_rpath, owner, ...) + # but we don't have the owner here - maybe parse from existing + # cache? for now, we only lose lvm data when we rename, which + # is less critical than DRBD or MD except errors.BlockDeviceError, err: logger.Error("Can't rename device '%s' to '%s': %s" % (dev, unique_id, err)) @@ -1473,3 +1493,56 @@ class HooksRunner(object): rr.append(("%s/%s" % (subdir, relname), rrval, output)) return rr + + +class DevCacheManager(object): + """Simple class for managing a chache of block device information. + + """ + _DEV_PREFIX = "/dev/" + _ROOT_DIR = constants.BDEV_CACHE_DIR + + @classmethod + def _ConvertPath(cls, dev_path): + """Converts a /dev/name path to the cache file name. + + This replaces slashes with underscores and strips the /dev + prefix. It then returns the full path to the cache file + + """ + if dev_path.startswith(cls._DEV_PREFIX): + dev_path = dev_path[len(cls._DEV_PREFIX):] + dev_path = dev_path.replace("/", "_") + fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path) + return fpath + + @classmethod + def UpdateCache(cls, dev_path, owner, on_primary, iv_name): + """Updates the cache information for a given device. + + """ + fpath = cls._ConvertPath(dev_path) + if on_primary: + state = "primary" + else: + state = "secondary" + if iv_name is None: + iv_name = "not_visible" + fdata = "%s %s %s\n" % (str(owner), state, iv_name) + try: + utils.WriteFile(fpath, data=fdata) + except EnvironmentError, err: + logger.Error("Can't update bdev cache for %s, error %s" % + (dev_path, str(err))) + + @classmethod + def RemoveCache(cls, dev_path): + """Remove data for a dev_path. + + """ + fpath = cls._ConvertPath(dev_path) + try: + utils.RemoveFile(fpath) + except EnvironmentError, err: + logger.Error("Can't update bdev cache for %s, error %s" % + (dev_path, str(err))) diff --git a/lib/cmdlib.py b/lib/cmdlib.py index bcb29a39d..829c01913 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1821,7 +1821,8 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False): for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): cfg.SetDiskID(node_disk, node) is_primary = node == instance.primary_node - result = rpc.call_blockdev_assemble(node, node_disk, is_primary) + result = rpc.call_blockdev_assemble(node, node_disk, + instance.name, is_primary) if not result: logger.Error("could not prepare block device %s on node %s (is_pri" "mary=%s)" % (inst_disk.iv_name, node, is_primary)) @@ -2560,7 +2561,7 @@ class LUFailoverInstance(LogicalUnit): (instance.name, target_node)) -def _CreateBlockDevOnPrimary(cfg, node, device, info): +def _CreateBlockDevOnPrimary(cfg, node, instance, device, info): """Create a tree of block devices on the primary node. This always creates all devices. @@ -2568,11 +2569,12 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info): """ if device.children: for child in device.children: - if not _CreateBlockDevOnPrimary(cfg, node, child, info): + if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info): return False cfg.SetDiskID(device, node) - new_id = rpc.call_blockdev_create(node, device, device.size, True, info) + new_id = rpc.call_blockdev_create(node, device, device.size, + instance.name, True, info) if not new_id: return False if device.physical_id is None: @@ -2580,7 +2582,7 @@ def _CreateBlockDevOnPrimary(cfg, node, device, info): return True -def _CreateBlockDevOnSecondary(cfg, node, device, force, info): +def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info): """Create a tree of block devices on a secondary node. If this device type has to be created on secondaries, create it and @@ -2593,13 +2595,15 @@ def _CreateBlockDevOnSecondary(cfg, node, device, force, info): force = True if device.children: for child in device.children: - if not _CreateBlockDevOnSecondary(cfg, node, child, force, info): + if not _CreateBlockDevOnSecondary(cfg, node, instance, + child, force, info): return False if not force: return True cfg.SetDiskID(device, node) - new_id = rpc.call_blockdev_create(node, device, device.size, False, info) + new_id = rpc.call_blockdev_create(node, device, device.size, + instance.name, False, info) if not new_id: return False if device.physical_id is None: @@ -2754,13 +2758,14 @@ def _CreateDisks(cfg, instance): (device.iv_name, instance.name)) #HARDCODE for secondary_node in instance.secondary_nodes: - if not _CreateBlockDevOnSecondary(cfg, secondary_node, device, False, - info): + if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance, + device, False, info): logger.Error("failed to create volume %s (%s) on secondary node %s!" % (device.iv_name, device, secondary_node)) return False #HARDCODE - if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, device, info): + if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, + instance, device, info): logger.Error("failed to create volume %s on primary!" % device.iv_name) return False @@ -3206,14 +3211,16 @@ class LUAddMDDRBDComponent(LogicalUnit): logger.Info("adding new mirror component on secondary") #HARDCODE - if not _CreateBlockDevOnSecondary(self.cfg, remote_node, new_drbd, False, + if not _CreateBlockDevOnSecondary(self.cfg, remote_node, instance, + new_drbd, False, _GetInstanceInfoText(instance)): raise errors.OpExecError("Failed to create new component on secondary" " node %s" % remote_node) logger.Info("adding new mirror component on primary") #HARDCODE - if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node, new_drbd, + if not _CreateBlockDevOnPrimary(self.cfg, instance.primary_node, + instance, new_drbd, _GetInstanceInfoText(instance)): # remove secondary dev self.cfg.SetDiskID(new_drbd, remote_node) @@ -3444,7 +3451,8 @@ class LUReplaceDisks(LogicalUnit): logger.Info("adding new mirror component on secondary for %s" % dev.iv_name) #HARDCODE - if not _CreateBlockDevOnSecondary(cfg, remote_node, new_drbd, False, + if not _CreateBlockDevOnSecondary(cfg, remote_node, instance, + new_drbd, False, _GetInstanceInfoText(instance)): raise errors.OpExecError("Failed to create new component on" " secondary node %s\n" @@ -3453,7 +3461,8 @@ class LUReplaceDisks(LogicalUnit): logger.Info("adding new mirror component on primary") #HARDCODE - if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, new_drbd, + if not _CreateBlockDevOnPrimary(cfg, instance.primary_node, + instance, new_drbd, _GetInstanceInfoText(instance)): # remove secondary dev cfg.SetDiskID(new_drbd, remote_node) @@ -3558,7 +3567,7 @@ class LUReplaceDisks(LogicalUnit): # _Create...OnPrimary (which forces the creation), even if we # are talking about the secondary node for new_lv in new_lvs: - if not _CreateBlockDevOnPrimary(cfg, tgt_node, new_lv, + if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv, _GetInstanceInfoText(instance)): raise errors.OpExecError("Failed to create new LV named '%s' on" " node '%s'" % @@ -3669,7 +3678,7 @@ class LUReplaceDisks(LogicalUnit): # _Create...OnPrimary (which forces the creation), even if we # are talking about the secondary node for new_lv in dev.children: - if not _CreateBlockDevOnPrimary(cfg, new_node, new_lv, + if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv, _GetInstanceInfoText(instance)): raise errors.OpExecError("Failed to create new LV named '%s' on" " node '%s'" % @@ -3680,7 +3689,8 @@ class LUReplaceDisks(LogicalUnit): logical_id=(pri_node, new_node, dev.logical_id[2]), children=dev.children) - if not _CreateBlockDevOnSecondary(cfg, new_node, new_drbd, False, + if not _CreateBlockDevOnSecondary(cfg, new_node, instance, + new_drbd, False, _GetInstanceInfoText(instance)): raise errors.OpExecError("Failed to create new DRBD on" " node '%s'" % new_node) diff --git a/lib/constants.py b/lib/constants.py index 2a810b109..8ade4b680 100644 --- a/lib/constants.py +++ b/lib/constants.py @@ -25,7 +25,7 @@ from ganeti import _autoconf # various versions CONFIG_VERSION = 3 -PROTOCOL_VERSION = 4 +PROTOCOL_VERSION = 5 RELEASE_VERSION = _autoconf.PACKAGE_VERSION OS_API_VERSION = 5 EXPORT_VERSION = 0 @@ -33,6 +33,7 @@ EXPORT_VERSION = 0 # file paths DATA_DIR = _autoconf.LOCALSTATEDIR + "/lib/ganeti" +BDEV_CACHE_DIR = _autoconf.LOCALSTATEDIR + "/run/ganeti" CLUSTER_CONF_FILE = DATA_DIR + "/config.data" SSL_CERT_FILE = DATA_DIR + "/server.pem" WATCHER_STATEFILE = DATA_DIR + "/watcher.data" diff --git a/lib/rpc.py b/lib/rpc.py index 790ad5e40..d84a85e04 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -489,13 +489,13 @@ def call_version(node_list): return c.getresult() -def call_blockdev_create(node, bdev, size, on_primary, info): +def call_blockdev_create(node, bdev, size, owner, on_primary, info): """Request creation of a given block device. This is a single-node call. """ - params = [bdev.ToDict(), size, on_primary, info] + params = [bdev.ToDict(), size, owner, on_primary, info] c = Client("blockdev_create", params) c.connect(node) c.run() @@ -527,13 +527,13 @@ def call_blockdev_rename(node, devlist): return c.getresult().get(node, False) -def call_blockdev_assemble(node, disk, on_primary): +def call_blockdev_assemble(node, disk, owner, on_primary): """Request assembling of a given block device. This is a single-node call. """ - params = [disk.ToDict(), on_primary] + params = [disk.ToDict(), owner, on_primary] c = Client("blockdev_assemble", params) c.connect(node) c.run() -- GitLab