diff --git a/lib/bdev.py b/lib/bdev.py index 1552956c716bb4cbb076a1c4369617bef406c7cc..a6556956dd8b69a5591dc2c25e3a122a33f5f00b 100644 --- a/lib/bdev.py +++ b/lib/bdev.py @@ -122,7 +122,13 @@ class BlockDev(object): status = status and child.Assemble() if not status: break - status = status and child.Open() + + try: + child.Open() + except errors.BlockDeviceError: + for child in self._children: + child.Shutdown() + raise if not status: for child in self._children: @@ -502,7 +508,7 @@ class LogicalVolume(BlockDev): This is a no-op for the LV device type. """ - return True + pass def Close(self): """Notifies that the device will no longer be used for I/O. @@ -510,7 +516,7 @@ class LogicalVolume(BlockDev): This is a no-op for the LV device type. """ - return True + pass def Snapshot(self, size): """Create a snapshot copy of an lvm block device. @@ -954,7 +960,7 @@ class MDRaid1(BlockDev): the 2.6.18's new array_state thing. """ - return True + pass def Close(self): """Notifies that the device will no longer be used for I/O. @@ -963,7 +969,7 @@ class MDRaid1(BlockDev): `Open()`. """ - return True + pass class BaseDRBD(BlockDev): @@ -1456,9 +1462,9 @@ class DRBDev(BaseDRBD): cmd.append("--do-what-I-say") result = utils.RunCmd(cmd) if result.failed: - logger.Error("Can't make drbd device primary: %s" % result.output) - return False - return True + msg = ("Can't make drbd device primary: %s" % result.output) + logger.Error(msg) + raise errors.BlockDeviceError(msg) def Close(self): """Make the local state secondary. @@ -1471,8 +1477,10 @@ class DRBDev(BaseDRBD): raise errors.BlockDeviceError("Can't find device") result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"]) if result.failed: - logger.Error("Can't switch drbd device to secondary: %s" % result.output) - raise errors.BlockDeviceError("Can't switch drbd device to secondary") + msg = ("Can't switch drbd device to" + " secondary: %s" % result.output) + logger.Error(msg) + raise errors.BlockDeviceError(msg) def SetSyncSpeed(self, kbytes): """Set the speed of the DRBD syncer. @@ -2068,9 +2076,9 @@ class DRBD8(BaseDRBD): cmd.append("-o") result = utils.RunCmd(cmd) if result.failed: - logger.Error("Can't make drbd device primary: %s" % result.output) - return False - return True + msg = ("Can't make drbd device primary: %s" % result.output) + logger.Error(msg) + raise errors.BlockDeviceError(msg) def Close(self): """Make the local state secondary. @@ -2083,8 +2091,10 @@ class DRBD8(BaseDRBD): raise errors.BlockDeviceError("Can't find device") result = utils.RunCmd(["drbdsetup", self.dev_path, "secondary"]) if result.failed: - logger.Error("Can't switch drbd device to secondary: %s" % result.output) - raise errors.BlockDeviceError("Can't switch drbd device to secondary") + msg = ("Can't switch drbd device to" + " secondary: %s" % result.output) + logger.Error(msg) + raise errors.BlockDeviceError(msg) def Attach(self): """Find a DRBD device which matches our config and attach to it. diff --git a/lib/cmdlib.py b/lib/cmdlib.py index b45a83b94ed2f667c385bbb25df9b53274d980d3..6bddebf5e8ece2370450120d4759a6a8805217ec 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1860,23 +1860,41 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False): """ device_info = [] disks_ok = True + iname = instance.name + # With the two passes mechanism we try to reduce the window of + # opportunity for the race condition of switching DRBD to primary + # before handshaking occured, but we do not eliminate it + + # The proper fix would be to wait (with some limits) until the + # connection has been made and drbd transitions from WFConnection + # into any other network-connected state (Connected, SyncTarget, + # SyncSource, etc.) + + # 1st pass, assemble on all nodes in secondary mode for inst_disk in instance.disks: - master_result = None for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): cfg.SetDiskID(node_disk, node) - is_primary = node == instance.primary_node - result = rpc.call_blockdev_assemble(node, node_disk, - instance.name, is_primary) + result = rpc.call_blockdev_assemble(node, node_disk, iname, False) if not result: logger.Error("could not prepare block device %s on node %s" - " (is_primary=%s)" % - (inst_disk.iv_name, node, is_primary)) - if is_primary or not ignore_secondaries: + " (is_primary=False, pass=1)" % (inst_disk.iv_name, node)) + if not ignore_secondaries: disks_ok = False - if is_primary: - master_result = result - device_info.append((instance.primary_node, inst_disk.iv_name, - master_result)) + + # FIXME: race condition on drbd migration to primary + + # 2nd pass, do only the primary node + for inst_disk in instance.disks: + for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): + if node != instance.primary_node: + continue + cfg.SetDiskID(node_disk, node) + result = rpc.call_blockdev_assemble(node, node_disk, iname, True) + if not result: + logger.Error("could not prepare block device %s on node %s" + " (is_primary=True, pass=2)" % (inst_disk.iv_name, node)) + disks_ok = False + device_info.append((instance.primary_node, inst_disk.iv_name, result)) # leave the disks configured for the primary node # this is a workaround that would be fixed better by