diff --git a/lib/backend.py b/lib/backend.py index 71d3330cfbeb1a3c9ac57280a1fda4f770a5c947..d79a6f6daa534e4ecd7b24632c3ed8b5ee90c4ac 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -778,7 +778,8 @@ def MirrorAddChildren(parent_cdev, new_cdevs): return False new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] if new_bdevs.count(None) > 0: - logger.Error("Can't find new device(s) to add") + logger.Error("Can't find new device(s) to add: %s:%s" % + (new_bdevs, new_cdevs)) return False parent_bdev.AddChildren(new_bdevs) return True @@ -790,9 +791,12 @@ def MirrorRemoveChildren(parent_cdev, new_cdevs): """ parent_bdev = _RecursiveFindBD(parent_cdev) if parent_bdev is None: + logger.Error("Can't find parent in remove children: %s" % parent_cdev) return False new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] if new_bdevs.count(None) > 0: + logger.Error("Can't find some devices while doing remove children: %s %s" % + (new_cdevs, new_bdevs)) return False parent_bdev.RemoveChildren(new_bdevs) return True diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 04612d77bb1e1500b8152f45749d23b84f3b96bc..e273ee23989553ea2b83954b709d727c702cd811 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -3336,7 +3336,7 @@ class LUReplaceDisks(LogicalUnit): """ HPATH = "mirrors-replace" HTYPE = constants.HTYPE_INSTANCE - _OP_REQP = ["instance_name"] + _OP_REQP = ["instance_name", "mode", "disks"] def BuildHooksEnv(self): """Build hooks env. @@ -3345,6 +3345,7 @@ class LUReplaceDisks(LogicalUnit): """ env = { + "MODE": self.op.mode, "NEW_SECONDARY": self.op.remote_node, "OLD_SECONDARY": self.instance.secondary_nodes[0], } @@ -3366,36 +3367,72 @@ class LUReplaceDisks(LogicalUnit): self.op.instance_name) self.instance = instance - if instance.disk_template != constants.DT_REMOTE_RAID1: + if instance.disk_template not in constants.DTS_NET_MIRROR: raise errors.OpPrereqError("Instance's disk layout is not" - " remote_raid1.") + " network mirrored.") if len(instance.secondary_nodes) != 1: raise errors.OpPrereqError("The instance has a strange layout," " expected one secondary but found %d" % len(instance.secondary_nodes)) + self.sec_node = instance.secondary_nodes[0] + remote_node = getattr(self.op, "remote_node", None) - if remote_node is None: - remote_node = instance.secondary_nodes[0] - else: + if remote_node is not None: remote_node = self.cfg.ExpandNodeName(remote_node) if remote_node is None: raise errors.OpPrereqError("Node '%s' not known" % self.op.remote_node) + self.remote_node_info = self.cfg.GetNodeInfo(remote_node) + else: + self.remote_node_info = None if remote_node == instance.primary_node: raise errors.OpPrereqError("The specified node is the primary node of" " the instance.") + elif remote_node == self.sec_node: + # the user gave the current secondary, switch to + # 'no-replace-secondary' mode + remote_node = None + if (instance.disk_template == constants.DT_REMOTE_RAID1 and + self.op.mode != constants.REPLACE_DISK_ALL): + raise errors.OpPrereqError("Template 'remote_raid1' only allows all" + " disks replacement, not individual ones") + if instance.disk_template == constants.DT_DRBD8: + if self.op.mode == constants.REPLACE_DISK_ALL: + raise errors.OpPrereqError("Template 'drbd8' only allows primary or" + " secondary disk replacement, not" + " both at once") + elif self.op.mode == constants.REPLACE_DISK_PRI: + if remote_node is not None: + raise errors.OpPrereqError("Template 'drbd8' does not allow changing" + " the secondary while doing a primary" + " node disk replacement") + self.tgt_node = instance.primary_node + elif self.op.mode == constants.REPLACE_DISK_SEC: + self.new_node = remote_node # this can be None, in which case + # we don't change the secondary + self.tgt_node = instance.secondary_nodes[0] + else: + raise errors.ProgrammerError("Unhandled disk replace mode") + + for name in self.op.disks: + if instance.FindDisk(name) is None: + raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" % + (name, instance.name)) self.op.remote_node = remote_node - def Exec(self, feedback_fn): + def _ExecRR1(self, feedback_fn): """Replace the disks of an instance. """ instance = self.instance iv_names = {} # start of work - remote_node = self.op.remote_node + if self.op.remote_node is None: + remote_node = self.sec_node + else: + remote_node = self.op.remote_node cfg = self.cfg for dev in instance.disks: size = dev.size @@ -3479,6 +3516,236 @@ class LUReplaceDisks(LogicalUnit): cfg.AddInstance(instance) + def _ExecD8DiskOnly(self, feedback_fn): + """Replace a disk on the primary or secondary for dbrd8. + + The algorithm for replace is quite complicated: + - for each disk to be replaced: + - create new LVs on the target node with unique names + - detach old LVs from the drbd device + - rename old LVs to name_replaced.<time_t> + - rename new LVs to old LVs + - attach the new LVs (with the old names now) to the drbd device + - wait for sync across all devices + - for each modified disk: + - remove old LVs (which have the name name_replaces.<time_t>) + + Failures are not very well handled. + """ + instance = self.instance + iv_names = {} + vgname = self.cfg.GetVGName() + # start of work + cfg = self.cfg + tgt_node = self.tgt_node + for dev in instance.disks: + if not dev.iv_name in self.op.disks: + continue + size = dev.size + cfg.SetDiskID(dev, tgt_node) + lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]] + names = _GenerateUniqueNames(cfg, lv_names) + lv_data = objects.Disk(dev_type=constants.LD_LV, size=size, + logical_id=(vgname, names[0])) + lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128, + logical_id=(vgname, names[1])) + new_lvs = [lv_data, lv_meta] + old_lvs = dev.children + iv_names[dev.iv_name] = (dev, old_lvs, new_lvs) + logger.Info("adding new local storage on %s for %s" % + (tgt_node, dev.iv_name)) + # since we *always* want to create this LV, we use the + # _Create...OnPrimary (which forces the creation), even if we + # are talking about the secondary node + for new_lv in new_lvs: + if not _CreateBlockDevOnPrimary(cfg, tgt_node, new_lv, + _GetInstanceInfoText(instance)): + raise errors.OpExecError("Failed to create new LV named '%s' on" + " node '%s'" % + (new_lv.logical_id[1], tgt_node)) + + if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs): + raise errors.OpExecError("Can't detach drbd from local storage on node" + " %s for device %s" % (tgt_node, dev.iv_name)) + dev.children = [] + cfg.Update(instance) + + # ok, we created the new LVs, so now we know we have the needed + # storage; as such, we proceed on the target node to rename + # old_lv to _old, and new_lv to old_lv; note that we rename LVs + # using the assumption than logical_id == physical_id (which in + # turn is the unique_id on that node) + temp_suffix = int(time.time()) + logger.Info("renaming the old LVs on the target node") + ren_fn = lambda d, suff: (d.physical_id[0], + d.physical_id[1] + "_replaced-%s" % suff) + rlist = [(disk, ren_fn(disk, temp_suffix)) for disk in old_lvs] + if not rpc.call_blockdev_rename(tgt_node, rlist): + logger.Error("Can't rename old LVs on node %s" % tgt_node) + do_change_old = False + else: + do_change_old = True + # now we rename the new LVs to the old LVs + logger.Info("renaming the new LVs on the target node") + rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)] + if not rpc.call_blockdev_rename(tgt_node, rlist): + logger.Error("Can't rename new LVs on node %s" % tgt_node) + else: + for old, new in zip(old_lvs, new_lvs): + new.logical_id = old.logical_id + cfg.SetDiskID(new, tgt_node) + + if do_change_old: + for disk in old_lvs: + disk.logical_id = ren_fn(disk, temp_suffix) + cfg.SetDiskID(disk, tgt_node) + + # now that the new lvs have the old name, we can add them to the device + logger.Info("adding new mirror component on %s" % tgt_node) + if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs): + logger.Error("Can't add local storage to drbd!") + for new_lv in new_lvs: + if not rpc.call_blockdev_remove(tgt_node, new_lv): + logger.Error("Can't rollback device %s") + return + + dev.children = new_lvs + cfg.Update(instance) + + + # this can fail as the old devices are degraded and _WaitForSync + # does a combined result over all disks, so we don't check its + # return value + logger.Info("Done changing drbd configs, waiting for sync") + _WaitForSync(cfg, instance, unlock=True) + + # so check manually all the devices + for name, (dev, old_lvs, new_lvs) in iv_names.iteritems(): + cfg.SetDiskID(dev, instance.primary_node) + is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5] + if is_degr: + raise errors.OpExecError("DRBD device %s is degraded!" % name) + + for name, (dev, old_lvs, new_lvs) in iv_names.iteritems(): + logger.Info("remove logical volumes for %s" % name) + for lv in old_lvs: + cfg.SetDiskID(lv, tgt_node) + if not rpc.call_blockdev_remove(tgt_node, lv): + logger.Error("Can't cleanup child device, skipping. You need to" + " fix manually!") + continue + + def _ExecD8Secondary(self, feedback_fn): + """Replace the secondary node for drbd8. + + The algorithm for replace is quite complicated: + - for all disks of the instance: + - create new LVs on the new node with same names + - shutdown the drbd device on the old secondary + - disconnect the drbd network on the primary + - create the drbd device on the new secondary + - network attach the drbd on the primary, using an artifice: + the drbd code for Attach() will connect to the network if it + finds a device which is connected to the good local disks but + not network enabled + - wait for sync across all devices + - remove all disks from the old secondary + + Failures are not very well handled. + """ + instance = self.instance + iv_names = {} + vgname = self.cfg.GetVGName() + # start of work + cfg = self.cfg + old_node = self.tgt_node + new_node = self.new_node + pri_node = instance.primary_node + for dev in instance.disks: + size = dev.size + logger.Info("adding new local storage on %s for %s" % + (new_node, dev.iv_name)) + # since we *always* want to create this LV, we use the + # _Create...OnPrimary (which forces the creation), even if we + # are talking about the secondary node + for new_lv in dev.children: + if not _CreateBlockDevOnPrimary(cfg, new_node, new_lv, + _GetInstanceInfoText(instance)): + raise errors.OpExecError("Failed to create new LV named '%s' on" + " node '%s'" % + (new_lv.logical_id[1], new_node)) + + # create new devices on new_node + new_drbd = objects.Disk(dev_type=constants.LD_DRBD8, + logical_id=(pri_node, new_node, + dev.logical_id[2]), + children=dev.children) + if not _CreateBlockDevOnSecondary(cfg, new_node, new_drbd, False, + _GetInstanceInfoText(instance)): + raise errors.OpExecError("Failed to create new DRBD on" + " node '%s'" % new_node) + + # we have new devices, shutdown the drbd on the old secondary + cfg.SetDiskID(dev, old_node) + if not rpc.call_blockdev_shutdown(old_node, dev): + raise errors.OpExecError("Failed to shutdown DRBD on old node") + + # we have new storage, we 'rename' the network on the primary + cfg.SetDiskID(dev, pri_node) + # rename to the ip of the new node + new_uid = list(dev.physical_id) + new_uid[2] = self.remote_node_info.secondary_ip + rlist = [(dev, tuple(new_uid))] + if not rpc.call_blockdev_rename(pri_node, rlist): + raise errors.OpExecError("Can't detach re-attach drbd %s on node" + " %s from %s to %s" % + (dev.iv_name, pri_node, old_node, new_node)) + dev.logical_id = (pri_node, new_node, dev.logical_id[2]) + cfg.SetDiskID(dev, pri_node) + cfg.Update(instance) + + iv_names[dev.iv_name] = (dev, dev.children) + + # this can fail as the old devices are degraded and _WaitForSync + # does a combined result over all disks, so we don't check its + # return value + logger.Info("Done changing drbd configs, waiting for sync") + _WaitForSync(cfg, instance, unlock=True) + + # so check manually all the devices + for name, (dev, old_lvs) in iv_names.iteritems(): + cfg.SetDiskID(dev, pri_node) + is_degr = rpc.call_blockdev_find(pri_node, dev)[5] + if is_degr: + raise errors.OpExecError("DRBD device %s is degraded!" % name) + + for name, (dev, old_lvs) in iv_names.iteritems(): + logger.Info("remove logical volumes for %s" % name) + for lv in old_lvs: + cfg.SetDiskID(lv, old_node) + if not rpc.call_blockdev_remove(old_node, lv): + logger.Error("Can't cleanup child device, skipping. You need to" + " fix manually!") + continue + + def Exec(self, feedback_fn): + """Execute disk replacement. + + This dispatches the disk replacement to the appropriate handler. + + """ + instance = self.instance + if instance.disk_template == constants.DT_REMOTE_RAID1: + fn = self._ExecRR1 + elif instance.disk_template == constants.DT_DRBD8: + if self.op.remote_node is None: + fn = self._ExecD8DiskOnly + else: + fn = self._ExecD8Secondary + else: + raise errors.ProgrammerError("Unhandled disk replacement case") + return fn(feedback_fn) + class LUQueryInstanceData(NoHooksLU): """Query runtime instance data. diff --git a/lib/constants.py b/lib/constants.py index a6d99da8297896ab606daedea09172fe3814644d..2a810b1098e46e615e95cf6c82309ca550b0cebf 100644 --- a/lib/constants.py +++ b/lib/constants.py @@ -25,7 +25,7 @@ from ganeti import _autoconf # various versions CONFIG_VERSION = 3 -PROTOCOL_VERSION = 3 +PROTOCOL_VERSION = 4 RELEASE_VERSION = _autoconf.PACKAGE_VERSION OS_API_VERSION = 5 EXPORT_VERSION = 0 @@ -92,6 +92,11 @@ LD_DRBD8 = "drbd8" # the set of drbd-like disk types LDS_DRBD = frozenset([LD_DRBD7, LD_DRBD8]) +# disk replacement mode +REPLACE_DISK_PRI = "replace_primary" +REPLACE_DISK_SEC = "replace_secondary" +REPLACE_DISK_ALL = "replace_all" + # instance creation modem INSTANCE_CREATE = "create" INSTANCE_IMPORT = "import" diff --git a/lib/opcodes.py b/lib/opcodes.py index e06a37ec9349f8bdd1ee2ec5fb923f53eaa3879f..43cefaeea942cb28495fbcd3e4ffd2fdb54af5e9 100644 --- a/lib/opcodes.py +++ b/lib/opcodes.py @@ -193,7 +193,7 @@ class OpRemoveMDDRBDComponent(OpCode): class OpReplaceDisks(OpCode): """Replace the disks of an instance.""" OP_ID = "OP_INSTANCE_REPLACE_DISKS" - __slots__ = ["instance_name", "remote_node"] + __slots__ = ["instance_name", "remote_node", "mode", "disks"] class OpFailoverInstance(OpCode): diff --git a/scripts/gnt-instance b/scripts/gnt-instance index e1bb5af9b4bf1329fd8018a5ec0fe61e353024b5..956b51337620667793f5f58b1b4bd0cb8ce678c9 100755 --- a/scripts/gnt-instance +++ b/scripts/gnt-instance @@ -431,9 +431,23 @@ def ReplaceDisks(opts, args): """ instance_name = args[0] - new_secondary = opts.new_secondary - op = opcodes.OpReplaceDisks(instance_name=args[0], - remote_node=opts.new_secondary) + new_2ndary = opts.new_secondary + if opts.disks is None: + disks = ["sda", "sdb"] + else: + disks = opts.disks.split(",") + if opts.on_primary == opts.on_secondary: # no -p or -s passed, or both passed + mode = constants.REPLACE_DISK_ALL + elif opts.on_primary: # only on primary: + mode = constants.REPLACE_DISK_PRI + if new_2ndary is not None: + raise errors.OpPrereqError("Can't change secondary node on primary disk" + " replacement") + elif opts.on_secondary is not None: # only on secondary + mode = constants.REPLACE_DISK_SEC + + op = opcodes.OpReplaceDisks(instance_name=args[0], disks=disks, + remote_node=new_2ndary, mode=mode) SubmitOpCode(op) return 0 @@ -745,9 +759,21 @@ commands = { 'replace-disks': (ReplaceDisks, ARGS_ONE, [DEBUG_OPT, make_option("-n", "--new-secondary", dest="new_secondary", - metavar="NODE", - help=("New secondary node (if you want to" - " change the secondary)"))], + help=("New secondary node (for secondary" + " node change)"), metavar="NODE"), + make_option("-p", "--on-primary", dest="on_primary", + default=False, action="store_true", + help=("Replace the disk(s) on the primary" + " node (only for the drbd8 template)")), + make_option("-s", "--on-secondary", dest="on_secondary", + default=False, action="store_true", + help=("Replace the disk(s) on the secondary" + " node (only for the drbd8 template)")), + make_option("--disks", dest="disks", default=None, + help=("Comma-separated list of disks" + " to replace (e.g. sda) (optional," + " defaults to all disks")), + ], "[-n NODE] <instance>", "Replaces all disks for the instance"), 'modify': (SetInstanceParms, ARGS_ONE,