diff --git a/NEWS b/NEWS index 8011557ea6032707c98fe1d5ec7c872ac45fd181..5ec45d873ba2a5f7ea0473c4918847a9283fab8c 100644 --- a/NEWS +++ b/NEWS @@ -12,7 +12,7 @@ Version 2.2.0 Version 2.1.3 ------------- -*(Unreleased)* +*(Released Thu, 3 Jun 2010)* A medium sized development cycle. Some new features, and some fixes/small improvements/cleanups. @@ -25,7 +25,7 @@ The node deamon now tries to mlock itself into memory, unless the its logs, and falls back to console logging. This allows emergency features such as ``gnt-node powercycle`` to work even in the event of a broken node disk (tested offlining the disk hosting the node's -filesystem and dropping its memory caches. don't try this at home) +filesystem and dropping its memory caches; don't try this at home) KVM: add vhost-net acceleration support. It can be tested with a new enough version of the kernel and of qemu-kvm. @@ -48,8 +48,8 @@ this feature, before using it, by checking for its presence in the ``features`` RAPI resource. Now with ancient latin support. Try it passing the ``--roman`` option to -``gnt-instance info, gnt-cluster info or gnt-node list`` (requires the -python-roman module to be installed, in order to work). +``gnt-instance info``, ``gnt-cluster info`` or ``gnt-node list`` +(requires the python-roman module to be installed, in order to work). Other changes ~~~~~~~~~~~~~ diff --git a/configure.ac b/configure.ac index a3c5d00b05d06b19e5312cde2b9252ba99ce58a3..f7eb78bf9d12b160e1b0d44dfd41e9b29db26970 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ # Configure script for Ganeti m4_define([gnt_version_major], [2]) m4_define([gnt_version_minor], [1]) -m4_define([gnt_version_revision], [2]) -m4_define([gnt_version_suffix], [.1]) +m4_define([gnt_version_revision], [3]) +m4_define([gnt_version_suffix], []) m4_define([gnt_version_full], m4_format([%d.%d.%d%s], gnt_version_major, gnt_version_minor, diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 8687ba7583a02a062a073c68ee969f43e413a56b..7af4bf1c92b9c352054f61f1eb386c71239b2641 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -2565,19 +2565,21 @@ class LURedistributeConfig(NoHooksLU): _RedistributeAncillaryFiles(self) -def _WaitForSync(lu, instance, oneshot=False): +def _WaitForSync(lu, instance, disks=None, oneshot=False): """Sleep and poll for an instance's disk to sync. """ - if not instance.disks: + if not instance.disks or disks is not None and not disks: return True + disks = _ExpandCheckDisks(instance, disks) + if not oneshot: lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name) node = instance.primary_node - for dev in instance.disks: + for dev in disks: lu.cfg.SetDiskID(dev, node) # TODO: Convert to utils.Retry @@ -2588,7 +2590,7 @@ def _WaitForSync(lu, instance, oneshot=False): max_time = 0 done = True cumul_degraded = False - rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks) + rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks) msg = rstats.fail_msg if msg: lu.LogWarning("Can't get any data from node %s: %s", node, msg) @@ -2603,7 +2605,7 @@ def _WaitForSync(lu, instance, oneshot=False): for i, mstat in enumerate(rstats): if mstat is None: lu.LogWarning("Can't compute data for node %s/%s", - node, instance.disks[i].iv_name) + node, disks[i].iv_name) continue cumul_degraded = (cumul_degraded or @@ -2616,8 +2618,7 @@ def _WaitForSync(lu, instance, oneshot=False): else: rem_time = "no time estimate" lu.proc.LogInfo("- device %s: %5.2f%% done, %s" % - (instance.disks[i].iv_name, mstat.sync_percent, - rem_time)) + (disks[i].iv_name, mstat.sync_percent, rem_time)) # if we're done but degraded, let's do a few small retries, to # make sure we see a stable and not transient situation; therefore @@ -3852,7 +3853,7 @@ class LUActivateInstanceDisks(NoHooksLU): return disks_info -def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False, +def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, ignore_size=False): """Prepare the block devices for an instance. @@ -3862,6 +3863,8 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False, @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance for whose disks we assemble + @type disks: list of L{objects.Disk} or None + @param disks: which disks to assemble (or all, if None) @type ignore_secondaries: boolean @param ignore_secondaries: if true, errors on secondary nodes won't result in an error return from the function @@ -3877,6 +3880,8 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False, device_info = [] disks_ok = True iname = instance.name + disks = _ExpandCheckDisks(instance, disks) + # With the two passes mechanism we try to reduce the window of # opportunity for the race condition of switching DRBD to primary # before handshaking occured, but we do not eliminate it @@ -3887,7 +3892,7 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False, # SyncSource, etc.) # 1st pass, assemble on all nodes in secondary mode - for inst_disk in instance.disks: + for inst_disk in disks: for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): if ignore_size: node_disk = node_disk.Copy() @@ -3905,7 +3910,7 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False, # FIXME: race condition on drbd migration to primary # 2nd pass, do only the primary node - for inst_disk in instance.disks: + for inst_disk in disks: dev_path = None for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): @@ -3930,7 +3935,7 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False, # leave the disks configured for the primary node # this is a workaround that would be fixed better by # improving the logical/physical id handling - for disk in instance.disks: + for disk in disks: lu.cfg.SetDiskID(disk, instance.primary_node) return disks_ok, device_info @@ -3985,7 +3990,7 @@ class LUDeactivateInstanceDisks(NoHooksLU): _SafeShutdownInstanceDisks(self, instance) -def _SafeShutdownInstanceDisks(lu, instance): +def _SafeShutdownInstanceDisks(lu, instance, disks=None): """Shutdown block devices of an instance. This function checks if an instance is running, before calling @@ -3993,10 +3998,28 @@ def _SafeShutdownInstanceDisks(lu, instance): """ _CheckInstanceDown(lu, instance, "cannot shutdown disks") - _ShutdownInstanceDisks(lu, instance) + _ShutdownInstanceDisks(lu, instance, disks=disks) + +def _ExpandCheckDisks(instance, disks): + """Return the instance disks selected by the disks list -def _ShutdownInstanceDisks(lu, instance, ignore_primary=False): + @type disks: list of L{objects.Disk} or None + @param disks: selected disks + @rtype: list of L{objects.Disk} + @return: selected instance disks to act on + + """ + if disks is None: + return instance.disks + else: + if not set(disks).issubset(instance.disks): + raise errors.ProgrammerError("Can only act on disks belonging to the" + " target instance") + return disks + + +def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False): """Shutdown block devices of an instance. This does the shutdown on all nodes of the instance. @@ -4006,7 +4029,9 @@ def _ShutdownInstanceDisks(lu, instance, ignore_primary=False): """ all_result = True - for disk in instance.disks: + disks = _ExpandCheckDisks(instance, disks) + + for disk in disks: for node, top_disk in disk.ComputeNodeTree(instance.primary_node): lu.cfg.SetDiskID(top_disk, node) result = lu.rpc.call_blockdev_shutdown(node, top_disk) @@ -7990,6 +8015,11 @@ class LUGrowDisk(LogicalUnit): """ instance = self.instance disk = self.disk + + disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk]) + if not disks_ok: + raise errors.OpExecError("Cannot activate block device to grow") + for node in instance.all_nodes: self.cfg.SetDiskID(disk, node) result = self.rpc.call_blockdev_grow(node, disk, self.op.amount) @@ -8005,10 +8035,16 @@ class LUGrowDisk(LogicalUnit): disk.RecordGrow(self.op.amount) self.cfg.Update(instance, feedback_fn) if self.op.wait_for_sync: - disk_abort = not _WaitForSync(self, instance) + disk_abort = not _WaitForSync(self, instance, disks=[disk]) if disk_abort: self.proc.LogWarning("Warning: disk sync-ing has not returned a good" " status.\nPlease check the instance.") + if not instance.admin_up: + _SafeShutdownInstanceDisks(self, instance, disks=[disk]) + elif not instance.admin_up: + self.proc.LogWarning("Not shutting down the disk even if the instance is" + " not supposed to be running because no wait for" + " sync mode was requested.") class LUQueryInstanceData(NoHooksLU): diff --git a/lib/hypervisor/hv_base.py b/lib/hypervisor/hv_base.py index 1465da2401a570e4232604403e295acca1d4dc45..cd9632484217548f9e9461a65d7946d190e355f7 100644 --- a/lib/hypervisor/hv_base.py +++ b/lib/hypervisor/hv_base.py @@ -245,7 +245,7 @@ class BaseHypervisor(object): Since by default we do no preparation, we also don't have anything to do @type instance: L{objects.Instance} - @param instance: instance whose migration is being aborted + @param instance: instance whose migration is being finalized @type info: string/data (opaque) @param info: migration information, from the source node @type success: boolean diff --git a/lib/hypervisor/hv_kvm.py b/lib/hypervisor/hv_kvm.py index 6edc5362cbb3492464b2dc5156a6d5091d7f004f..81499b58412ae9dc3101d43b591141c7babb1568 100644 --- a/lib/hypervisor/hv_kvm.py +++ b/lib/hypervisor/hv_kvm.py @@ -640,26 +640,40 @@ class KVMHypervisor(hv_base.BaseHypervisor): @param incoming: (target_host_ip, port) """ - hvp = instance.hvparams + # Small _ExecuteKVMRuntime hv parameters programming howto: + # - conf_hvp contains the parameters as configured on ganeti. they might + # have changed since the instance started; only use them if the change + # won't affect the inside of the instance (which hasn't been rebooted). + # - up_hvp contains the parameters as they were when the instance was + # started, plus any new parameter which has been added between ganeti + # versions: it is paramount that those default to a value which won't + # affect the inside of the instance as well. + conf_hvp = instance.hvparams name = instance.name self._CheckDown(name) temp_files = [] - kvm_cmd, kvm_nics, hvparams = kvm_runtime + kvm_cmd, kvm_nics, up_hvp = kvm_runtime + up_hvp = objects.FillDict(conf_hvp, up_hvp) - security_model = hvp[constants.HV_SECURITY_MODEL] + # We know it's safe to run as a different user upon migration, so we'll use + # the latest conf, from conf_hvp. + security_model = conf_hvp[constants.HV_SECURITY_MODEL] if security_model == constants.HT_SM_USER: - kvm_cmd.extend(["-runas", hvp[constants.HV_SECURITY_DOMAIN]]) + kvm_cmd.extend(["-runas", conf_hvp[constants.HV_SECURITY_DOMAIN]]) + # We have reasons to believe changing something like the nic driver/type + # upon migration won't exactly fly with the instance kernel, so for nic + # related parameters we'll use up_hvp if not kvm_nics: kvm_cmd.extend(["-net", "none"]) else: tap_extra = "" - nic_type = hvparams[constants.HV_NIC_TYPE] + nic_type = up_hvp[constants.HV_NIC_TYPE] if nic_type == constants.HT_NIC_PARAVIRTUAL: nic_model = "model=virtio" - if hvparams[constants.HV_VHOST_NET]: + if up_hvp[constants.HV_VHOST_NET]: tap_extra = ",vhost=on" else: nic_model = "model=%s" % nic_type @@ -676,7 +690,10 @@ class KVMHypervisor(hv_base.BaseHypervisor): target, port = incoming kvm_cmd.extend(['-incoming', 'tcp:%s:%s' % (target, port)]) - vnc_pwd_file = hvp[constants.HV_VNC_PASSWORD_FILE] + # Changing the vnc password doesn't bother the guest that much. At most it + # will surprise people who connect to it. Whether positively or negatively + # it's debatable. + vnc_pwd_file = conf_hvp[constants.HV_VNC_PASSWORD_FILE] vnc_pwd = None if vnc_pwd_file: try: @@ -685,7 +702,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): raise errors.HypervisorError("Failed to open VNC password file %s: %s" % (vnc_pwd_file, err)) - if hvp[constants.HV_KVM_USE_CHROOT]: + if conf_hvp[constants.HV_KVM_USE_CHROOT]: utils.EnsureDirs([(self._InstanceChrootDir(name), constants.SECURE_DIR_MODE)]) @@ -821,7 +838,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): Stop the incoming mode KVM. @type instance: L{objects.Instance} - @param instance: instance whose migration is being aborted + @param instance: instance whose migration is being finalized """ if success: diff --git a/lib/hypervisor/hv_xen.py b/lib/hypervisor/hv_xen.py index 77f98b0798b4d43445ca50f17fcedd01d60ee7c9..acf5e0b2d65f1afb5ced4b9687fe386d28ed80ee 100644 --- a/lib/hypervisor/hv_xen.py +++ b/lib/hypervisor/hv_xen.py @@ -380,7 +380,7 @@ class XenHypervisor(hv_base.BaseHypervisor): We do nothing on a failure, as we did not change anything at accept time. @type instance: L{objects.Instance} - @param instance: instance whose migration is being aborted + @param instance: instance whose migration is being finalized @type info: string @param info: content of the xen config file on the source node @type success: boolean