Commit 85530402 authored by Michele Tartara's avatar Michele Tartara

Merge branch 'origin/stable-2.8' into stable-2.9

* stable-2.8
  Improve RAPI detection of the watcher
  Add patching QA configuration files on buildbots
  Enable a timeout for instance shutdown
  Allow KVM commands to have a timeout
  Allow xen commands to have a timeout
  Fix wrong docstring

Conflicts:
	lib/hypervisor/hv_xen.py
	test/py/ganeti.hypervisor.hv_xen_unittest.py

Resolution:
  The conflicts were due to a new mandatory parameter hvparams being added
  (in 2.9) to the same functions that received the optional timeout parameter
  (in 2.8). They were resolved by keeping the hvparams in its place, and
  enqueuing the timeout parameter after it.
Signed-off-by: default avatarMichele Tartara <mtartara@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parents fa0192b2 6177890b
...@@ -1706,7 +1706,7 @@ def InstanceShutdown(instance, timeout, reason, store_reason=True): ...@@ -1706,7 +1706,7 @@ def InstanceShutdown(instance, timeout, reason, store_reason=True):
return return
try: try:
hyper.StopInstance(instance, retry=self.tried_once) hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
if store_reason: if store_reason:
_StoreInstReasonTrail(instance.name, reason) _StoreInstReasonTrail(instance.name, reason)
except errors.HypervisorError, err: except errors.HypervisorError, err:
......
...@@ -173,7 +173,8 @@ class BaseHypervisor(object): ...@@ -173,7 +173,8 @@ class BaseHypervisor(object):
"""Start an instance.""" """Start an instance."""
raise NotImplementedError raise NotImplementedError
def StopInstance(self, instance, force=False, retry=False, name=None): def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance """Stop an instance
@type instance: L{objects.Instance} @type instance: L{objects.Instance}
...@@ -186,6 +187,10 @@ class BaseHypervisor(object): ...@@ -186,6 +187,10 @@ class BaseHypervisor(object):
@param name: if this parameter is passed, the the instance object @param name: if this parameter is passed, the the instance object
should not be used (will be passed as None), and the shutdown should not be used (will be passed as None), and the shutdown
must be done by name only must be done by name only
@type timeout: int or None
@param timeout: if the parameter is not None, a soft shutdown operation will
be killed after the specified number of seconds. A hard (forced)
shutdown cannot have a timeout
""" """
raise NotImplementedError raise NotImplementedError
......
...@@ -171,7 +171,8 @@ class ChrootManager(hv_base.BaseHypervisor): ...@@ -171,7 +171,8 @@ class ChrootManager(hv_base.BaseHypervisor):
raise HypervisorError("Can't run the chroot start script: %s" % raise HypervisorError("Can't run the chroot start script: %s" %
result.output) result.output)
def StopInstance(self, instance, force=False, retry=False, name=None): def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance. """Stop an instance.
This method has complicated cleanup tests, as we must: This method has complicated cleanup tests, as we must:
...@@ -180,6 +181,8 @@ class ChrootManager(hv_base.BaseHypervisor): ...@@ -180,6 +181,8 @@ class ChrootManager(hv_base.BaseHypervisor):
- finally unmount the instance dir - finally unmount the instance dir
""" """
assert(timeout is None or force is not None)
if name is None: if name is None:
name = instance.name name = instance.name
...@@ -187,9 +190,14 @@ class ChrootManager(hv_base.BaseHypervisor): ...@@ -187,9 +190,14 @@ class ChrootManager(hv_base.BaseHypervisor):
if not os.path.exists(root_dir) or not self._IsDirLive(root_dir): if not os.path.exists(root_dir) or not self._IsDirLive(root_dir):
return return
timeout_cmd = []
if timeout is not None:
timeout_cmd.extend(["timeout", str(timeout)])
# Run the chroot stop script only once # Run the chroot stop script only once
if not retry and not force: if not retry and not force:
result = utils.RunCmd(["chroot", root_dir, "/ganeti-chroot", "stop"]) result = utils.RunCmd(timeout_cmd.extend(["chroot", root_dir,
"/ganeti-chroot", "stop"]))
if result.failed: if result.failed:
raise HypervisorError("Can't run the chroot stop script: %s" % raise HypervisorError("Can't run the chroot stop script: %s" %
result.output) result.output)
......
...@@ -174,13 +174,16 @@ class FakeHypervisor(hv_base.BaseHypervisor): ...@@ -174,13 +174,16 @@ class FakeHypervisor(hv_base.BaseHypervisor):
raise errors.HypervisorError("Failed to start instance %s: %s" % raise errors.HypervisorError("Failed to start instance %s: %s" %
(instance.name, err)) (instance.name, err))
def StopInstance(self, instance, force=False, retry=False, name=None): def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance. """Stop an instance.
For the fake hypervisor, this just removes the file in the base For the fake hypervisor, this just removes the file in the base
dir, if it exist, otherwise we raise an exception. dir, if it exist, otherwise we raise an exception.
""" """
assert(timeout is None or force is not None)
if name is None: if name is None:
name = instance.name name = instance.name
if not self._IsAlive(name): if not self._IsAlive(name):
......
...@@ -1703,10 +1703,15 @@ class KVMHypervisor(hv_base.BaseHypervisor): ...@@ -1703,10 +1703,15 @@ class KVMHypervisor(hv_base.BaseHypervisor):
self._SaveKVMRuntime(instance, kvm_runtime) self._SaveKVMRuntime(instance, kvm_runtime)
self._ExecuteKVMRuntime(instance, kvm_runtime, kvmhelp) self._ExecuteKVMRuntime(instance, kvm_runtime, kvmhelp)
def _CallMonitorCommand(self, instance_name, command): def _CallMonitorCommand(self, instance_name, command, timeout=None):
"""Invoke a command on the instance monitor. """Invoke a command on the instance monitor.
""" """
if timeout is not None:
timeout_cmd = "timeout %s" % (timeout, )
else:
timeout_cmd = ""
# TODO: Replace monitor calls with QMP once KVM >= 0.14 is the minimum # TODO: Replace monitor calls with QMP once KVM >= 0.14 is the minimum
# version. The monitor protocol is designed for human consumption, whereas # version. The monitor protocol is designed for human consumption, whereas
# QMP is made for programmatic usage. In the worst case QMP can also # QMP is made for programmatic usage. In the worst case QMP can also
...@@ -1714,10 +1719,12 @@ class KVMHypervisor(hv_base.BaseHypervisor): ...@@ -1714,10 +1719,12 @@ class KVMHypervisor(hv_base.BaseHypervisor):
# 500ms and likely more: socat can't detect the end of the reply and waits # 500ms and likely more: socat can't detect the end of the reply and waits
# for 500ms of no data received before exiting (500 ms is the default for # for 500ms of no data received before exiting (500 ms is the default for
# the "-t" parameter). # the "-t" parameter).
socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" % socat = ("echo %s | %s %s STDIO UNIX-CONNECT:%s" %
(utils.ShellQuote(command), (utils.ShellQuote(command),
timeout_cmd,
constants.SOCAT_PATH, constants.SOCAT_PATH,
utils.ShellQuote(self._InstanceMonitor(instance_name)))) utils.ShellQuote(self._InstanceMonitor(instance_name))))
result = utils.RunCmd(socat) result = utils.RunCmd(socat)
if result.failed: if result.failed:
msg = ("Failed to send command '%s' to instance '%s', reason '%s'," msg = ("Failed to send command '%s' to instance '%s', reason '%s',"
...@@ -1794,10 +1801,13 @@ class KVMHypervisor(hv_base.BaseHypervisor): ...@@ -1794,10 +1801,13 @@ class KVMHypervisor(hv_base.BaseHypervisor):
else: else:
return "pc" return "pc"
def StopInstance(self, instance, force=False, retry=False, name=None): def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance. """Stop an instance.
""" """
assert(timeout is None or force is not None)
if name is not None and not force: if name is not None and not force:
raise errors.HypervisorError("Cannot shutdown cleanly by name only") raise errors.HypervisorError("Cannot shutdown cleanly by name only")
if name is None: if name is None:
...@@ -1810,7 +1820,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): ...@@ -1810,7 +1820,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
if force or not acpi: if force or not acpi:
utils.KillProcess(pid) utils.KillProcess(pid)
else: else:
self._CallMonitorCommand(name, "system_powerdown") self._CallMonitorCommand(name, "system_powerdown", timeout)
def CleanupInstance(self, instance_name): def CleanupInstance(self, instance_name):
"""Cleanup after a stopped instance """Cleanup after a stopped instance
......
...@@ -329,7 +329,8 @@ class LXCHypervisor(hv_base.BaseHypervisor): ...@@ -329,7 +329,8 @@ class LXCHypervisor(hv_base.BaseHypervisor):
raise HypervisorError("Running the lxc-start script failed: %s" % raise HypervisorError("Running the lxc-start script failed: %s" %
result.output) result.output)
def StopInstance(self, instance, force=False, retry=False, name=None): def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance. """Stop an instance.
This method has complicated cleanup tests, as we must: This method has complicated cleanup tests, as we must:
...@@ -338,9 +339,15 @@ class LXCHypervisor(hv_base.BaseHypervisor): ...@@ -338,9 +339,15 @@ class LXCHypervisor(hv_base.BaseHypervisor):
- finally unmount the instance dir - finally unmount the instance dir
""" """
assert(timeout is None or force is not None)
if name is None: if name is None:
name = instance.name name = instance.name
timeout_cmd = []
if timeout is not None:
timeout_cmd.extend(["timeout", str(timeout)])
root_dir = self._InstanceDir(name) root_dir = self._InstanceDir(name)
if not os.path.exists(root_dir): if not os.path.exists(root_dir):
return return
...@@ -353,7 +360,7 @@ class LXCHypervisor(hv_base.BaseHypervisor): ...@@ -353,7 +360,7 @@ class LXCHypervisor(hv_base.BaseHypervisor):
raise HypervisorError("Running 'poweroff' on the instance" raise HypervisorError("Running 'poweroff' on the instance"
" failed: %s" % result.output) " failed: %s" % result.output)
time.sleep(2) time.sleep(2)
result = utils.RunCmd(["lxc-stop", "-n", name]) result = utils.RunCmd(timeout_cmd.extend(["lxc-stop", "-n", name]))
if result.failed: if result.failed:
logging.warning("Error while doing lxc-stop for %s: %s", name, logging.warning("Error while doing lxc-stop for %s: %s", name,
result.output) result.output)
...@@ -362,12 +369,12 @@ class LXCHypervisor(hv_base.BaseHypervisor): ...@@ -362,12 +369,12 @@ class LXCHypervisor(hv_base.BaseHypervisor):
return return
for mpath in self._GetMountSubdirs(root_dir): for mpath in self._GetMountSubdirs(root_dir):
result = utils.RunCmd(["umount", mpath]) result = utils.RunCmd(timeout_cmd.extend(["umount", mpath]))
if result.failed: if result.failed:
logging.warning("Error while umounting subpath %s for instance %s: %s", logging.warning("Error while umounting subpath %s for instance %s: %s",
mpath, name, result.output) mpath, name, result.output)
result = utils.RunCmd(["umount", root_dir]) result = utils.RunCmd(timeout_cmd.extend(["umount", root_dir]))
if result.failed and force: if result.failed and force:
msg = ("Processes still alive in the chroot: %s" % msg = ("Processes still alive in the chroot: %s" %
utils.RunCmd("fuser -vm %s" % root_dir).output) utils.RunCmd("fuser -vm %s" % root_dir).output)
......
...@@ -383,15 +383,23 @@ class XenHypervisor(hv_base.BaseHypervisor): ...@@ -383,15 +383,23 @@ class XenHypervisor(hv_base.BaseHypervisor):
return cmd return cmd
def _RunXen(self, args, hvparams): def _RunXen(self, args, hvparams, timeout=None):
"""Wrapper around L{utils.process.RunCmd} to run Xen command. """Wrapper around L{utils.process.RunCmd} to run Xen command.
@type hvparams: dict of strings @type hvparams: dict of strings
@param hvparams: dictionary of hypervisor params @param hvparams: dictionary of hypervisor params
@type timeout: int or None
@param timeout: if a timeout (in seconds) is specified, the command will be
terminated after that number of seconds.
@see: L{utils.process.RunCmd} @see: L{utils.process.RunCmd}
""" """
cmd = [self._GetCommand(hvparams)] cmd = []
if timeout is not None:
cmd.extend(["timeout", str(timeout)])
cmd.extend([self._GetCommand(hvparams)])
cmd.extend(args) cmd.extend(args)
return self._run_cmd_fn(cmd) return self._run_cmd_fn(cmd)
...@@ -604,26 +612,34 @@ class XenHypervisor(hv_base.BaseHypervisor): ...@@ -604,26 +612,34 @@ class XenHypervisor(hv_base.BaseHypervisor):
(instance.name, result.fail_reason, (instance.name, result.fail_reason,
result.output, stashed_config)) result.output, stashed_config))
def StopInstance(self, instance, force=False, retry=False, name=None): def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance. """Stop an instance.
A soft shutdown can be interrupted. A hard shutdown tries forever.
""" """
assert(timeout is None or force is not None)
if name is None: if name is None:
name = instance.name name = instance.name
return self._StopInstance(name, force, instance.hvparams) return self._StopInstance(name, force, instance.hvparams, timeout)
def _ShutdownInstance(self, name, hvparams): def _ShutdownInstance(self, name, hvparams):
"""Shutdown an instance if the instance is running. """Shutdown an instance if the instance is running.
The '-w' flag waits for shutdown to complete which avoids the need
to poll in the case where we want to destroy the domain
immediately after shutdown.
@type name: string @type name: string
@param name: name of the instance to stop @param name: name of the instance to stop
@type hvparams: dict of string @type hvparams: dict of string
@param hvparams: hypervisor parameters of the instance @param hvparams: hypervisor parameters of the instance
@type timeout: int or None
The '-w' flag waits for shutdown to complete which avoids the need @param timeout: a timeout after which the shutdown command should be killed,
to poll in the case where we want to destroy the domain or None for no timeout
immediately after shutdown.
""" """
instance_info = self.GetInstanceInfo(name, hvparams=hvparams) instance_info = self.GetInstanceInfo(name, hvparams=hvparams)
...@@ -632,7 +648,7 @@ class XenHypervisor(hv_base.BaseHypervisor): ...@@ -632,7 +648,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
logging.info("Failed to shutdown instance %s, not running", name) logging.info("Failed to shutdown instance %s, not running", name)
return None return None
return self._RunXen(["shutdown", "-w", name], hvparams) return self._RunXen(["shutdown", "-w", name], hvparams, timeout)
def _DestroyInstance(self, name, hvparams): def _DestroyInstance(self, name, hvparams):
"""Destroy an instance if the instance if the instance exists. """Destroy an instance if the instance if the instance exists.
...@@ -651,7 +667,7 @@ class XenHypervisor(hv_base.BaseHypervisor): ...@@ -651,7 +667,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
return self._RunXen(["destroy", name], hvparams) return self._RunXen(["destroy", name], hvparams)
def _StopInstance(self, name, force, hvparams): def _StopInstance(self, name, force, hvparams, timeout):
"""Stop an instance. """Stop an instance.
@type name: string @type name: string
...@@ -663,11 +679,15 @@ class XenHypervisor(hv_base.BaseHypervisor): ...@@ -663,11 +679,15 @@ class XenHypervisor(hv_base.BaseHypervisor):
@type hvparams: dict of string @type hvparams: dict of string
@param hvparams: hypervisor parameters of the instance @param hvparams: hypervisor parameters of the instance
@type timeout: int or None
@param timeout: a timeout after which the shutdown command should be killed,
or None for no timeout
""" """
if force: if force:
result = self._DestroyInstance(name, hvparams) result = self._DestroyInstance(name, hvparams)
else: else:
self._ShutdownInstance(name, hvparams) self._ShutdownInstance(name, hvparams, timeout)
result = self._DestroyInstance(name, hvparams) result = self._DestroyInstance(name, hvparams)
if result is not None and result.failed and \ if result is not None and result.failed and \
......
...@@ -329,6 +329,10 @@ def IsRapiResponding(hostname): ...@@ -329,6 +329,10 @@ def IsRapiResponding(hostname):
Connects to RAPI port of hostname and does a simple test. At this time, the Connects to RAPI port of hostname and does a simple test. At this time, the
test is GetVersion. test is GetVersion.
If RAPI responds with error code "401 Unauthorized", the test is successful,
because the aim of this function is to assess whether RAPI is responding, not
if it is accessible.
@type hostname: string @type hostname: string
@param hostname: hostname of the node to connect to. @param hostname: hostname of the node to connect to.
@rtype: bool @rtype: bool
...@@ -344,8 +348,12 @@ def IsRapiResponding(hostname): ...@@ -344,8 +348,12 @@ def IsRapiResponding(hostname):
logging.warning("RAPI certificate error: %s", err) logging.warning("RAPI certificate error: %s", err)
return False return False
except rapi.client.GanetiApiError, err: except rapi.client.GanetiApiError, err:
logging.warning("RAPI error: %s", err) if err.code == 401:
return False # Unauthorized, but RAPI is alive and responding
return True
else:
logging.warning("RAPI error: %s", err)
return False
else: else:
logging.debug("Reported RAPI version %s", master_version) logging.debug("Reported RAPI version %s", master_version)
return master_version == constants.RAPI_VERSION return master_version == constants.RAPI_VERSION
......
...@@ -40,6 +40,10 @@ _VCLUSTER_MASTER_KEY = "vcluster-master" ...@@ -40,6 +40,10 @@ _VCLUSTER_MASTER_KEY = "vcluster-master"
_VCLUSTER_BASEDIR_KEY = "vcluster-basedir" _VCLUSTER_BASEDIR_KEY = "vcluster-basedir"
_ENABLED_DISK_TEMPLATES_KEY = "enabled-disk-templates" _ENABLED_DISK_TEMPLATES_KEY = "enabled-disk-templates"
# The path of an optional JSON Patch file (as per RFC6902) that modifies QA's
# configuration.
_PATCH_JSON = os.path.join(os.path.dirname(__file__), "qa-patch.json")
#: QA configuration (L{_QaConfig}) #: QA configuration (L{_QaConfig})
_config = None _config = None
...@@ -261,6 +265,20 @@ class _QaConfig(object): ...@@ -261,6 +265,20 @@ class _QaConfig(object):
""" """
data = serializer.LoadJson(utils.ReadFile(filename)) data = serializer.LoadJson(utils.ReadFile(filename))
# Patch the document using JSON Patch (RFC6902) in file _PATCH_JSON, if
# available
try:
patch = serializer.LoadJson(utils.ReadFile(_PATCH_JSON))
if patch:
mod = __import__("jsonpatch", fromlist=[])
data = mod.apply_patch(data, patch)
except IOError:
pass
except ImportError:
raise qa_error.Error("If you want to use the QA JSON patching feature,"
" you need to install Python modules"
" 'jsonpatch' and 'jsonpointer'.")
result = cls(dict(map(_ConvertResources, result = cls(dict(map(_ConvertResources,
data.items()))) # pylint: disable=E1103 data.items()))) # pylint: disable=E1103
result.Validate() result.Validate()
......
...@@ -730,7 +730,14 @@ class _TestXenHypervisor(object): ...@@ -730,7 +730,14 @@ class _TestXenHypervisor(object):
extra = inst.hvparams[constants.HV_KERNEL_ARGS] extra = inst.hvparams[constants.HV_KERNEL_ARGS]
self.assertTrue(("extra = '%s'" % extra) in lines) self.assertTrue(("extra = '%s'" % extra) in lines)
def _StopInstanceCommand(self, instance_name, force, fail, cmd): def _StopInstanceCommand(self, instance_name, force, fail, full_cmd):
# Remove the timeout (and its number of seconds) if it's there
if full_cmd[:1][0] == "timeout":
cmd = full_cmd[2:]
else:
cmd = full_cmd
# Test the actual command
if (cmd == [self.CMD, "list"]): if (cmd == [self.CMD, "list"]):
output = "Name ID Mem VCPUs State Time(s)\n" \ output = "Name ID Mem VCPUs State Time(s)\n" \
"Domain-0 0 1023 1 r----- 142691.0\n" \ "Domain-0 0 1023 1 r----- 142691.0\n" \
...@@ -767,7 +774,8 @@ class _TestXenHypervisor(object): ...@@ -767,7 +774,8 @@ class _TestXenHypervisor(object):
if fail: if fail:
try: try:
hv._StopInstance(name, force, None) hv._StopInstance(name, force, None,
constants.DEFAULT_SHUTDOWN_TIMEOUT)
except errors.HypervisorError, err: except errors.HypervisorError, err:
self.assertTrue(str(err).startswith("listing instances failed"), self.assertTrue(str(err).startswith("listing instances failed"),
msg=str(err)) msg=str(err))
...@@ -777,7 +785,8 @@ class _TestXenHypervisor(object): ...@@ -777,7 +785,8 @@ class _TestXenHypervisor(object):
msg=("Configuration was removed when stopping" msg=("Configuration was removed when stopping"
" instance failed")) " instance failed"))
else: else:
hv._StopInstance(name, force, None) hv._StopInstance(name, force, None,
constants.DEFAULT_SHUTDOWN_TIMEOUT)
self.assertFalse(os.path.exists(cfgfile)) self.assertFalse(os.path.exists(cfgfile))
def _MigrateNonRunningInstCmd(self, cmd): def _MigrateNonRunningInstCmd(self, cmd):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment