Commit 85530402 authored by Michele Tartara's avatar Michele Tartara

Merge branch 'origin/stable-2.8' into stable-2.9

* stable-2.8
  Improve RAPI detection of the watcher
  Add patching QA configuration files on buildbots
  Enable a timeout for instance shutdown
  Allow KVM commands to have a timeout
  Allow xen commands to have a timeout
  Fix wrong docstring

Conflicts:
	lib/hypervisor/hv_xen.py
	test/py/ganeti.hypervisor.hv_xen_unittest.py

Resolution:
  The conflicts were due to a new mandatory parameter hvparams being added
  (in 2.9) to the same functions that received the optional timeout parameter
  (in 2.8). They were resolved by keeping the hvparams in its place, and
  enqueuing the timeout parameter after it.
Signed-off-by: default avatarMichele Tartara <mtartara@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parents fa0192b2 6177890b
......@@ -1706,7 +1706,7 @@ def InstanceShutdown(instance, timeout, reason, store_reason=True):
return
try:
hyper.StopInstance(instance, retry=self.tried_once)
hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
if store_reason:
_StoreInstReasonTrail(instance.name, reason)
except errors.HypervisorError, err:
......
......@@ -173,7 +173,8 @@ class BaseHypervisor(object):
"""Start an instance."""
raise NotImplementedError
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance
@type instance: L{objects.Instance}
......@@ -186,6 +187,10 @@ class BaseHypervisor(object):
@param name: if this parameter is passed, the the instance object
should not be used (will be passed as None), and the shutdown
must be done by name only
@type timeout: int or None
@param timeout: if the parameter is not None, a soft shutdown operation will
be killed after the specified number of seconds. A hard (forced)
shutdown cannot have a timeout
"""
raise NotImplementedError
......
......@@ -171,7 +171,8 @@ class ChrootManager(hv_base.BaseHypervisor):
raise HypervisorError("Can't run the chroot start script: %s" %
result.output)
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
This method has complicated cleanup tests, as we must:
......@@ -180,6 +181,8 @@ class ChrootManager(hv_base.BaseHypervisor):
- finally unmount the instance dir
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
......@@ -187,9 +190,14 @@ class ChrootManager(hv_base.BaseHypervisor):
if not os.path.exists(root_dir) or not self._IsDirLive(root_dir):
return
timeout_cmd = []
if timeout is not None:
timeout_cmd.extend(["timeout", str(timeout)])
# Run the chroot stop script only once
if not retry and not force:
result = utils.RunCmd(["chroot", root_dir, "/ganeti-chroot", "stop"])
result = utils.RunCmd(timeout_cmd.extend(["chroot", root_dir,
"/ganeti-chroot", "stop"]))
if result.failed:
raise HypervisorError("Can't run the chroot stop script: %s" %
result.output)
......
......@@ -174,13 +174,16 @@ class FakeHypervisor(hv_base.BaseHypervisor):
raise errors.HypervisorError("Failed to start instance %s: %s" %
(instance.name, err))
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
For the fake hypervisor, this just removes the file in the base
dir, if it exist, otherwise we raise an exception.
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
if not self._IsAlive(name):
......
......@@ -1703,10 +1703,15 @@ class KVMHypervisor(hv_base.BaseHypervisor):
self._SaveKVMRuntime(instance, kvm_runtime)
self._ExecuteKVMRuntime(instance, kvm_runtime, kvmhelp)
def _CallMonitorCommand(self, instance_name, command):
def _CallMonitorCommand(self, instance_name, command, timeout=None):
"""Invoke a command on the instance monitor.
"""
if timeout is not None:
timeout_cmd = "timeout %s" % (timeout, )
else:
timeout_cmd = ""
# TODO: Replace monitor calls with QMP once KVM >= 0.14 is the minimum
# version. The monitor protocol is designed for human consumption, whereas
# QMP is made for programmatic usage. In the worst case QMP can also
......@@ -1714,10 +1719,12 @@ class KVMHypervisor(hv_base.BaseHypervisor):
# 500ms and likely more: socat can't detect the end of the reply and waits
# for 500ms of no data received before exiting (500 ms is the default for
# the "-t" parameter).
socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" %
socat = ("echo %s | %s %s STDIO UNIX-CONNECT:%s" %
(utils.ShellQuote(command),
timeout_cmd,
constants.SOCAT_PATH,
utils.ShellQuote(self._InstanceMonitor(instance_name))))
result = utils.RunCmd(socat)
if result.failed:
msg = ("Failed to send command '%s' to instance '%s', reason '%s',"
......@@ -1794,10 +1801,13 @@ class KVMHypervisor(hv_base.BaseHypervisor):
else:
return "pc"
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
"""
assert(timeout is None or force is not None)
if name is not None and not force:
raise errors.HypervisorError("Cannot shutdown cleanly by name only")
if name is None:
......@@ -1810,7 +1820,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
if force or not acpi:
utils.KillProcess(pid)
else:
self._CallMonitorCommand(name, "system_powerdown")
self._CallMonitorCommand(name, "system_powerdown", timeout)
def CleanupInstance(self, instance_name):
"""Cleanup after a stopped instance
......
......@@ -329,7 +329,8 @@ class LXCHypervisor(hv_base.BaseHypervisor):
raise HypervisorError("Running the lxc-start script failed: %s" %
result.output)
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
This method has complicated cleanup tests, as we must:
......@@ -338,9 +339,15 @@ class LXCHypervisor(hv_base.BaseHypervisor):
- finally unmount the instance dir
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
timeout_cmd = []
if timeout is not None:
timeout_cmd.extend(["timeout", str(timeout)])
root_dir = self._InstanceDir(name)
if not os.path.exists(root_dir):
return
......@@ -353,7 +360,7 @@ class LXCHypervisor(hv_base.BaseHypervisor):
raise HypervisorError("Running 'poweroff' on the instance"
" failed: %s" % result.output)
time.sleep(2)
result = utils.RunCmd(["lxc-stop", "-n", name])
result = utils.RunCmd(timeout_cmd.extend(["lxc-stop", "-n", name]))
if result.failed:
logging.warning("Error while doing lxc-stop for %s: %s", name,
result.output)
......@@ -362,12 +369,12 @@ class LXCHypervisor(hv_base.BaseHypervisor):
return
for mpath in self._GetMountSubdirs(root_dir):
result = utils.RunCmd(["umount", mpath])
result = utils.RunCmd(timeout_cmd.extend(["umount", mpath]))
if result.failed:
logging.warning("Error while umounting subpath %s for instance %s: %s",
mpath, name, result.output)
result = utils.RunCmd(["umount", root_dir])
result = utils.RunCmd(timeout_cmd.extend(["umount", root_dir]))
if result.failed and force:
msg = ("Processes still alive in the chroot: %s" %
utils.RunCmd("fuser -vm %s" % root_dir).output)
......
......@@ -383,15 +383,23 @@ class XenHypervisor(hv_base.BaseHypervisor):
return cmd
def _RunXen(self, args, hvparams):
def _RunXen(self, args, hvparams, timeout=None):
"""Wrapper around L{utils.process.RunCmd} to run Xen command.
@type hvparams: dict of strings
@param hvparams: dictionary of hypervisor params
@type timeout: int or None
@param timeout: if a timeout (in seconds) is specified, the command will be
terminated after that number of seconds.
@see: L{utils.process.RunCmd}
"""
cmd = [self._GetCommand(hvparams)]
cmd = []
if timeout is not None:
cmd.extend(["timeout", str(timeout)])
cmd.extend([self._GetCommand(hvparams)])
cmd.extend(args)
return self._run_cmd_fn(cmd)
......@@ -604,26 +612,34 @@ class XenHypervisor(hv_base.BaseHypervisor):
(instance.name, result.fail_reason,
result.output, stashed_config))
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
A soft shutdown can be interrupted. A hard shutdown tries forever.
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
return self._StopInstance(name, force, instance.hvparams)
return self._StopInstance(name, force, instance.hvparams, timeout)
def _ShutdownInstance(self, name, hvparams):
"""Shutdown an instance if the instance is running.
The '-w' flag waits for shutdown to complete which avoids the need
to poll in the case where we want to destroy the domain
immediately after shutdown.
@type name: string
@param name: name of the instance to stop
@type hvparams: dict of string
@param hvparams: hypervisor parameters of the instance
The '-w' flag waits for shutdown to complete which avoids the need
to poll in the case where we want to destroy the domain
immediately after shutdown.
@type timeout: int or None
@param timeout: a timeout after which the shutdown command should be killed,
or None for no timeout
"""
instance_info = self.GetInstanceInfo(name, hvparams=hvparams)
......@@ -632,7 +648,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
logging.info("Failed to shutdown instance %s, not running", name)
return None
return self._RunXen(["shutdown", "-w", name], hvparams)
return self._RunXen(["shutdown", "-w", name], hvparams, timeout)
def _DestroyInstance(self, name, hvparams):
"""Destroy an instance if the instance if the instance exists.
......@@ -651,7 +667,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
return self._RunXen(["destroy", name], hvparams)
def _StopInstance(self, name, force, hvparams):
def _StopInstance(self, name, force, hvparams, timeout):
"""Stop an instance.
@type name: string
......@@ -663,11 +679,15 @@ class XenHypervisor(hv_base.BaseHypervisor):
@type hvparams: dict of string
@param hvparams: hypervisor parameters of the instance
@type timeout: int or None
@param timeout: a timeout after which the shutdown command should be killed,
or None for no timeout
"""
if force:
result = self._DestroyInstance(name, hvparams)
else:
self._ShutdownInstance(name, hvparams)
self._ShutdownInstance(name, hvparams, timeout)
result = self._DestroyInstance(name, hvparams)
if result is not None and result.failed and \
......
......@@ -329,6 +329,10 @@ def IsRapiResponding(hostname):
Connects to RAPI port of hostname and does a simple test. At this time, the
test is GetVersion.
If RAPI responds with error code "401 Unauthorized", the test is successful,
because the aim of this function is to assess whether RAPI is responding, not
if it is accessible.
@type hostname: string
@param hostname: hostname of the node to connect to.
@rtype: bool
......@@ -344,8 +348,12 @@ def IsRapiResponding(hostname):
logging.warning("RAPI certificate error: %s", err)
return False
except rapi.client.GanetiApiError, err:
logging.warning("RAPI error: %s", err)
return False
if err.code == 401:
# Unauthorized, but RAPI is alive and responding
return True
else:
logging.warning("RAPI error: %s", err)
return False
else:
logging.debug("Reported RAPI version %s", master_version)
return master_version == constants.RAPI_VERSION
......
......@@ -40,6 +40,10 @@ _VCLUSTER_MASTER_KEY = "vcluster-master"
_VCLUSTER_BASEDIR_KEY = "vcluster-basedir"
_ENABLED_DISK_TEMPLATES_KEY = "enabled-disk-templates"
# The path of an optional JSON Patch file (as per RFC6902) that modifies QA's
# configuration.
_PATCH_JSON = os.path.join(os.path.dirname(__file__), "qa-patch.json")
#: QA configuration (L{_QaConfig})
_config = None
......@@ -261,6 +265,20 @@ class _QaConfig(object):
"""
data = serializer.LoadJson(utils.ReadFile(filename))
# Patch the document using JSON Patch (RFC6902) in file _PATCH_JSON, if
# available
try:
patch = serializer.LoadJson(utils.ReadFile(_PATCH_JSON))
if patch:
mod = __import__("jsonpatch", fromlist=[])
data = mod.apply_patch(data, patch)
except IOError:
pass
except ImportError:
raise qa_error.Error("If you want to use the QA JSON patching feature,"
" you need to install Python modules"
" 'jsonpatch' and 'jsonpointer'.")
result = cls(dict(map(_ConvertResources,
data.items()))) # pylint: disable=E1103
result.Validate()
......
......@@ -730,7 +730,14 @@ class _TestXenHypervisor(object):
extra = inst.hvparams[constants.HV_KERNEL_ARGS]
self.assertTrue(("extra = '%s'" % extra) in lines)
def _StopInstanceCommand(self, instance_name, force, fail, cmd):
def _StopInstanceCommand(self, instance_name, force, fail, full_cmd):
# Remove the timeout (and its number of seconds) if it's there
if full_cmd[:1][0] == "timeout":
cmd = full_cmd[2:]
else:
cmd = full_cmd
# Test the actual command
if (cmd == [self.CMD, "list"]):
output = "Name ID Mem VCPUs State Time(s)\n" \
"Domain-0 0 1023 1 r----- 142691.0\n" \
......@@ -767,7 +774,8 @@ class _TestXenHypervisor(object):
if fail:
try:
hv._StopInstance(name, force, None)
hv._StopInstance(name, force, None,
constants.DEFAULT_SHUTDOWN_TIMEOUT)
except errors.HypervisorError, err:
self.assertTrue(str(err).startswith("listing instances failed"),
msg=str(err))
......@@ -777,7 +785,8 @@ class _TestXenHypervisor(object):
msg=("Configuration was removed when stopping"
" instance failed"))
else:
hv._StopInstance(name, force, None)
hv._StopInstance(name, force, None,
constants.DEFAULT_SHUTDOWN_TIMEOUT)
self.assertFalse(os.path.exists(cfgfile))
def _MigrateNonRunningInstCmd(self, cmd):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment