Commit 874f6148 authored by Michele Tartara's avatar Michele Tartara

Enable a timeout for instance shutdown

Add the timeout parameter to the StopInstance function of the hypervisor base
class and to all its implementations.

Also, change the tests as required by this change.
Signed-off-by: default avatarMichele Tartara <mtartara@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parent 2a2d087a
......@@ -1394,7 +1394,7 @@ def InstanceShutdown(instance, timeout, reason, store_reason=True):
return
try:
hyper.StopInstance(instance, retry=self.tried_once)
hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
if store_reason:
_StoreInstReasonTrail(instance.name, reason)
except errors.HypervisorError, err:
......
......@@ -173,7 +173,8 @@ class BaseHypervisor(object):
"""Start an instance."""
raise NotImplementedError
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance
@type instance: L{objects.Instance}
......@@ -186,6 +187,10 @@ class BaseHypervisor(object):
@param name: if this parameter is passed, the the instance object
should not be used (will be passed as None), and the shutdown
must be done by name only
@type timeout: int or None
@param timeout: if the parameter is not None, a soft shutdown operation will
be killed after the specified number of seconds. A hard (forced)
shutdown cannot have a timeout
"""
raise NotImplementedError
......
......@@ -167,7 +167,8 @@ class ChrootManager(hv_base.BaseHypervisor):
raise HypervisorError("Can't run the chroot start script: %s" %
result.output)
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
This method has complicated cleanup tests, as we must:
......@@ -176,6 +177,8 @@ class ChrootManager(hv_base.BaseHypervisor):
- finally unmount the instance dir
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
......@@ -183,9 +186,14 @@ class ChrootManager(hv_base.BaseHypervisor):
if not os.path.exists(root_dir) or not self._IsDirLive(root_dir):
return
timeout_cmd = []
if timeout is not None:
timeout_cmd.extend(["timeout", str(timeout)])
# Run the chroot stop script only once
if not retry and not force:
result = utils.RunCmd(["chroot", root_dir, "/ganeti-chroot", "stop"])
result = utils.RunCmd(timeout_cmd.extend(["chroot", root_dir,
"/ganeti-chroot", "stop"]))
if result.failed:
raise HypervisorError("Can't run the chroot stop script: %s" %
result.output)
......
......@@ -165,13 +165,16 @@ class FakeHypervisor(hv_base.BaseHypervisor):
raise errors.HypervisorError("Failed to start instance %s: %s" %
(instance.name, err))
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
For the fake hypervisor, this just removes the file in the base
dir, if it exist, otherwise we raise an exception.
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
if not self._IsAlive(name):
......
......@@ -1696,10 +1696,12 @@ class KVMHypervisor(hv_base.BaseHypervisor):
# 500ms and likely more: socat can't detect the end of the reply and waits
# for 500ms of no data received before exiting (500 ms is the default for
# the "-t" parameter).
socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" %
socat = ("echo %s | %s %s STDIO UNIX-CONNECT:%s" %
(utils.ShellQuote(command),
timeout_cmd,
constants.SOCAT_PATH,
utils.ShellQuote(self._InstanceMonitor(instance_name))))
result = utils.RunCmd(socat)
if result.failed:
msg = ("Failed to send command '%s' to instance '%s', reason '%s',"
......@@ -1776,10 +1778,13 @@ class KVMHypervisor(hv_base.BaseHypervisor):
else:
return "pc"
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
"""
assert(timeout is None or force is not None)
if name is not None and not force:
raise errors.HypervisorError("Cannot shutdown cleanly by name only")
if name is None:
......@@ -1792,7 +1797,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
if force or not acpi:
utils.KillProcess(pid)
else:
self._CallMonitorCommand(name, "system_powerdown")
self._CallMonitorCommand(name, "system_powerdown", timeout)
def CleanupInstance(self, instance_name):
"""Cleanup after a stopped instance
......
......@@ -325,7 +325,8 @@ class LXCHypervisor(hv_base.BaseHypervisor):
raise HypervisorError("Running the lxc-start script failed: %s" %
result.output)
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
This method has complicated cleanup tests, as we must:
......@@ -334,9 +335,15 @@ class LXCHypervisor(hv_base.BaseHypervisor):
- finally unmount the instance dir
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
timeout_cmd = []
if timeout is not None:
timeout_cmd.extend(["timeout", str(timeout)])
root_dir = self._InstanceDir(name)
if not os.path.exists(root_dir):
return
......@@ -349,7 +356,7 @@ class LXCHypervisor(hv_base.BaseHypervisor):
raise HypervisorError("Running 'poweroff' on the instance"
" failed: %s" % result.output)
time.sleep(2)
result = utils.RunCmd(["lxc-stop", "-n", name])
result = utils.RunCmd(timeout_cmd.extend(["lxc-stop", "-n", name]))
if result.failed:
logging.warning("Error while doing lxc-stop for %s: %s", name,
result.output)
......@@ -358,12 +365,12 @@ class LXCHypervisor(hv_base.BaseHypervisor):
return
for mpath in self._GetMountSubdirs(root_dir):
result = utils.RunCmd(["umount", mpath])
result = utils.RunCmd(timeout_cmd.extend(["umount", mpath]))
if result.failed:
logging.warning("Error while umounting subpath %s for instance %s: %s",
mpath, name, result.output)
result = utils.RunCmd(["umount", root_dir])
result = utils.RunCmd(timeout_cmd.extend(["umount", root_dir]))
if result.failed and force:
msg = ("Processes still alive in the chroot: %s" %
utils.RunCmd("fuser -vm %s" % root_dir).output)
......
......@@ -519,16 +519,21 @@ class XenHypervisor(hv_base.BaseHypervisor):
(instance.name, result.fail_reason,
result.output, stashed_config))
def StopInstance(self, instance, force=False, retry=False, name=None):
def StopInstance(self, instance, force=False, retry=False, name=None,
timeout=None):
"""Stop an instance.
A soft shutdown can be interrupted. A hard shutdown tries forever.
"""
assert(timeout is None or force is not None)
if name is None:
name = instance.name
return self._StopInstance(name, force)
return self._StopInstance(name, force, timeout)
def _ShutdownInstance(self, name):
def _ShutdownInstance(self, name, timeout):
"""Shutdown an instance if the instance is running.
The '-w' flag waits for shutdown to complete which avoids the need
......@@ -537,6 +542,9 @@ class XenHypervisor(hv_base.BaseHypervisor):
@type name: string
@param name: name of the instance to stop
@type timeout: int or None
@param timeout: a timeout after which the shutdown command should be killed,
or None for no timeout
"""
instance_info = self.GetInstanceInfo(name)
......@@ -545,7 +553,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
logging.info("Failed to shutdown instance %s, not running", name)
return None
return self._RunXen(["shutdown", "-w", name])
return self._RunXen(["shutdown", "-w", name], timeout)
def _DestroyInstance(self, name):
"""Destroy an instance if the instance if the instance exists.
......@@ -562,7 +570,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
return self._RunXen(["destroy", name])
def _StopInstance(self, name, force):
def _StopInstance(self, name, force, timeout):
"""Stop an instance.
@type name: string
......@@ -571,11 +579,15 @@ class XenHypervisor(hv_base.BaseHypervisor):
@type force: boolean
@param force: whether to do a "hard" stop (destroy)
@type timeout: int or None
@param timeout: a timeout after which the shutdown command should be killed,
or None for no timeout
"""
if force:
result = self._DestroyInstance(name)
else:
self._ShutdownInstance(name)
self._ShutdownInstance(name, timeout)
result = self._DestroyInstance(name)
if result is not None and result.failed and \
......
......@@ -555,7 +555,14 @@ class _TestXenHypervisor(object):
extra = inst.hvparams[constants.HV_KERNEL_ARGS]
self.assertTrue(("extra = '%s'" % extra) in lines)
def _StopInstanceCommand(self, instance_name, force, fail, cmd):
def _StopInstanceCommand(self, instance_name, force, fail, full_cmd):
# Remove the timeout (and its number of seconds) if it's there
if full_cmd[:1][0] == "timeout":
cmd = full_cmd[2:]
else:
cmd = full_cmd
# Test the actual command
if (cmd == [self.CMD, "list"]):
output = "Name ID Mem VCPUs State Time(s)\n" \
"Domain-0 0 1023 1 r----- 142691.0\n" \
......@@ -592,7 +599,7 @@ class _TestXenHypervisor(object):
if fail:
try:
hv._StopInstance(name, force)
hv._StopInstance(name, force, constants.DEFAULT_SHUTDOWN_TIMEOUT)
except errors.HypervisorError, err:
self.assertTrue(str(err).startswith("xm list failed"),
msg=str(err))
......@@ -602,7 +609,7 @@ class _TestXenHypervisor(object):
msg=("Configuration was removed when stopping"
" instance failed"))
else:
hv._StopInstance(name, force)
hv._StopInstance(name, force, constants.DEFAULT_SHUTDOWN_TIMEOUT)
self.assertFalse(os.path.exists(cfgfile))
def _MigrateNonRunningInstCmd(self, cmd):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment