Commit 4993ee12 authored by Aaron Karper's avatar Aaron Karper

Retry thrice if a qa times out

Since the qa can take a lot of connection, it might happen that one of
them times out for reasons unrelated to ganeti. This patch allows the qa
to retry the command in that case.
Signed-off-by: default avatarAaron Karper <akarper@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parent 75ad2861
......@@ -230,3 +230,23 @@ def SimpleRetry(expected, fn, delay, timeout, args=None, wait_fn=time.sleep,
assert "result" in rdict
result = rdict["result"]
return result
def CountRetry(expected, fn, count, args=None):
"""A wrapper over L{SimpleRetry} implementing a count down.
Where L{Retry} fixes the time, after which the command is assumed to be
failing, this function assumes the total number of tries.
@see: L{Retry}
"""
rdict = {"tries": 0}
get_tries = lambda: rdict["tries"]
def inc_tries(t):
rdict["tries"] += t
return SimpleRetry(expected, fn, 1, count, args=args,
wait_fn=inc_tries, _time_fn=get_tries)
......@@ -72,6 +72,9 @@ _RUN_UUID = utils.NewUUID()
_QA_OUTPUT = pathutils.GetLogFilename("qa-output")
_RETRIES = 3
(INST_DOWN,
INST_UP) = range(500, 502)
......@@ -344,6 +347,13 @@ def _GetCommandStdout(proc):
return out
def _NoTimeout(state):
"""False iff the command timed out."""
rcode, out = state
return rcode == 0 or not ('TimeoutError' in out or 'timed out' in out)
def GetCommandOutput(node, cmd, tty=False, use_multiplexer=True, log_cmd=True,
fail=False):
"""Returns the output of a command executed on the given node.
......@@ -363,11 +373,17 @@ def GetCommandOutput(node, cmd, tty=False, use_multiplexer=True, log_cmd=True,
@param fail: whether the command is expected to fail
"""
assert cmd
p = StartLocalCommand(GetSSHCommand(node, cmd, tty=tty,
use_multiplexer=use_multiplexer),
stdout=subprocess.PIPE, log_cmd=log_cmd)
rcode = p.wait()
out = _GetCommandStdout(p)
def CallCommand():
command = GetSSHCommand(node, cmd, tty=tty,
use_multiplexer=use_multiplexer)
p = StartLocalCommand(command, stdout=subprocess.PIPE, log_cmd=log_cmd)
rcode = p.wait()
out = _GetCommandStdout(p)
return rcode, out
# TODO: make retries configurable
rcode, out = utils.CountRetry(_NoTimeout, CallCommand, _RETRIES)
_AssertRetCode(rcode, fail, cmd, node)
return out
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment