Skip to content
Snippets Groups Projects
Commit 28a6fbc8 authored by Michael Hanselmann's avatar Michael Hanselmann
Browse files

Run ganeti-watcher script from QA code instead of cron.

This makes the tests much more reliably because it avoids race conditions.
It also helps to speed them up a lot.

Reviewed-by: iustinp
parent 7b195d9b
No related branches found
No related tags found
No related merge requests found
...@@ -55,18 +55,14 @@ tests: ...@@ -55,18 +55,14 @@ tests:
instance-import: True instance-import: True
instance-reinstall: True instance-reinstall: True
instance-shutdown: True instance-shutdown: True
instance-automatic-restart: False
instance-consecutive-failures: False
# Make sure not to include the disk(s) required for Dom0 to be included in # Make sure not to include the disk(s) required for Dom0 to be included in
# the volume group used for instances. Otherwise the whole system may stop # the volume group used for instances. Otherwise the whole system may stop
# working until restarted. # working until restarted.
instance-disk-failure: False instance-disk-failure: False
# This test takes up to 6 minutes to complete
instance-automatic-restart: False
# This test takes at least 35 minutes to complete
instance-consecutive-failures: False
# Other settings # Other settings
options: options:
burnin-instances: 2 burnin-instances: 2
......
...@@ -67,47 +67,55 @@ def _XmShutdownInstance(node, name): ...@@ -67,47 +67,55 @@ def _XmShutdownInstance(node, name):
raise qa_error.Error("xm shutdown failed") raise qa_error.Error("xm shutdown failed")
def _ResetWatcherDaemon(node): def _ResetWatcherDaemon():
"""Removes the watcher daemon's state file. """Removes the watcher daemon's state file.
Args: Args:
node: Node to be reset node: Node to be reset
""" """
master = qa_config.GetMasterNode()
cmd = ['rm', '-f', constants.WATCHER_STATEFILE] cmd = ['rm', '-f', constants.WATCHER_STATEFILE]
AssertEqual(StartSSH(node['primary'], AssertEqual(StartSSH(master['primary'],
utils.ShellQuoteArgs(cmd)).wait(), 0)
def _RunWatcherDaemon():
"""Runs the ganeti-watcher daemon on the master node.
"""
master = qa_config.GetMasterNode()
cmd = ['ganeti-watcher', '-d']
AssertEqual(StartSSH(master['primary'],
utils.ShellQuoteArgs(cmd)).wait(), 0) utils.ShellQuoteArgs(cmd)).wait(), 0)
def PrintCronWarning(): def PrintCronWarning():
"""Shows a warning about the required cron job. """Shows a warning about the cron job.
""" """
msg = ("For the following tests it's recommended to turn off the "
"ganeti-watcher cronjob.")
print print
print qa_utils.FormatWarning("The following tests require the cron script " print qa_utils.FormatWarning(msg)
"for ganeti-watcher to be set up.")
def TestInstanceAutomaticRestart(node, instance): def TestInstanceAutomaticRestart(node, instance):
"""Test automatic restart of instance by ganeti-watcher. """Test automatic restart of instance by ganeti-watcher.
Note: takes up to 6 minutes to complete.
""" """
master = qa_config.GetMasterNode() master = qa_config.GetMasterNode()
inst_name = qa_utils.ResolveInstanceName(instance) inst_name = qa_utils.ResolveInstanceName(instance)
_ResetWatcherDaemon(node) _ResetWatcherDaemon()
_XmShutdownInstance(node, inst_name) _XmShutdownInstance(node, inst_name)
# Give it a bit more than five minutes to start again _RunWatcherDaemon()
restart_at = time.time() + 330 time.sleep(5)
# Wait until it's running again if not _InstanceRunning(node, inst_name):
while time.time() <= restart_at: raise qa_error.Error("Daemon didn't restart instance")
if _InstanceRunning(node, inst_name):
break
time.sleep(15)
else:
raise qa_error.Error("Daemon didn't restart instance in time")
cmd = ['gnt-instance', 'info', inst_name] cmd = ['gnt-instance', 'info', inst_name]
AssertEqual(StartSSH(master['primary'], AssertEqual(StartSSH(master['primary'],
...@@ -117,28 +125,23 @@ def TestInstanceAutomaticRestart(node, instance): ...@@ -117,28 +125,23 @@ def TestInstanceAutomaticRestart(node, instance):
def TestInstanceConsecutiveFailures(node, instance): def TestInstanceConsecutiveFailures(node, instance):
"""Test five consecutive instance failures. """Test five consecutive instance failures.
Note: takes at least 35 minutes to complete.
""" """
master = qa_config.GetMasterNode() master = qa_config.GetMasterNode()
inst_name = qa_utils.ResolveInstanceName(instance) inst_name = qa_utils.ResolveInstanceName(instance)
_ResetWatcherDaemon(node) _ResetWatcherDaemon()
_XmShutdownInstance(node, inst_name)
# Do shutdowns for 30 minutes
finished_at = time.time() + (35 * 60)
while time.time() <= finished_at: for should_start in ([True] * 5) + [False]:
if _InstanceRunning(node, inst_name): _XmShutdownInstance(node, inst_name)
_XmShutdownInstance(node, inst_name) _RunWatcherDaemon()
time.sleep(30) time.sleep(5)
# Check for some time whether the instance doesn't start again if bool(_InstanceRunning(node, inst_name)) != should_start:
check_until = time.time() + 330 if should_start:
while time.time() <= check_until: msg = "Instance not started when it should"
if _InstanceRunning(node, inst_name): else:
raise qa_error.Error("Instance started when it shouldn't") msg = "Instance started when it shouldn't"
time.sleep(30) raise qa_error.Error(msg)
cmd = ['gnt-instance', 'info', inst_name] cmd = ['gnt-instance', 'info', inst_name]
AssertEqual(StartSSH(master['primary'], AssertEqual(StartSSH(master['primary'],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment