From 28a6fbc8470deae498b7b17435a09c1bae563a22 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann <hansmi@google.com> Date: Tue, 13 Nov 2007 19:31:54 +0000 Subject: [PATCH] Run ganeti-watcher script from QA code instead of cron. This makes the tests much more reliably because it avoids race conditions. It also helps to speed them up a lot. Reviewed-by: iustinp --- qa/qa-sample.yaml | 8 ++---- qa/qa_daemon.py | 67 +++++++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/qa/qa-sample.yaml b/qa/qa-sample.yaml index 75bbd3840..0285d18ba 100644 --- a/qa/qa-sample.yaml +++ b/qa/qa-sample.yaml @@ -55,18 +55,14 @@ tests: instance-import: True instance-reinstall: True instance-shutdown: True + instance-automatic-restart: False + instance-consecutive-failures: False # Make sure not to include the disk(s) required for Dom0 to be included in # the volume group used for instances. Otherwise the whole system may stop # working until restarted. instance-disk-failure: False - # This test takes up to 6 minutes to complete - instance-automatic-restart: False - - # This test takes at least 35 minutes to complete - instance-consecutive-failures: False - # Other settings options: burnin-instances: 2 diff --git a/qa/qa_daemon.py b/qa/qa_daemon.py index 413e5d798..6d0d70676 100644 --- a/qa/qa_daemon.py +++ b/qa/qa_daemon.py @@ -67,47 +67,55 @@ def _XmShutdownInstance(node, name): raise qa_error.Error("xm shutdown failed") -def _ResetWatcherDaemon(node): +def _ResetWatcherDaemon(): """Removes the watcher daemon's state file. Args: node: Node to be reset """ + master = qa_config.GetMasterNode() + cmd = ['rm', '-f', constants.WATCHER_STATEFILE] - AssertEqual(StartSSH(node['primary'], + AssertEqual(StartSSH(master['primary'], + utils.ShellQuoteArgs(cmd)).wait(), 0) + + +def _RunWatcherDaemon(): + """Runs the ganeti-watcher daemon on the master node. + + """ + master = qa_config.GetMasterNode() + + cmd = ['ganeti-watcher', '-d'] + AssertEqual(StartSSH(master['primary'], utils.ShellQuoteArgs(cmd)).wait(), 0) def PrintCronWarning(): - """Shows a warning about the required cron job. + """Shows a warning about the cron job. """ + msg = ("For the following tests it's recommended to turn off the " + "ganeti-watcher cronjob.") print - print qa_utils.FormatWarning("The following tests require the cron script " - "for ganeti-watcher to be set up.") + print qa_utils.FormatWarning(msg) def TestInstanceAutomaticRestart(node, instance): """Test automatic restart of instance by ganeti-watcher. - Note: takes up to 6 minutes to complete. """ master = qa_config.GetMasterNode() inst_name = qa_utils.ResolveInstanceName(instance) - _ResetWatcherDaemon(node) + _ResetWatcherDaemon() _XmShutdownInstance(node, inst_name) - # Give it a bit more than five minutes to start again - restart_at = time.time() + 330 + _RunWatcherDaemon() + time.sleep(5) - # Wait until it's running again - while time.time() <= restart_at: - if _InstanceRunning(node, inst_name): - break - time.sleep(15) - else: - raise qa_error.Error("Daemon didn't restart instance in time") + if not _InstanceRunning(node, inst_name): + raise qa_error.Error("Daemon didn't restart instance") cmd = ['gnt-instance', 'info', inst_name] AssertEqual(StartSSH(master['primary'], @@ -117,28 +125,23 @@ def TestInstanceAutomaticRestart(node, instance): def TestInstanceConsecutiveFailures(node, instance): """Test five consecutive instance failures. - Note: takes at least 35 minutes to complete. """ master = qa_config.GetMasterNode() inst_name = qa_utils.ResolveInstanceName(instance) - _ResetWatcherDaemon(node) - _XmShutdownInstance(node, inst_name) - - # Do shutdowns for 30 minutes - finished_at = time.time() + (35 * 60) + _ResetWatcherDaemon() - while time.time() <= finished_at: - if _InstanceRunning(node, inst_name): - _XmShutdownInstance(node, inst_name) - time.sleep(30) + for should_start in ([True] * 5) + [False]: + _XmShutdownInstance(node, inst_name) + _RunWatcherDaemon() + time.sleep(5) - # Check for some time whether the instance doesn't start again - check_until = time.time() + 330 - while time.time() <= check_until: - if _InstanceRunning(node, inst_name): - raise qa_error.Error("Instance started when it shouldn't") - time.sleep(30) + if bool(_InstanceRunning(node, inst_name)) != should_start: + if should_start: + msg = "Instance not started when it should" + else: + msg = "Instance started when it shouldn't" + raise qa_error.Error(msg) cmd = ['gnt-instance', 'info', inst_name] AssertEqual(StartSSH(master['primary'], -- GitLab