From b1ffe1eb18d93f0145c36afeec21cb9715311505 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann <hansmi@google.com> Date: Thu, 1 Nov 2007 13:54:42 +0000 Subject: [PATCH] Cleanup QA scripts. - Split main() function into several small ones. - Current work on disk failure tests. This is not yet finished. - Fix small typo in qa_node.py. Reviewed-by: schreiberal --- qa/ganeti-qa.py | 268 ++++++++++++++++++++++++++-------------------- qa/qa_instance.py | 41 ++++--- qa/qa_node.py | 12 +-- qa/qa_other.py | 2 +- 4 files changed, 185 insertions(+), 138 deletions(-) diff --git a/qa/ganeti-qa.py b/qa/ganeti-qa.py index 3c85242a2..306eccb42 100755 --- a/qa/ganeti-qa.py +++ b/qa/ganeti-qa.py @@ -60,44 +60,32 @@ def RunTest(fn, *args): return fn(*args) -def main(): - """Main program. +def RunEnvTests(): + """Run several environment tests. """ - parser = OptionParser(usage="%prog [options] <config-file> " - "<known-hosts-file>") - parser.add_option('--dry-run', dest='dry_run', - action="store_true", - help="Show what would be done") - parser.add_option('--yes-do-it', dest='yes_do_it', - action="store_true", - help="Really execute the tests") - (qa_config.options, args) = parser.parse_args() + if not qa_config.TestEnabled('env'): + return - if len(args) == 2: - (config_file, known_hosts_file) = args - else: - parser.error("Not enough arguments.") - - if not qa_config.options.yes_do_it: - print ("Executing this script irreversibly destroys any Ganeti\n" - "configuration on all nodes involved. If you really want\n" - "to start testing, supply the --yes-do-it option.") - sys.exit(1) - - qa_config.Load(config_file) + RunTest(qa_env.TestSshConnection) + RunTest(qa_env.TestIcmpPing) + RunTest(qa_env.TestGanetiCommands) - RunTest(qa_other.TestUploadKnownHostsFile, known_hosts_file) - if qa_config.TestEnabled('env'): - RunTest(qa_env.TestSshConnection) - RunTest(qa_env.TestIcmpPing) - RunTest(qa_env.TestGanetiCommands) +def SetupCluster(): + """Initializes the cluster. + """ RunTest(qa_cluster.TestClusterInit) - RunTest(qa_node.TestNodeAddAll) + if qa_config.TestEnabled('node-info'): + RunTest(qa_node.TestNodeInfo) + + +def RunClusterTests(): + """Runs tests related to gnt-cluster. + """ if qa_config.TestEnabled('cluster-verify'): RunTest(qa_cluster.TestClusterVerify) @@ -113,134 +101,178 @@ def main(): if qa_config.TestEnabled('cluster-copyfile'): RunTest(qa_cluster.TestClusterCopyfile) - if qa_config.TestEnabled('node-info'): - RunTest(qa_node.TestNodeInfo) - if qa_config.TestEnabled('cluster-burnin'): RunTest(qa_cluster.TestClusterBurnin) if qa_config.TestEnabled('cluster-master-failover'): RunTest(qa_cluster.TestClusterMasterFailover) - if qa_config.TestEnabled('os'): - RunTest(qa_os.TestOsList) - RunTest(qa_os.TestOsDiagnose) - RunTest(qa_os.TestOsValid) - RunTest(qa_os.TestOsInvalid) - RunTest(qa_os.TestOsPartiallyValid) - node = qa_config.AcquireNode() - try: - if qa_config.TestEnabled('instance-add-plain-disk'): - instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, node) +def RunOsTests(): + """Runs all tests related to gnt-os. - if qa_config.TestEnabled('instance-shutdown'): - RunTest(qa_instance.TestInstanceShutdown, instance) - RunTest(qa_instance.TestInstanceStartup, instance) + """ + if not qa_config.TestEnabled('os'): + return + + RunTest(qa_os.TestOsList) + RunTest(qa_os.TestOsDiagnose) + RunTest(qa_os.TestOsValid) + RunTest(qa_os.TestOsInvalid) + RunTest(qa_os.TestOsPartiallyValid) + + +def RunCommonInstanceTests(instance): + """Runs a few tests that are common to all disk types. + + """ + if qa_config.TestEnabled('instance-shutdown'): + RunTest(qa_instance.TestInstanceShutdown, instance) + RunTest(qa_instance.TestInstanceStartup, instance) - if qa_config.TestEnabled('instance-list'): - RunTest(qa_instance.TestInstanceList) + if qa_config.TestEnabled('instance-list'): + RunTest(qa_instance.TestInstanceList) - if qa_config.TestEnabled('instance-info'): - RunTest(qa_instance.TestInstanceInfo, instance) + if qa_config.TestEnabled('instance-info'): + RunTest(qa_instance.TestInstanceInfo, instance) - automatic_restart = \ - qa_config.TestEnabled('instance-automatic-restart') - consecutive_failures = \ - qa_config.TestEnabled('instance-consecutive-failures') + if qa_config.TestEnabled('instance-reinstall'): + RunTest(qa_instance.TestInstanceShutdown, instance) + RunTest(qa_instance.TestInstanceReinstall, instance) + RunTest(qa_instance.TestInstanceStartup, instance) - if automatic_restart or consecutive_failures: - qa_daemon.PrintCronWarning() + if qa_config.TestEnabled('node-volumes'): + RunTest(qa_node.TestNodeVolumes) - if automatic_restart: - RunTest(qa_daemon.TestInstanceAutomaticRestart, node, instance) - if consecutive_failures: - RunTest(qa_daemon.TestInstanceConsecutiveFailures, node, instance) +def RunExportImportTests(instance, pnode): + """Tries to export and import the instance. - if qa_config.TestEnabled('instance-export'): - expnode = qa_config.AcquireNode(exclude=node) + """ + if qa_config.TestEnabled('instance-export'): + expnode = qa_config.AcquireNode(exclude=pnode) + try: + name = RunTest(qa_instance.TestInstanceExport, instance, expnode) + + RunTest(qa_instance.TestBackupList, expnode) + + if qa_config.TestEnabled('instance-import'): + newinst = qa_config.AcquireInstance() try: - name = RunTest(qa_instance.TestInstanceExport, instance, expnode) - - RunTest(qa_instance.TestBackupList, expnode) - - if qa_config.TestEnabled('instance-import'): - newinst = qa_config.AcquireInstance() - try: - RunTest(qa_instance.TestInstanceImport, node, newinst, - expnode, name) - RunTest(qa_instance.TestInstanceRemove, newinst) - finally: - qa_config.ReleaseInstance(newinst) + RunTest(qa_instance.TestInstanceImport, pnode, newinst, + expnode, name) + RunTest(qa_instance.TestInstanceRemove, newinst) finally: - qa_config.ReleaseNode(expnode) + qa_config.ReleaseInstance(newinst) + finally: + qa_config.ReleaseNode(expnode) - if qa_config.TestEnabled('instance-reinstall'): - RunTest(qa_instance.TestInstanceShutdown, instance) - RunTest(qa_instance.TestInstanceReinstall, instance) - RunTest(qa_instance.TestInstanceStartup, instance) - if qa_config.TestEnabled('node-volumes'): - RunTest(qa_node.TestNodeVolumes) +def RunDaemonTests(instance, pnode): + """Test the ganeti-watcher script. - RunTest(qa_instance.TestInstanceRemove, instance) - del instance + """ + automatic_restart = \ + qa_config.TestEnabled('instance-automatic-restart') + consecutive_failures = \ + qa_config.TestEnabled('instance-consecutive-failures') - if qa_config.TestEnabled('instance-add-local-mirror-disk'): - instance = RunTest(qa_instance.TestInstanceAddWithLocalMirrorDisk, node) + if automatic_restart or consecutive_failures: + qa_daemon.PrintCronWarning() - if qa_config.TestEnabled('instance-shutdown'): - RunTest(qa_instance.TestInstanceShutdown, instance) - RunTest(qa_instance.TestInstanceStartup, instance) + if automatic_restart: + RunTest(qa_daemon.TestInstanceAutomaticRestart, pnode, instance) - if qa_config.TestEnabled('instance-info'): - RunTest(qa_instance.TestInstanceInfo, instance) + if consecutive_failures: + RunTest(qa_daemon.TestInstanceConsecutiveFailures, node, instance) - if qa_config.TestEnabled('node-volumes'): - RunTest(qa_node.TestNodeVolumes) - RunTest(qa_instance.TestInstanceRemove, instance) - del instance +def RunHardwareFailureTests(instance, pnode, snode): + """Test cluster internal hardware failure recovery. - if qa_config.TestEnabled('instance-add-remote-raid-disk'): - node2 = qa_config.AcquireNode(exclude=node) - try: - instance = RunTest(qa_instance.TestInstanceAddWithRemoteRaidDisk, - node, node2) + """ + if qa_config.TestEnabled('instance-failover'): + RunTest(qa_instance.TestInstanceFailover, instance) + + if qa_config.TestEnabled('node-evacuate'): + RunTest(qa_node.TestNodeEvacuate, pnode, snode) + + if qa_config.TestEnabled('node-failover'): + RunTest(qa_node.TestNodeFailover, pnode, snode) + + if qa_config.TestEnabled('instance-disk-failure'): + RunTest(qa_instance.TestInstanceMasterDiskFailure, + instance, pnode, snode) + RunTest(qa_instance.TestInstanceSecondaryDiskFailure, + instance, pnode, snode) + + +def main(): + """Main program. + + """ + parser = OptionParser(usage="%prog [options] <config-file> " + "<known-hosts-file>") + parser.add_option('--dry-run', dest='dry_run', + action="store_true", + help="Show what would be done") + parser.add_option('--yes-do-it', dest='yes_do_it', + action="store_true", + help="Really execute the tests") + (qa_config.options, args) = parser.parse_args() - if qa_config.TestEnabled('instance-shutdown'): - RunTest(qa_instance.TestInstanceShutdown, instance) - RunTest(qa_instance.TestInstanceStartup, instance) + if len(args) == 2: + (config_file, known_hosts_file) = args + else: + parser.error("Not enough arguments.") - if qa_config.TestEnabled('instance-info'): - RunTest(qa_instance.TestInstanceInfo, instance) + if not qa_config.options.yes_do_it: + print ("Executing this script irreversibly destroys any Ganeti\n" + "configuration on all nodes involved. If you really want\n" + "to start testing, supply the --yes-do-it option.") + sys.exit(1) - if qa_config.TestEnabled('instance-failover'): - RunTest(qa_instance.TestInstanceFailover, instance) + qa_config.Load(config_file) - if qa_config.TestEnabled('node-evacuate'): - RunTest(qa_node.TestNodeEvacuate, node, node2) + RunTest(qa_other.UploadKnownHostsFile, known_hosts_file) - if qa_config.TestEnabled('node-failover'): - RunTest(qa_node.TestNodeFailover, node, node2) + RunEnvTests() + SetupCluster() + RunClusterTests() + RunOsTests() - if qa_config.TestEnabled('node-volumes'): - RunTest(qa_node.TestNodeVolumes) + pnode = qa_config.AcquireNode() + try: + if qa_config.TestEnabled('instance-add-plain-disk'): + instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, pnode) + RunCommonInstanceTests(instance) + RunExportImportTests(instance, pnode) + RunDaemonTests(instance, pnode) + RunTest(qa_instance.TestInstanceRemove, instance) + del instance - if qa_config.TestEnabled('instance-disk-failure'): - RunTest(qa_instance.TestInstanceMasterDiskFailure, - instance, node, node2) - RunTest(qa_instance.TestInstanceSecondaryDiskFailure, - instance, node, node2) + if qa_config.TestEnabled('instance-add-local-mirror-disk'): + instance = RunTest(qa_instance.TestInstanceAddWithLocalMirrorDisk, pnode) + RunCommonInstanceTests(instance) + RunExportImportTests(instance, pnode) + RunTest(qa_instance.TestInstanceRemove, instance) + del instance + if qa_config.TestEnabled('instance-add-remote-raid-disk'): + snode = qa_config.AcquireNode(exclude=pnode) + try: + instance = RunTest(qa_instance.TestInstanceAddWithRemoteRaidDisk, + pnode, snode) + RunCommonInstanceTests(instance) + RunExportImportTests(instance, pnode) + RunHardwareFailureTests(instance, pnode, snode) RunTest(qa_instance.TestInstanceRemove, instance) del instance finally: - qa_config.ReleaseNode(node2) + qa_config.ReleaseNode(snode) finally: - qa_config.ReleaseNode(node) + qa_config.ReleaseNode(pnode) RunTest(qa_node.TestNodeRemoveAll) diff --git a/qa/qa_instance.py b/qa/qa_instance.py index ec17d703a..39a5e4ff4 100644 --- a/qa/qa_instance.py +++ b/qa/qa_instance.py @@ -239,15 +239,16 @@ def _TestInstanceDiskFailure(instance, node, node2, onmaster): r'\s+primary:\s+(/dev/drbd\d+)\s+') drbddevs = re.findall(pattern, output, re.M) - # Deactivate disks on secondary node halted_disks = [] - cmds = [] - for name in node2disk[[node2_full, node_full][int(onmaster)]]: - halted_disks.append(name) - cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name)) - AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'], - '; '.join(cmds)).wait(), 0) try: + # Deactivate disks + cmds = [] + for name in node2disk[[node2_full, node_full][int(onmaster)]]: + halted_disks.append(name) + cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name)) + AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'], + ' && '.join(cmds)).wait(), 0) + # Write something to the disks and give some time to notice the problem cmds = [] for disk in devpath: @@ -257,6 +258,10 @@ def _TestInstanceDiskFailure(instance, node, node2, onmaster): AssertEqual(StartSSH(node['primary'], ' && '.join(cmds)).wait(), 0) time.sleep(3) + for name in drbddevs: + cmd = ['drbdsetup', name, 'show'] + AssertEqual(StartSSH(node['primary'], sq(cmd)).wait(), 0) + # For manual checks cmd = ['gnt-instance', 'info', instance['name']] AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) @@ -269,15 +274,25 @@ def _TestInstanceDiskFailure(instance, node, node2, onmaster): AssertEqual(StartSSH([node2, node][int(onmaster)]['primary'], '; '.join(cmds)).wait(), 0) + if onmaster: + for name in drbddevs: + cmd = ['drbdsetup', name, 'detach'] + AssertEqual(StartSSH(node['primary'], sq(cmd)).wait(), 0) + else: + for name in drbddevs: + cmd = ['drbdsetup', name, 'disconnect'] + AssertEqual(StartSSH(node2['primary'], sq(cmd)).wait(), 0) + + # Make sure disks are up again + #cmd = ['gnt-instance', 'activate-disks', instance['name']] + #AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) + # Restart instance cmd = ['gnt-instance', 'shutdown', instance['name']] AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) - cmd = ['gnt-instance', 'startup', '--force', instance['name']] - AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) - - # Make sure disks are up again - cmd = ['gnt-instance', 'activate-disks', instance['name']] + #cmd = ['gnt-instance', 'startup', '--force', instance['name']] + cmd = ['gnt-instance', 'startup', instance['name']] AssertEqual(StartSSH(master['primary'], sq(cmd)).wait(), 0) cmd = ['gnt-cluster', 'verify'] @@ -289,7 +304,7 @@ def TestInstanceMasterDiskFailure(instance, node, node2): qa_utils.PrintError("Disk failure on primary node cannot be " "tested due to potential crashes.") # The following can cause crashes, thus it's disabled until fixed - #return _TestInstanceDiskFailure(instance, node, node2, True) + return _TestInstanceDiskFailure(instance, node, node2, True) def TestInstanceSecondaryDiskFailure(instance, node, node2): diff --git a/qa/qa_node.py b/qa/qa_node.py index 968b7f458..56915116a 100644 --- a/qa/qa_node.py +++ b/qa/qa_node.py @@ -89,9 +89,9 @@ def TestNodeFailover(node, node2): master = qa_config.GetMasterNode() if qa_utils.GetNodeInstances(node2, secondaries=False): - raise qa_errors.UnusableNodeError("Secondary node has at least one " - "primary instance. This test requires " - "it to have no primary instances.") + raise qa_error.UnusableNodeError("Secondary node has at least one " + "primary instance. This test requires " + "it to have no primary instances.") # Fail over to secondary node cmd = ['gnt-node', 'failover', '-f', node['primary']] @@ -111,9 +111,9 @@ def TestNodeEvacuate(node, node2): node3 = qa_config.AcquireNode(exclude=[node, node2]) try: if qa_utils.GetNodeInstances(node3, secondaries=True): - raise qa_errors.UnusableNodeError("Evacuation node has at least one " - "secondary instance. This test requires " - "it to have no secondary instances.") + raise qa_error.UnusableNodeError("Evacuation node has at least one " + "secondary instance. This test requires " + "it to have no secondary instances.") # Evacuate all secondary instances cmd = ['gnt-node', 'evacuate', '-f', node2['primary'], node3['primary']] diff --git a/qa/qa_other.py b/qa/qa_other.py index 6882254be..d349a066c 100644 --- a/qa/qa_other.py +++ b/qa/qa_other.py @@ -25,7 +25,7 @@ import qa_utils from qa_utils import AssertEqual, StartSSH -def TestUploadKnownHostsFile(localpath): +def UploadKnownHostsFile(localpath): """Uploading known_hosts file. """ -- GitLab