diff --git a/qa/ganeti-qa.py b/qa/ganeti-qa.py index e7ad3b81f7813d2a94d35f882d3c869632b4aadf..2d764c15036f76f66e35cc0f6a4469fd0b212521 100755 --- a/qa/ganeti-qa.py +++ b/qa/ganeti-qa.py @@ -34,6 +34,7 @@ import qa_cluster import qa_config import qa_daemon import qa_env +import qa_error import qa_group import qa_instance import qa_node @@ -391,6 +392,19 @@ def RunDaemonTests(instance): RunTest(qa_daemon.TestResumeWatcher) +def RunSingleHomedHardwareFailureTests(instance, pnode): + """Test hardware failure recovery for single-homed instances. + + """ + if qa_config.TestEnabled("instance-recreate-disks"): + othernode = qa_config.AcquireNode(exclude=[pnode]) + try: + RunTest(qa_instance.TestRecreateDisks, + instance, pnode, None, [othernode]) + finally: + qa_config.ReleaseNode(othernode) + + def RunHardwareFailureTests(instance, pnode, snode): """Test cluster internal hardware failure recovery. @@ -412,6 +426,21 @@ def RunHardwareFailureTests(instance, pnode, snode): finally: qa_config.ReleaseNode(othernode) + if qa_config.TestEnabled("instance-recreate-disks"): + othernode1 = qa_config.AcquireNode(exclude=[pnode, snode]) + try: + othernode2 = qa_config.AcquireNode(exclude=[pnode, snode, othernode1]) + except qa_error.OutOfNodesError: + # Let's reuse one of the nodes if the cluster is not big enough + othernode2 = pnode + try: + RunTest(qa_instance.TestRecreateDisks, + instance, pnode, snode, [othernode1, othernode2]) + finally: + qa_config.ReleaseNode(othernode1) + if othernode2 != pnode: + qa_config.ReleaseNode(othernode2) + RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, pnode, snode) RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode) @@ -477,6 +506,7 @@ def RunQa(): RunExportImportTests(instance, pnode, None) RunDaemonTests(instance) RunRepairDiskSizes() + RunSingleHomedHardwareFailureTests(instance, pnode) RunTest(qa_instance.TestInstanceRemove, instance) del instance diff --git a/qa/qa-sample.json b/qa/qa-sample.json index 0769d0848e2a0e47470927f3f2c2db37378d9c5c..c4550f31a98d8a3669fe69d1856ac91cd57f47e5 100644 --- a/qa/qa-sample.json +++ b/qa/qa-sample.json @@ -140,8 +140,9 @@ "# on whether they support the `gnt-instance console' command.": null, "instance-console": false, - "# Disabled by default because it takes rather long": null, + "# Disabled by default because they take rather long": null, "instance-replace-disks": false, + "instance-recreate-disks": false, "# Whether to test the tools/move-instance utility": null, "inter-cluster-instance-move": false, diff --git a/qa/qa_instance.py b/qa/qa_instance.py index d16033bb95ed068105e5131055a4fc91a2281c6d..b2a38a36ee98f7b8738487bc3d20d750a8b9155b 100644 --- a/qa/qa_instance.py +++ b/qa/qa_instance.py @@ -433,6 +433,56 @@ def TestReplaceDisks(instance, pnode, snode, othernode): AssertCommand(["gnt-instance", "start", instance["name"]]) +def _AssertRecreateDisks(cmdargs, instance, fail=False, check=True, + destroy=True): + """Execute gnt-instance recreate-disks and check the result + + @param cmdargs: Arguments (instance name excluded) + @param instance: Instance to operate on + @param fail: True if the command is expected to fail + @param check: If True and fail is False, check that the disks work + @prama destroy: If True, destroy the old disks first + + """ + if destroy: + _DestroyInstanceVolumes(instance) + AssertCommand((["gnt-instance", "recreate-disks"] + cmdargs + + [instance["name"]]), fail) + if not fail and check: + # Quick check that the disks are there + AssertCommand(["gnt-instance", "activate-disks", instance["name"]]) + AssertCommand(["gnt-instance", "deactivate-disks", instance["name"]]) + +@InstanceCheck(INST_UP, INST_UP, FIRST_ARG) +def TestRecreateDisks(instance, pnode, snode, othernodes): + """gnt-instance recreate-disks + + @param instance: Instance to work on + @param pnode: Primary node + @param snode: Secondary node, or None for sigle-homed instances + @param othernodes: list/tuple of nodes where to temporarily recreate disks + + """ + other_seq = ":".join([n["primary"] for n in othernodes]) + orig_seq = pnode["primary"] + if snode: + orig_seq = orig_seq + ":" + snode["primary"] + # This fails beacuse the instance is running + _AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False) + AssertCommand(["gnt-instance", "stop", instance["name"]]) + # Disks exist: this should fail + _AssertRecreateDisks([], instance, fail=True, destroy=False) + # Recreate disks in place + _AssertRecreateDisks([], instance) + # Move disks away + _AssertRecreateDisks(["-n", other_seq], instance) + # Move disks back + _AssertRecreateDisks(["-n", orig_seq], instance, check=False) + # This and InstanceCheck decoration check that the disks are working + AssertCommand(["gnt-instance", "reinstall", "-f", instance["name"]]) + AssertCommand(["gnt-instance", "start", instance["name"]]) + + @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceExport(instance, node): """gnt-backup export -n ..."""