Commit e4c346a5 authored by Bernardo Dal Seno's avatar Bernardo Dal Seno
Browse files

QA cleanup: Removed instance-disk-failure test



The test was broken, out of sync with the rest of the code, and prone to
crashes. Until someone does a better job, it's better to remove the
test. This simplifies further refactoring.
Signed-off-by: default avatarBernardo Dal Seno <bdalseno@google.com>
Reviewed-by: default avatarMichael Hanselmann <hansmi@google.com>
parent deaa347b
......@@ -450,12 +450,6 @@ def RunHardwareFailureTests(instance, pnode, snode):
RunTestIf("node-failover", qa_node.TestNodeFailover, pnode, snode)
RunTestIf("instance-disk-failure", qa_instance.TestInstanceMasterDiskFailure,
instance, pnode, snode)
RunTestIf("instance-disk-failure",
qa_instance.TestInstanceSecondaryDiskFailure, instance,
pnode, snode)
def RunExclusiveStorageTests():
"""Test exclusive storage."""
......
......@@ -169,12 +169,7 @@
"instance-recreate-disks": false,
"# Whether to test the tools/move-instance utility": null,
"inter-cluster-instance-move": false,
"# Make sure not to include the disk(s) required for Dom0 to be up": null,
"# in the volume group used for instances. Otherwise the whole": null,
"# system may stop working until restarted.": null,
"instance-disk-failure": false
"inter-cluster-instance-move": false
},
"options": {
......
......@@ -24,7 +24,6 @@
"""
import re
import time
from ganeti import utils
from ganeti import constants
......@@ -654,131 +653,3 @@ def TestBackupList(expnode):
def TestBackupListFields():
"""gnt-backup list-fields"""
qa_utils.GenericQueryFieldsTest("gnt-backup", query.EXPORT_FIELDS.keys())
def _TestInstanceDiskFailure(instance, node, node2, onmaster):
"""Testing disk failure."""
master = qa_config.GetMasterNode()
sq = utils.ShellQuoteArgs
instance_full = qa_utils.ResolveInstanceName(instance["name"])
node_full = qa_utils.ResolveNodeName(node)
node2_full = qa_utils.ResolveNodeName(node2)
print qa_utils.FormatInfo("Getting physical disk names")
cmd = ["gnt-node", "volumes", "--separator=|", "--no-headers",
"--output=node,phys,instance",
node["primary"], node2["primary"]]
output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
# Get physical disk names
re_disk = re.compile(r"^/dev/([a-z]+)\d+$")
node2disk = {}
for line in output.splitlines():
(node_name, phys, inst) = line.split("|")
if inst == instance_full:
if node_name not in node2disk:
node2disk[node_name] = []
m = re_disk.match(phys)
if not m:
raise qa_error.Error("Unknown disk name format: %s" % phys)
name = m.group(1)
if name not in node2disk[node_name]:
node2disk[node_name].append(name)
if [node2_full, node_full][int(onmaster)] not in node2disk:
raise qa_error.Error("Couldn't find physical disks used on"
" %s node" % ["secondary", "master"][int(onmaster)])
print qa_utils.FormatInfo("Checking whether nodes have ability to stop"
" disks")
for node_name, disks in node2disk.iteritems():
cmds = []
for disk in disks:
cmds.append(sq(["test", "-f", _GetDiskStatePath(disk)]))
AssertCommand(" && ".join(cmds), node=node_name)
print qa_utils.FormatInfo("Getting device paths")
cmd = ["gnt-instance", "activate-disks", instance["name"]]
output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
devpath = []
for line in output.splitlines():
(_, _, tmpdevpath) = line.split(":")
devpath.append(tmpdevpath)
print devpath
print qa_utils.FormatInfo("Getting drbd device paths")
cmd = ["gnt-instance", "info", instance["name"]]
output = qa_utils.GetCommandOutput(master["primary"], sq(cmd))
pattern = (r"\s+-\s+sd[a-z]+,\s+type:\s+drbd8?,\s+.*$"
r"\s+primary:\s+(/dev/drbd\d+)\s+")
drbddevs = re.findall(pattern, output, re.M)
print drbddevs
halted_disks = []
try:
print qa_utils.FormatInfo("Deactivating disks")
cmds = []
for name in node2disk[[node2_full, node_full][int(onmaster)]]:
halted_disks.append(name)
cmds.append(sq(["echo", "offline"]) + " >%s" % _GetDiskStatePath(name))
AssertCommand(" && ".join(cmds), node=[node2, node][int(onmaster)])
print qa_utils.FormatInfo("Write to disks and give some time to notice"
" the problem")
cmds = []
for disk in devpath:
cmds.append(sq(["dd", "count=1", "bs=512", "conv=notrunc",
"if=%s" % disk, "of=%s" % disk]))
for _ in (0, 1, 2):
AssertCommand(" && ".join(cmds), node=node)
time.sleep(3)
print qa_utils.FormatInfo("Debugging info")
for name in drbddevs:
AssertCommand(["drbdsetup", name, "show"], node=node)
AssertCommand(["gnt-instance", "info", instance["name"]])
finally:
print qa_utils.FormatInfo("Activating disks again")
cmds = []
for name in halted_disks:
cmds.append(sq(["echo", "running"]) + " >%s" % _GetDiskStatePath(name))
AssertCommand("; ".join(cmds), node=[node2, node][int(onmaster)])
if onmaster:
for name in drbddevs:
AssertCommand(["drbdsetup", name, "detach"], node=node)
else:
for name in drbddevs:
AssertCommand(["drbdsetup", name, "disconnect"], node=node2)
# TODO
#AssertCommand(["vgs"], [node2, node][int(onmaster)])
print qa_utils.FormatInfo("Making sure disks are up again")
AssertCommand(["gnt-instance", "replace-disks", instance["name"]])
print qa_utils.FormatInfo("Restarting instance")
AssertCommand(["gnt-instance", "shutdown", instance["name"]])
AssertCommand(["gnt-instance", "startup", instance["name"]])
AssertCommand(["gnt-cluster", "verify"])
def TestInstanceMasterDiskFailure(instance, node, node2):
"""Testing disk failure on master node."""
# pylint: disable=W0613
# due to unused args
print qa_utils.FormatError("Disk failure on primary node cannot be"
" tested due to potential crashes.")
# The following can cause crashes, thus it's disabled until fixed
#return _TestInstanceDiskFailure(instance, node, node2, True)
def TestInstanceSecondaryDiskFailure(instance, node, node2):
"""Testing disk failure on secondary node."""
return _TestInstanceDiskFailure(instance, node, node2, False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment