# # # Copyright (C) 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Cluster related QA tests. """ import tempfile import os.path from ganeti import constants from ganeti import compat from ganeti import utils import qa_config import qa_utils import qa_error from qa_utils import AssertEqual, AssertCommand, GetCommandOutput #: cluster verify command _CLUSTER_VERIFY = ["gnt-cluster", "verify"] def _RemoveFileFromAllNodes(filename): """Removes a file from all nodes. """ for node in qa_config.get("nodes"): AssertCommand(["rm", "-f", filename], node=node) def _CheckFileOnAllNodes(filename, content): """Verifies the content of the given file on all nodes. """ cmd = utils.ShellQuoteArgs(["cat", filename]) for node in qa_config.get("nodes"): AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content) # data for testing failures due to bad keys/values for disk parameters _FAIL_PARAMS = ["nonexistent:resync-rate=1", "drbd:nonexistent=1", "drbd:resync-rate=invalid", ] def TestClusterInitDisk(): """gnt-cluster init -D""" name = qa_config.get("name") for param in _FAIL_PARAMS: AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True) def TestClusterInit(rapi_user, rapi_secret): """gnt-cluster init""" master = qa_config.GetMasterNode() rapi_dir = os.path.dirname(constants.RAPI_USERS_FILE) # First create the RAPI credentials fh = tempfile.NamedTemporaryFile() try: fh.write("%s %s write\n" % (rapi_user, rapi_secret)) fh.flush() tmpru = qa_utils.UploadFile(master["primary"], fh.name) try: AssertCommand(["mkdir", "-p", rapi_dir]) AssertCommand(["mv", tmpru, constants.RAPI_USERS_FILE]) finally: AssertCommand(["rm", "-f", tmpru]) finally: fh.close() # Initialize cluster cmd = ["gnt-cluster", "init"] cmd.append("--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4)) for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count", "nic-count"): for spec_val in ("min", "max", "std"): spec = qa_config.get("ispec_%s_%s" % (spec_type.replace('-', '_'), spec_val), None) if spec: cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec)) if master.get("secondary", None): cmd.append("--secondary-ip=%s" % master["secondary"]) bridge = qa_config.get("bridge", None) if bridge: cmd.append("--bridge=%s" % bridge) cmd.append("--master-netdev=%s" % bridge) htype = qa_config.get("enabled-hypervisors", None) if htype: cmd.append("--enabled-hypervisors=%s" % htype) cmd.append(qa_config.get("name")) AssertCommand(cmd) cmd = ["gnt-cluster", "modify"] # hypervisor parameter modifications hvp = qa_config.get("hypervisor-parameters", {}) for k, v in hvp.items(): cmd.extend(["-H", "%s:%s" % (k, v)]) # backend parameter modifications bep = qa_config.get("backend-parameters", "") if bep: cmd.extend(["-B", bep]) if len(cmd) > 2: AssertCommand(cmd) # OS parameters osp = qa_config.get("os-parameters", {}) for k, v in osp.items(): AssertCommand(["gnt-os", "modify", "-O", v, k]) # OS hypervisor parameters os_hvp = qa_config.get("os-hvp", {}) for os_name in os_hvp: for hv, hvp in os_hvp[os_name].items(): AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name]) def TestClusterRename(): """gnt-cluster rename""" cmd = ["gnt-cluster", "rename", "-f"] original_name = qa_config.get("name") rename_target = qa_config.get("rename", None) if rename_target is None: print qa_utils.FormatError('"rename" entry is missing') return for data in [ cmd + [rename_target], _CLUSTER_VERIFY, cmd + [original_name], _CLUSTER_VERIFY, ]: AssertCommand(data) def TestClusterOob(): """out-of-band framework""" oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID() AssertCommand(_CLUSTER_VERIFY) AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" % utils.NewUUID()]) AssertCommand(_CLUSTER_VERIFY, fail=True) AssertCommand(["touch", oob_path_exists]) AssertCommand(["chmod", "0400", oob_path_exists]) AssertCommand(["gnt-cluster", "copyfile", oob_path_exists]) try: AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program=%s" % oob_path_exists]) AssertCommand(_CLUSTER_VERIFY, fail=True) AssertCommand(["chmod", "0500", oob_path_exists]) AssertCommand(["gnt-cluster", "copyfile", oob_path_exists]) AssertCommand(_CLUSTER_VERIFY) finally: AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists]) AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program="]) def TestClusterEpo(): """gnt-cluster epo""" master = qa_config.GetMasterNode() # Assert that OOB is unavailable for all nodes result_output = GetCommandOutput(master["primary"], "gnt-node list --verbose --no-headers -o" " powered") AssertEqual(compat.all(powered == "(unavail)" for powered in result_output.splitlines()), True) # Conflicting AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True) # --all doesn't expect arguments AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True) # Unless --all is given master is not allowed to be in the list AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True) # This shouldn't fail AssertCommand(["gnt-cluster", "epo", "-f", "--all"]) # All instances should have been stopped now result_output = GetCommandOutput(master["primary"], "gnt-instance list --no-headers -o status") # ERROR_down because the instance is stopped but not recorded as such AssertEqual(compat.all(status == "ERROR_down" for status in result_output.splitlines()), True) # Now start everything again AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"]) # All instances should have been started now result_output = GetCommandOutput(master["primary"], "gnt-instance list --no-headers -o status") AssertEqual(compat.all(status == "running" for status in result_output.splitlines()), True) def TestClusterVerify(): """gnt-cluster verify""" AssertCommand(_CLUSTER_VERIFY) AssertCommand(["gnt-cluster", "verify-disks"]) def TestJobqueue(): """gnt-debug test-jobqueue""" AssertCommand(["gnt-debug", "test-jobqueue"]) def TestDelay(node): """gnt-debug delay""" AssertCommand(["gnt-debug", "delay", "1"]) AssertCommand(["gnt-debug", "delay", "--no-master", "1"]) AssertCommand(["gnt-debug", "delay", "--no-master", "-n", node["primary"], "1"]) def TestClusterReservedLvs(): """gnt-cluster reserved lvs""" for fail, cmd in [ (False, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]), (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]), (True, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", "xenvg/qa-test,.*/other-test"]), (False, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]), (False, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]), (True, _CLUSTER_VERIFY), (False, ["lvremove", "-f", "xenvg/qa-test"]), (False, _CLUSTER_VERIFY), ]: AssertCommand(cmd, fail=fail) def TestClusterModifyEmpty(): """gnt-cluster modify""" AssertCommand(["gnt-cluster", "modify"], fail=True) def TestClusterModifyDisk(): """gnt-cluster modify -D""" for param in _FAIL_PARAMS: AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True) def TestClusterModifyBe(): """gnt-cluster modify -B""" for fail, cmd in [ # max/min mem (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]), (False, ["gnt-cluster", "modify", "-B", "minmem=256"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]), (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]), (True, ["gnt-cluster", "modify", "-B", "minmem=a"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]), (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]), # vcpus (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]), (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]), (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]), # auto_balance (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]), (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]), (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]), ]: AssertCommand(cmd, fail=fail) # redo the original-requested BE parameters, if any bep = qa_config.get("backend-parameters", "") if bep: AssertCommand(["gnt-cluster", "modify", "-B", bep]) def TestClusterInfo(): """gnt-cluster info""" AssertCommand(["gnt-cluster", "info"]) def TestClusterRedistConf(): """gnt-cluster redist-conf""" AssertCommand(["gnt-cluster", "redist-conf"]) def TestClusterGetmaster(): """gnt-cluster getmaster""" AssertCommand(["gnt-cluster", "getmaster"]) def TestClusterVersion(): """gnt-cluster version""" AssertCommand(["gnt-cluster", "version"]) def TestClusterRenewCrypto(): """gnt-cluster renew-crypto""" master = qa_config.GetMasterNode() # Conflicting options cmd = ["gnt-cluster", "renew-crypto", "--force", "--new-cluster-certificate", "--new-confd-hmac-key"] conflicting = [ ["--new-rapi-certificate", "--rapi-certificate=/dev/null"], ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"], ] for i in conflicting: AssertCommand(cmd + i, fail=True) # Invalid RAPI certificate cmd = ["gnt-cluster", "renew-crypto", "--force", "--rapi-certificate=/dev/null"] AssertCommand(cmd, fail=True) rapi_cert_backup = qa_utils.BackupFile(master["primary"], constants.RAPI_CERT_FILE) try: # Custom RAPI certificate fh = tempfile.NamedTemporaryFile() # Ensure certificate doesn't cause "gnt-cluster verify" to complain validity = constants.SSL_CERT_EXPIRATION_WARN * 3 utils.GenerateSelfSignedSslCert(fh.name, validity=validity) tmpcert = qa_utils.UploadFile(master["primary"], fh.name) try: AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--rapi-certificate=%s" % tmpcert]) finally: AssertCommand(["rm", "-f", tmpcert]) # Custom cluster domain secret cds_fh = tempfile.NamedTemporaryFile() cds_fh.write(utils.GenerateSecret()) cds_fh.write("\n") cds_fh.flush() tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name) try: AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--cluster-domain-secret=%s" % tmpcds]) finally: AssertCommand(["rm", "-f", tmpcds]) # Normal case AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--new-cluster-certificate", "--new-confd-hmac-key", "--new-rapi-certificate", "--new-cluster-domain-secret"]) # Restore RAPI certificate AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--rapi-certificate=%s" % rapi_cert_backup]) finally: AssertCommand(["rm", "-f", rapi_cert_backup]) def TestClusterBurnin(): """Burnin""" master = qa_config.GetMasterNode() options = qa_config.get("options", {}) disk_template = options.get("burnin-disk-template", "drbd") parallel = options.get("burnin-in-parallel", False) check_inst = options.get("burnin-check-instances", False) do_rename = options.get("burnin-rename", "") do_reboot = options.get("burnin-reboot", True) reboot_types = options.get("reboot-types", constants.REBOOT_TYPES) # Get as many instances as we need instances = [] try: try: num = qa_config.get("options", {}).get("burnin-instances", 1) for _ in range(0, num): instances.append(qa_config.AcquireInstance()) except qa_error.OutOfInstancesError: print "Not enough instances, continuing anyway." if len(instances) < 1: raise qa_error.Error("Burnin needs at least one instance") script = qa_utils.UploadFile(master["primary"], "../tools/burnin") try: # Run burnin cmd = [script, "--os=%s" % qa_config.get("os"), "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM), "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM), "--disk-size=%s" % ",".join(qa_config.get("disk")), "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")), "--disk-template=%s" % disk_template] if parallel: cmd.append("--parallel") cmd.append("--early-release") if check_inst: cmd.append("--http-check") if do_rename: cmd.append("--rename=%s" % do_rename) if not do_reboot: cmd.append("--no-reboot") else: cmd.append("--reboot-types=%s" % ",".join(reboot_types)) cmd += [inst["name"] for inst in instances] AssertCommand(cmd) finally: AssertCommand(["rm", "-f", script]) finally: for inst in instances: qa_config.ReleaseInstance(inst) def TestClusterMasterFailover(): """gnt-cluster master-failover""" master = qa_config.GetMasterNode() failovermaster = qa_config.AcquireNode(exclude=master) cmd = ["gnt-cluster", "master-failover"] try: AssertCommand(cmd, node=failovermaster) # Back to original master node AssertCommand(cmd, node=master) finally: qa_config.ReleaseNode(failovermaster) def TestClusterMasterFailoverWithDrainedQueue(): """gnt-cluster master-failover with drained queue""" drain_check = ["test", "-f", constants.JOB_QUEUE_DRAIN_FILE] master = qa_config.GetMasterNode() failovermaster = qa_config.AcquireNode(exclude=master) # Ensure queue is not drained for node in [master, failovermaster]: AssertCommand(drain_check, node=node, fail=True) # Drain queue on failover master AssertCommand(["touch", constants.JOB_QUEUE_DRAIN_FILE], node=failovermaster) cmd = ["gnt-cluster", "master-failover"] try: AssertCommand(drain_check, node=failovermaster) AssertCommand(cmd, node=failovermaster) AssertCommand(drain_check, fail=True) AssertCommand(drain_check, node=failovermaster, fail=True) # Back to original master node AssertCommand(cmd, node=master) finally: qa_config.ReleaseNode(failovermaster) AssertCommand(drain_check, fail=True) AssertCommand(drain_check, node=failovermaster, fail=True) def TestClusterCopyfile(): """gnt-cluster copyfile""" master = qa_config.GetMasterNode() uniqueid = utils.NewUUID() # Create temporary file f = tempfile.NamedTemporaryFile() f.write(uniqueid) f.flush() f.seek(0) # Upload file to master node testname = qa_utils.UploadFile(master["primary"], f.name) try: # Copy file to all nodes AssertCommand(["gnt-cluster", "copyfile", testname]) _CheckFileOnAllNodes(testname, uniqueid) finally: _RemoveFileFromAllNodes(testname) def TestClusterCommand(): """gnt-cluster command""" uniqueid = utils.NewUUID() rfile = "/tmp/gnt%s" % utils.NewUUID() rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid]) cmd = utils.ShellQuoteArgs(["gnt-cluster", "command", "%s >%s" % (rcmd, rfile)]) try: AssertCommand(cmd) _CheckFileOnAllNodes(rfile, uniqueid) finally: _RemoveFileFromAllNodes(rfile) def TestClusterDestroy(): """gnt-cluster destroy""" AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"]) def TestClusterRepairDiskSizes(): """gnt-cluster repair-disk-sizes""" AssertCommand(["gnt-cluster", "repair-disk-sizes"])