diff --git a/NEWS b/NEWS index 782faedaa215a220376fa82de433c24f644a2dec..240b46e39199c3fed5361158609a4abcd647e080 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,17 @@ Version 2.5.0 beta1 since Ganeti 2.1.3 and :doc:`documented <rapi>`, instead. +Version 2.4.1 +------------- + +*(Released Wed, 09 Mar 2011)* + +Emergency bug-fix release. ``tools/cfgupgrade`` was broken and overwrote +the RAPI users file if run twice (even with ``-dry-run``). + +The release fixes that bug (nothing else changed). + + Version 2.4.0 ------------- diff --git a/configure.ac b/configure.ac index 98263bdd8e876a6d73d6d430728131f76d778288..a34033d2fb159cdb5851080b2a165cd7320fbc00 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Configure script for Ganeti m4_define([gnt_version_major], [2]) m4_define([gnt_version_minor], [4]) -m4_define([gnt_version_revision], [0]) +m4_define([gnt_version_revision], [1]) m4_define([gnt_version_suffix], []) m4_define([gnt_version_full], m4_format([%d.%d.%d%s], diff --git a/doc/rapi.rst b/doc/rapi.rst index fe54dc75d85909f9a7b2b6e4332cb897af687a4b..d278b2975b028cc34cb5aaf40a87f2d704f59949 100644 --- a/doc/rapi.rst +++ b/doc/rapi.rst @@ -70,6 +70,10 @@ HTTP Basic authentication as per :rfc:`2617` is supported. .. _JSON: http://www.json.org/ .. _REST: http://en.wikipedia.org/wiki/Representational_State_Transfer +HTTP requests with a body (e.g. ``PUT`` or ``POST``) require the request +header ``Content-type`` be set to ``application/json`` (see :rfc:`2616` +(HTTP/1.1), section 7.2.1). + A note on JSON as used by RAPI ++++++++++++++++++++++++++++++ diff --git a/lib/cli.py b/lib/cli.py index c1a7287ff1073faac4b02a8aa202e865c0e87bc4..c54e00ba28334685ab384d96d3c5e772b74c4883 100644 --- a/lib/cli.py +++ b/lib/cli.py @@ -905,8 +905,7 @@ NOSSH_KEYCHECK_OPT = cli_option("--no-ssh-key-check", dest="ssh_key_check", NODE_FORCE_JOIN_OPT = cli_option("--force-join", dest="force_join", default=False, action="store_true", - help="Force the joining of a node," - " needed when merging clusters") + help="Force the joining of a node") MC_OPT = cli_option("-C", "--master-candidate", dest="master_candidate", type="bool", default=None, metavar=_YORNO, diff --git a/lib/client/gnt_job.py b/lib/client/gnt_job.py index 99519280f798b52236bafe040a201ab2a86a9e3e..1049760545a2ace297266a4d92e3f5c36f820b9b 100644 --- a/lib/client/gnt_job.py +++ b/lib/client/gnt_job.py @@ -317,6 +317,7 @@ def ShowJobs(opts, args): if not result: format_msg(3, "Result: empty dictionary") else: + format_msg(3, "Result:") for key, val in result.iteritems(): format_msg(4, "%s: %s" % (key, result_helper(val))) else: diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 95468dacb9ab811371393fcfae5dda5c69c4ed06..84e1fe712c468f0ea33ac4db20f8bc160b6d4e0b 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1695,7 +1695,8 @@ class LUClusterVerify(LogicalUnit): test = n_img.mfree < needed_mem self._ErrorIf(test, self.ENODEN1, node, "not enough memory to accomodate instance failovers" - " should node %s fail", prinode) + " should node %s fail (%dMiB needed, %dMiB available)", + prinode, needed_mem, n_img.mfree) @classmethod def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo, @@ -2912,6 +2913,12 @@ class LUClusterSetParams(LogicalUnit): utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) + # TODO: we need a more general way to handle resetting + # cluster-level parameters to default values + if self.new_ndparams["oob_program"] == "": + self.new_ndparams["oob_program"] = \ + constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] + if self.op.nicparams: utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) @@ -4124,10 +4131,16 @@ class _InstanceQuery(_QueryBase): bad_nodes.append(name) elif result.payload: for inst in result.payload: - if all_info[inst].primary_node == name: - live_data.update(result.payload) + if inst in all_info: + if all_info[inst].primary_node == name: + live_data.update(result.payload) + else: + wrongnode_inst.add(inst) else: - wrongnode_inst.add(inst) + # orphan instance; we don't list it here as we don't + # handle this case yet in the output of instance listing + logging.warning("Orphan instance '%s' found on node %s", + inst, name) # else no instance is alive else: live_data = {} @@ -9423,23 +9436,33 @@ class LUInstanceQueryData(NoHooksLU): def ExpandNames(self): self.needed_locks = {} - self.share_locks = dict.fromkeys(locking.LEVELS, 1) - if self.op.instances: - self.wanted_names = [] - for name in self.op.instances: - full_name = _ExpandInstanceName(self.cfg, name) - self.wanted_names.append(full_name) - self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names + # Use locking if requested or when non-static information is wanted + if not (self.op.static or self.op.use_locking): + self.LogWarning("Non-static data requested, locks need to be acquired") + self.op.use_locking = True + + if self.op.instances or not self.op.use_locking: + # Expand instance names right here + self.wanted_names = _GetWantedInstances(self, self.op.instances) else: + # Will use acquired locks self.wanted_names = None - self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET - self.needed_locks[locking.LEVEL_NODE] = [] - self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + if self.op.use_locking: + self.share_locks = dict.fromkeys(locking.LEVELS, 1) + + if self.wanted_names is None: + self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET + else: + self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names + + self.needed_locks[locking.LEVEL_NODE] = [] + self.share_locks = dict.fromkeys(locking.LEVELS, 1) + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE def DeclareLocks(self, level): - if level == locking.LEVEL_NODE: + if self.op.use_locking and level == locking.LEVEL_NODE: self._LockInstancesNodes() def CheckPrereq(self): @@ -9449,10 +9472,11 @@ class LUInstanceQueryData(NoHooksLU): """ if self.wanted_names is None: + assert self.op.use_locking, "Locking was not used" self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] - self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name - in self.wanted_names] + self.wanted_instances = [self.cfg.GetInstanceInfo(name) + for name in self.wanted_names] def _ComputeBlockdevStatus(self, node, instance_name, dev): """Returns the status of a block device @@ -9498,7 +9522,7 @@ class LUInstanceQueryData(NoHooksLU): else: dev_children = [] - data = { + return { "iv_name": dev.iv_name, "dev_type": dev.dev_type, "logical_id": dev.logical_id, @@ -9510,8 +9534,6 @@ class LUInstanceQueryData(NoHooksLU): "size": dev.size, } - return data - def Exec(self, feedback_fn): """Gather and return data""" result = {} @@ -9539,7 +9561,7 @@ class LUInstanceQueryData(NoHooksLU): disks = [self._ComputeDiskStatus(instance, None, device) for device in instance.disks] - idict = { + result[instance.name] = { "name": instance.name, "config_state": config_state, "run_state": remote_state, @@ -9564,8 +9586,6 @@ class LUInstanceQueryData(NoHooksLU): "uuid": instance.uuid, } - result[instance.name] = idict - return result diff --git a/lib/constants.py b/lib/constants.py index 71338762f4df0cd33c0a809cb619ee9f850c37f7..55e81bdd9535a5c137934feba93e5bfe6e6e5854 100644 --- a/lib/constants.py +++ b/lib/constants.py @@ -432,10 +432,18 @@ EXPORT_MODES = frozenset([ EXPORT_MODE_REMOTE, ]) -# lock recalculate mode +# Lock recalculate mode LOCKS_REPLACE = 'replace' LOCKS_APPEND = 'append' +# Lock timeout (sum) before we should go into blocking acquire (still +# can be reset by priority change); computed as max time (10 hours) +# before we should actually go into blocking acquire given that we +# start from default priority level; in seconds +LOCK_ATTEMPTS_TIMEOUT = 10 * 3600 / 20.0 +LOCK_ATTEMPTS_MAXWAIT = 15.0 +LOCK_ATTEMPTS_MINWAIT = 1.0 + # instance creation modes INSTANCE_CREATE = "create" INSTANCE_IMPORT = "import" diff --git a/lib/daemon.py b/lib/daemon.py index 12acc6421ce2a0daf68379990ef01816e7864399..6d6bd743ddd9acde2e16c905a323be3dd8635911 100644 --- a/lib/daemon.py +++ b/lib/daemon.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007, 2008, 2010 Google Inc. +# Copyright (C) 2006, 2007, 2008, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -682,11 +682,11 @@ def GenericMain(daemon_name, optionparser, utils.WritePidFile(utils.DaemonPidFileName(daemon_name)) try: try: + logging.info("%s daemon startup", daemon_name) if callable(prepare_fn): prep_results = prepare_fn(options, args) else: prep_results = None - logging.info("%s daemon startup", daemon_name) except Exception, err: utils.WriteErrorToFD(wpipe, _BeautifyError(err)) raise diff --git a/lib/locking.py b/lib/locking.py index 35750851e169ac51594598ae75359688b78c8519..57d260047d6f12a8b340b17494d532f653689143 100644 --- a/lib/locking.py +++ b/lib/locking.py @@ -33,6 +33,7 @@ import weakref import logging import heapq import operator +import itertools from ganeti import errors from ganeti import utils @@ -1514,6 +1515,17 @@ class GanetiLockManager: return self.__keyring[level].remove(names) +def _MonitorSortKey((num, item)): + """Sorting key function. + + Sort by name, then by incoming order. + + """ + (name, _, _, _) = item + + return (utils.NiceSortKey(name), num) + + class LockMonitor(object): _LOCK_ATTR = "_lock" @@ -1523,6 +1535,9 @@ class LockMonitor(object): """ self._lock = SharedLock("LockMonitor") + # Counter for stable sorting + self._counter = itertools.count(0) + # Tracked locks. Weak references are used to avoid issues with circular # references and deletion. self._locks = weakref.WeakKeyDictionary() @@ -1534,16 +1549,21 @@ class LockMonitor(object): """ logging.debug("Registering lock %s", lock.name) assert lock not in self._locks, "Duplicate lock registration" - assert not compat.any(lock.name == i.name for i in self._locks.keys()), \ - "Found duplicate lock name" - self._locks[lock] = None + + # There used to be a check for duplicate names here. As it turned out, when + # a lock is re-created with the same name in a very short timeframe, the + # previous instance might not yet be removed from the weakref dictionary. + # By keeping track of the order of incoming registrations, a stable sort + # ordering can still be guaranteed. + + self._locks[lock] = self._counter.next() @ssynchronized(_LOCK_ATTR) def _GetLockInfo(self, requested): """Get information from all locks while the monitor lock is held. """ - return [lock.GetInfo(requested) for lock in self._locks.keys()] + return [(num, lock.GetInfo(requested)) for lock, num in self._locks.items()] def _Query(self, fields): """Queries information from all locks. @@ -1554,11 +1574,13 @@ class LockMonitor(object): """ qobj = query.Query(query.LOCK_FIELDS, fields) - # Get all data and sort by name - lockinfo = utils.NiceSort(self._GetLockInfo(qobj.RequestedData()), - key=operator.itemgetter(0)) + # Get all data with internal lock held and then sort by name and incoming + # order + lockinfo = sorted(self._GetLockInfo(qobj.RequestedData()), + key=_MonitorSortKey) - return (qobj, query.LockQueryData(lockinfo)) + # Extract lock information and build query data + return (qobj, query.LockQueryData(map(operator.itemgetter(1), lockinfo))) def QueryLocks(self, fields): """Queries information from all locks. diff --git a/lib/mcpu.py b/lib/mcpu.py index 37588e1bdd5761d123bb208f5abddf4615734143..5728c3b64552404d97a570a6d6efd8e0538f5a81 100644 --- a/lib/mcpu.py +++ b/lib/mcpu.py @@ -56,23 +56,23 @@ def _CalculateLockAttemptTimeouts(): """Calculate timeouts for lock attempts. """ - result = [1.0] + result = [constants.LOCK_ATTEMPTS_MINWAIT] + running_sum = result[0] - # Wait for a total of at least 150s before doing a blocking acquire - while sum(result) < 150.0: + # Wait for a total of at least LOCK_ATTEMPTS_TIMEOUT before doing a + # blocking acquire + while running_sum < constants.LOCK_ATTEMPTS_TIMEOUT: timeout = (result[-1] * 1.05) ** 1.25 - # Cap timeout at 10 seconds. This gives other jobs a chance to run - # even if we're still trying to get our locks, before finally moving - # to a blocking acquire. - if timeout > 10.0: - timeout = 10.0 - - elif timeout < 0.1: - # Lower boundary for safety - timeout = 0.1 + # Cap max timeout. This gives other jobs a chance to run even if + # we're still trying to get our locks, before finally moving to a + # blocking acquire. + timeout = min(timeout, constants.LOCK_ATTEMPTS_MAXWAIT) + # And also cap the lower boundary for safety + timeout = max(timeout, constants.LOCK_ATTEMPTS_MINWAIT) result.append(timeout) + running_sum += timeout return result diff --git a/lib/opcodes.py b/lib/opcodes.py index 374e61f113e6ff3a469c8e78c5b4b0ba4ac02d92..dbcf8d73f40caef809ea8213ce1d150fcb3000c8 100644 --- a/lib/opcodes.py +++ b/lib/opcodes.py @@ -1119,8 +1119,12 @@ class OpInstanceQuery(OpCode): class OpInstanceQueryData(OpCode): """Compute the run-time status of instances.""" OP_PARAMS = [ - ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), None), - ("static", False, ht.TBool, None), + _PUseLocking, + ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), + "Instance names"), + ("static", False, ht.TBool, + "Whether to only return configuration data without querying" + " nodes"), ] diff --git a/lib/ssh.py b/lib/ssh.py index ee8aba6eee51826d1f1f5063bd81b2bb1edf157b..1a3c101ae574fad6e11e9f1f41b37fea2d3064f1 100644 --- a/lib/ssh.py +++ b/lib/ssh.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007, 2010 Google Inc. +# Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -254,7 +254,7 @@ class SshRunner: - detail: string with details """ - retval = self.Run(node, 'root', 'hostname --fqdn') + retval = self.Run(node, "root", "hostname --fqdn", quiet=False) if retval.failed: msg = "ssh problem" diff --git a/lib/utils/log.py b/lib/utils/log.py index ee178b0ca822ce58bfc2af0c605595c2cd639a08..2ddd0f92da7c76255b019c802e717d1e422c66ba 100644 --- a/lib/utils/log.py +++ b/lib/utils/log.py @@ -246,9 +246,18 @@ def SetupLogging(logfile, program, debug=0, stderr_logging=False, # exception since otherwise we could run but without any logs at all try: if console_logging: - logfile_handler = _LogHandler(open(constants.DEV_CONSOLE, "a"), logfile) + logfile_handler = _LogHandler(open(constants.DEV_CONSOLE, "a"), + logfile) else: logfile_handler = _ReopenableLogHandler(logfile) + + logfile_handler.setFormatter(formatter) + if debug: + logfile_handler.setLevel(logging.DEBUG) + else: + logfile_handler.setLevel(logging.INFO) + root_logger.addHandler(logfile_handler) + reopen_handlers.append(logfile_handler) except EnvironmentError: if stderr_logging or syslog == constants.SYSLOG_YES: logging.exception("Failed to enable logging to file '%s'", logfile) @@ -256,13 +265,4 @@ def SetupLogging(logfile, program, debug=0, stderr_logging=False, # we need to re-raise the exception raise - logfile_handler.setFormatter(formatter) - if debug: - logfile_handler.setLevel(logging.DEBUG) - else: - logfile_handler.setLevel(logging.INFO) - root_logger.addHandler(logfile_handler) - - reopen_handlers.append(logfile_handler) - return compat.partial(_ReopenLogFiles, reopen_handlers) diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py index 7f18af4aa4c1ad148d3afb18345ade976c190316..9c7a448274f2840adc7c74238da27570c8bb7e10 100644 --- a/lib/watcher/__init__.py +++ b/lib/watcher/__init__.py @@ -694,14 +694,18 @@ def ParseOptions(): constants.RELEASE_VERSION) parser.add_option(cli.DEBUG_OPT) - parser.add_option("-A", "--job-age", dest="job_age", + parser.add_option("-A", "--job-age", dest="job_age", default=6 * 3600, help="Autoarchive jobs older than this age (default" - " 6 hours)", default=6*3600) + " 6 hours)") parser.add_option("--ignore-pause", dest="ignore_pause", default=False, action="store_true", help="Ignore cluster pause setting") options, args = parser.parse_args() options.job_age = cli.ParseTimespec(options.job_age) - return options, args + + if args: + parser.error("No arguments expected") + + return (options, args) @rapi.client.UsesRapiClient @@ -711,11 +715,7 @@ def Main(): """ global client # pylint: disable-msg=W0603 - options, args = ParseOptions() - - if args: # watcher doesn't take any arguments - print >> sys.stderr, ("Usage: %s [-f] " % sys.argv[0]) - return constants.EXIT_FAILURE + (options, _) = ParseOptions() utils.SetupLogging(constants.LOG_WATCHER, sys.argv[0], debug=options.debug, stderr_logging=options.debug) diff --git a/test/cfgupgrade_unittest.py b/test/cfgupgrade_unittest.py index 0819dfa56144886d623107f02f9eafac6509bdd6..359a298dcc0fef07316aa50ca6d956d744448f4a 100755 --- a/test/cfgupgrade_unittest.py +++ b/test/cfgupgrade_unittest.py @@ -161,12 +161,16 @@ class TestCfgupgrade(unittest.TestCase): def testRapiUsers(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) + self.assertFalse(os.path.exists(os.path.dirname(self.rapi_users_path))) utils.WriteFile(self.rapi_users_path_pre24, data="some user\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), False) + self.assertTrue(os.path.isdir(os.path.dirname(self.rapi_users_path))) self.assert_(os.path.islink(self.rapi_users_path_pre24)) self.assert_(os.path.isfile(self.rapi_users_path)) + self.assertEqual(os.readlink(self.rapi_users_path_pre24), + self.rapi_users_path) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assertEqual(utils.ReadFile(path), "some user\n") @@ -180,6 +184,8 @@ class TestCfgupgrade(unittest.TestCase): self.assert_(os.path.islink(self.rapi_users_path_pre24)) self.assert_(os.path.isfile(self.rapi_users_path)) + self.assertEqual(os.readlink(self.rapi_users_path_pre24), + self.rapi_users_path) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assertEqual(utils.ReadFile(path), "other user\n") @@ -187,13 +193,77 @@ class TestCfgupgrade(unittest.TestCase): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) + os.mkdir(os.path.dirname(self.rapi_users_path)) os.symlink(self.rapi_users_path, self.rapi_users_path_pre24) - utils.WriteFile(self.rapi_users_path_pre24, data="hello world\n") + utils.WriteFile(self.rapi_users_path, data="hello world\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 2, 0), False) - self.assert_(os.path.isfile(self.rapi_users_path)) + self.assert_(os.path.isfile(self.rapi_users_path) and + not os.path.islink(self.rapi_users_path)) self.assert_(os.path.islink(self.rapi_users_path_pre24)) + self.assertEqual(os.readlink(self.rapi_users_path_pre24), + self.rapi_users_path) + for path in [self.rapi_users_path, self.rapi_users_path_pre24]: + self.assertEqual(utils.ReadFile(path), "hello world\n") + + def testRapiUsersExistingTarget(self): + self.assertFalse(os.path.exists(self.rapi_users_path)) + self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) + + os.mkdir(os.path.dirname(self.rapi_users_path)) + utils.WriteFile(self.rapi_users_path, data="other user\n") + utils.WriteFile(self.rapi_users_path_pre24, data="hello world\n") + + self.assertRaises(Exception, self._TestSimpleUpgrade, + constants.BuildVersion(2, 2, 0), False) + + for path in [self.rapi_users_path, self.rapi_users_path_pre24]: + self.assert_(os.path.isfile(path) and not os.path.islink(path)) + self.assertEqual(utils.ReadFile(self.rapi_users_path), "other user\n") + self.assertEqual(utils.ReadFile(self.rapi_users_path_pre24), + "hello world\n") + + def testRapiUsersDryRun(self): + self.assertFalse(os.path.exists(self.rapi_users_path)) + self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) + + utils.WriteFile(self.rapi_users_path_pre24, data="some user\n") + self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), True) + + self.assertFalse(os.path.isdir(os.path.dirname(self.rapi_users_path))) + self.assertTrue(os.path.isfile(self.rapi_users_path_pre24) and + not os.path.islink(self.rapi_users_path_pre24)) + self.assertFalse(os.path.exists(self.rapi_users_path)) + + def testRapiUsers24AndAboveDryRun(self): + self.assertFalse(os.path.exists(self.rapi_users_path)) + self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) + + os.mkdir(os.path.dirname(self.rapi_users_path)) + utils.WriteFile(self.rapi_users_path, data="other user\n") + self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), True) + + self.assertTrue(os.path.isfile(self.rapi_users_path) and + not os.path.islink(self.rapi_users_path)) + self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) + self.assertEqual(utils.ReadFile(self.rapi_users_path), "other user\n") + + def testRapiUsersExistingSymlinkDryRun(self): + self.assertFalse(os.path.exists(self.rapi_users_path)) + self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) + + os.mkdir(os.path.dirname(self.rapi_users_path)) + os.symlink(self.rapi_users_path, self.rapi_users_path_pre24) + utils.WriteFile(self.rapi_users_path, data="hello world\n") + + self._TestSimpleUpgrade(constants.BuildVersion(2, 2, 0), True) + + self.assertTrue(os.path.islink(self.rapi_users_path_pre24)) + self.assertTrue(os.path.isfile(self.rapi_users_path) and + not os.path.islink(self.rapi_users_path)) + self.assertEqual(os.readlink(self.rapi_users_path_pre24), + self.rapi_users_path) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assertEqual(utils.ReadFile(path), "hello world\n") diff --git a/test/ganeti.locking_unittest.py b/test/ganeti.locking_unittest.py index 811bfd6fac6e8289dcba47b03f75675e424a45fb..b17e7e8e85906c7d8b9cecedbdd4ee3de2e47928 100755 --- a/test/ganeti.locking_unittest.py +++ b/test/ganeti.locking_unittest.py @@ -28,6 +28,7 @@ import time import Queue import threading import random +import gc import itertools from ganeti import constants @@ -1914,6 +1915,80 @@ class TestLockMonitor(_ThreadedTestCase): self.assertEqual(len(self.lm._locks), 1) + def testDeleteAndRecreate(self): + lname = "TestLock101923193" + + # Create some locks with the same name and keep all references + locks = [locking.SharedLock(lname, monitor=self.lm) + for _ in range(5)] + + self.assertEqual(len(self.lm._locks), len(locks)) + + result = self.lm.QueryLocks(["name", "mode", "owner"]) + self.assertEqual(objects.QueryResponse.FromDict(result).data, + [[(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, None), + (constants.RS_NORMAL, None)]] * 5) + + locks[2].delete() + + # Check information order + result = self.lm.QueryLocks(["name", "mode", "owner"]) + self.assertEqual(objects.QueryResponse.FromDict(result).data, + [[(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, None), + (constants.RS_NORMAL, None)]] * 2 + + [[(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, "deleted"), + (constants.RS_NORMAL, None)]] + + [[(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, None), + (constants.RS_NORMAL, None)]] * 2) + + locks[1].acquire(shared=0) + + last_status = [ + [(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, None), + (constants.RS_NORMAL, None)], + [(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, "exclusive"), + (constants.RS_NORMAL, [threading.currentThread().getName()])], + [(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, "deleted"), + (constants.RS_NORMAL, None)], + [(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, None), + (constants.RS_NORMAL, None)], + [(constants.RS_NORMAL, lname), + (constants.RS_NORMAL, None), + (constants.RS_NORMAL, None)], + ] + + # Check information order + result = self.lm.QueryLocks(["name", "mode", "owner"]) + self.assertEqual(objects.QueryResponse.FromDict(result).data, last_status) + + self.assertEqual(len(set(self.lm._locks.values())), len(locks)) + self.assertEqual(len(self.lm._locks), len(locks)) + + # Check lock deletion + for idx in range(len(locks)): + del locks[0] + assert gc.isenabled() + gc.collect() + self.assertEqual(len(self.lm._locks), len(locks)) + result = self.lm.QueryLocks(["name", "mode", "owner"]) + self.assertEqual(objects.QueryResponse.FromDict(result).data, + last_status[idx + 1:]) + + # All locks should have been deleted + assert not locks + self.assertFalse(self.lm._locks) + + result = self.lm.QueryLocks(["name", "mode", "owner"]) + self.assertEqual(objects.QueryResponse.FromDict(result).data, []) + if __name__ == '__main__': testutils.GanetiTestProgram() diff --git a/test/ganeti.mcpu_unittest.py b/test/ganeti.mcpu_unittest.py index aeca84d053c57d8171ea6b79fe0eafe4c1c2985a..20707623ca3a5532ae20e704372e457fef8090b2 100755 --- a/test/ganeti.mcpu_unittest.py +++ b/test/ganeti.mcpu_unittest.py @@ -26,6 +26,10 @@ import unittest from ganeti import mcpu from ganeti import opcodes +from ganeti.constants import \ + LOCK_ATTEMPTS_TIMEOUT, \ + LOCK_ATTEMPTS_MAXWAIT, \ + LOCK_ATTEMPTS_MINWAIT import testutils @@ -33,8 +37,8 @@ import testutils class TestLockAttemptTimeoutStrategy(unittest.TestCase): def testConstants(self): tpa = mcpu.LockAttemptTimeoutStrategy._TIMEOUT_PER_ATTEMPT - self.assert_(len(tpa) > 10) - self.assert_(sum(tpa) >= 150.0) + self.assert_(len(tpa) > LOCK_ATTEMPTS_TIMEOUT / LOCK_ATTEMPTS_MAXWAIT) + self.assert_(sum(tpa) >= LOCK_ATTEMPTS_TIMEOUT) def testSimple(self): strat = mcpu.LockAttemptTimeoutStrategy(_random_fn=lambda: 0.5, @@ -45,8 +49,8 @@ class TestLockAttemptTimeoutStrategy(unittest.TestCase): timeout = strat.NextAttempt() self.assert_(timeout is not None) - self.assert_(timeout <= 10.0) - self.assert_(timeout >= 0.0) + self.assert_(timeout <= LOCK_ATTEMPTS_MAXWAIT) + self.assert_(timeout >= LOCK_ATTEMPTS_MINWAIT) self.assert_(prev is None or timeout >= prev) prev = timeout diff --git a/tools/cfgupgrade b/tools/cfgupgrade index 320f229cabc5a64f8042be04be756791bc9098b4..0d341c35cc4d78ccca043485523033d9733b559a 100755 --- a/tools/cfgupgrade +++ b/tools/cfgupgrade @@ -185,17 +185,26 @@ def main(): raise Error("Configuration version %d.%d.%d not supported by this tool" % (config_major, config_minor, config_revision)) - if os.path.isfile(options.RAPI_USERS_FILE_PRE24): + if (os.path.isfile(options.RAPI_USERS_FILE_PRE24) and + not os.path.islink(options.RAPI_USERS_FILE_PRE24)): + if os.path.exists(options.RAPI_USERS_FILE): + raise Error("Found pre-2.4 RAPI users file at %s, but another file" + " already exists at %s" % + (options.RAPI_USERS_FILE_PRE24, options.RAPI_USERS_FILE)) logging.info("Found pre-2.4 RAPI users file at %s, renaming to %s", options.RAPI_USERS_FILE_PRE24, options.RAPI_USERS_FILE) - utils.RenameFile(options.RAPI_USERS_FILE_PRE24, options.RAPI_USERS_FILE, - mkdir=True, mkdir_mode=0750) + if not options.dry_run: + utils.RenameFile(options.RAPI_USERS_FILE_PRE24, options.RAPI_USERS_FILE, + mkdir=True, mkdir_mode=0750) # Create a symlink for RAPI users file - if not os.path.islink(options.RAPI_USERS_FILE_PRE24): + if (not (os.path.islink(options.RAPI_USERS_FILE_PRE24) or + os.path.isfile(options.RAPI_USERS_FILE_PRE24)) and + os.path.isfile(options.RAPI_USERS_FILE)): logging.info("Creating symlink from %s to %s", options.RAPI_USERS_FILE_PRE24, options.RAPI_USERS_FILE) - os.symlink(options.RAPI_USERS_FILE, options.RAPI_USERS_FILE_PRE24) + if not options.dry_run: + os.symlink(options.RAPI_USERS_FILE, options.RAPI_USERS_FILE_PRE24) try: logging.info("Writing configuration file to %s", options.CONFIG_DATA_PATH) diff --git a/tools/cluster-merge b/tools/cluster-merge index 21750f9b183cd373e491178f8b9face479b0c137..2de09bc60cf56e4b79bd7258db8e13b7947e2205 100755 --- a/tools/cluster-merge +++ b/tools/cluster-merge @@ -85,7 +85,7 @@ class MergerData(object): @param cluster: The name of the cluster @param key_path: Path to the ssh private key used for authentication - @param nodes: List of nodes in the merging cluster + @param nodes: List of online nodes in the merging cluster @param instances: List of instances running on merging cluster @param config_path: Path to the merging cluster config @@ -144,13 +144,15 @@ class Merger(object): key_path = utils.PathJoin(self.work_dir, cluster) utils.WriteFile(key_path, mode=0600, data=result.stdout) - result = self._RunCmd(cluster, "gnt-node list -o name --no-header", - private_key=key_path) + result = self._RunCmd(cluster, "gnt-node list -o name,offline" + " --no-header --separator=,", private_key=key_path) if result.failed: raise errors.RemoteError("Unable to retrieve list of nodes from %s." " Fail reason: %s; output: %s" % (cluster, result.fail_reason, result.output)) - nodes = result.stdout.splitlines() + nodes_statuses = [line.split(',') for line in result.stdout.splitlines()] + nodes = [node_status[0] for node_status in nodes_statuses + if node_status[1] == "N"] result = self._RunCmd(cluster, "gnt-instance list -o name --no-header", private_key=key_path) @@ -280,6 +282,7 @@ class Merger(object): for data in self.merger_data: other_config = config.ConfigWriter(data.config_path, accept_foreign=True) + self._MergeClusterConfigs(my_config, other_config) self._MergeNodeGroups(my_config, other_config) for node in other_config.GetNodeList(): @@ -308,6 +311,145 @@ class Merger(object): _CLUSTERMERGE_ECID + str(fake_ec_id)) fake_ec_id += 1 + # R0201: Method could be a function + def _MergeClusterConfigs(self, my_config, other_config): + """Checks that all relevant cluster parameters are compatible + + """ + # pylint: disable-msg=R0201 + my_cluster = my_config.GetClusterInfo() + other_cluster = other_config.GetClusterInfo() + err_count = 0 + + # + # Generic checks + # + check_params = ( + "beparams", + "default_iallocator", + "drbd_usermode_helper", + "file_storage_dir", + "hidden_os", + "maintain_node_health", + "master_netdev", + "ndparams", + "nicparams", + "primary_ip_family", + "tags", + "uid_pool", + "volume_group_name", + ) + for param_name in check_params: + my_param = getattr(my_cluster, param_name) + other_param = getattr(other_cluster, param_name) + if my_param != other_param: + logging.error("The value (%s) of the cluster parameter %s on %s" + " differs to this cluster's value (%s)", + other_param, param_name, other_cluster.cluster_name, + my_param) + err_count += 1 + + # + # Custom checks + # + + # Check default hypervisor + my_defhyp = my_cluster.enabled_hypervisors[0] + other_defhyp = other_cluster.enabled_hypervisors[0] + if my_defhyp != other_defhyp: + logging.warning("The default hypervisor (%s) differs on %s, new" + " instances will be created with this cluster's" + " default hypervisor (%s)", other_defhyp, + other_cluster.cluster_name, my_defhyp) + + if (set(my_cluster.enabled_hypervisors) != + set(other_cluster.enabled_hypervisors)): + logging.error("The set of enabled hypervisors (%s) on %s differs to" + " this cluster's set (%s)", + other_cluster.enabled_hypervisors, + other_cluster.cluster_name, my_cluster.enabled_hypervisors) + err_count += 1 + + # Check hypervisor params for hypervisors we care about + # TODO: we probably don't care about all params for a given hypervisor + for hyp in my_cluster.enabled_hypervisors: + for param in my_cluster.hvparams[hyp]: + my_value = my_cluster.hvparams[hyp][param] + other_value = other_cluster.hvparams[hyp][param] + if my_value != other_value: + logging.error("The value (%s) of the %s parameter of the %s" + " hypervisor on %s differs to this cluster's parameter" + " (%s)", + other_value, param, hyp, other_cluster.cluster_name, + my_value) + err_count += 1 + + # Check os hypervisor params for hypervisors we care about + for os_name in set(my_cluster.os_hvp.keys() + other_cluster.os_hvp.keys()): + for hyp in my_cluster.enabled_hypervisors: + my_os_hvp = self._GetOsHypervisor(my_cluster, os_name, hyp) + other_os_hvp = self._GetOsHypervisor(other_cluster, os_name, hyp) + if my_os_hvp != other_os_hvp: + logging.error("The OS parameters (%s) for the %s OS for the %s" + " hypervisor on %s differs to this cluster's parameters" + " (%s)", + other_os_hvp, os_name, hyp, other_cluster.cluster_name, + my_os_hvp) + err_count += 1 + + # + # Warnings + # + if my_cluster.modify_etc_hosts != other_cluster.modify_etc_hosts: + logging.warning("The modify_etc_hosts value (%s) differs on %s," + " this cluster's value (%s) will take precedence", + other_cluster.modify_etc_hosts, + other_cluster.cluster_name, + my_cluster.modify_etc_hosts) + + if my_cluster.modify_ssh_setup != other_cluster.modify_ssh_setup: + logging.warning("The modify_ssh_setup value (%s) differs on %s," + " this cluster's value (%s) will take precedence", + other_cluster.modify_ssh_setup, + other_cluster.cluster_name, + my_cluster.modify_ssh_setup) + + # + # Actual merging + # + my_cluster.reserved_lvs = list(set(my_cluster.reserved_lvs + + other_cluster.reserved_lvs)) + + if my_cluster.prealloc_wipe_disks != other_cluster.prealloc_wipe_disks: + logging.warning("The prealloc_wipe_disks value (%s) on %s differs to this" + " cluster's value (%s). The least permissive value (%s)" + " will be used", other_cluster.prealloc_wipe_disks, + other_cluster.cluster_name, + my_cluster.prealloc_wipe_disks, True) + my_cluster.prealloc_wipe_disks = True + + for os_, osparams in other_cluster.osparams.items(): + if os_ not in my_cluster.osparams: + my_cluster.osparams[os_] = osparams + elif my_cluster.osparams[os_] != osparams: + logging.error("The OS parameters (%s) for the %s OS on %s differs to" + " this cluster's parameters (%s)", + osparams, os_, other_cluster.cluster_name, + my_cluster.osparams[os_]) + err_count += 1 + + if err_count: + raise errors.ConfigurationError("Cluster config for %s has incompatible" + " values, please fix and re-run" % + other_cluster.cluster_name) + + # R0201: Method could be a function + def _GetOsHypervisor(self, cluster, os_name, hyp): # pylint: disable-msg=R0201 + if os_name in cluster.os_hvp: + return cluster.os_hvp[os_name].get(hyp, None) + else: + return None + # R0201: Method could be a function def _MergeNodeGroups(self, my_config, other_config): """Adds foreign node groups @@ -534,7 +676,7 @@ def SetupLogging(options): elif options.verbose: stderr_handler.setLevel(logging.INFO) else: - stderr_handler.setLevel(logging.ERROR) + stderr_handler.setLevel(logging.WARNING) root_logger = logging.getLogger("") root_logger.setLevel(logging.NOTSET)