diff --git a/lib/client/gnt_node.py b/lib/client/gnt_node.py index 7ac8a3968d60cbad37373c5394c74496f933c04b..66df3543b6b4497f64ecd0f941b1c71a9ba4105a 100644 --- a/lib/client/gnt_node.py +++ b/lib/client/gnt_node.py @@ -279,11 +279,11 @@ def EvacuateNode(opts, args): " --secondary-only options can be passed", errors.ECODE_INVAL) elif opts.primary_only: - mode = constants.IALLOCATOR_NEVAC_PRI + mode = constants.NODE_EVAC_PRI elif opts.secondary_only: - mode = constants.IALLOCATOR_NEVAC_SEC + mode = constants.NODE_EVAC_SEC else: - mode = constants.IALLOCATOR_NEVAC_ALL + mode = constants.NODE_EVAC_ALL # Determine affected instances fields = [] diff --git a/lib/cmdlib.py b/lib/cmdlib.py index ebc34f4e2a6706007b51a201cb0b60f162a1604b..2f32b80e5cb988b39bb5cd76028eae21db4b19bf 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -4512,7 +4512,7 @@ class LUNodeRemove(LogicalUnit): raise errors.OpPrereqError("Node is the master node, failover to another" " node is required", errors.ECODE_INVAL) - for instance_name, instance in self.cfg.GetAllInstancesInfo(): + for instance_name, instance in self.cfg.GetAllInstancesInfo().items(): if node.name in instance.all_nodes: raise errors.OpPrereqError("Instance %s is still running on the node," " please remove first" % instance_name, @@ -10493,6 +10493,15 @@ class LUNodeEvacuate(NoHooksLU): """ REQ_BGL = False + _MODE2IALLOCATOR = { + constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI, + constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC, + constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL, + } + assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES + assert (frozenset(_MODE2IALLOCATOR.values()) == + constants.IALLOCATOR_NEVAC_MODES) + def CheckArguments(self): _CheckIAllocatorOrNode(self, "iallocator", "remote_node") @@ -10507,7 +10516,7 @@ class LUNodeEvacuate(NoHooksLU): raise errors.OpPrereqError("Can not use evacuated node as a new" " secondary node", errors.ECODE_INVAL) - if self.op.mode != constants.IALLOCATOR_NEVAC_SEC: + if self.op.mode != constants.NODE_EVAC_SEC: raise errors.OpPrereqError("Without the use of an iallocator only" " secondary instances can be evacuated", errors.ECODE_INVAL) @@ -10520,6 +10529,14 @@ class LUNodeEvacuate(NoHooksLU): locking.LEVEL_NODE: [], } + # Determine nodes (via group) optimistically, needs verification once locks + # have been acquired + self.lock_nodes = self._DetermineNodes() + + def _DetermineNodes(self): + """Gets the list of nodes to operate on. + + """ if self.op.remote_node is None: # Iallocator will choose any node(s) in the same group group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name]) @@ -10527,26 +10544,34 @@ class LUNodeEvacuate(NoHooksLU): group_nodes = frozenset([self.op.remote_node]) # Determine nodes to be locked - self.lock_nodes = set([self.op.node_name]) | group_nodes + return set([self.op.node_name]) | group_nodes def _DetermineInstances(self): """Builds list of instances to operate on. """ - assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES + assert self.op.mode in constants.NODE_EVAC_MODES - if self.op.mode == constants.IALLOCATOR_NEVAC_PRI: + if self.op.mode == constants.NODE_EVAC_PRI: # Primary instances only inst_fn = _GetNodePrimaryInstances assert self.op.remote_node is None, \ "Evacuating primary instances requires iallocator" - elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC: + elif self.op.mode == constants.NODE_EVAC_SEC: # Secondary instances only inst_fn = _GetNodeSecondaryInstances else: # All instances - assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL + assert self.op.mode == constants.NODE_EVAC_ALL inst_fn = _GetNodeInstances + # TODO: In 2.6, change the iallocator interface to take an evacuation mode + # per instance + raise errors.OpPrereqError("Due to an issue with the iallocator" + " interface it is not possible to evacuate" + " all instances at once; specify explicitly" + " whether to evacuate primary or secondary" + " instances", + errors.ECODE_INVAL) return inst_fn(self.cfg, self.op.node_name) @@ -10558,8 +10583,8 @@ class LUNodeEvacuate(NoHooksLU): set(i.name for i in self._DetermineInstances()) elif level == locking.LEVEL_NODEGROUP: - # Lock node groups optimistically, needs verification once nodes have - # been acquired + # Lock node groups for all potential target nodes optimistically, needs + # verification once nodes have been acquired self.needed_locks[locking.LEVEL_NODEGROUP] = \ self.cfg.GetNodeGroupsFromNodes(self.lock_nodes) @@ -10572,12 +10597,23 @@ class LUNodeEvacuate(NoHooksLU): owned_nodes = self.owned_locks(locking.LEVEL_NODE) owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) - assert owned_nodes == self.lock_nodes + need_nodes = self._DetermineNodes() + + if not owned_nodes.issuperset(need_nodes): + raise errors.OpPrereqError("Nodes in same group as '%s' changed since" + " locks were acquired, current nodes are" + " are '%s', used to be '%s'; retry the" + " operation" % + (self.op.node_name, + utils.CommaJoin(need_nodes), + utils.CommaJoin(owned_nodes)), + errors.ECODE_STATE) wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes) if owned_groups != wanted_groups: raise errors.OpExecError("Node groups changed since locks were acquired," - " current groups are '%s', used to be '%s'" % + " current groups are '%s', used to be '%s';" + " retry the operation" % (utils.CommaJoin(wanted_groups), utils.CommaJoin(owned_groups))) @@ -10588,7 +10624,7 @@ class LUNodeEvacuate(NoHooksLU): if set(self.instance_names) != owned_instances: raise errors.OpExecError("Instances on node '%s' changed since locks" " were acquired, current instances are '%s'," - " used to be '%s'" % + " used to be '%s'; retry the operation" % (self.op.node_name, utils.CommaJoin(self.instance_names), utils.CommaJoin(owned_instances))) @@ -10620,7 +10656,7 @@ class LUNodeEvacuate(NoHooksLU): elif self.op.iallocator is not None: # TODO: Implement relocation to other group ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC, - evac_mode=self.op.mode, + evac_mode=self._MODE2IALLOCATOR[self.op.mode], instances=list(self.instance_names)) ial.Run(self.op.iallocator) @@ -10634,7 +10670,7 @@ class LUNodeEvacuate(NoHooksLU): jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True) elif self.op.remote_node is not None: - assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC + assert self.op.mode == constants.NODE_EVAC_SEC jobs = [ [opcodes.OpInstanceReplaceDisks(instance_name=instance_name, remote_node=self.op.remote_node, @@ -12508,13 +12544,9 @@ class LUGroupAssignNodes(NoHooksLU): """Assign nodes to a new group. """ - for node in self.op.nodes: - self.node_data[node].group = self.group_uuid - - # FIXME: Depends on side-effects of modifying the result of - # C{cfg.GetAllNodesInfo} + mods = [(node_name, self.group_uuid) for node_name in self.op.nodes] - self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes. + self.cfg.AssignGroupNodes(mods) @staticmethod def CheckAssignmentForSplitInstances(changes, node_data, instance_data): diff --git a/lib/config.py b/lib/config.py index d5ca4196ba74753bc999f3d4f3e9808e5fdba85e..c1c857b4834ff70f98c9be0df5c757e156fdf6ae 100644 --- a/lib/config.py +++ b/lib/config.py @@ -38,6 +38,7 @@ import os import random import logging import time +import itertools from ganeti import errors from ganeti import locking @@ -1676,6 +1677,79 @@ class ConfigWriter: else: nodegroup_obj.members.remove(node.name) + @locking.ssynchronized(_config_lock) + def AssignGroupNodes(self, mods): + """Changes the group of a number of nodes. + + @type mods: list of tuples; (node name, new group UUID) + @param modes: Node membership modifications + + """ + groups = self._config_data.nodegroups + nodes = self._config_data.nodes + + resmod = [] + + # Try to resolve names/UUIDs first + for (node_name, new_group_uuid) in mods: + try: + node = nodes[node_name] + except KeyError: + raise errors.ConfigurationError("Unable to find node '%s'" % node_name) + + if node.group == new_group_uuid: + # Node is being assigned to its current group + logging.debug("Node '%s' was assigned to its current group (%s)", + node_name, node.group) + continue + + # Try to find current group of node + try: + old_group = groups[node.group] + except KeyError: + raise errors.ConfigurationError("Unable to find old group '%s'" % + node.group) + + # Try to find new group for node + try: + new_group = groups[new_group_uuid] + except KeyError: + raise errors.ConfigurationError("Unable to find new group '%s'" % + new_group_uuid) + + assert node.name in old_group.members, \ + ("Inconsistent configuration: node '%s' not listed in members for its" + " old group '%s'" % (node.name, old_group.uuid)) + assert node.name not in new_group.members, \ + ("Inconsistent configuration: node '%s' already listed in members for" + " its new group '%s'" % (node.name, new_group.uuid)) + + resmod.append((node, old_group, new_group)) + + # Apply changes + for (node, old_group, new_group) in resmod: + assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \ + "Assigning to current group is not possible" + + node.group = new_group.uuid + + # Update members of involved groups + if node.name in old_group.members: + old_group.members.remove(node.name) + if node.name not in new_group.members: + new_group.members.append(node.name) + + # Update timestamps and serials (only once per node/group object) + now = time.time() + for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142 + obj.serial_no += 1 + obj.mtime = now + + # Force ssconf update + self._config_data.cluster.serial_no += 1 + + self._WriteConfig() + def _BumpSerialNo(self): """Bump up the serial number of the config. diff --git a/lib/constants.py b/lib/constants.py index 385d3cd76853ea343e19de01ca182a07ab0b5d68..47d2a0dfb62f49f37b7a5a6190a65d1e80c2b9f6 100644 --- a/lib/constants.py +++ b/lib/constants.py @@ -1340,6 +1340,16 @@ IALLOCATOR_NEVAC_MODES = frozenset([ IALLOCATOR_NEVAC_ALL, ]) +# Node evacuation +NODE_EVAC_PRI = "primary-only" +NODE_EVAC_SEC = "secondary-only" +NODE_EVAC_ALL = "all" +NODE_EVAC_MODES = frozenset([ + NODE_EVAC_PRI, + NODE_EVAC_SEC, + NODE_EVAC_ALL, + ]) + # Job queue JOB_QUEUE_VERSION = 1 JOB_QUEUE_LOCK_FILE = QUEUE_DIR + "/lock" diff --git a/lib/opcodes.py b/lib/opcodes.py index 454a3780def6026b7765256d641c608873dafefd..64850ecf5638ba88505bfa0717d2f72e656d0cbd 100644 --- a/lib/opcodes.py +++ b/lib/opcodes.py @@ -1036,7 +1036,7 @@ class OpNodeEvacuate(OpCode): _PNodeName, ("remote_node", None, ht.TMaybeString, "New secondary node"), ("iallocator", None, ht.TMaybeString, "Iallocator for computing solution"), - ("mode", ht.NoDefault, ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES), + ("mode", ht.NoDefault, ht.TElemOf(constants.NODE_EVAC_MODES), "Node evacuation mode"), ] OP_RESULT = TJobIdListOnly diff --git a/test/ganeti.config_unittest.py b/test/ganeti.config_unittest.py index a21bfa6da85568676b4d728d9d15cc4feb88af4b..e82872c6b0e1ff40fc5302924d65970dd8e83b7b 100755 --- a/test/ganeti.config_unittest.py +++ b/test/ganeti.config_unittest.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -# Copyright (C) 2006, 2007, 2010 Google Inc. +# Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,6 +28,8 @@ import time import tempfile import os.path import socket +import operator +import itertools from ganeti import bootstrap from ganeti import config @@ -36,6 +38,7 @@ from ganeti import errors from ganeti import objects from ganeti import utils from ganeti import netutils +from ganeti import compat from ganeti.config import TemporaryReservationManager @@ -239,6 +242,127 @@ class TestConfigRunner(unittest.TestCase): cfg.AddNodeGroup(group, "my-job", check_uuid=False) # Does not raise. self.assertEqual(uuid, group.uuid) + def testAssignGroupNodes(self): + me = netutils.Hostname() + cfg = self._get_object() + + # Create two groups + grp1 = objects.NodeGroup(name="grp1", members=[], + uuid="2f2fadf7-2a70-4a23-9ab5-2568c252032c") + grp1_serial = 1 + cfg.AddNodeGroup(grp1, "job") + + grp2 = objects.NodeGroup(name="grp2", members=[], + uuid="798d0de3-680f-4a0e-b29a-0f54f693b3f1") + grp2_serial = 1 + cfg.AddNodeGroup(grp2, "job") + self.assertEqual(set(map(operator.attrgetter("name"), + cfg.GetAllNodeGroupsInfo().values())), + set(["grp1", "grp2", constants.INITIAL_NODE_GROUP_NAME])) + + # No-op + cluster_serial = cfg.GetClusterInfo().serial_no + cfg.AssignGroupNodes([]) + cluster_serial += 1 + + # Create two nodes + node1 = objects.Node(name="node1", group=grp1.uuid, ndparams={}) + node1_serial = 1 + node2 = objects.Node(name="node2", group=grp2.uuid, ndparams={}) + node2_serial = 1 + cfg.AddNode(node1, "job") + cfg.AddNode(node2, "job") + cluster_serial += 2 + self.assertEqual(set(cfg.GetNodeList()), set(["node1", "node2", me.name])) + + def _VerifySerials(): + self.assertEqual(cfg.GetClusterInfo().serial_no, cluster_serial) + self.assertEqual(node1.serial_no, node1_serial) + self.assertEqual(node2.serial_no, node2_serial) + self.assertEqual(grp1.serial_no, grp1_serial) + self.assertEqual(grp2.serial_no, grp2_serial) + + _VerifySerials() + + self.assertEqual(set(grp1.members), set(["node1"])) + self.assertEqual(set(grp2.members), set(["node2"])) + + # Check invalid nodes and groups + self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [ + ("unknown.node.example.com", grp2.uuid), + ]) + self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [ + (node1.name, "unknown-uuid"), + ]) + + self.assertEqual(node1.group, grp1.uuid) + self.assertEqual(node2.group, grp2.uuid) + self.assertEqual(set(grp1.members), set(["node1"])) + self.assertEqual(set(grp2.members), set(["node2"])) + + # Another no-op + cfg.AssignGroupNodes([]) + cluster_serial += 1 + _VerifySerials() + + # Assign to the same group (should be a no-op) + self.assertEqual(node2.group, grp2.uuid) + cfg.AssignGroupNodes([ + (node2.name, grp2.uuid), + ]) + cluster_serial += 1 + self.assertEqual(node2.group, grp2.uuid) + _VerifySerials() + self.assertEqual(set(grp1.members), set(["node1"])) + self.assertEqual(set(grp2.members), set(["node2"])) + + # Assign node 2 to group 1 + self.assertEqual(node2.group, grp2.uuid) + cfg.AssignGroupNodes([ + (node2.name, grp1.uuid), + ]) + cluster_serial += 1 + node2_serial += 1 + grp1_serial += 1 + grp2_serial += 1 + self.assertEqual(node2.group, grp1.uuid) + _VerifySerials() + self.assertEqual(set(grp1.members), set(["node1", "node2"])) + self.assertFalse(grp2.members) + + # And assign both nodes to group 2 + self.assertEqual(node1.group, grp1.uuid) + self.assertEqual(node2.group, grp1.uuid) + self.assertNotEqual(grp1.uuid, grp2.uuid) + cfg.AssignGroupNodes([ + (node1.name, grp2.uuid), + (node2.name, grp2.uuid), + ]) + cluster_serial += 1 + node1_serial += 1 + node2_serial += 1 + grp1_serial += 1 + grp2_serial += 1 + self.assertEqual(node1.group, grp2.uuid) + self.assertEqual(node2.group, grp2.uuid) + _VerifySerials() + self.assertFalse(grp1.members) + self.assertEqual(set(grp2.members), set(["node1", "node2"])) + + # Destructive tests + orig_group = node2.group + try: + other_uuid = "68b3d087-6ea5-491c-b81f-0a47d90228c5" + assert compat.all(node.group != other_uuid + for node in cfg.GetAllNodesInfo().values()) + node2.group = "68b3d087-6ea5-491c-b81f-0a47d90228c5" + self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [ + ("node2", grp2.uuid), + ]) + _VerifySerials() + finally: + node2.group = orig_group + class TestTRM(unittest.TestCase): EC_ID = 1 diff --git a/test/ganeti.rapi.client_unittest.py b/test/ganeti.rapi.client_unittest.py index 849207e7e001b0bb39980133af82f2b3847e082d..bc07614622869a0bebcfb30630265249c3f08120 100755 --- a/test/ganeti.rapi.client_unittest.py +++ b/test/ganeti.rapi.client_unittest.py @@ -166,9 +166,9 @@ class TestConstants(unittest.TestCase): self.assertEqual(client.JOB_STATUS_ALL, constants.JOB_STATUS_ALL) # Node evacuation - self.assertEqual(client.NODE_EVAC_PRI, constants.IALLOCATOR_NEVAC_PRI) - self.assertEqual(client.NODE_EVAC_SEC, constants.IALLOCATOR_NEVAC_SEC) - self.assertEqual(client.NODE_EVAC_ALL, constants.IALLOCATOR_NEVAC_ALL) + self.assertEqual(client.NODE_EVAC_PRI, constants.NODE_EVAC_PRI) + self.assertEqual(client.NODE_EVAC_SEC, constants.NODE_EVAC_SEC) + self.assertEqual(client.NODE_EVAC_ALL, constants.NODE_EVAC_ALL) # Legacy name self.assertEqual(client.JOB_STATUS_WAITLOCK, constants.JOB_STATUS_WAITING) @@ -866,7 +866,7 @@ class GanetiRapiClientTests(testutils.GanetiTestCase): self.rapi.AddResponse(serializer.DumpJson([rlib2._NODE_EVAC_RES1])) self.rapi.AddResponse("8888") job_id = self.client.EvacuateNode("node-3", iallocator="hail", dry_run=True, - mode=constants.IALLOCATOR_NEVAC_ALL, + mode=constants.NODE_EVAC_ALL, early_release=True) self.assertEqual(8888, job_id) self.assertItems(["node-3"])