Commit 75191077 authored by Michael Hanselmann's avatar Michael Hanselmann
Browse files

Merge branch 'devel-2.5'



* devel-2.5:
  LUGroupAssignNodes: Fix node membership corruption
  LUGroupAssignNodes: Fix node membership corruption
  Fix pylint warning on unreachable code
  LUNodeEvacuate: Disallow migrating all instances at once
  Separate OpNodeEvacuate.mode from iallocator
  LUNodeEvacuate: Locking fixes
  Fix error when removing node
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarRené Nussbaumer <rn@google.com>
parents 90bb3fe1 a0d39c8f
......@@ -279,11 +279,11 @@ def EvacuateNode(opts, args):
" --secondary-only options can be passed",
errors.ECODE_INVAL)
elif opts.primary_only:
mode = constants.IALLOCATOR_NEVAC_PRI
mode = constants.NODE_EVAC_PRI
elif opts.secondary_only:
mode = constants.IALLOCATOR_NEVAC_SEC
mode = constants.NODE_EVAC_SEC
else:
mode = constants.IALLOCATOR_NEVAC_ALL
mode = constants.NODE_EVAC_ALL
# Determine affected instances
fields = []
......
......@@ -4512,7 +4512,7 @@ class LUNodeRemove(LogicalUnit):
raise errors.OpPrereqError("Node is the master node, failover to another"
" node is required", errors.ECODE_INVAL)
for instance_name, instance in self.cfg.GetAllInstancesInfo():
for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
if node.name in instance.all_nodes:
raise errors.OpPrereqError("Instance %s is still running on the node,"
" please remove first" % instance_name,
......@@ -10493,6 +10493,15 @@ class LUNodeEvacuate(NoHooksLU):
"""
REQ_BGL = False
_MODE2IALLOCATOR = {
constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
}
assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
assert (frozenset(_MODE2IALLOCATOR.values()) ==
constants.IALLOCATOR_NEVAC_MODES)
def CheckArguments(self):
_CheckIAllocatorOrNode(self, "iallocator", "remote_node")
......@@ -10507,7 +10516,7 @@ class LUNodeEvacuate(NoHooksLU):
raise errors.OpPrereqError("Can not use evacuated node as a new"
" secondary node", errors.ECODE_INVAL)
if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
if self.op.mode != constants.NODE_EVAC_SEC:
raise errors.OpPrereqError("Without the use of an iallocator only"
" secondary instances can be evacuated",
errors.ECODE_INVAL)
......@@ -10520,6 +10529,14 @@ class LUNodeEvacuate(NoHooksLU):
locking.LEVEL_NODE: [],
}
# Determine nodes (via group) optimistically, needs verification once locks
# have been acquired
self.lock_nodes = self._DetermineNodes()
def _DetermineNodes(self):
"""Gets the list of nodes to operate on.
"""
if self.op.remote_node is None:
# Iallocator will choose any node(s) in the same group
group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
......@@ -10527,26 +10544,34 @@ class LUNodeEvacuate(NoHooksLU):
group_nodes = frozenset([self.op.remote_node])
# Determine nodes to be locked
self.lock_nodes = set([self.op.node_name]) | group_nodes
return set([self.op.node_name]) | group_nodes
def _DetermineInstances(self):
"""Builds list of instances to operate on.
"""
assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
assert self.op.mode in constants.NODE_EVAC_MODES
if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
if self.op.mode == constants.NODE_EVAC_PRI:
# Primary instances only
inst_fn = _GetNodePrimaryInstances
assert self.op.remote_node is None, \
"Evacuating primary instances requires iallocator"
elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
elif self.op.mode == constants.NODE_EVAC_SEC:
# Secondary instances only
inst_fn = _GetNodeSecondaryInstances
else:
# All instances
assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
assert self.op.mode == constants.NODE_EVAC_ALL
inst_fn = _GetNodeInstances
# TODO: In 2.6, change the iallocator interface to take an evacuation mode
# per instance
raise errors.OpPrereqError("Due to an issue with the iallocator"
" interface it is not possible to evacuate"
" all instances at once; specify explicitly"
" whether to evacuate primary or secondary"
" instances",
errors.ECODE_INVAL)
return inst_fn(self.cfg, self.op.node_name)
......@@ -10558,8 +10583,8 @@ class LUNodeEvacuate(NoHooksLU):
set(i.name for i in self._DetermineInstances())
elif level == locking.LEVEL_NODEGROUP:
# Lock node groups optimistically, needs verification once nodes have
# been acquired
# Lock node groups for all potential target nodes optimistically, needs
# verification once nodes have been acquired
self.needed_locks[locking.LEVEL_NODEGROUP] = \
self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
......@@ -10572,12 +10597,23 @@ class LUNodeEvacuate(NoHooksLU):
owned_nodes = self.owned_locks(locking.LEVEL_NODE)
owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
assert owned_nodes == self.lock_nodes
need_nodes = self._DetermineNodes()
if not owned_nodes.issuperset(need_nodes):
raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
" locks were acquired, current nodes are"
" are '%s', used to be '%s'; retry the"
" operation" %
(self.op.node_name,
utils.CommaJoin(need_nodes),
utils.CommaJoin(owned_nodes)),
errors.ECODE_STATE)
wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
if owned_groups != wanted_groups:
raise errors.OpExecError("Node groups changed since locks were acquired,"
" current groups are '%s', used to be '%s'" %
" current groups are '%s', used to be '%s';"
" retry the operation" %
(utils.CommaJoin(wanted_groups),
utils.CommaJoin(owned_groups)))
......@@ -10588,7 +10624,7 @@ class LUNodeEvacuate(NoHooksLU):
if set(self.instance_names) != owned_instances:
raise errors.OpExecError("Instances on node '%s' changed since locks"
" were acquired, current instances are '%s',"
" used to be '%s'" %
" used to be '%s'; retry the operation" %
(self.op.node_name,
utils.CommaJoin(self.instance_names),
utils.CommaJoin(owned_instances)))
......@@ -10620,7 +10656,7 @@ class LUNodeEvacuate(NoHooksLU):
elif self.op.iallocator is not None:
# TODO: Implement relocation to other group
ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
evac_mode=self.op.mode,
evac_mode=self._MODE2IALLOCATOR[self.op.mode],
instances=list(self.instance_names))
ial.Run(self.op.iallocator)
......@@ -10634,7 +10670,7 @@ class LUNodeEvacuate(NoHooksLU):
jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
elif self.op.remote_node is not None:
assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
assert self.op.mode == constants.NODE_EVAC_SEC
jobs = [
[opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
remote_node=self.op.remote_node,
......@@ -12508,13 +12544,9 @@ class LUGroupAssignNodes(NoHooksLU):
"""Assign nodes to a new group.
"""
for node in self.op.nodes:
self.node_data[node].group = self.group_uuid
# FIXME: Depends on side-effects of modifying the result of
# C{cfg.GetAllNodesInfo}
mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
self.cfg.AssignGroupNodes(mods)
@staticmethod
def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
......
......@@ -38,6 +38,7 @@ import os
import random
import logging
import time
import itertools
from ganeti import errors
from ganeti import locking
......@@ -1676,6 +1677,79 @@ class ConfigWriter:
else:
nodegroup_obj.members.remove(node.name)
@locking.ssynchronized(_config_lock)
def AssignGroupNodes(self, mods):
"""Changes the group of a number of nodes.
@type mods: list of tuples; (node name, new group UUID)
@param modes: Node membership modifications
"""
groups = self._config_data.nodegroups
nodes = self._config_data.nodes
resmod = []
# Try to resolve names/UUIDs first
for (node_name, new_group_uuid) in mods:
try:
node = nodes[node_name]
except KeyError:
raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
if node.group == new_group_uuid:
# Node is being assigned to its current group
logging.debug("Node '%s' was assigned to its current group (%s)",
node_name, node.group)
continue
# Try to find current group of node
try:
old_group = groups[node.group]
except KeyError:
raise errors.ConfigurationError("Unable to find old group '%s'" %
node.group)
# Try to find new group for node
try:
new_group = groups[new_group_uuid]
except KeyError:
raise errors.ConfigurationError("Unable to find new group '%s'" %
new_group_uuid)
assert node.name in old_group.members, \
("Inconsistent configuration: node '%s' not listed in members for its"
" old group '%s'" % (node.name, old_group.uuid))
assert node.name not in new_group.members, \
("Inconsistent configuration: node '%s' already listed in members for"
" its new group '%s'" % (node.name, new_group.uuid))
resmod.append((node, old_group, new_group))
# Apply changes
for (node, old_group, new_group) in resmod:
assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
"Assigning to current group is not possible"
node.group = new_group.uuid
# Update members of involved groups
if node.name in old_group.members:
old_group.members.remove(node.name)
if node.name not in new_group.members:
new_group.members.append(node.name)
# Update timestamps and serials (only once per node/group object)
now = time.time()
for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
obj.serial_no += 1
obj.mtime = now
# Force ssconf update
self._config_data.cluster.serial_no += 1
self._WriteConfig()
def _BumpSerialNo(self):
"""Bump up the serial number of the config.
......
......@@ -1340,6 +1340,16 @@ IALLOCATOR_NEVAC_MODES = frozenset([
IALLOCATOR_NEVAC_ALL,
])
# Node evacuation
NODE_EVAC_PRI = "primary-only"
NODE_EVAC_SEC = "secondary-only"
NODE_EVAC_ALL = "all"
NODE_EVAC_MODES = frozenset([
NODE_EVAC_PRI,
NODE_EVAC_SEC,
NODE_EVAC_ALL,
])
# Job queue
JOB_QUEUE_VERSION = 1
JOB_QUEUE_LOCK_FILE = QUEUE_DIR + "/lock"
......
......@@ -1036,7 +1036,7 @@ class OpNodeEvacuate(OpCode):
_PNodeName,
("remote_node", None, ht.TMaybeString, "New secondary node"),
("iallocator", None, ht.TMaybeString, "Iallocator for computing solution"),
("mode", ht.NoDefault, ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES),
("mode", ht.NoDefault, ht.TElemOf(constants.NODE_EVAC_MODES),
"Node evacuation mode"),
]
OP_RESULT = TJobIdListOnly
......
#!/usr/bin/python
#
# Copyright (C) 2006, 2007, 2010 Google Inc.
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -28,6 +28,8 @@ import time
import tempfile
import os.path
import socket
import operator
import itertools
from ganeti import bootstrap
from ganeti import config
......@@ -36,6 +38,7 @@ from ganeti import errors
from ganeti import objects
from ganeti import utils
from ganeti import netutils
from ganeti import compat
from ganeti.config import TemporaryReservationManager
......@@ -239,6 +242,127 @@ class TestConfigRunner(unittest.TestCase):
cfg.AddNodeGroup(group, "my-job", check_uuid=False) # Does not raise.
self.assertEqual(uuid, group.uuid)
def testAssignGroupNodes(self):
me = netutils.Hostname()
cfg = self._get_object()
# Create two groups
grp1 = objects.NodeGroup(name="grp1", members=[],
uuid="2f2fadf7-2a70-4a23-9ab5-2568c252032c")
grp1_serial = 1
cfg.AddNodeGroup(grp1, "job")
grp2 = objects.NodeGroup(name="grp2", members=[],
uuid="798d0de3-680f-4a0e-b29a-0f54f693b3f1")
grp2_serial = 1
cfg.AddNodeGroup(grp2, "job")
self.assertEqual(set(map(operator.attrgetter("name"),
cfg.GetAllNodeGroupsInfo().values())),
set(["grp1", "grp2", constants.INITIAL_NODE_GROUP_NAME]))
# No-op
cluster_serial = cfg.GetClusterInfo().serial_no
cfg.AssignGroupNodes([])
cluster_serial += 1
# Create two nodes
node1 = objects.Node(name="node1", group=grp1.uuid, ndparams={})
node1_serial = 1
node2 = objects.Node(name="node2", group=grp2.uuid, ndparams={})
node2_serial = 1
cfg.AddNode(node1, "job")
cfg.AddNode(node2, "job")
cluster_serial += 2
self.assertEqual(set(cfg.GetNodeList()), set(["node1", "node2", me.name]))
def _VerifySerials():
self.assertEqual(cfg.GetClusterInfo().serial_no, cluster_serial)
self.assertEqual(node1.serial_no, node1_serial)
self.assertEqual(node2.serial_no, node2_serial)
self.assertEqual(grp1.serial_no, grp1_serial)
self.assertEqual(grp2.serial_no, grp2_serial)
_VerifySerials()
self.assertEqual(set(grp1.members), set(["node1"]))
self.assertEqual(set(grp2.members), set(["node2"]))
# Check invalid nodes and groups
self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [
("unknown.node.example.com", grp2.uuid),
])
self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [
(node1.name, "unknown-uuid"),
])
self.assertEqual(node1.group, grp1.uuid)
self.assertEqual(node2.group, grp2.uuid)
self.assertEqual(set(grp1.members), set(["node1"]))
self.assertEqual(set(grp2.members), set(["node2"]))
# Another no-op
cfg.AssignGroupNodes([])
cluster_serial += 1
_VerifySerials()
# Assign to the same group (should be a no-op)
self.assertEqual(node2.group, grp2.uuid)
cfg.AssignGroupNodes([
(node2.name, grp2.uuid),
])
cluster_serial += 1
self.assertEqual(node2.group, grp2.uuid)
_VerifySerials()
self.assertEqual(set(grp1.members), set(["node1"]))
self.assertEqual(set(grp2.members), set(["node2"]))
# Assign node 2 to group 1
self.assertEqual(node2.group, grp2.uuid)
cfg.AssignGroupNodes([
(node2.name, grp1.uuid),
])
cluster_serial += 1
node2_serial += 1
grp1_serial += 1
grp2_serial += 1
self.assertEqual(node2.group, grp1.uuid)
_VerifySerials()
self.assertEqual(set(grp1.members), set(["node1", "node2"]))
self.assertFalse(grp2.members)
# And assign both nodes to group 2
self.assertEqual(node1.group, grp1.uuid)
self.assertEqual(node2.group, grp1.uuid)
self.assertNotEqual(grp1.uuid, grp2.uuid)
cfg.AssignGroupNodes([
(node1.name, grp2.uuid),
(node2.name, grp2.uuid),
])
cluster_serial += 1
node1_serial += 1
node2_serial += 1
grp1_serial += 1
grp2_serial += 1
self.assertEqual(node1.group, grp2.uuid)
self.assertEqual(node2.group, grp2.uuid)
_VerifySerials()
self.assertFalse(grp1.members)
self.assertEqual(set(grp2.members), set(["node1", "node2"]))
# Destructive tests
orig_group = node2.group
try:
other_uuid = "68b3d087-6ea5-491c-b81f-0a47d90228c5"
assert compat.all(node.group != other_uuid
for node in cfg.GetAllNodesInfo().values())
node2.group = "68b3d087-6ea5-491c-b81f-0a47d90228c5"
self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [
("node2", grp2.uuid),
])
_VerifySerials()
finally:
node2.group = orig_group
class TestTRM(unittest.TestCase):
EC_ID = 1
......
......@@ -166,9 +166,9 @@ class TestConstants(unittest.TestCase):
self.assertEqual(client.JOB_STATUS_ALL, constants.JOB_STATUS_ALL)
# Node evacuation
self.assertEqual(client.NODE_EVAC_PRI, constants.IALLOCATOR_NEVAC_PRI)
self.assertEqual(client.NODE_EVAC_SEC, constants.IALLOCATOR_NEVAC_SEC)
self.assertEqual(client.NODE_EVAC_ALL, constants.IALLOCATOR_NEVAC_ALL)
self.assertEqual(client.NODE_EVAC_PRI, constants.NODE_EVAC_PRI)
self.assertEqual(client.NODE_EVAC_SEC, constants.NODE_EVAC_SEC)
self.assertEqual(client.NODE_EVAC_ALL, constants.NODE_EVAC_ALL)
# Legacy name
self.assertEqual(client.JOB_STATUS_WAITLOCK, constants.JOB_STATUS_WAITING)
......@@ -866,7 +866,7 @@ class GanetiRapiClientTests(testutils.GanetiTestCase):
self.rapi.AddResponse(serializer.DumpJson([rlib2._NODE_EVAC_RES1]))
self.rapi.AddResponse("8888")
job_id = self.client.EvacuateNode("node-3", iallocator="hail", dry_run=True,
mode=constants.IALLOCATOR_NEVAC_ALL,
mode=constants.NODE_EVAC_ALL,
early_release=True)
self.assertEqual(8888, job_id)
self.assertItems(["node-3"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment