Commit 342a9b3b authored by Bernardo Dal Seno's avatar Bernardo Dal Seno
Browse files

Merge branch 'stable-2.8' into master



* stable-2.8:
  Document users-setup tool
  Adjusting permissions after confd start
  Ensure the queue socket has the right permissions
  Update IAllocator interface documentation
  Add NEWS entry for hail honoring networks
  Add tests for network-aware allocation
  Honor network connections in hail
  Parse NIC data from allocation request in hail
  Support group networks in Text backend
  Parse node group networks
  Update IAllocator interface documentation
  Export connected networks to IAllocator
  Re-activate previously active disks in watcher
  Make the disks_active flag queryable
  Consider disks_active in UpgradeConfig
  Add disks_active to configuration
  Fix release of instances in QA

Conflicts:
	tools/cfgupgrade

tools/cfgupgrade was modified in stable-2.8 to remove a new parameter when
downgrading to 2.7, but this shouldn't go into the 2.9 version of
cfgupgrade.
Signed-off-by: default avatarBernardo Dal Seno <bdalseno@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parents 33ee45c2 e41d5a22
......@@ -563,6 +563,7 @@ HS_LIB_SRCS = \
src/Ganeti/HTools/Group.hs \
src/Ganeti/HTools/Instance.hs \
src/Ganeti/HTools/Loader.hs \
src/Ganeti/HTools/Nic.hs \
src/Ganeti/HTools/Node.hs \
src/Ganeti/HTools/PeerMap.hs \
src/Ganeti/HTools/Program/Hail.hs \
......@@ -1040,7 +1041,9 @@ TEST_FILES = \
test/data/htools/common-suffix.data \
test/data/htools/empty-cluster.data \
test/data/htools/hail-alloc-drbd.json \
test/data/htools/hail-alloc-invalid-network.json \
test/data/htools/hail-alloc-invalid-twodisks.json \
test/data/htools/hail-alloc-restricted-network.json \
test/data/htools/hail-alloc-twodisks.json \
test/data/htools/hail-change-group.json \
test/data/htools/hail-invalid-reloc.json \
......
......@@ -41,6 +41,9 @@ Version 2.8.0 beta1
creation.
- ``cfgupgrade`` now supports a ``--downgrade`` option to bring the
configuration back to the previous stable version.
- hail now honors network restrictions when allocating nodes. This led to an
update of the IAllocator protocol. See the IAllocator documentation for
details.
Version 2.7.0 rc2
......
......@@ -252,6 +252,12 @@ start() {
--user ${usergroup%:*} \
$daemonexec $args "$@"
fi
# FIXME: This is a workaround for issue 477. Remove this once confd does not
# mess up the permissions anymore.
if [[ "$name" == ganeti-confd ]]; then
@PKGLIBDIR@/ensure-dirs;
fi
}
# Stops a daemon
......
......@@ -1661,6 +1661,22 @@ move-instance
See :doc:`separate documentation for move-instance <move-instance>`.
users-setup
+++++++++++
Ganeti can either be run entirely as root, or with every daemon running as
its own specific user (if the parameters ``--with-user-prefix`` and/or
``--with-group-prefix`` have been specified at ``./configure``-time).
In case split users are activated, they are required to exist on the system,
and they need to belong to the proper groups in order for the access
permissions to files and programs to be correct.
The ``users-setup`` tool, when run, takes care of setting up the proper
users and groups.
The tool does not accept any parameter, and requires root permissions to run.
.. TODO: document cluster-merge tool
......
......@@ -109,8 +109,12 @@ nodegroups
alloc_policy
the allocation policy of the node group (consult the semantics of
this attribute in the :manpage:`gnt-group(8)` manpage)
networks
the list of network UUID's this node group is connected to
ipolicy
the instance policy of the node group
tags
the list of node group tags
instances
a dictionary with the data for the current existing instance on the
......@@ -377,7 +381,42 @@ time, but not included in further examples below)::
"nodegroups": {
"f4e06e0d-528a-4963-a5ad-10f3e114232d": {
"name": "default",
"alloc_policy": "preferred"
"alloc_policy": "preferred",
"networks": ["net-uuid-1", "net-uuid-2"],
"ipolicy": {
"disk-templates": ["drbd", "plain"],
"minmax": [
{
"max": {
"cpu-count": 2,
"disk-count": 8,
"disk-size": 2048,
"memory-size": 12800,
"nic-count": 8,
"spindle-use": 8
},
"min": {
"cpu-count": 1,
"disk-count": 1,
"disk-size": 1024,
"memory-size": 128,
"nic-count": 1,
"spindle-use": 1
}
}
],
"spindle-ratio": 32.0,
"std": {
"cpu-count": 1,
"disk-count": 1,
"disk-size": 1024,
"memory-size": 128,
"nic-count": 1,
"spindle-use": 1
},
"vcpu-ratio": 4.0
},
"tags": ["ng-tag-1", "ng-tag-2"]
}
},
"instances": {
......
......@@ -398,7 +398,7 @@ class LUBackupExport(LogicalUnit):
for disk in instance.disks:
self.cfg.SetDiskID(disk, src_node)
activate_disks = (instance.admin_state != constants.ADMINST_UP)
activate_disks = not instance.disks_active
if activate_disks:
# Activate the instance disks if we'exporting a stopped instance
......
......@@ -1880,12 +1880,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
# node here
snode = node_image[nname]
bad_snode = snode.ghost or snode.offline
_ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
_ErrorIf(inst_config.disks_active and
not success and not bad_snode,
constants.CV_EINSTANCEFAULTYDISK, instance,
"couldn't retrieve status for disk/%s on %s: %s",
idx, nname, bdev_status)
_ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
_ErrorIf((inst_config.disks_active and
success and bdev_status.ldisk_status == constants.LDS_FAULTY),
constants.CV_EINSTANCEFAULTYDISK, instance,
"disk/%s on %s is faulty", idx, nname)
......@@ -2172,8 +2172,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
node_drbd[minor] = (instance, False)
else:
instance = instanceinfo[instance]
node_drbd[minor] = (instance.name,
instance.admin_state == constants.ADMINST_UP)
node_drbd[minor] = (instance.name, instance.disks_active)
# and now check them
used_minors = nresult.get(constants.NV_DRBDLIST, [])
......
......@@ -910,8 +910,7 @@ class LUGroupVerifyDisks(NoHooksLU):
res_missing = {}
nv_dict = MapInstanceDisksToNodes(
[inst for inst in self.instances.values()
if inst.admin_state == constants.ADMINST_UP])
[inst for inst in self.instances.values() if inst.disks_active])
if nv_dict:
nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
......
......@@ -1203,6 +1203,7 @@ class LUInstanceCreate(LogicalUnit):
primary_node=pnode_name,
nics=self.nics, disks=disks,
disk_template=self.op.disk_template,
disks_active=False,
admin_state=constants.ADMINST_DOWN,
network_port=network_port,
beparams=self.op.beparams,
......@@ -1282,6 +1283,9 @@ class LUInstanceCreate(LogicalUnit):
raise errors.OpExecError("There are some degraded disks for"
" this instance")
# instance disks are now active
iobj.disks_active = True
# Release all node resource locks
ReleaseLocks(self, locking.LEVEL_NODE_RES)
......
......@@ -832,7 +832,7 @@ class TLMigrateInstance(Tasklet):
source_node = instance.primary_node
target_node = self.target_node
if instance.admin_state == constants.ADMINST_UP:
if instance.disks_active:
self.feedback_fn("* checking disk consistency between source and target")
for (idx, dev) in enumerate(instance.disks):
# for drbd, these are drbd over lvm
......
......@@ -1182,6 +1182,7 @@ def ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
ignored.
"""
lu.cfg.MarkInstanceDisksInactive(instance.name)
all_result = True
disks = ExpandCheckDisks(instance, disks)
......@@ -1248,6 +1249,10 @@ def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
# into any other network-connected state (Connected, SyncTarget,
# SyncSource, etc.)
# mark instance disks as active before doing actual work, so watcher does
# not try to shut them down erroneously
lu.cfg.MarkInstanceDisksActive(iname)
# 1st pass, assemble on all nodes in secondary mode
for idx, inst_disk in enumerate(disks):
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
......@@ -1299,6 +1304,9 @@ def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
for disk in disks:
lu.cfg.SetDiskID(disk, instance.primary_node)
if not disks_ok:
lu.cfg.MarkInstanceDisksInactive(iname)
return disks_ok, device_info
......@@ -1505,9 +1513,9 @@ class LUInstanceGrowDisk(LogicalUnit):
if disk_abort:
self.LogWarning("Disk syncing has not returned a good status; check"
" the instance")
if instance.admin_state != constants.ADMINST_UP:
if not instance.disks_active:
_SafeShutdownInstanceDisks(self, instance, disks=[disk])
elif instance.admin_state != constants.ADMINST_UP:
elif not instance.disks_active:
self.LogWarning("Not shutting down the disk even if the instance is"
" not supposed to be running because no wait for"
" sync mode was requested")
......@@ -1695,6 +1703,7 @@ class LUInstanceActivateDisks(NoHooksLU):
if self.op.wait_for_sync:
if not WaitForSync(self, self.instance):
self.cfg.MarkInstanceDisksInactive(self.instance.name)
raise errors.OpExecError("Some disks of the instance are degraded!")
return disks_info
......@@ -2080,7 +2089,7 @@ class TLReplaceDisks(Tasklet):
feedback_fn("Current seconary node: %s" %
utils.CommaJoin(self.instance.secondary_nodes))
activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
activate_disks = not self.instance.disks_active
# Activate the instance disks if we're replacing them on a down instance
if activate_disks:
......
......@@ -1552,7 +1552,7 @@ class LURepairNodeStorage(NoHooksLU):
"""
# Check whether any instance on this node has faulty disks
for inst in _GetNodeInstances(self.cfg, self.op.node_name):
if inst.admin_state != constants.ADMINST_UP:
if not inst.disks_active:
continue
check_nodes = set(inst.all_nodes)
check_nodes.discard(self.op.node_name)
......
......@@ -1436,19 +1436,27 @@ class ConfigWriter:
raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
" in use" % (item.name, item.uuid))
def _SetInstanceStatus(self, instance_name, status):
def _SetInstanceStatus(self, instance_name, status, disks_active):
"""Set the instance's status to a given value.
"""
assert status in constants.ADMINST_ALL, \
"Invalid status '%s' passed to SetInstanceStatus" % (status,)
if instance_name not in self._config_data.instances:
raise errors.ConfigurationError("Unknown instance '%s'" %
instance_name)
instance = self._config_data.instances[instance_name]
if instance.admin_state != status:
if status is None:
status = instance.admin_state
if disks_active is None:
disks_active = instance.disks_active
assert status in constants.ADMINST_ALL, \
"Invalid status '%s' passed to SetInstanceStatus" % (status,)
if instance.admin_state != status or \
instance.disks_active != disks_active:
instance.admin_state = status
instance.disks_active = disks_active
instance.serial_no += 1
instance.mtime = time.time()
self._WriteConfig()
......@@ -1457,15 +1465,19 @@ class ConfigWriter:
def MarkInstanceUp(self, instance_name):
"""Mark the instance status to up in the config.
This also sets the instance disks active flag.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
self._SetInstanceStatus(instance_name, constants.ADMINST_UP, True)
@locking.ssynchronized(_config_lock)
def MarkInstanceOffline(self, instance_name):
"""Mark the instance status to down in the config.
This also clears the instance disks active flag.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE, False)
@locking.ssynchronized(_config_lock)
def RemoveInstance(self, instance_name):
......@@ -1531,8 +1543,25 @@ class ConfigWriter:
def MarkInstanceDown(self, instance_name):
"""Mark the status of an instance to down in the configuration.
This does not touch the instance disks active flag, as shut down instances
can still have active disks.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN, None)
@locking.ssynchronized(_config_lock)
def MarkInstanceDisksActive(self, instance_name):
"""Mark the status of instance disks active.
"""
self._SetInstanceStatus(instance_name, None, True)
@locking.ssynchronized(_config_lock)
def MarkInstanceDisksInactive(self, instance_name):
"""Mark the status of instance disks inactive.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
self._SetInstanceStatus(instance_name, None, False)
def _UnlockedGetInstanceList(self):
"""Get the list of instances.
......
......@@ -465,6 +465,7 @@ class IAllocator(object):
ng = dict((guuid, {
"name": gdata.name,
"alloc_policy": gdata.alloc_policy,
"networks": [net_uuid for net_uuid, _ in gdata.networks.items()],
"ipolicy": gmi.CalculateGroupIPolicy(cluster, gdata),
"tags": list(gdata.GetTags()),
})
......@@ -605,6 +606,7 @@ class IAllocator(object):
constants.IDISK_MODE: dsk.mode}
for dsk in iinfo.disks],
"disk_template": iinfo.disk_template,
"disks_active": iinfo.disks_active,
"hypervisor": iinfo.hypervisor,
}
pir["disk_space_total"] = gmi.ComputeDiskSize(iinfo.disk_template,
......
......@@ -1062,6 +1062,7 @@ class Instance(TaggableObject):
"nics",
"disks",
"disk_template",
"disks_active",
"network_port",
"serial_no",
] + _TIMESTAMPS + _UUID
......@@ -1227,6 +1228,8 @@ class Instance(TaggableObject):
if self.osparams is None:
self.osparams = {}
UpgradeBeParams(self.beparams)
if self.disks_active is None:
self.disks_active = self.admin_state == constants.ADMINST_UP
class OS(ConfigObject):
......
......@@ -2075,6 +2075,9 @@ def _BuildInstanceFields():
(_MakeField("admin_up", "Autostart", QFT_BOOL,
"Desired state of instance"),
IQ_CONFIG, 0, lambda ctx, inst: inst.admin_state == constants.ADMINST_UP),
(_MakeField("disks_active", "DisksActive", QFT_BOOL,
"Desired state of instance disks"),
IQ_CONFIG, 0, _GetItemAttr("disks_active")),
(_MakeField("tags", "Tags", QFT_OTHER, "Tags"), IQ_CONFIG, 0,
lambda ctx, inst: list(inst.GetTags())),
(_MakeField("console", "Console", QFT_OTHER,
......
......@@ -179,6 +179,8 @@ def GetPaths():
(pathutils.SOCKET_DIR, DIR, 0770, getent.masterd_uid, getent.daemons_gid),
(pathutils.MASTER_SOCKET, FILE, 0660,
getent.masterd_uid, getent.daemons_gid, False),
(pathutils.QUERY_SOCKET, FILE, 0660,
getent.confd_uid, getent.daemons_gid, False),
(pathutils.BDEV_CACHE_DIR, DIR, 0755,
getent.noded_uid, getent.masterd_gid),
(pathutils.UIDPOOL_LOCKDIR, DIR, 0750,
......
......@@ -137,10 +137,10 @@ class Instance(object):
"""Abstraction for a Virtual Machine instance.
"""
def __init__(self, name, status, autostart, snodes):
def __init__(self, name, status, disks_active, snodes):
self.name = name
self.status = status
self.autostart = autostart
self.disks_active = disks_active
self.snodes = snodes
def Restart(self, cl):
......@@ -246,9 +246,9 @@ def _CheckDisks(cl, notepad, nodes, instances, started):
instance_name)
continue
if not inst.autostart:
logging.info("Skipping disk activation for non-autostart"
" instance '%s'", inst.name)
if not inst.disks_active:
logging.info("Skipping disk activation for instance with not"
" activated disks '%s'", inst.name)
continue
if inst.name in started:
......@@ -626,7 +626,7 @@ def _GetGroupData(cl, uuid):
job = [
# Get all primary instances in group
opcodes.OpQuery(what=constants.QR_INSTANCE,
fields=["name", "status", "admin_state", "snodes",
fields=["name", "status", "disks_active", "snodes",
"pnode.group.uuid", "snodes.group.uuid"],
qfilter=[qlang.OP_EQUAL, "pnode.group.uuid", uuid],
use_locking=True),
......@@ -657,14 +657,14 @@ def _GetGroupData(cl, uuid):
instances = []
# Load all instances
for (name, status, autostart, snodes, pnode_group_uuid,
for (name, status, disks_active, snodes, pnode_group_uuid,
snodes_group_uuid) in raw_instances:
if snodes and set([pnode_group_uuid]) != set(snodes_group_uuid):
logging.error("Ignoring split instance '%s', primary group %s, secondary"
" groups %s", name, pnode_group_uuid,
utils.CommaJoin(snodes_group_uuid))
else:
instances.append(Instance(name, status, autostart, snodes))
instances.append(Instance(name, status, disks_active, snodes))
for node in snodes:
secondaries.setdefault(node, set()).add(name)
......
......@@ -163,6 +163,7 @@ support all options. Some common options are:
- group uuid
- allocation policy
- tags (separated by comma)
- networks (UUID's, separated by comma)
The second sections contains node data, with the following columns:
......
......@@ -1006,7 +1006,7 @@ def TestRemoveInstanceOfflineNode(instance, snode, set_offline, set_online):
def TestInstanceCreationRestrictedByDiskTemplates():
"""Test adding instances for disbled disk templates."""
"""Test adding instances for disabled disk templates."""
enabled_disk_templates = qa_config.GetEnabledDiskTemplates()
nodes = qa_config.AcquireManyNodes(2)
......@@ -1019,14 +1019,15 @@ def TestInstanceCreationRestrictedByDiskTemplates():
# Test instance creation for enabled disk templates
for disk_template in enabled_disk_templates:
instance = CreateInstanceByDiskTemplate(nodes, disk_template, False)
instance = CreateInstanceByDiskTemplate(nodes, disk_template, fail=False)
TestInstanceRemove(instance)
instance.Release()
# Test that instance creation fails for disabled disk templates
disabled_disk_templates = list(constants.DISK_TEMPLATES
- set(enabled_disk_templates))
for disk_template in disabled_disk_templates:
instance = CreateInstanceByDiskTemplate(nodes, disk_template, True)
instance = CreateInstanceByDiskTemplate(nodes, disk_template, fail=True)
# Test instance creation for after disabling enabled disk templates
if (len(enabled_disk_templates) > 1):
......@@ -1043,7 +1044,7 @@ def TestInstanceCreationRestrictedByDiskTemplates():
",".join(enabled)],
fail=False)
for disk_template in disabled:
CreateInstanceByDiskTemplate(nodes, disk_template, True)
CreateInstanceByDiskTemplate(nodes, disk_template, fail=True)
elif (len(enabled_disk_templates) == 1):
# If only one disk template is enabled in the QA config, we have to enable
# some of the disabled disk templates in order to test if the disabling the
......@@ -1052,7 +1053,7 @@ def TestInstanceCreationRestrictedByDiskTemplates():
"--enabled-disk-template=%s" %
",".join(disabled_disk_templates)],
fail=False)
CreateInstanceByDiskTemplate(nodes, enabled_disk_templates[0], True)
CreateInstanceByDiskTemplate(nodes, enabled_disk_templates[0], fail=True)
else:
raise qa_error.Error("Please enable at least one disk template"
" in your QA setup.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment