Commit 1d4a4b26 authored by Thomas Thrainer's avatar Thomas Thrainer
Browse files

Add disks_active to configuration



This flag tracks if the disks of an instace are supposed to be active.
That's the case when an instance is running or when its disks got
activated explicitly (and in a couple of other cases).
It will be used by watcher to re-activate disks after a node reboot.
Signed-off-by: default avatarThomas Thrainer <thomasth@google.com>
Reviewed-by: default avatarHelga Velroyen <helgav@google.com>
parent e75f80b9
......@@ -398,7 +398,7 @@ class LUBackupExport(LogicalUnit):
for disk in instance.disks:
self.cfg.SetDiskID(disk, src_node)
activate_disks = (instance.admin_state != constants.ADMINST_UP)
activate_disks = not instance.disks_active
if activate_disks:
# Activate the instance disks if we'exporting a stopped instance
......
......@@ -1779,12 +1779,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
# node here
snode = node_image[nname]
bad_snode = snode.ghost or snode.offline
_ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
_ErrorIf(inst_config.disks_active and
not success and not bad_snode,
constants.CV_EINSTANCEFAULTYDISK, instance,
"couldn't retrieve status for disk/%s on %s: %s",
idx, nname, bdev_status)
_ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
_ErrorIf((inst_config.disks_active and
success and bdev_status.ldisk_status == constants.LDS_FAULTY),
constants.CV_EINSTANCEFAULTYDISK, instance,
"disk/%s on %s is faulty", idx, nname)
......@@ -2062,8 +2062,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
node_drbd[minor] = (instance, False)
else:
instance = instanceinfo[instance]
node_drbd[minor] = (instance.name,
instance.admin_state == constants.ADMINST_UP)
node_drbd[minor] = (instance.name, instance.disks_active)
# and now check them
used_minors = nresult.get(constants.NV_DRBDLIST, [])
......
......@@ -910,8 +910,7 @@ class LUGroupVerifyDisks(NoHooksLU):
res_missing = {}
nv_dict = MapInstanceDisksToNodes(
[inst for inst in self.instances.values()
if inst.admin_state == constants.ADMINST_UP])
[inst for inst in self.instances.values() if inst.disks_active])
if nv_dict:
nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
......
......@@ -1197,6 +1197,7 @@ class LUInstanceCreate(LogicalUnit):
primary_node=pnode_name,
nics=self.nics, disks=disks,
disk_template=self.op.disk_template,
disks_active=False,
admin_state=constants.ADMINST_DOWN,
network_port=network_port,
beparams=self.op.beparams,
......@@ -1276,6 +1277,9 @@ class LUInstanceCreate(LogicalUnit):
raise errors.OpExecError("There are some degraded disks for"
" this instance")
# instance disks are now active
iobj.disks_active = True
# Release all node resource locks
ReleaseLocks(self, locking.LEVEL_NODE_RES)
......
......@@ -832,7 +832,7 @@ class TLMigrateInstance(Tasklet):
source_node = instance.primary_node
target_node = self.target_node
if instance.admin_state == constants.ADMINST_UP:
if instance.disks_active:
self.feedback_fn("* checking disk consistency between source and target")
for (idx, dev) in enumerate(instance.disks):
# for drbd, these are drbd over lvm
......
......@@ -1139,6 +1139,7 @@ def ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
ignored.
"""
lu.cfg.MarkInstanceDisksInactive(instance.name)
all_result = True
disks = ExpandCheckDisks(instance, disks)
......@@ -1205,6 +1206,10 @@ def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
# into any other network-connected state (Connected, SyncTarget,
# SyncSource, etc.)
# mark instance disks as active before doing actual work, so watcher does
# not try to shut them down erroneously
lu.cfg.MarkInstanceDisksActive(iname)
# 1st pass, assemble on all nodes in secondary mode
for idx, inst_disk in enumerate(disks):
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
......@@ -1256,6 +1261,9 @@ def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
for disk in disks:
lu.cfg.SetDiskID(disk, instance.primary_node)
if not disks_ok:
lu.cfg.MarkInstanceDisksInactive(iname)
return disks_ok, device_info
......@@ -1460,9 +1468,9 @@ class LUInstanceGrowDisk(LogicalUnit):
if disk_abort:
self.LogWarning("Disk syncing has not returned a good status; check"
" the instance")
if instance.admin_state != constants.ADMINST_UP:
if not instance.disks_active:
_SafeShutdownInstanceDisks(self, instance, disks=[disk])
elif instance.admin_state != constants.ADMINST_UP:
elif not instance.disks_active:
self.LogWarning("Not shutting down the disk even if the instance is"
" not supposed to be running because no wait for"
" sync mode was requested")
......@@ -1650,6 +1658,7 @@ class LUInstanceActivateDisks(NoHooksLU):
if self.op.wait_for_sync:
if not WaitForSync(self, self.instance):
self.cfg.MarkInstanceDisksInactive(self.instance.name)
raise errors.OpExecError("Some disks of the instance are degraded!")
return disks_info
......@@ -2035,7 +2044,7 @@ class TLReplaceDisks(Tasklet):
feedback_fn("Current seconary node: %s" %
utils.CommaJoin(self.instance.secondary_nodes))
activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
activate_disks = not self.instance.disks_active
# Activate the instance disks if we're replacing them on a down instance
if activate_disks:
......
......@@ -1549,7 +1549,7 @@ class LURepairNodeStorage(NoHooksLU):
"""
# Check whether any instance on this node has faulty disks
for inst in _GetNodeInstances(self.cfg, self.op.node_name):
if inst.admin_state != constants.ADMINST_UP:
if not inst.disks_active:
continue
check_nodes = set(inst.all_nodes)
check_nodes.discard(self.op.node_name)
......
......@@ -1436,19 +1436,27 @@ class ConfigWriter:
raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
" in use" % (item.name, item.uuid))
def _SetInstanceStatus(self, instance_name, status):
def _SetInstanceStatus(self, instance_name, status, disks_active):
"""Set the instance's status to a given value.
"""
assert status in constants.ADMINST_ALL, \
"Invalid status '%s' passed to SetInstanceStatus" % (status,)
if instance_name not in self._config_data.instances:
raise errors.ConfigurationError("Unknown instance '%s'" %
instance_name)
instance = self._config_data.instances[instance_name]
if instance.admin_state != status:
if status is None:
status = instance.admin_state
if disks_active is None:
disks_active = instance.disks_active
assert status in constants.ADMINST_ALL, \
"Invalid status '%s' passed to SetInstanceStatus" % (status,)
if instance.admin_state != status or \
instance.disks_active != disks_active:
instance.admin_state = status
instance.disks_active = disks_active
instance.serial_no += 1
instance.mtime = time.time()
self._WriteConfig()
......@@ -1457,15 +1465,19 @@ class ConfigWriter:
def MarkInstanceUp(self, instance_name):
"""Mark the instance status to up in the config.
This also sets the instance disks active flag.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
self._SetInstanceStatus(instance_name, constants.ADMINST_UP, True)
@locking.ssynchronized(_config_lock)
def MarkInstanceOffline(self, instance_name):
"""Mark the instance status to down in the config.
This also clears the instance disks active flag.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE, False)
@locking.ssynchronized(_config_lock)
def RemoveInstance(self, instance_name):
......@@ -1531,8 +1543,25 @@ class ConfigWriter:
def MarkInstanceDown(self, instance_name):
"""Mark the status of an instance to down in the configuration.
This does not touch the instance disks active flag, as shut down instances
can still have active disks.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN, None)
@locking.ssynchronized(_config_lock)
def MarkInstanceDisksActive(self, instance_name):
"""Mark the status of instance disks active.
"""
self._SetInstanceStatus(instance_name, None, True)
@locking.ssynchronized(_config_lock)
def MarkInstanceDisksInactive(self, instance_name):
"""Mark the status of instance disks inactive.
"""
self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
self._SetInstanceStatus(instance_name, None, False)
def _UnlockedGetInstanceList(self):
"""Get the list of instances.
......
......@@ -605,6 +605,7 @@ class IAllocator(object):
constants.IDISK_MODE: dsk.mode}
for dsk in iinfo.disks],
"disk_template": iinfo.disk_template,
"disks_active": iinfo.disks_active,
"hypervisor": iinfo.hypervisor,
}
pir["disk_space_total"] = gmi.ComputeDiskSize(iinfo.disk_template,
......
......@@ -1057,6 +1057,7 @@ class Instance(TaggableObject):
"nics",
"disks",
"disk_template",
"disks_active",
"network_port",
"serial_no",
] + _TIMESTAMPS + _UUID
......
......@@ -472,6 +472,7 @@ $(buildObject "Instance" "inst" $
, simpleField "nics" [t| [PartialNic] |]
, simpleField "disks" [t| [Disk] |]
, simpleField "disk_template" [t| DiskTemplate |]
, simpleField "disks_active" [t| Bool |]
, optionalField $ simpleField "network_port" [t| Int |]
]
++ timeStampFields
......
......@@ -131,6 +131,8 @@ instance Arbitrary Instance where
<*> vectorOf 5 genDisk
-- disk template
<*> arbitrary
-- disks active
<*> arbitrary
-- network port
<*> arbitrary
-- ts
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment