Commit 60975797 authored by Iustin Pop's avatar Iustin Pop
Browse files

Implement gnt-cluster check-disk-sizes



This patch adds a new opcode and lu for checking disk sizes. Currently
it does only top-level disk verification, and also doesn't check
primary/secondary node size mismatches (these two are added as TODOs in
the Exec() function of the LU).
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parent 968a7623
......@@ -90,7 +90,7 @@ _gnt_cluster()
if [[ -e "@LOCALSTATEDIR@/lib/ganeti/ssconf_cluster_name" ]]; then
cmds="add-tags command copyfile destroy getmaster info list-tags \
masterfailover modify queue redist-conf remove-tags rename \
search-tags verify verify-disks version"
repair-disk-sizes search-tags verify verify-disks version"
else
cmds="init"
fi
......
......@@ -1329,6 +1329,100 @@ class LUVerifyDisks(NoHooksLU):
return result
class LURepairDiskSizes(NoHooksLU):
"""Verifies the cluster disks sizes.
"""
_OP_REQP = ["instances"]
REQ_BGL = False
def ExpandNames(self):
if not isinstance(self.op.instances, list):
raise errors.OpPrereqError("Invalid argument type 'instances'")
if self.op.instances:
self.wanted_names = []
for name in self.op.instances:
full_name = self.cfg.ExpandInstanceName(name)
if full_name is None:
raise errors.OpPrereqError("Instance '%s' not known" % name)
self.wanted_names.append(full_name)
self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
self.needed_locks = {
locking.LEVEL_NODE: [],
locking.LEVEL_INSTANCE: self.wanted_names,
}
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
else:
self.wanted_names = None
self.needed_locks = {
locking.LEVEL_NODE: locking.ALL_SET,
locking.LEVEL_INSTANCE: locking.ALL_SET,
}
self.share_locks = dict(((i, 1) for i in locking.LEVELS))
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE and self.wanted_names is not None:
self._LockInstancesNodes(primary_only=True)
def CheckPrereq(self):
"""Check prerequisites.
This only checks the optional instance list against the existing names.
"""
if self.wanted_names is None:
self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
in self.wanted_names]
def Exec(self, feedback_fn):
"""Verify the size of cluster disks.
"""
# TODO: check child disks too
# TODO: check differences in size between primary/secondary nodes
per_node_disks = {}
for instance in self.wanted_instances:
pnode = instance.primary_node
if pnode not in per_node_disks:
per_node_disks[pnode] = []
for idx, disk in enumerate(instance.disks):
per_node_disks[pnode].append((instance, idx, disk))
changed = []
for node, dskl in per_node_disks.items():
result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
if result.failed:
self.LogWarning("Failure in blockdev_getsizes call to node"
" %s, ignoring", node)
continue
if len(result.data) != len(dskl):
self.LogWarning("Invalid result from node %s, ignoring node results",
node)
continue
for ((instance, idx, disk), size) in zip(dskl, result.data):
if size is None:
self.LogWarning("Disk %d of instance %s did not return size"
" information, ignoring", idx, instance.name)
continue
if not isinstance(size, (int, long)):
self.LogWarning("Disk %d of instance %s did not return valid"
" size information, ignoring", idx, instance.name)
continue
size = size >> 20
if size != disk.size:
self.LogInfo("Disk %d of instance %s has mismatched size,"
" correcting: recorded %d, actual %d", idx,
instance.name, disk.size, size)
disk.size = size
self.cfg.Update(instance)
changed.append((instance.name, idx, size))
return changed
class LURenameCluster(LogicalUnit):
"""Rename the cluster.
......
......@@ -50,6 +50,7 @@ class Processor(object):
opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks,
opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams,
opcodes.OpRedistributeConfig: cmdlib.LURedistributeConfig,
opcodes.OpRepairDiskSizes: cmdlib.LURepairDiskSizes,
# node lu
opcodes.OpAddNode: cmdlib.LUAddNode,
opcodes.OpQueryNodes: cmdlib.LUQueryNodes,
......
......@@ -229,6 +229,26 @@ class OpVerifyDisks(OpCode):
__slots__ = []
class OpRepairDiskSizes(OpCode):
"""Verify the disk sizes of the instances and fixes configuration
mimatches.
Parameters: optional instances list, in case we want to restrict the
checks to only a subset of the instances.
Result: a list of tuples, (instance, disk, new-size) for changed
configurations.
In normal operation, the list should be empty.
@type instances: list
@ivar instances: the list of instances to check, or empty for all instances
"""
OP_ID = "OP_CLUSTER_REPAIR_DISK_SIZES"
__slots__ = ["instances"]
class OpQueryConfigValues(OpCode):
"""Query cluster configuration values."""
OP_ID = "OP_CLUSTER_CONFIG_QUERY"
......
......@@ -610,6 +610,38 @@
</para>
</refsect2>
<refsect2>
<title>REPAIR-DISK-SIZES</title>
<cmdsynopsis>
<command>repair-disk-sizes</command>
<arg rep="repeat">instance</arg>
</cmdsynopsis>
<para>
This command checks that the recorded size of the given
instance's disks matches the actual size and updates any
mismatches found. This is needed if the Ganeti configuration
is no longer consistent with reality, as it will impact some
disk operations. If no arguments are given, all instances will
be checked.
</para>
<para>
Note that only active disks can be checked by this command; in
case a disk cannot be activated it's advised to use
<command>gnt-instance activate-disks --ignore-size
...</command> to force activation without regard to the
current size.
</para>
<para>
When the all disk sizes are consistent, the command will
return no output. Otherwise it will log details about the
inconsistencies in the configuration.
</para>
</refsect2>
<refsect2>
<title>SEARCH-TAGS</title>
......
......@@ -409,6 +409,20 @@ def VerifyDisks(opts, args):
return retcode
def RepairDiskSizes(opts, args):
"""Verify sizes of cluster disks.
@param opts: the command line options selected by the user
@type args: list
@param args: optional list of instances to restrict check to
@rtype: int
@return: the desired exit code
"""
op = opcodes.OpRepairDiskSizes(instances=args)
SubmitOpCode(op)
@UsesRPC
def MasterFailover(opts, args):
"""Failover the master node.
......@@ -621,6 +635,8 @@ commands = {
"", "Does a check on the cluster configuration"),
'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT],
"", "Does a check on the cluster disk status"),
'repair-disk-sizes': (RepairDiskSizes, ARGS_ANY, [DEBUG_OPT],
"", "Updates mismatches in recorded disk sizes"),
'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT,
make_option("--no-voting", dest="no_voting",
help="Skip node agreement check (dangerous)",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment