Commit b142ef15 authored by Iustin Pop's avatar Iustin Pop

Merge commit 'origin/next' into branch-2.1

* commit 'origin/next': (28 commits)
  Fix a typo in InitCluster
  Ignore results from drained nodes in iallocator
  Ship the ethers hook
  Ethers hook, compatibility with old lockfile
  Remove a few unused imports from noded/masterd
  Move HVM's device_model to a hypervisor parameter
  Implement the KERNEL_PATH parameter for xen-hvm
  Upgrade be/hv params with default values
  Add cluster-init --no-etc-hosts parameter
  objects: add configuration upgrade system
  Update NEWS and version for 2.0.3 release
  example ethers hook: use lockfile-progs
  ethers hook lock: use logger not echo
  ethers hook: reduce the probability of data loss
  devel/upload: revert rsync -p
  export: add meaningful exit code
  Fix detecting of errors in export
  Implement gnt-cluster check-disk-sizes
  rpc: add rpc call for getting disk size
  bdev: Add function for reading actual disk size

	daemons/ganeti-masterd   - trivial, kept 2.1 version
	lib/         - trivial, kept 2.1 version
	lib/            - integrated the 2.0.3 changes
	lib/         - trivial
	lib/hypervisor/ - trivial, kept 2.1 version
	lib/           - trivial, kept 2.1 version
	lib/           - integrated the 2.0.3 changes
Signed-off-by: default avatarIustin Pop <>
parents 05d47e33 bec0522b
......@@ -26,6 +26,7 @@ DIRS = \
devel \
doc \
doc/examples \
doc/examples/hooks \
lib \
lib/http \
lib/hypervisor \
......@@ -182,6 +183,7 @@ EXTRA_DIST = \
doc/examples/ \
doc/examples/ \
doc/examples/dumb-allocator \
doc/examples/hooks/ethers \
doc/locking.txt \
test/ \
test/ \
......@@ -341,6 +343,7 @@ $(REPLACE_VARS_SED): Makefile stamp-directories
echo 's#@PKGLIBDIR@#$(pkglibdir)#g'; \
} > $@
# We need to create symlinks because "make distcheck" will not install Python
Version 2.0.3
- Added “--ignore-size” to the “gnt-instance activate-disks” command
to allow using the pre-2.0.2 behaviour in activation, if any
existing instances have mismatched disk sizes in the configuration
- Added “gnt-cluster repair-disk-sizes” command to check and update
any configuration mismatches for disk sizes
- Added “gnt-master cluste-failover --no-voting” to allow master
failover to work on two-node clusters
- Fixed the ‘--net’ option of “gnt-backup import”, which was unusable
- Fixed detection of OS script errors in “gnt-backup export”
- Fixed exit code of “gnt-backup export”
Version 2.0.2
- Added experimental support for stripped logical volumes; this should
enhance performance but comes with a higher complexity in the block
# Configure script for Ganeti
m4_define([gnt_version_major], [2])
m4_define([gnt_version_minor], [0])
m4_define([gnt_version_revision], [2])
m4_define([gnt_version_revision], [3])
m4_define([gnt_version_suffix], [])
......@@ -28,13 +28,10 @@ inheritance from parent classes requires it.
import os
import errno
import sys
import SocketServer
import time
import collections
import Queue
import random
import signal
import logging
......@@ -26,9 +26,7 @@
import os
import sys
import traceback
import SocketServer
import errno
import logging
import signal
......@@ -250,6 +248,14 @@ class NodeHttpServer(http.server.HttpServer):
disks = [objects.Disk.FromDict(cf) for cf in params[1]]
return backend.BlockdevClose(params[0], disks)
def perspective_blockdev_getsize(params):
"""Compute the sizes of the given block devices.
disks = [objects.Disk.FromDict(cf) for cf in params[0]]
return backend.BlockdevGetsize(disks)
# blockdev/drbd specific methods ----------
......@@ -27,14 +27,22 @@
set -e
while [ "$#" -gt 0 ]; do
case "$opt" in
echo "Usage: $0 [--no-restart] hosts..."
exit 0
......@@ -42,10 +50,10 @@ while [ "$#" -gt 0 ]; do
echo "Unknown option: $opt" >&2
exit 1
hosts="$hosts $opt"
......@@ -58,39 +66,36 @@ trap 'rm -rf $TXD' EXIT
# install ganeti as a real tree
make install DESTDIR="$TXD"
# copy additional needed files
install -D --mode=0755 doc/examples/ganeti.initd \
install -D --mode=0644 doc/examples/bash_completion \
if [ -z "$NO_CRON" ]; then
install -D --mode=0644 doc/examples/ganeti.cron \
install -D --mode=0755 doc/examples/dumb-allocator \
echo ---
( cd "$TXD" && find; )
echo ---
# and now put it under $prefix on the target node(s)
for host; do
echo Uploading code to ${host}...
rsync -v -rlDc --exclude="*.py[oc]" --exclude="*.pdf" --exclude="*.html" \
root@${host}:$PREFIX/ &
install --mode=0755 doc/examples/ganeti.initd $INIT_SCRIPT
for host; do
echo Uploading init script to ${host}...
scp $INIT_SCRIPT root@${host}:/etc/init.d/ganeti &
"$TXD/" \
root@${host}:/ &
if [ -f ganeti-master-cron ]; then
for host; do
echo Uploading cron files to ${host}...
scp ganeti-master-cron root@${host}:/etc/ganeti/master-cron &
if test -z "${NO_RESTART}"; then
for host; do
echo Restarting ganeti-noded on ${host}...
......@@ -90,7 +90,7 @@ _gnt_cluster()
if [[ -e "@LOCALSTATEDIR@/lib/ganeti/ssconf_cluster_name" ]]; then
cmds="add-tags command copyfile destroy getmaster info list-tags \
masterfailover modify queue redist-conf remove-tags rename \
search-tags verify verify-disks version"
repair-disk-sizes search-tags verify verify-disks version"
......@@ -35,7 +35,13 @@
# bridge to /etc/ethers.
# In order to handle concurrent execution of this lock, we use the $LOCKFILE.
# LOCKFILE_CREATE and LOCKFILE_REMOVE are the path names for the lockfile-progs
# programs which we use as helpers.
[ -n "$hooks_path" ] || exit 1
......@@ -44,11 +50,8 @@ instance=$GANETI_INSTANCE_NAME
acquire_lockfile() {
if ! ( set -o noclobber; echo "$$" > $LOCKFILE) 2> /dev/null; then
logger -s "Cannot acquire lockfile for ethers update"
exit 1
trap "rm -f $LOCKFILE" EXIT
update_ethers_from_new() {
......@@ -176,7 +176,7 @@ instances
dictionary with the data for the nodes in the cluster, indexed by
the node name; the dict contains:
the node name; the dict contains [*]_ :
the total disk size of this node (mebibytes)
......@@ -225,9 +225,13 @@ nodes
or ``offline`` flags set. More details about these of node status
flags is available in the manpage :manpage:`ganeti(7)`.
.. [*] Note that no run-time data is present for offline or drained nodes;
this means the tags total_memory, reserved_memory, free_memory, total_disk,
free_disk, total_cpus, i_pri_memory and i_pri_up memory will be absent
Respone message
Response message
The response message is much more simple than the input one. It is
also a dict having three keys:
......@@ -1433,6 +1433,32 @@ def BlockdevFind(disk):
return rbd.GetSyncStatus()
def BlockdevGetsize(disks):
"""Computes the size of the given disks.
If a disk is not found, returns None instead.
@type disks: list of L{objects.Disk}
@param disks: the list of disk to compute the size for
@rtype: list
@return: list with elements None if the disk cannot be found,
otherwise the size
result = []
for cf in disks:
rbd = _RecursiveFindBD(cf)
except errors.BlockDeviceError, err:
if rbd is None:
return result
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
"""Write a file to the filesystem.
......@@ -1820,8 +1846,8 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
# the target command is built out of three individual commands,
# which are joined by pipes; we check each individual command for
# valid parameters
expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path,
export_script, logfile)
expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s",
inst_os.path, export_script, logfile)
comprcmd = "gzip"
......@@ -1834,7 +1860,7 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
# all commands have been checked, so we're safe to combine them
command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
result = utils.RunCmd(command, env=export_env)
result = utils.RunCmd(["bash", "-c", command], env=export_env)
if result.failed:
_Fail("OS snapshot export command '%s' returned error: %s"
......@@ -304,6 +304,23 @@ class BlockDev(object):
raise NotImplementedError
def GetActualSize(self):
"""Return the actual disk size.
@note: the device needs to be active when this is called
assert self.attached, "BlockDevice not attached in GetActualSize()"
result = utils.RunCmd(["blockdev", "--getsize64", self.dev_path])
if result.failed:
_ThrowError("blockdev failed (%s): %s",
result.fail_reason, result.output)
sz = int(result.output.strip())
except (ValueError, TypeError), err:
_ThrowError("Failed to parse blockdev output: %s", str(err))
return sz
def __repr__(self):
return ("<%s: unique_id: %s, children: %s, %s:%s, %s>" %
(self.__class__, self.unique_id, self._children,
......@@ -1166,9 +1183,10 @@ class DRBD8(BaseDRBD):
args = ["drbdsetup", cls._DevPath(minor), "disk",
backend, meta, "0",
"-d", "%sm" % size,
"-e", "detach",
if size:
args.extend(["-d", "%sm" % size])
result = utils.RunCmd(args)
if result.failed:
_ThrowError("drbd%d: can't attach local disk: %s", minor, result.output)
......@@ -1777,6 +1795,19 @@ class FileStorage(BlockDev):
self.attached = os.path.exists(self.dev_path)
return self.attached
def GetActualSize(self):
"""Return the actual disk size.
@note: the device needs to be active when this is called
assert self.attached, "BlockDevice not attached in GetActualSize()"
st = os.stat(self.dev_path)
return st.st_size
except OSError, err:
_ThrowError("Can't stat %s: %s", self.dev_path, err)
def Create(cls, unique_id, children, size):
"""Create a new file.
......@@ -1500,6 +1500,100 @@ class LUVerifyDisks(NoHooksLU):
return result
class LURepairDiskSizes(NoHooksLU):
"""Verifies the cluster disks sizes.
_OP_REQP = ["instances"]
REQ_BGL = False
def ExpandNames(self):
if not isinstance(self.op.instances, list):
raise errors.OpPrereqError("Invalid argument type 'instances'")
if self.op.instances:
self.wanted_names = []
for name in self.op.instances:
full_name = self.cfg.ExpandInstanceName(name)
if full_name is None:
raise errors.OpPrereqError("Instance '%s' not known" % name)
self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
self.needed_locks = {
locking.LEVEL_NODE: [],
locking.LEVEL_INSTANCE: self.wanted_names,
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
self.wanted_names = None
self.needed_locks = {
locking.LEVEL_NODE: locking.ALL_SET,
locking.LEVEL_INSTANCE: locking.ALL_SET,
self.share_locks = dict(((i, 1) for i in locking.LEVELS))
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE and self.wanted_names is not None:
def CheckPrereq(self):
"""Check prerequisites.
This only checks the optional instance list against the existing names.
if self.wanted_names is None:
self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
in self.wanted_names]
def Exec(self, feedback_fn):
"""Verify the size of cluster disks.
# TODO: check child disks too
# TODO: check differences in size between primary/secondary nodes
per_node_disks = {}
for instance in self.wanted_instances:
pnode = instance.primary_node
if pnode not in per_node_disks:
per_node_disks[pnode] = []
for idx, disk in enumerate(instance.disks):
per_node_disks[pnode].append((instance, idx, disk))
changed = []
for node, dskl in per_node_disks.items():
result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
if result.failed:
self.LogWarning("Failure in blockdev_getsizes call to node"
" %s, ignoring", node)
if len( != len(dskl):
self.LogWarning("Invalid result from node %s, ignoring node results",
for ((instance, idx, disk), size) in zip(dskl,
if size is None:
self.LogWarning("Disk %d of instance %s did not return size"
" information, ignoring", idx,
if not isinstance(size, (int, long)):
self.LogWarning("Disk %d of instance %s did not return valid"
" size information, ignoring", idx,
size = size >> 20
if size != disk.size:
self.LogInfo("Disk %d of instance %s has mismatched size,"
" correcting: recorded %d, actual %d", idx,, disk.size, size)
disk.size = size
changed.append((, idx, size))
return changed
class LURenameCluster(LogicalUnit):
"""Rename the cluster.
......@@ -3000,19 +3094,24 @@ class LUActivateInstanceDisks(NoHooksLU):
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
_CheckNodeOnline(self, self.instance.primary_node)
if not hasattr(self.op, "ignore_size"):
self.op.ignore_size = False
def Exec(self, feedback_fn):
"""Activate the disks.
disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
disks_ok, disks_info = \
_AssembleInstanceDisks(self, self.instance,
if not disks_ok:
raise errors.OpExecError("Cannot activate block devices")
return disks_info
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
"""Prepare the block devices for an instance.
This sets up the block devices on all nodes.
......@@ -3024,6 +3123,10 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
@type ignore_secondaries: boolean
@param ignore_secondaries: if true, errors on secondary nodes
won't result in an error return from the function
@type ignore_size: boolean
@param ignore_size: if true, the current known size of the disk
will not be used during the disk activation, useful for cases
when the size is wrong
@return: False if the operation failed, otherwise a list of
(host, instance_visible_name, node_visible_name)
with the mapping from node devices to instance devices
......@@ -3044,6 +3147,9 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
# 1st pass, assemble on all nodes in secondary mode
for inst_disk in instance.disks:
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
if ignore_size:
node_disk = node_disk.Copy()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
msg = result.fail_msg
......@@ -3061,6 +3167,9 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
if node != instance.primary_node:
if ignore_size:
node_disk = node_disk.Copy()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
msg = result.fail_msg
......@@ -7122,6 +7231,8 @@ class LUExportInstance(LogicalUnit):
for disk in instance.disks:
self.cfg.SetDiskID(disk, src_node)
# per-disk results
dresults = []
for idx, disk in enumerate(instance.disks):
# result.payload will be a snapshot of an lvm leaf of the one we passed
......@@ -7157,16 +7268,23 @@ class LUExportInstance(LogicalUnit):
if msg:
self.LogWarning("Could not export disk/%s from node %s to"
" node %s: %s", idx, src_node,, msg)
msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
if msg:
self.LogWarning("Could not remove snapshot for disk/%d from node"
" %s: %s", idx, src_node, msg)
result = self.rpc.call_finalize_export(, instance, snap_disks)
fin_resu = True
msg = result.fail_msg
if msg:
self.LogWarning("Could not finalize export for instance %s"
" on node %s: %s",,, msg)
fin_resu = False
nodelist = self.cfg.GetNodeList()
......@@ -7185,6 +7303,7 @@ class LUExportInstance(LogicalUnit):
if msg:
self.LogWarning("Could not remove older export for instance %s"
" on node %s: %s", iname, node, msg)
return fin_resu, dresults
class LURemoveExport(NoHooksLU):
......@@ -7548,11 +7667,12 @@ class IAllocator(object):
"master_candidate": ninfo.master_candidate,
if not ninfo.offline:
if not (ninfo.offline or ninfo.drained):
nresult.Raise("Can't get data for node %s" % nname)
node_iinfo[nname].Raise("Can't get node instance info from node %s" %
remote_info = nresult.payload
for attr in ['memory_total', 'memory_free', 'memory_dom0',
'vg_size', 'vg_free', 'cpu_total']:
if attr not in remote_info:
......@@ -661,4 +661,3 @@ CONFD_ERROR_INTERNAL = 2
# This constants decides how many seconds of skew to accept.
# TODO: make this a default and allow the value to be more configurable
......@@ -51,6 +51,7 @@ class Processor(object):
opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks,
opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams,
opcodes.OpRedistributeConfig: cmdlib.LURedistributeConfig,
opcodes.OpRepairDiskSizes: cmdlib.LURepairDiskSizes,
# node lu
opcodes.OpAddNode: cmdlib.LUAddNode,
opcodes.OpQueryNodes: cmdlib.LUQueryNodes,
......@@ -187,6 +187,14 @@ class ConfigObject(object):
" _ContainerFromDicts" % c_type)
return ret
def Copy(self):
"""Makes a deep copy of the current object and its children.
dict_form = self.ToDict()
clone_obj = self.__class__.FromDict(dict_form)
return clone_obj
def __repr__(self):
"""Implement __repr__ for ConfigObjects."""
return repr(self.ToDict())
......@@ -463,6 +471,15 @@ class Disk(ConfigObject):
raise errors.ProgrammerError("Disk.RecordGrow called for unsupported"
" disk type %s" % self.dev_type)
def UnsetSize(self):
"""Sets recursively the size to zero for the disk and its children.
if self.children:
for child in self.children:
self.size = 0
def SetPhysicalID(self, target_node, nodes_ip):
"""Convert the logical ID to the physical ID.
......@@ -237,6 +237,26 @@ class OpVerifyDisks(OpCode):
__slots__ = OpCode.__slots__ + []
class OpRepairDiskSizes(OpCode):
"""Verify the disk sizes of the instances and fixes configuration
Parameters: optional instances list, in case we want to restrict the
checks to only a subset of the instances.
Result: a list of tuples, (instance, disk, new-size) for changed