Commit 8486ffc0 authored by Guido Trotter's avatar Guido Trotter
Browse files

Merge branch 'master' into next



* master:
  Update NEWS and version for 2.0.3 release
  devel/upload: revert rsync -p
  export: add meaningful exit code
  Fix detecting of errors in export
  Implement gnt-cluster check-disk-sizes
  rpc: add rpc call for getting disk size
  bdev: Add function for reading actual disk size
  Implement --ignore-size in activate-disks
  Add ignore size support in _AssembleInstanceDisks
  Add a objects.Disk.UnsetSize() method
  bdev: allow ignoring of size in Assemble()
  Fix instance import net option
  Simplify the devel/upload script
  Add a Copy method to object.ConfigObject
  Extend call_node_start_master rpc with no_voting

Conflicts:

  daemons/ganeti-masterd
    s/SimpleConfigReader/SimpleStore/ VS start-master no-voting
    (kept both)
Signed-off-by: default avatarGuido Trotter <ultrotter@google.com>
parents e24999ab 9d725d41
......@@ -330,6 +330,7 @@ $(REPLACE_VARS_SED): Makefile stamp-directories
echo 's#@CUSTOM_XEN_KERNEL@#$(XEN_KERNEL)#g'; \
echo 's#@CUSTOM_XEN_INITRD@#$(XEN_INITRD)#g'; \
echo 's#@RPL_FILE_STORAGE_DIR@#$(FILE_STORAGE_DIR)#g'; \
echo 's#@PKGLIBDIR@#$(pkglibdir)#g'; \
} > $@
# We need to create symlinks because "make distcheck" will not install Python
......
Version 2.0.3
- Added “--ignore-size” to the “gnt-instance activate-disks” command
to allow using the pre-2.0.2 behaviour in activation, if any
existing instances have mismatched disk sizes in the configuration
- Added “gnt-cluster repair-disk-sizes” command to check and update
any configuration mismatches for disk sizes
- Added “gnt-master cluste-failover --no-voting” to allow master
failover to work on two-node clusters
- Fixed the ‘--net’ option of “gnt-backup import”, which was unusable
- Fixed detection of OS script errors in “gnt-backup export”
- Fixed exit code of “gnt-backup export”
Version 2.0.2
- Added experimental support for stripped logical volumes; this should
enhance performance but comes with a higher complexity in the block
......
# Configure script for Ganeti
m4_define([gnt_version_major], [2])
m4_define([gnt_version_minor], [0])
m4_define([gnt_version_revision], [2])
m4_define([gnt_version_revision], [3])
m4_define([gnt_version_suffix], [])
m4_define([gnt_version_full],
m4_format([%d.%d.%d%s],
......
......@@ -524,7 +524,7 @@ def main():
try:
# activate ip
master_node = ssconf.SimpleStore().GetMasterNode()
if not rpc.RpcRunner.call_node_start_master(master_node, False):
if not rpc.RpcRunner.call_node_start_master(master_node, False, False):
logging.error("Can't activate master IP address")
master.setup_queue()
......
......@@ -232,6 +232,14 @@ class NodeHttpServer(http.server.HttpServer):
disks = [objects.Disk.FromDict(cf) for cf in params[1]]
return backend.BlockdevClose(params[0], disks)
@staticmethod
def perspective_blockdev_getsize(params):
"""Compute the sizes of the given block devices.
"""
disks = [objects.Disk.FromDict(cf) for cf in params[0]]
return backend.BlockdevGetsize(disks)
# blockdev/drbd specific methods ----------
@staticmethod
......@@ -520,7 +528,7 @@ class NodeHttpServer(http.server.HttpServer):
"""Promote this node to master status.
"""
return backend.StartMaster(params[0])
return backend.StartMaster(params[0], params[1])
@staticmethod
def perspective_node_stop_master(params):
......
......@@ -27,14 +27,22 @@
set -e
PREFIX='@PREFIX@'
SYSCONFDIR='@SYSCONFDIR@'
PKGLIBDIR='@PKGLIBDIR@'
NO_RESTART=
NO_CRON=
hosts=
while [ "$#" -gt 0 ]; do
opt="$1"
case "$opt" in
--no-restart)
NO_RESTART=1
;;
;;
--no-cron)
NO_CRON=1
;;
-h|--help)
echo "Usage: $0 [--no-restart] hosts..."
exit 0
......@@ -42,10 +50,10 @@ while [ "$#" -gt 0 ]; do
-*)
echo "Unknown option: $opt" >&2
exit 1
;;
;;
*)
hosts="$hosts $opt"
;;
;;
esac
shift
done
......@@ -58,39 +66,36 @@ trap 'rm -rf $TXD' EXIT
# install ganeti as a real tree
make install DESTDIR="$TXD"
# copy additional needed files
install -D --mode=0755 doc/examples/ganeti.initd \
"$TXD/$SYSCONFDIR/init.d/ganeti"
install -D --mode=0644 doc/examples/bash_completion \
"$TXD/$SYSCONFDIR/bash_completion.d/ganeti"
if [ -z "$NO_CRON" ]; then
install -D --mode=0644 doc/examples/ganeti.cron \
"$TXD/$SYSCONFDIR/cron.d/ganeti"
fi
install -D --mode=0755 doc/examples/dumb-allocator \
"$TXD/$PKGLIBDIR/iallocators/dumb"
echo ---
( cd "$TXD" && find; )
echo ---
PREFIX='@PREFIX@'
# and now put it under $prefix on the target node(s)
for host; do
echo Uploading code to ${host}...
rsync -v -rlDc --exclude="*.py[oc]" --exclude="*.pdf" --exclude="*.html" \
"$TXD/$PREFIX/" \
root@${host}:$PREFIX/ &
done
wait
INIT_SCRIPT="$TXD/ganeti.initd"
install --mode=0755 doc/examples/ganeti.initd $INIT_SCRIPT
for host; do
echo Uploading init script to ${host}...
scp $INIT_SCRIPT root@${host}:/etc/init.d/ganeti &
"$TXD/" \
root@${host}:/ &
done
wait
if [ -f ganeti-master-cron ]; then
for host; do
echo Uploading cron files to ${host}...
scp ganeti-master-cron root@${host}:/etc/ganeti/master-cron &
done
fi
wait
if test -z "${NO_RESTART}"; then
for host; do
echo Restarting ganeti-noded on ${host}...
......
......@@ -90,7 +90,7 @@ _gnt_cluster()
if [[ -e "@LOCALSTATEDIR@/lib/ganeti/ssconf_cluster_name" ]]; then
cmds="add-tags command copyfile destroy getmaster info list-tags \
masterfailover modify queue redist-conf remove-tags rename \
search-tags verify verify-disks version"
repair-disk-sizes search-tags verify verify-disks version"
else
cmds="init"
fi
......
......@@ -169,7 +169,7 @@ def GetMasterInfo():
return (master_netdev, master_ip, master_node)
def StartMaster(start_daemons):
def StartMaster(start_daemons, no_voting):
"""Activate local node as master node.
The function will always try activate the IP address of the master
......@@ -179,6 +179,9 @@ def StartMaster(start_daemons):
@type start_daemons: boolean
@param start_daemons: whther to also start the master
daemons (ganeti-masterd and ganeti-rapi)
@type no_voting: boolean
@param no_voting: whether to start ganeti-masterd without a node vote
(if start_daemons is True), but still non-interactively
@rtype: None
"""
......@@ -208,8 +211,17 @@ def StartMaster(start_daemons):
# and now start the master and rapi daemons
if start_daemons:
for daemon in 'ganeti-masterd', 'ganeti-rapi':
result = utils.RunCmd([daemon])
daemons_params = {
'ganeti-masterd': [],
'ganeti-rapi': [],
}
if no_voting:
daemons_params['ganeti-masterd'].append('--no-voting')
daemons_params['ganeti-masterd'].append('--yes-do-it')
for daemon in daemons_params:
cmd = [daemon]
cmd.extend(daemons_params[daemon])
result = utils.RunCmd(cmd)
if result.failed:
logging.error("Can't start daemon %s: %s", daemon, result.output)
ok = False
......@@ -1452,6 +1464,32 @@ def BlockdevFind(disk):
return (True, (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus())
def BlockdevGetsize(disks):
"""Computes the size of the given disks.
If a disk is not found, returns None instead.
@type disks: list of L{objects.Disk}
@param disks: the list of disk to compute the size for
@rtype: list
@return: list with elements None if the disk cannot be found,
otherwise the size
"""
result = []
for cf in disks:
try:
rbd = _RecursiveFindBD(cf)
except errors.BlockDeviceError, err:
result.append(None)
continue
if rbd is None:
result.append(None)
else:
result.append(rbd.GetActualSize())
return result
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
"""Write a file to the filesystem.
......@@ -1815,8 +1853,8 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
# the target command is built out of three individual commands,
# which are joined by pipes; we check each individual command for
# valid parameters
expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path,
export_script, logfile)
expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s",
inst_os.path, export_script, logfile)
comprcmd = "gzip"
......@@ -1829,7 +1867,7 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
# all commands have been checked, so we're safe to combine them
command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
result = utils.RunCmd(command, env=export_env)
result = utils.RunCmd(["bash", "-c", command], env=export_env)
if result.failed:
logging.error("os snapshot export command '%s' returned error: %s"
......
......@@ -277,6 +277,23 @@ class BlockDev(object):
"""
raise NotImplementedError
def GetActualSize(self):
"""Return the actual disk size.
@note: the device needs to be active when this is called
"""
assert self.attached, "BlockDevice not attached in GetActualSize()"
result = utils.RunCmd(["blockdev", "--getsize64", self.dev_path])
if result.failed:
_ThrowError("blockdev failed (%s): %s",
result.fail_reason, result.output)
try:
sz = int(result.output.strip())
except (ValueError, TypeError), err:
_ThrowError("Failed to parse blockdev output: %s", str(err))
return sz
def __repr__(self):
return ("<%s: unique_id: %s, children: %s, %s:%s, %s>" %
(self.__class__, self.unique_id, self._children,
......@@ -1129,9 +1146,10 @@ class DRBD8(BaseDRBD):
"""
args = ["drbdsetup", cls._DevPath(minor), "disk",
backend, meta, "0",
"-d", "%sm" % size,
"-e", "detach",
"--create-device"]
if size:
args.extend(["-d", "%sm" % size])
result = utils.RunCmd(args)
if result.failed:
_ThrowError("drbd%d: can't attach local disk: %s", minor, result.output)
......@@ -1727,6 +1745,19 @@ class FileStorage(BlockDev):
self.attached = os.path.exists(self.dev_path)
return self.attached
def GetActualSize(self):
"""Return the actual disk size.
@note: the device needs to be active when this is called
"""
assert self.attached, "BlockDevice not attached in GetActualSize()"
try:
st = os.stat(self.dev_path)
return st.st_size
except OSError, err:
_ThrowError("Can't stat %s: %s", self.dev_path, err)
@classmethod
def Create(cls, unique_id, children, size):
"""Create a new file.
......
......@@ -275,7 +275,7 @@ def InitCluster(cluster_name, mac_prefix, def_bridge,
# start the master ip
# TODO: Review rpc call from bootstrap
rpc.RpcRunner.call_node_start_master(hostname.name, True)
rpc.RpcRunner.call_node_start_master(hostname.name, True, False)
def InitConfig(version, cluster_config, master_node_config,
......@@ -453,8 +453,7 @@ def MasterFailover(no_voting=False):
# cluster info
cfg.Update(cluster_info)
# 2.0.X: Don't start the master if no_voting is true
result = rpc.RpcRunner.call_node_start_master(new_master, not no_voting)
result = rpc.RpcRunner.call_node_start_master(new_master, True, no_voting)
if result.failed or not result.data:
logging.error("Could not start the master role on the new master"
" %s, please check", new_master)
......
......@@ -1325,6 +1325,100 @@ class LUVerifyDisks(NoHooksLU):
return result
class LURepairDiskSizes(NoHooksLU):
"""Verifies the cluster disks sizes.
"""
_OP_REQP = ["instances"]
REQ_BGL = False
def ExpandNames(self):
if not isinstance(self.op.instances, list):
raise errors.OpPrereqError("Invalid argument type 'instances'")
if self.op.instances:
self.wanted_names = []
for name in self.op.instances:
full_name = self.cfg.ExpandInstanceName(name)
if full_name is None:
raise errors.OpPrereqError("Instance '%s' not known" % name)
self.wanted_names.append(full_name)
self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
self.needed_locks = {
locking.LEVEL_NODE: [],
locking.LEVEL_INSTANCE: self.wanted_names,
}
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
else:
self.wanted_names = None
self.needed_locks = {
locking.LEVEL_NODE: locking.ALL_SET,
locking.LEVEL_INSTANCE: locking.ALL_SET,
}
self.share_locks = dict(((i, 1) for i in locking.LEVELS))
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE and self.wanted_names is not None:
self._LockInstancesNodes(primary_only=True)
def CheckPrereq(self):
"""Check prerequisites.
This only checks the optional instance list against the existing names.
"""
if self.wanted_names is None:
self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
in self.wanted_names]
def Exec(self, feedback_fn):
"""Verify the size of cluster disks.
"""
# TODO: check child disks too
# TODO: check differences in size between primary/secondary nodes
per_node_disks = {}
for instance in self.wanted_instances:
pnode = instance.primary_node
if pnode not in per_node_disks:
per_node_disks[pnode] = []
for idx, disk in enumerate(instance.disks):
per_node_disks[pnode].append((instance, idx, disk))
changed = []
for node, dskl in per_node_disks.items():
result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
if result.failed:
self.LogWarning("Failure in blockdev_getsizes call to node"
" %s, ignoring", node)
continue
if len(result.data) != len(dskl):
self.LogWarning("Invalid result from node %s, ignoring node results",
node)
continue
for ((instance, idx, disk), size) in zip(dskl, result.data):
if size is None:
self.LogWarning("Disk %d of instance %s did not return size"
" information, ignoring", idx, instance.name)
continue
if not isinstance(size, (int, long)):
self.LogWarning("Disk %d of instance %s did not return valid"
" size information, ignoring", idx, instance.name)
continue
size = size >> 20
if size != disk.size:
self.LogInfo("Disk %d of instance %s has mismatched size,"
" correcting: recorded %d, actual %d", idx,
instance.name, disk.size, size)
disk.size = size
self.cfg.Update(instance)
changed.append((instance.name, idx, size))
return changed
class LURenameCluster(LogicalUnit):
"""Rename the cluster.
......@@ -1399,7 +1493,7 @@ class LURenameCluster(LogicalUnit):
constants.SSH_KNOWN_HOSTS_FILE, to_node)
finally:
result = self.rpc.call_node_start_master(master, False)
result = self.rpc.call_node_start_master(master, False, False)
if result.failed or not result.data:
self.LogWarning("Could not re-enable the master role on"
" the master, please restart manually.")
......@@ -2595,19 +2689,24 @@ class LUActivateInstanceDisks(NoHooksLU):
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
_CheckNodeOnline(self, self.instance.primary_node)
if not hasattr(self.op, "ignore_size"):
self.op.ignore_size = False
def Exec(self, feedback_fn):
"""Activate the disks.
"""
disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
disks_ok, disks_info = \
_AssembleInstanceDisks(self, self.instance,
ignore_size=self.op.ignore_size)
if not disks_ok:
raise errors.OpExecError("Cannot activate block devices")
return disks_info
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
ignore_size=False):
"""Prepare the block devices for an instance.
This sets up the block devices on all nodes.
......@@ -2619,6 +2718,10 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
@type ignore_secondaries: boolean
@param ignore_secondaries: if true, errors on secondary nodes
won't result in an error return from the function
@type ignore_size: boolean
@param ignore_size: if true, the current known size of the disk
will not be used during the disk activation, useful for cases
when the size is wrong
@return: False if the operation failed, otherwise a list of
(host, instance_visible_name, node_visible_name)
with the mapping from node devices to instance devices
......@@ -2639,6 +2742,9 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
# 1st pass, assemble on all nodes in secondary mode
for inst_disk in instance.disks:
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
if ignore_size:
node_disk = node_disk.Copy()
node_disk.UnsetSize()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
msg = result.RemoteFailMsg()
......@@ -2656,6 +2762,9 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
if node != instance.primary_node:
continue
if ignore_size:
node_disk = node_disk.Copy()
node_disk.UnsetSize()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
msg = result.RemoteFailMsg()
......@@ -6365,6 +6474,8 @@ class LUExportInstance(LogicalUnit):
for disk in instance.disks:
self.cfg.SetDiskID(disk, src_node)
# per-disk results
dresults = []
try:
for idx, disk in enumerate(instance.disks):
# new_dev_name will be a snapshot of an lvm leaf of the one we passed
......@@ -6398,15 +6509,22 @@ class LUExportInstance(LogicalUnit):
if result.failed or not result.data:
self.LogWarning("Could not export disk/%d from node %s to"
" node %s", idx, src_node, dst_node.name)
dresults.append(False)
else:
dresults.append(True)
msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
if msg:
self.LogWarning("Could not remove snapshot for disk/%d from node"
" %s: %s", idx, src_node, msg)
else:
dresults.append(False)
result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
fin_resu = True
if result.failed or not result.data:
self.LogWarning("Could not finalize export for instance %s on node %s",
instance.name, dst_node.name)
fin_resu = False
nodelist = self.cfg.GetNodeList()
nodelist.remove(dst_node.name)
......@@ -6423,6 +6541,7 @@ class LUExportInstance(LogicalUnit):
if not self.rpc.call_export_remove(node, instance.name):
self.LogWarning("Could not remove older export for instance %s"
" on node %s", instance.name, node)
return fin_resu, dresults
class LURemoveExport(NoHooksLU):
......
......@@ -50,6 +50,7 @@ class Processor(object):
opcodes.OpVerifyDisks: cmdlib.LUVerifyDisks,
opcodes.OpSetClusterParams: cmdlib.LUSetClusterParams,
opcodes.OpRedistributeConfig: cmdlib.LURedistributeConfig,
opcodes.OpRepairDiskSizes: cmdlib.LURepairDiskSizes,
# node lu
opcodes.OpAddNode: cmdlib.LUAddNode,
opcodes.OpQueryNodes: cmdlib.LUQueryNodes,
......
......@@ -153,6 +153,14 @@ class ConfigObject(object):
" _ContainerFromDicts" % c_type)
return ret
def Copy(self):
"""Makes a deep copy of the current object and its children.
"""
dict_form = self.ToDict()
clone_obj = self.__class__.FromDict(dict_form)
return clone_obj
def __repr__(self):
"""Implement __repr__ for ConfigObjects."""
return repr(self.ToDict())
......@@ -388,6 +396,15 @@ class Disk(ConfigObject):
raise errors.ProgrammerError("Disk.RecordGrow called for unsupported"
" disk type %s" % self.dev_type)
def UnsetSize(self):
"""Sets recursively the size to zero for the disk and its children.
"""
if self.children:
for child in self.children:
child.UnsetSize()
self.size = 0
def SetPhysicalID(self, target_node, nodes_ip):
"""Convert the logical ID to the physical ID.
......
......@@ -224,6 +224,26 @@ class OpVerifyDisks(OpCode):
__slots__ = []
class OpRepairDiskSizes(OpCode):
"""Verify the disk sizes of the instances and fixes configuration
mimatches.
Parameters: optional instances list, in case we want to restrict the
checks to only a subset of the instances.
Result: a list of tuples, (instance, disk, new-size) for changed
configurations.
In normal operation, the list should be empty.
@type instances: list
@ivar instances: the list of instances to check, or empty for all instances
"""
OP_ID = "OP_CLUSTER_REPAIR_DISK_SIZES"
__slots__ = ["instances"]
class OpQueryConfigValues(OpCode):
"""Query cluster configuration values."""
OP_ID = "OP_CLUSTER_CONFIG_QUERY"
......@@ -433,7 +453,7 @@ class OpActivateInstanceDisks(OpCode):
"""Activate an instance's disks."""
OP_ID = "OP_INSTANCE_ACTIVATE_DISKS"
OP_DSC_FIELD = "instance_name"
__slots__ = ["instance_name"]
__slots__ = ["instance_name", "ignore_size"]
class OpDeactivateInstanceDisks(OpCode):
......
......@@ -681,14 +681,14 @@ class RpcRunner(object):
[checkdict, cluster_name])
@classmethod