Commit 775c6d3e authored by Guido Trotter's avatar Guido Trotter
Browse files

Merge branch 'next' into branch-2.1

* next:
  lvmstrap: Change diskinfo to use GenerateTable
  Get rid of constants.RAPI_ENABLE
  Remove references to utils.debug
  ganeti-rapi, replace hardcoded exit value
  Add the bind-address option to ganeti-rapi
  noded: Abstract hard-coded sys.exit value
  Add an example "ethers" hook
  burnin: move batch init/commit into a decorator
  burnin: move instance alive checks to a decorator
  burnin: Implement retryable operations
  Ignore vim swap files
  burnin: fix removal errors hiding real errors
parents 464243a7 e194129a
......@@ -6,6 +6,7 @@
# global ignores
*.py[co]
*.swp
# /
/Makefile
......
......@@ -473,7 +473,6 @@ def main():
"""Main function"""
options, args = ParseOptions()
utils.debug = options.debug
utils.no_fork = True
if options.fork:
......
......@@ -764,7 +764,6 @@ def main():
global queue_lock
options, args = ParseOptions()
utils.debug = options.debug
if options.fork:
utils.CloseFDs()
......@@ -772,13 +771,9 @@ def main():
for fname in (constants.SSL_CERT_FILE,):
if not os.path.isfile(fname):
print "config %s not there, will not run." % fname
sys.exit(5)
sys.exit(constants.EXIT_NOTCLUSTER)
try:
port = utils.GetNodeDaemonPort()
except errors.ConfigurationError, err:
print "Cluster configuration incomplete: '%s'" % str(err)
sys.exit(5)
port = utils.GetNodeDaemonPort()
dirs = [(val, constants.RUN_DIRS_MODE) for val in constants.SUB_RUN_DIRS]
dirs.append((constants.LOG_OS_DIR, 0750))
......
......@@ -206,17 +206,20 @@ def ParseOptions():
parser.add_option("-f", "--foreground", dest="fork",
help="Don't detach from the current terminal",
default=True, action="store_false")
parser.add_option("-b", "--bind", dest="bind_address",
help="Bind address",
default="", metavar="ADDRESS")
options, args = parser.parse_args()
if len(args) != 0:
print >> sys.stderr, "Usage: %s [-d] [-p port]" % sys.argv[0]
sys.exit(1)
sys.exit(constants.EXIT_FAILURE)
if options.ssl and not (options.ssl_cert and options.ssl_key):
print >> sys.stderr, ("For secure mode please provide "
"--ssl-key and --ssl-cert arguments")
sys.exit(1)
"--ssl-key and --ssl-cert arguments")
sys.exit(constants.EXIT_FAILURE)
return options, args
......@@ -237,7 +240,7 @@ def main():
ssl_cert_path=options.ssl_cert)
except Exception, err:
sys.stderr.write("Can't load the SSL certificate/key: %s\n" % (err,))
sys.exit(1)
sys.exit(constants.EXIT_FAILURE)
else:
ssl_params = None
......@@ -252,7 +255,7 @@ def main():
utils.WritePidFile(constants.RAPI_PID)
try:
mainloop = daemon.Mainloop()
server = RemoteApiHttpServer(mainloop, "", options.port,
server = RemoteApiHttpServer(mainloop, options.bind_address, options.port,
ssl_params=ssl_params, ssl_verify_peer=False,
request_executor_class=
JsonErrorRequestExecutor)
......
#!/bin/bash
# Copyright (C) 2009 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
# This is an example ganeti hook that writes the instance mac addresses in the
# node's /etc/ether file. It will pic up the first nic connected to the
# TARGET_BRIDGE bridge, and write it down with the syntax "MAC INSTANCE_NAME".
# The hook will also send a HUP signal the daemon whose PID is in
# DAEMON_PID_FILE, so that it can load the new /etc/ethers file and use it.
# This has been tested in conjunction with dnsmasq's dhcp implementation.
# It will also remove any other occurrences for the same instance in the
# aformentioned file. This hook supports the "instance-add", "instance-modify"
# "instance-remove", and "instance-mirror-replace" ganeti post hook paths. To
# install it add a symlink from those hooks' directories to where this file is
# installed (with a mode which permits execution).
# TARGET_BRIDGE: We'll only add the first nic which gets connected to this
# bridge to /etc/ethers.
TARGET_BRIDGE="br0"
DAEMON_PID_FILE="/var/run/dnsmasq.pid"
LOCKFILE="/var/lock/ganeti_ethers.lock"
hooks_path=$GANETI_HOOKS_PATH
[ -n "$hooks_path" ] || exit 1
instance=$GANETI_INSTANCE_NAME
[ -n "$instance" ] || exit 1
nic_count=$GANETI_INSTANCE_NIC_COUNT
acquire_lockfile() {
if ! ( set -o noclobber; echo "$$" > $LOCKFILE) 2> /dev/null; then
logger -s "Cannot acquire lockfile for ethers update"
exit 1
fi
trap "rm -f $LOCKFILE" EXIT
}
update_ethers_from_new() {
chmod 644 /etc/ethers.new
mv /etc/ethers.new /etc/ethers
[ -f "$DAEMON_PID_FILE" ] && kill -HUP $(< $DAEMON_PID_FILE)
}
if [ "$hooks_path" = "instance-add" -o \
"$hooks_path" = "instance-modify" -o \
"$hooks_path" = "instance-mirror-replace" ]
then
for i in $(seq 0 $((nic_count - 1)) ); do
bridge_var="GANETI_INSTANCE_NIC${i}_BRIDGE"
bridge=${!bridge_var}
if [ -n "$bridge" -a "$bridge" = "$TARGET_BRIDGE" ]; then
mac_var="GANETI_INSTANCE_NIC${i}_MAC"
mac=${!mac_var}
acquire_lockfile
cat /etc/ethers | awk -- "! /^([[:xdigit:]:]*)[[:blank:]]+$instance\>/;
END {print \"$mac\t$instance\"}" > /etc/ethers.new
update_ethers_from_new
break
fi
done
fi
if [ "$hooks_path" = "instance-remove" -o \
\( "$hooks_path" = "instance-modify" -a "$nic_count" -eq 0 \) ]; then
acquire_lockfile
cat /etc/ethers | awk -- "! /^([[:xdigit:]:]*)[[:blank:]]+$instance\>/" \
> /etc/ethers.new
update_ethers_from_new
fi
......@@ -766,8 +766,6 @@ def GenericMain(commands, override=None, aliases=None):
utils.SetupLogging(constants.LOG_COMMANDS, debug=options.debug,
stderr_logging=True, program=binary)
utils.debug = options.debug
if old_cmdline:
logging.info("run with arguments '%s'", old_cmdline)
else:
......
......@@ -228,6 +228,7 @@ DDM_REMOVE = 'remove'
# common exit codes
EXIT_SUCCESS = 0
EXIT_FAILURE = 1
EXIT_NOTCLUSTER = 5
EXIT_NOTMASTER = 11
EXIT_NODESETUP_ERROR = 12
EXIT_CONFIRMATION = 13 # need user confirmation
......@@ -487,8 +488,6 @@ OPS_FINALIZED = frozenset([OP_STATUS_CANCELED,
ELOG_MESSAGE = "message"
ELOG_PROGRESS = "progress"
# Temporary RAPI constants until we have cluster parameters
RAPI_ENABLE = True
RAPI_PORT = 5080
# max dynamic devices
......
......@@ -58,19 +58,7 @@ def Enabled():
"""Return whether remote API tests should be run.
"""
return constants.RAPI_ENABLE and qa_config.TestEnabled('rapi')
def PrintRemoteAPIWarning():
"""Print warning if remote API is not enabled.
"""
if constants.RAPI_ENABLE or not qa_config.TestEnabled('rapi'):
return
msg = ("Remote API is not enabled in this Ganeti build. Please run"
" `configure [...] --enable-rapi'.")
print
print qa_utils.FormatWarning(msg)
return qa_config.TestEnabled('rapi')
def _DoTests(uris):
......
......@@ -41,11 +41,16 @@ from ganeti import utils
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
MAX_RETRIES = 3
class InstanceDown(Exception):
"""The checked instance was not up"""
class BurninFailure(Exception):
"""Failure detected during burning"""
def Usage():
"""Shows program usage information and exits the program."""
......@@ -106,6 +111,9 @@ class Burner(object):
self.to_rem = []
self.queued_ops = []
self.opts = None
self.queue_retry = False
self.disk_count = self.disk_growth = self.disk_size = None
self.hvp = self.bep = None
self.ParseOptions()
self.cl = cli.GetClient()
self.GetState()
......@@ -125,7 +133,39 @@ class Burner(object):
if self.opts.verbose:
Log(msg, indent=3)
def ExecOp(self, *ops):
def MaybeRetry(self, retry_count, msg, fn, *args):
"""Possibly retry a given function execution.
@type retry_count: int
@param retry_count: retry counter:
- 0: non-retryable action
- 1: last retry for a retryable action
- MAX_RETRIES: original try for a retryable action
@type msg: str
@param msg: the kind of the operation
@type fn: callable
@param fn: the function to be called
"""
try:
val = fn(*args)
if retry_count > 0 and retry_count < MAX_RETRIES:
Log("Idempotent %s succeeded after %d retries" %
(msg, MAX_RETRIES - retry_count))
return val
except Exception, err:
if retry_count == 0:
Log("Non-idempotent %s failed, aborting" % (msg, ))
raise
elif retry_count == 1:
Log("Idempotent %s repeated failure, aborting" % (msg, ))
raise
else:
Log("Idempotent %s failed, retry #%d/%d: %s" %
(msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err))
self.MaybeRetry(retry_count - 1, msg, fn, *args)
def _ExecOp(self, *ops):
"""Execute one or more opcodes and manage the exec buffer.
@result: if only opcode has been passed, we return its result;
......@@ -139,20 +179,48 @@ class Burner(object):
else:
return results
def ExecOp(self, retry, *ops):
"""Execute one or more opcodes and manage the exec buffer.
@result: if only opcode has been passed, we return its result;
otherwise we return the list of results
"""
if retry:
rval = MAX_RETRIES
else:
rval = 0
return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)
def ExecOrQueue(self, name, *ops):
"""Execute an opcode and manage the exec buffer."""
if self.opts.parallel:
self.queued_ops.append((ops, name))
else:
return self.ExecOp(*ops)
return self.ExecOp(self.queue_retry, *ops)
def StartBatch(self, retry):
"""Start a new batch of jobs.
@param retry: whether this is a retryable batch
"""
self.queued_ops = []
self.queue_retry = retry
def CommitQueue(self):
"""Execute all submitted opcodes in case of parallel burnin"""
if not self.opts.parallel:
return
if self.queue_retry:
rval = MAX_RETRIES
else:
rval = 0
try:
results = self.ExecJobSet(self.queued_ops)
results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
self.queued_ops)
finally:
self.queued_ops = []
return results
......@@ -171,10 +239,45 @@ class Burner(object):
results = []
for jid, (_, iname) in zip(job_ids, jobs):
Log("waiting for job %s for %s" % (jid, iname), indent=2)
results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
try:
results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
except Exception, err:
Log("Job for %s failed: %s" % (iname, err))
if len(results) != len(jobs):
raise BurninFailure()
return results
def _DoCheckInstances(fn):
"""Decorator for checking instances.
"""
def wrapper(self, *args, **kwargs):
val = fn(self, *args, **kwargs)
for instance in self.instances:
self._CheckInstanceAlive(instance)
return val
return wrapper
def _DoBatch(retry):
"""Decorator for possible batch operations.
Must come after the _DoCheckInstances decorator (if any).
@param retry: whether this is a retryable batch, will be
passed to StartBatch
"""
def wrap(fn):
def batched(self, *args, **kwargs):
self.StartBatch(retry)
val = fn(self, *args, **kwargs)
self.CommitQueue()
return val
return batched
return wrap
def ParseOptions(self):
"""Parses the command line options.
......@@ -325,14 +428,14 @@ class Burner(object):
try:
op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
names=names, use_locking=True)
result = self.ExecOp(op)
result = self.ExecOp(True, op)
except errors.GenericError, err:
err_code, msg = cli.FormatError(err)
Err(msg, exit_code=err_code)
self.nodes = [data[0] for data in result if not (data[1] or data[2])]
result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
names=[]))
op_diagos = opcodes.OpDiagnoseOS(output_fields=["name", "valid"], names=[])
result = self.ExecOp(True, op_diagos)
if not result:
Err("Can't get the OS list")
......@@ -343,6 +446,8 @@ class Burner(object):
if self.opts.os not in os_set:
Err("OS '%s' not found" % self.opts.os)
@_DoCheckInstances
@_DoBatch(False)
def BurnCreateInstances(self):
"""Create the given instances.
......@@ -388,11 +493,7 @@ class Burner(object):
self.ExecOrQueue(instance, op)
self.to_rem.append(instance)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
@_DoBatch(False)
def BurnGrowDisks(self):
"""Grow both the os and the swap disks by the requested amount, if any."""
Log("Growing disks")
......@@ -404,8 +505,8 @@ class Burner(object):
amount=growth, wait_for_sync=True)
Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
self.ExecOrQueue(instance, op)
self.CommitQueue()
@_DoBatch(True)
def BurnReplaceDisks1D8(self):
"""Replace disks on primary and secondary for drbd8."""
Log("Replacing disks on the same nodes")
......@@ -419,8 +520,8 @@ class Burner(object):
Log("run %s" % mode, indent=2)
ops.append(op)
self.ExecOrQueue(instance, *ops)
self.CommitQueue()
@_DoBatch(True)
def BurnReplaceDisks2(self):
"""Replace secondary node."""
Log("Changing the secondary node")
......@@ -442,8 +543,9 @@ class Burner(object):
disks=[i for i in range(self.disk_count)])
Log("run %s %s" % (mode, msg), indent=2)
self.ExecOrQueue(instance, op)
self.CommitQueue()
@_DoCheckInstances
@_DoBatch(False)
def BurnFailover(self):
"""Failover the instances."""
Log("Failing over instances")
......@@ -453,10 +555,8 @@ class Burner(object):
ignore_consistency=False)
self.ExecOrQueue(instance, op)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
@_DoBatch(False)
def BurnMigrate(self):
"""Migrate the instances."""
Log("Migrating instances")
......@@ -469,8 +569,9 @@ class Burner(object):
cleanup=True)
Log("migration and migration cleanup", indent=2)
self.ExecOrQueue(instance, op1, op2)
self.CommitQueue()
@_DoCheckInstances
@_DoBatch(False)
def BurnImportExport(self):
"""Export the instance, delete it, and import it back.
......@@ -486,7 +587,7 @@ class Burner(object):
# read the full name of the instance
nam_op = opcodes.OpQueryInstances(output_fields=["name"],
names=[instance], use_locking=True)
full_name = self.ExecOp(nam_op)[0][0]
full_name = self.ExecOp(False, nam_op)[0][0]
if self.opts.iallocator:
pnode = snode = None
......@@ -535,10 +636,6 @@ class Burner(object):
Log("remove export", indent=2)
self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
def StopInstanceOp(self, instance):
"""Stop given instance."""
return opcodes.OpShutdownInstance(instance_name=instance)
......@@ -552,6 +649,8 @@ class Burner(object):
return opcodes.OpRenameInstance(instance_name=instance,
new_name=instance_new)
@_DoCheckInstances
@_DoBatch(True)
def BurnStopStart(self):
"""Stop/start the instances."""
Log("Stopping and starting instances")
......@@ -561,11 +660,7 @@ class Burner(object):
op2 = self.StartInstanceOp(instance)
self.ExecOrQueue(instance, op1, op2)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
@_DoBatch(False)
def BurnRemove(self):
"""Remove the instances."""
Log("Removing instances")
......@@ -575,8 +670,6 @@ class Burner(object):
ignore_failures=True)
self.ExecOrQueue(instance, op)
self.CommitQueue()
def BurnRename(self):
"""Rename the instances.
......@@ -594,11 +687,13 @@ class Burner(object):
op_rename2 = self.RenameInstanceOp(rename, instance)
op_start1 = self.StartInstanceOp(rename)
op_start2 = self.StartInstanceOp(instance)
self.ExecOp(op_stop1, op_rename1, op_start1)
self.ExecOp(False, op_stop1, op_rename1, op_start1)
self._CheckInstanceAlive(rename)
self.ExecOp(op_stop2, op_rename2, op_start2)
self.ExecOp(False, op_stop2, op_rename2, op_start2)
self._CheckInstanceAlive(instance)
@_DoCheckInstances
@_DoBatch(True)
def BurnReinstall(self):
"""Reinstall the instances."""
Log("Reinstalling instances")
......@@ -613,11 +708,8 @@ class Burner(object):
op4 = self.StartInstanceOp(instance)
self.ExecOrQueue(instance, op1, op2, op3, op4)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
@_DoCheckInstances
@_DoBatch(True)
def BurnReboot(self):
"""Reboot the instances."""
Log("Rebooting instances")
......@@ -632,11 +724,8 @@ class Burner(object):
ops.append(op)
self.ExecOrQueue(instance, *ops)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
@_DoCheckInstances
@_DoBatch(True)
def BurnActivateDisks(self):
"""Activate and deactivate disks of the instances."""
Log("Activating/deactivating disks")
......@@ -650,10 +739,9 @@ class Burner(object):
Log("activate disks when offline", indent=2)
Log("deactivate disks (when offline)", indent=2)
self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
@_DoCheckInstances
@_DoBatch(False)
def BurnAddRemoveDisks(self):
"""Add and remove an extra disk for the instances."""
Log("Adding and removing disks")
......@@ -669,10 +757,8 @@ class Burner(object):
Log("adding a disk", indent=2)
Log("removing last disk", indent=2)
self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
self.CommitQueue()
for instance in self.instances:
self._CheckInstanceAlive(instance)
@_DoBatch(False)
def BurnAddRemoveNICs(self):
"""Add and remove an extra NIC for the instances."""
Log("Adding and removing NICs")
......@@ -685,7 +771,6 @@ class Burner(object):
Log("adding a NIC", indent=2)
Log("removing last NIC", indent=2)
self.ExecOrQueue(instance, op_add, op_rem)
self.CommitQueue()
def _CheckInstanceAlive(self, instance):
"""Check if an instance is alive by doing http checks.
......@@ -784,7 +869,14 @@ class Burner(object):
Log(self.GetFeedbackBuf())
Log("\n\n")
if not self.opts.keep_instances:
self.BurnRemove()
try:
self.BurnRemove()
except Exception, err:
if has_err: # already detected errors, so errors in removal
# are quite expected
Log("Note: error detected during instance remove: %s" % str(err))
else: # non-expected error
raise
return 0
......
......@@ -46,6 +46,7 @@ import time
from ganeti.utils import RunCmd, ReadFile
from ganeti import constants
from ganeti import cli
USAGE = ("\tlvmstrap diskinfo\n"
"\tlvmstrap [--vgname=NAME] [--allow-removable]"
......@@ -485,8 +486,15 @@ def ShowDiskInfo(opts):
dlist = GetDiskList(opts)
print "------- Disk information -------"
print ("%5s %7s %4s %5s %-10s %s" %
("Name", "Size[M]", "Used", "Mount", "LVM?", "Info"))
headers = {
"name": "Name",
"size": "Size[M]",
"used": "Used",
"mount": "Mount",
"lvm": "LVM?",