Commit c30421e0 authored by René Nussbaumer's avatar René Nussbaumer

Merge branch 'devel-2.2'

hansmi helped me with merging the conflict. Thanks

Conflicts:
	lib/workerpool.py
Signed-off-by: default avatarRené Nussbaumer <rn@google.com>
Reviewed-by: default avatarIustin Pop <iustin@google.com>
parents 14bde528 310a8944
News News
==== ====
Version 2.2.0 rc1
-----------------
*(Released Mon, 23 Aug 2010)*
- Support DRBD versions of the format "a.b.c.d"
- Updated manpages
- Re-introduce support for usage from multiple threads in RAPI client
- Instance renames and modify via RAPI
- Work around race condition between processing and archival in job
queue
- Mark opcodes following failed one as failed, too
- Job field ``lock_status`` was removed due to difficulties making it
work with the changed job queue in Ganeti 2.2; a better way to monitor
locks is expected for a later 2.2.x release
- Fixed dry-run behaviour with many commands
- Support ``ssh-agent`` again when adding nodes
- Many additional bugfixes
Version 2.2.0 rc0 Version 2.2.0 rc0
----------------- -----------------
...@@ -105,6 +125,22 @@ Version 2.2.0 beta 0 ...@@ -105,6 +125,22 @@ Version 2.2.0 beta 0
see the ``ganeti-os-interface(7)`` manpage and look for see the ``ganeti-os-interface(7)`` manpage and look for
``EXP_SIZE_FD`` ``EXP_SIZE_FD``
Version 2.1.7
-------------
*(Released Tue, 24 Aug 2010)*
Bugfixes only:
- Don't ignore secondary node silently on non-mirrored disk templates
(issue 113)
- Fix --master-netdev arg name in gnt-cluster(8) (issue 114)
- Fix usb_mouse parameter breaking with vnc_console (issue 109)
- Properly document the usb_mouse parameter
- Fix path in ganeti-rapi(8) (issue 116)
- Adjust error message when the ganeti user's .ssh directory is
missing
- Add same-node-check when changing the disk template to drbd
Version 2.1.6 Version 2.1.6
------------- -------------
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
m4_define([gnt_version_major], [2]) m4_define([gnt_version_major], [2])
m4_define([gnt_version_minor], [2]) m4_define([gnt_version_minor], [2])
m4_define([gnt_version_revision], [0]) m4_define([gnt_version_revision], [0])
m4_define([gnt_version_suffix], [~rc0]) m4_define([gnt_version_suffix], [~rc1])
m4_define([gnt_version_full], m4_define([gnt_version_full],
m4_format([%d.%d.%d%s], m4_format([%d.%d.%d%s],
gnt_version_major, gnt_version_minor, gnt_version_major, gnt_version_minor,
......
...@@ -277,6 +277,11 @@ class ClientOps: ...@@ -277,6 +277,11 @@ class ClientOps:
op = opcodes.OpGetTags(kind=kind, name=name) op = opcodes.OpGetTags(kind=kind, name=name)
return self._Query(op) return self._Query(op)
elif method == luxi.REQ_QUERY_LOCKS:
(fields, sync) = args
logging.info("Received locks query request")
return self.server.context.glm.QueryLocks(fields, sync)
elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG: elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
drain_flag = args drain_flag = args
logging.info("Received queue drain flag change request to %s", logging.info("Received queue drain flag change request to %s",
......
...@@ -84,6 +84,7 @@ __all__ = [ ...@@ -84,6 +84,7 @@ __all__ = [
"IGNORE_REMOVE_FAILURES_OPT", "IGNORE_REMOVE_FAILURES_OPT",
"IGNORE_SECONDARIES_OPT", "IGNORE_SECONDARIES_OPT",
"IGNORE_SIZE_OPT", "IGNORE_SIZE_OPT",
"INTERVAL_OPT",
"MAC_PREFIX_OPT", "MAC_PREFIX_OPT",
"MAINTAIN_NODE_HEALTH_OPT", "MAINTAIN_NODE_HEALTH_OPT",
"MASTER_NETDEV_OPT", "MASTER_NETDEV_OPT",
...@@ -196,6 +197,7 @@ __all__ = [ ...@@ -196,6 +197,7 @@ __all__ = [
"cli_option", "cli_option",
"SplitNodeOption", "SplitNodeOption",
"CalculateOSNames", "CalculateOSNames",
"ParseFields",
] ]
NO_PREFIX = "no_" NO_PREFIX = "no_"
...@@ -929,6 +931,11 @@ SHUTDOWN_TIMEOUT_OPT = cli_option("--shutdown-timeout", ...@@ -929,6 +931,11 @@ SHUTDOWN_TIMEOUT_OPT = cli_option("--shutdown-timeout",
default=constants.DEFAULT_SHUTDOWN_TIMEOUT, default=constants.DEFAULT_SHUTDOWN_TIMEOUT,
help="Maximum time to wait for instance shutdown") help="Maximum time to wait for instance shutdown")
INTERVAL_OPT = cli_option("--interval", dest="interval", type="int",
default=None,
help=("Number of seconds between repetions of the"
" command"))
EARLY_RELEASE_OPT = cli_option("--early-release", EARLY_RELEASE_OPT = cli_option("--early-release",
dest="early_release", default=False, dest="early_release", default=False,
action="store_true", action="store_true",
...@@ -1208,6 +1215,24 @@ def CalculateOSNames(os_name, os_variants): ...@@ -1208,6 +1215,24 @@ def CalculateOSNames(os_name, os_variants):
return [os_name] return [os_name]
def ParseFields(selected, default):
"""Parses the values of "--field"-like options.
@type selected: string or None
@param selected: User-selected options
@type default: list
@param default: Default fields
"""
if selected is None:
return default
if selected.startswith("+"):
return default + selected[1:].split(",")
return selected.split(",")
UsesRPC = rpc.RunWithRPC UsesRPC = rpc.RunWithRPC
......
...@@ -690,6 +690,8 @@ class _JobQueueWorker(workerpool.BaseWorker): ...@@ -690,6 +690,8 @@ class _JobQueueWorker(workerpool.BaseWorker):
@param job: the job to be processed @param job: the job to be processed
""" """
self.SetTaskName("Job%s" % job.id)
logging.info("Processing job %s", job.id) logging.info("Processing job %s", job.id)
proc = mcpu.Processor(self.pool.queue.context, job.id) proc = mcpu.Processor(self.pool.queue.context, job.id)
queue = job.queue queue = job.queue
......
...@@ -30,6 +30,8 @@ import select ...@@ -30,6 +30,8 @@ import select
import threading import threading
import time import time
import errno import errno
import weakref
import logging
from ganeti import errors from ganeti import errors
from ganeti import utils from ganeti import utils
...@@ -409,6 +411,7 @@ class SharedLock(object): ...@@ -409,6 +411,7 @@ class SharedLock(object):
""" """
__slots__ = [ __slots__ = [
"__weakref__",
"__active_shr_c", "__active_shr_c",
"__inactive_shr_c", "__inactive_shr_c",
"__deleted", "__deleted",
...@@ -421,10 +424,12 @@ class SharedLock(object): ...@@ -421,10 +424,12 @@ class SharedLock(object):
__condition_class = PipeCondition __condition_class = PipeCondition
def __init__(self, name): def __init__(self, name, monitor=None):
"""Construct a new SharedLock. """Construct a new SharedLock.
@param name: the name of the lock @param name: the name of the lock
@type monitor: L{LockMonitor}
@param monitor: Lock monitor with which to register
""" """
object.__init__(self) object.__init__(self)
...@@ -448,6 +453,55 @@ class SharedLock(object): ...@@ -448,6 +453,55 @@ class SharedLock(object):
# is this lock in the deleted state? # is this lock in the deleted state?
self.__deleted = False self.__deleted = False
# Register with lock monitor
if monitor:
monitor.RegisterLock(self)
def GetInfo(self, fields):
"""Retrieves information for querying locks.
@type fields: list of strings
@param fields: List of fields to return
"""
self.__lock.acquire()
try:
info = []
# Note: to avoid unintentional race conditions, no references to
# modifiable objects should be returned unless they were created in this
# function.
for fname in fields:
if fname == "name":
info.append(self.name)
elif fname == "mode":
if self.__deleted:
info.append("deleted")
assert not (self.__exc or self.__shr)
elif self.__exc:
info.append("exclusive")
elif self.__shr:
info.append("shared")
else:
info.append(None)
elif fname == "owner":
if self.__exc:
owner = [self.__exc]
else:
owner = self.__shr
if owner:
assert not self.__deleted
info.append([i.getName() for i in owner])
else:
info.append(None)
else:
raise errors.OpExecError("Invalid query field '%s'" % fname)
return info
finally:
self.__lock.release()
def __check_deleted(self): def __check_deleted(self):
"""Raises an exception if the lock has been deleted. """Raises an exception if the lock has been deleted.
...@@ -671,6 +725,8 @@ class SharedLock(object): ...@@ -671,6 +725,8 @@ class SharedLock(object):
self.__deleted = True self.__deleted = True
self.__exc = None self.__exc = None
assert not (self.__exc or self.__shr), "Found owner during deletion"
# Notify all acquires. They'll throw an error. # Notify all acquires. They'll throw an error.
while self.__pending: while self.__pending:
self.__pending.pop().notifyAll() self.__pending.pop().notifyAll()
...@@ -713,16 +769,21 @@ class LockSet: ...@@ -713,16 +769,21 @@ class LockSet:
@ivar name: the name of the lockset @ivar name: the name of the lockset
""" """
def __init__(self, members, name): def __init__(self, members, name, monitor=None):
"""Constructs a new LockSet. """Constructs a new LockSet.
@type members: list of strings @type members: list of strings
@param members: initial members of the set @param members: initial members of the set
@type monitor: L{LockMonitor}
@param monitor: Lock monitor with which to register member locks
""" """
assert members is not None, "members parameter is not a list" assert members is not None, "members parameter is not a list"
self.name = name self.name = name
# Lock monitor
self.__monitor = monitor
# Used internally to guarantee coherency. # Used internally to guarantee coherency.
self.__lock = SharedLock(name) self.__lock = SharedLock(name)
...@@ -731,7 +792,8 @@ class LockSet: ...@@ -731,7 +792,8 @@ class LockSet:
self.__lockdict = {} self.__lockdict = {}
for mname in members: for mname in members:
self.__lockdict[mname] = SharedLock("%s/%s" % (name, mname)) self.__lockdict[mname] = SharedLock(self._GetLockName(mname),
monitor=monitor)
# The owner dict contains the set of locks each thread owns. For # The owner dict contains the set of locks each thread owns. For
# performance each thread can access its own key without a global lock on # performance each thread can access its own key without a global lock on
...@@ -742,6 +804,12 @@ class LockSet: ...@@ -742,6 +804,12 @@ class LockSet:
# will be trouble. # will be trouble.
self.__owners = {} self.__owners = {}
def _GetLockName(self, mname):
"""Returns the name for a member lock.
"""
return "%s/%s" % (self.name, mname)
def _is_owned(self): def _is_owned(self):
"""Is the current thread a current level owner?""" """Is the current thread a current level owner?"""
return threading.currentThread() in self.__owners return threading.currentThread() in self.__owners
...@@ -1049,7 +1117,7 @@ class LockSet: ...@@ -1049,7 +1117,7 @@ class LockSet:
(invalid_names, self.name)) (invalid_names, self.name))
for lockname in names: for lockname in names:
lock = SharedLock("%s/%s" % (self.name, lockname)) lock = SharedLock(self._GetLockName(lockname), monitor=self.__monitor)
if acquired: if acquired:
lock.acquire(shared=shared) lock.acquire(shared=shared)
...@@ -1187,13 +1255,24 @@ class GanetiLockManager: ...@@ -1187,13 +1255,24 @@ class GanetiLockManager:
self.__class__._instance = self self.__class__._instance = self
self._monitor = LockMonitor()
# The keyring contains all the locks, at their level and in the correct # The keyring contains all the locks, at their level and in the correct
# locking order. # locking order.
self.__keyring = { self.__keyring = {
LEVEL_CLUSTER: LockSet([BGL], "bgl lockset"), LEVEL_CLUSTER: LockSet([BGL], "BGL", monitor=self._monitor),
LEVEL_NODE: LockSet(nodes, "nodes lockset"), LEVEL_NODE: LockSet(nodes, "nodes", monitor=self._monitor),
LEVEL_INSTANCE: LockSet(instances, "instances lockset"), LEVEL_INSTANCE: LockSet(instances, "instances",
} monitor=self._monitor),
}
def QueryLocks(self, fields, sync):
"""Queries information from all locks.
See L{LockMonitor.QueryLocks}.
"""
return self._monitor.QueryLocks(fields, sync)
def _names(self, level): def _names(self, level):
"""List the lock names at the given level. """List the lock names at the given level.
...@@ -1346,3 +1425,59 @@ class GanetiLockManager: ...@@ -1346,3 +1425,59 @@ class GanetiLockManager:
"Cannot remove locks at a level while not owning it or" "Cannot remove locks at a level while not owning it or"
" owning some at a greater one") " owning some at a greater one")
return self.__keyring[level].remove(names) return self.__keyring[level].remove(names)
class LockMonitor(object):
_LOCK_ATTR = "_lock"
def __init__(self):
"""Initializes this class.
"""
self._lock = SharedLock("LockMonitor")
# Tracked locks. Weak references are used to avoid issues with circular
# references and deletion.
self._locks = weakref.WeakKeyDictionary()
@ssynchronized(_LOCK_ATTR)
def RegisterLock(self, lock):
"""Registers a new lock.
"""
logging.debug("Registering lock %s", lock.name)
assert lock not in self._locks, "Duplicate lock registration"
assert not compat.any(lock.name == i.name for i in self._locks.keys()), \
"Found duplicate lock name"
self._locks[lock] = None
@ssynchronized(_LOCK_ATTR)
def _GetLockInfo(self, fields):
"""Get information from all locks while the monitor lock is held.
"""
result = {}
for lock in self._locks.keys():
assert lock.name not in result, "Found duplicate lock name"
result[lock.name] = lock.GetInfo(fields)
return result
def QueryLocks(self, fields, sync):
"""Queries information from all locks.
@type fields: list of strings
@param fields: List of fields to return
@type sync: boolean
@param sync: Whether to operate in synchronous mode
"""
if sync:
raise NotImplementedError("Synchronous queries are not implemented")
# Get all data without sorting
result = self._GetLockInfo(fields)
# Sort by name
return [result[name] for name in utils.NiceSort(result.keys())]
...@@ -59,6 +59,7 @@ REQ_QUERY_EXPORTS = "QueryExports" ...@@ -59,6 +59,7 @@ REQ_QUERY_EXPORTS = "QueryExports"
REQ_QUERY_CONFIG_VALUES = "QueryConfigValues" REQ_QUERY_CONFIG_VALUES = "QueryConfigValues"
REQ_QUERY_CLUSTER_INFO = "QueryClusterInfo" REQ_QUERY_CLUSTER_INFO = "QueryClusterInfo"
REQ_QUERY_TAGS = "QueryTags" REQ_QUERY_TAGS = "QueryTags"
REQ_QUERY_LOCKS = "QueryLocks"
REQ_QUEUE_SET_DRAIN_FLAG = "SetDrainFlag" REQ_QUEUE_SET_DRAIN_FLAG = "SetDrainFlag"
REQ_SET_WATCHER_PAUSE = "SetWatcherPause" REQ_SET_WATCHER_PAUSE = "SetWatcherPause"
...@@ -490,3 +491,6 @@ class Client(object): ...@@ -490,3 +491,6 @@ class Client(object):
def QueryTags(self, kind, name): def QueryTags(self, kind, name):
return self.CallMethod(REQ_QUERY_TAGS, (kind, name)) return self.CallMethod(REQ_QUERY_TAGS, (kind, name))
def QueryLocks(self, fields, sync):
return self.CallMethod(REQ_QUERY_LOCKS, (fields, sync))
...@@ -26,12 +26,25 @@ ...@@ -26,12 +26,25 @@
import os import os
import logging import logging
import re
from ganeti import utils from ganeti import utils
from ganeti import errors from ganeti import errors
from ganeti import constants from ganeti import constants
def FormatParamikoFingerprint(fingerprint):
"""Formats the fingerprint of L{paramiko.PKey.get_fingerprint()}
@type fingerprint: str
@param fingerprint: PKey fingerprint
@return The string hex representation of the fingerprint
"""
assert len(fingerprint) % 2 == 0
return ":".join(re.findall(r"..", fingerprint.lower()))
def GetUserFiles(user, mkdir=False): def GetUserFiles(user, mkdir=False):
"""Return the paths of a user's ssh files. """Return the paths of a user's ssh files.
......
...@@ -69,8 +69,11 @@ class BaseWorker(threading.Thread, object): ...@@ -69,8 +69,11 @@ class BaseWorker(threading.Thread, object):
""" """
super(BaseWorker, self).__init__(name=worker_id) super(BaseWorker, self).__init__(name=worker_id)
self.pool = pool self.pool = pool
self._worker_id = worker_id
self._current_task = None self._current_task = None
assert self.getName() == worker_id
def ShouldTerminate(self): def ShouldTerminate(self):
"""Returns whether this worker should terminate. """Returns whether this worker should terminate.
...@@ -100,6 +103,23 @@ class BaseWorker(threading.Thread, object): ...@@ -100,6 +103,23 @@ class BaseWorker(threading.Thread, object):
finally: finally:
self.pool._lock.release() self.pool._lock.release()
def SetTaskName(self, taskname):
"""Sets the name of the current task.
Should only be called from within L{RunTask}.
@type taskname: string
@param taskname: Task's name
"""
if taskname:
name = "%s/%s" % (self._worker_id, taskname)
else:
name = self._worker_id
# Set thread name
self.setName(name)
def _HasRunningTaskUnlocked(self): def _HasRunningTaskUnlocked(self):
"""Returns whether this worker is currently running a task. """Returns whether this worker is currently running a task.
...@@ -147,7 +167,11 @@ class BaseWorker(threading.Thread, object): ...@@ -147,7 +167,11 @@ class BaseWorker(threading.Thread, object):
# Run the actual task # Run the actual task
assert defer is None assert defer is None
logging.debug("Starting task %r, priority %s", args, priority) logging.debug("Starting task %r, priority %s", args, priority)
self.RunTask(*args) # pylint: disable-msg=W0142 assert self.getName() == self._worker_id
try:
self.RunTask(*args) # pylint: disable-msg=W0142
finally:
self.SetTaskName(None)
logging.debug("Done with task %r, priority %s", args, priority) logging.debug("Done with task %r, priority %s", args, priority)
except DeferTask, err: except DeferTask, err:
defer = err defer = err
...@@ -159,7 +183,6 @@ class BaseWorker(threading.Thread, object): ...@@ -159,7 +183,6 @@ class BaseWorker(threading.Thread, object):
logging.debug("Deferring task %r, new priority %s", defer.priority) logging.debug("Deferring task %r, new priority %s", defer.priority)
assert self._HasRunningTaskUnlocked() assert self._HasRunningTaskUnlocked()
except: # pylint: disable-msg=W0702 except: # pylint: disable-msg=W0702
logging.exception("Caught unhandled exception") logging.exception("Caught unhandled exception")
......
...@@ -192,6 +192,70 @@ ...@@ -192,6 +192,70 @@
</para> </para>
</refsect2> </refsect2>
<refsect2>
<title>LOCKS</title>
<cmdsynopsis>
<command>locks</command>
<arg>--no-headers</arg>
<arg>--separator=<replaceable>SEPARATOR</replaceable></arg>
<sbr>
<arg>-o <replaceable>[+]FIELD,...</replaceable></arg>
<arg>--interval=<replaceable>SECONDS</replaceable></arg>
<sbr>
</cmdsynopsis>
<para>
Shows a list of locks in the master daemon.
</para>
<para>
The <option>--no-headers</option> option will skip the initial
header line. The <option>--separator</option> option takes an
argument which denotes what will be used between the output
fields. Both these options are to help scripting.
</para>
<para>
The <option>-o</option> option takes a comma-separated list of
output fields. The available fields and their meaning are:
<variablelist>
<varlistentry>
<term>name</term>
<listitem>
<simpara>Lock name</simpara>
</listitem>
</varlistentry>
<varlistentry>
<term>mode</term>
<listitem>
<simpara>
Mode in which the lock is currently acquired (exclusive or
shared)
</simpara>
</listitem>
</varlistentry>
<varlistentry>
<term>owner</term>
<listitem>
<simpara>Current lock owner(s)</simpara>