locking.py 34.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#
#

# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

"""Module implementing the Ganeti locking code."""

23
24
import os
import select
25
import threading
26
27
import time
import errno
28

29
from ganeti import errors
30
from ganeti import utils
31
32


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def ssynchronized(lock, shared=0):
  """Shared Synchronization decorator.

  Calls the function holding the given lock, either in exclusive or shared
  mode. It requires the passed lock to be a SharedLock (or support its
  semantics).

  """
  def wrap(fn):
    def sync_function(*args, **kwargs):
      lock.acquire(shared=shared)
      try:
        return fn(*args, **kwargs)
      finally:
        lock.release()
    return sync_function
  return wrap


52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
class _SingleActionPipeConditionWaiter(object):
  """Callable helper class for _SingleActionPipeCondition.

  """
  __slots__ = [
    "_cond",
    "_fd",
    "_poller",
    ]

  def __init__(self, cond, poller, fd):
    """Initializes this class.

    @type cond: L{_SingleActionPipeCondition}
    @param cond: Parent condition
    @type poller: select.poll
    @param poller: Poller object
    @type fd: int
    @param fd: File descriptor to wait for

    """
    object.__init__(self)

    self._cond = cond
    self._poller = poller
    self._fd = fd

  def __call__(self, timeout):
    """Wait for something to happen on the pipe.

    @type timeout: float or None
    @param timeout: Timeout for waiting (can be None)

    """
    start_time = time.time()
    remaining_time = timeout

    while timeout is None or remaining_time > 0:
      try:
        result = self._poller.poll(remaining_time)
      except EnvironmentError, err:
        if err.errno != errno.EINTR:
          raise
        result = None

      # Check whether we were notified
      if result and result[0][0] == self._fd:
        break

      # Re-calculate timeout if necessary
      if timeout is not None:
        remaining_time = start_time + timeout - time.time()


class _SingleActionPipeCondition(object):
  """Wrapper around a pipe for usage inside conditions.

  This class contains a POSIX pipe(2) and a poller to poll it. The pipe is
  always allocated when constructing the class. Extra care is taken to always
  close the file descriptors.

  An additional class, L{_SingleActionPipeConditionWaiter}, is used to wait for
  notifications.

  Warning: This class is designed to be used as the underlying component of a
  locking condition, but is not by itself thread safe, and needs to be
  protected by an external lock.

  """
  __slots__ = [
    "_poller",
    "_read_fd",
    "_write_fd",
    "_nwaiters",
    ]

  _waiter_class = _SingleActionPipeConditionWaiter

  def __init__(self):
    """Initializes this class.

    """
    object.__init__(self)

    self._nwaiters = 0

    # Just assume the unpacking is successful, otherwise error handling gets
    # very complicated.
    (self._read_fd, self._write_fd) = os.pipe()
    try:
      # The poller looks for closure of the write side
      poller = select.poll()
      poller.register(self._read_fd, select.POLLHUP)

      self._poller = poller
    except:
      if self._read_fd is not None:
        os.close(self._read_fd)
      if self._write_fd is not None:
        os.close(self._write_fd)
      raise

    # There should be no code here anymore, otherwise the pipe file descriptors
    # may be not be cleaned up properly in case of errors.

  def StartWaiting(self):
    """Return function to wait for notification.

    @rtype: L{_SingleActionPipeConditionWaiter}
    @return: Function to wait for notification

    """
    assert self._nwaiters >= 0

    if self._poller is None:
      raise RuntimeError("Already cleaned up")

    # Create waiter function and increase number of waiters
    wait_fn = self._waiter_class(self, self._poller, self._read_fd)
    self._nwaiters += 1
    return wait_fn

  def DoneWaiting(self):
    """Decrement number of waiters and automatic cleanup.

    Must be called after waiting for a notification.

    @rtype: bool
    @return: Whether this was the last waiter

    """
    assert self._nwaiters > 0

    self._nwaiters -= 1

    if self._nwaiters == 0:
      self._Cleanup()
      return True

    return False

  def notifyAll(self):
    """Close the writing side of the pipe to notify all waiters.

    """
    if self._write_fd is None:
      raise RuntimeError("Can only notify once")

    os.close(self._write_fd)
    self._write_fd = None

  def _Cleanup(self):
    """Close all file descriptors.

    """
    if self._read_fd is not None:
      os.close(self._read_fd)
      self._read_fd = None

    if self._write_fd is not None:
      os.close(self._write_fd)
      self._write_fd = None

    self._poller = None

  def __del__(self):
    """Called on object deletion.

    Ensure no file descriptors are left open.

    """
    self._Cleanup()


226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
class _CountingCondition(object):
  """Wrapper for Python's built-in threading.Condition class.

  This wrapper keeps a count of active waiters. We can't access the internal
  "__waiters" attribute of threading.Condition because it's not thread-safe.

  """
  __slots__ = [
    "_cond",
    "_nwaiters",
    ]

  def __init__(self, lock):
    """Initializes this class.

    """
    object.__init__(self)
    self._cond = threading.Condition(lock=lock)
    self._nwaiters = 0

  def notifyAll(self):
    """Notifies the condition.

    """
    return self._cond.notifyAll()

  def wait(self, timeout=None):
    """Waits for the condition to be notified.

    @type timeout: float or None
    @param timeout: Timeout in seconds

    """
    assert self._nwaiters >= 0

    self._nwaiters += 1
    try:
      return self._cond.wait(timeout=timeout)
    finally:
      self._nwaiters -= 1

  def has_waiting(self):
    """Returns whether there are active waiters.

    """
    return bool(self._nwaiters)


class SharedLock(object):
275
276
277
278
279
280
281
282
283
284
285
  """Implements a shared lock.

  Multiple threads can acquire the lock in a shared way, calling
  acquire_shared().  In order to acquire the lock in an exclusive way threads
  can call acquire_exclusive().

  The lock prevents starvation but does not guarantee that threads will acquire
  the shared lock in the order they queued for it, just that they will
  eventually do so.

  """
286
287
288
289
290
291
292
293
294
295
296
297
  __slots__ = [
    "__active_shr_c",
    "__inactive_shr_c",
    "__deleted",
    "__exc",
    "__lock",
    "__pending",
    "__shr",
    ]

  __condition_class = _CountingCondition

298
  def __init__(self):
299
300
301
302
303
304
    """Construct a new SharedLock.

    """
    object.__init__(self)

    # Internal lock
305
306
    self.__lock = threading.Lock()

307
308
309
310
311
312
313
314
    # Queue containing waiting acquires
    self.__pending = []

    # Active and inactive conditions for shared locks
    self.__active_shr_c = self.__condition_class(self.__lock)
    self.__inactive_shr_c = self.__condition_class(self.__lock)

    # Current lock holders
315
316
317
    self.__shr = set()
    self.__exc = None

318
319
320
    # is this lock in the deleted state?
    self.__deleted = False

321
322
323
324
325
326
327
  def __check_deleted(self):
    """Raises an exception if the lock has been deleted.

    """
    if self.__deleted:
      raise errors.LockError("Deleted lock")

328
  def __is_sharer(self):
329
330
331
    """Is the current thread sharing the lock at this time?

    """
332
333
334
    return threading.currentThread() in self.__shr

  def __is_exclusive(self):
335
336
337
    """Is the current thread holding the lock exclusively at this time?

    """
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
    return threading.currentThread() == self.__exc

  def __is_owned(self, shared=-1):
    """Is the current thread somehow owning the lock at this time?

    This is a private version of the function, which presumes you're holding
    the internal lock.

    """
    if shared < 0:
      return self.__is_sharer() or self.__is_exclusive()
    elif shared:
      return self.__is_sharer()
    else:
      return self.__is_exclusive()

  def _is_owned(self, shared=-1):
    """Is the current thread somehow owning the lock at this time?

Iustin Pop's avatar
Iustin Pop committed
357
358
359
360
    @param shared:
        - < 0: check for any type of ownership (default)
        - 0: check for exclusive ownership
        - > 0: check for shared ownership
361
362
363
364

    """
    self.__lock.acquire()
    try:
365
      return self.__is_owned(shared=shared)
366
367
368
    finally:
      self.__lock.release()

369
370
  def _count_pending(self):
    """Returns the number of pending acquires.
371

372
    @rtype: int
373
374

    """
375
376
377
378
379
    self.__lock.acquire()
    try:
      return len(self.__pending)
    finally:
      self.__lock.release()
380

381
382
383
384
385
386
387
388
  def __do_acquire(self, shared):
    """Actually acquire the lock.

    """
    if shared:
      self.__shr.add(threading.currentThread())
    else:
      self.__exc = threading.currentThread()
389

390
391
  def __can_acquire(self, shared):
    """Determine whether lock can be acquired.
392
393

    """
394
395
396
397
    if shared:
      return self.__exc is None
    else:
      return len(self.__shr) == 0 and self.__exc is None
398

399
400
  def __is_on_top(self, cond):
    """Checks whether the passed condition is on top of the queue.
401

402
    The caller must make sure the queue isn't empty.
403

404
405
    """
    return self.__pending[0] == cond
Guido Trotter's avatar
Guido Trotter committed
406

407
408
  def __acquire_unlocked(self, shared=0, timeout=None):
    """Acquire a shared lock.
409

410
411
412
    @param shared: whether to acquire in shared mode; by default an
        exclusive lock will be acquired
    @param timeout: maximum waiting time before giving up
413
414

    """
415
    self.__check_deleted()
416

417
418
419
420
421
422
423
424
    # We cannot acquire the lock if we already have it
    assert not self.__is_owned(), "double acquire() on a non-recursive lock"

    # Check whether someone else holds the lock or there are pending acquires.
    if not self.__pending and self.__can_acquire(shared):
      # Apparently not, can acquire lock directly.
      self.__do_acquire(shared)
      return True
425

426
427
    if shared:
      wait_condition = self.__active_shr_c
428

429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
      # Check if we're not yet in the queue
      if wait_condition not in self.__pending:
        self.__pending.append(wait_condition)
    else:
      wait_condition = self.__condition_class(self.__lock)
      # Always add to queue
      self.__pending.append(wait_condition)

    try:
      # Wait until we become the topmost acquire in the queue or the timeout
      # expires.
      while not (self.__is_on_top(wait_condition) and
                 self.__can_acquire(shared)):
        # Wait for notification
        wait_condition.wait(timeout)
        self.__check_deleted()

        # A lot of code assumes blocking acquires always succeed. Loop
        # internally for that case.
        if timeout is not None:
          break

      if self.__is_on_top(wait_condition) and self.__can_acquire(shared):
        self.__do_acquire(shared)
        return True
454
    finally:
455
456
457
      # Remove condition from queue if there are no more waiters
      if not wait_condition.has_waiting() and not self.__deleted:
        self.__pending.remove(wait_condition)
458

459
    return False
460

461
  def acquire(self, shared=0, timeout=None):
462
463
    """Acquire a shared lock.

464
    @type shared: int
Iustin Pop's avatar
Iustin Pop committed
465
466
    @param shared: whether to acquire in shared mode; by default an
        exclusive lock will be acquired
467
468
    @type timeout: float
    @param timeout: maximum waiting time before giving up
469
470
471
472

    """
    self.__lock.acquire()
    try:
473
      return self.__acquire_unlocked(shared, timeout)
474
475
476
477
478
479
480
481
482
483
484
485
    finally:
      self.__lock.release()

  def release(self):
    """Release a Shared Lock.

    You must have acquired the lock, either in shared or in exclusive mode,
    before calling this function.

    """
    self.__lock.acquire()
    try:
486
487
488
      assert self.__is_exclusive() or self.__is_sharer(), \
        "Cannot release non-owned lock"

489
490
491
      # Autodetect release type
      if self.__is_exclusive():
        self.__exc = None
492
      else:
493
494
        self.__shr.remove(threading.currentThread())

495
496
497
498
      # Notify topmost condition in queue
      if self.__pending:
        first_condition = self.__pending[0]
        first_condition.notifyAll()
Guido Trotter's avatar
Guido Trotter committed
499

500
501
502
        if first_condition == self.__active_shr_c:
          self.__active_shr_c = self.__inactive_shr_c
          self.__inactive_shr_c = first_condition
503
504
505
506

    finally:
      self.__lock.release()

507
  def delete(self, timeout=None):
508
509
510
511
512
513
    """Delete a Shared Lock.

    This operation will declare the lock for removal. First the lock will be
    acquired in exclusive mode if you don't already own it, then the lock
    will be put in a state where any future and pending acquire() fail.

514
515
    @type timeout: float
    @param timeout: maximum waiting time before giving up
516
517
518
519

    """
    self.__lock.acquire()
    try:
520
521
522
      assert not self.__is_sharer(), "Cannot delete() a lock while sharing it"

      self.__check_deleted()
523

524
525
      # The caller is allowed to hold the lock exclusively already.
      acquired = self.__is_exclusive()
526

527
528
529
530
531
532
      if not acquired:
        acquired = self.__acquire_unlocked(timeout)

      if acquired:
        self.__deleted = True
        self.__exc = None
533

534
535
536
        # Notify all acquires. They'll throw an error.
        while self.__pending:
          self.__pending.pop().notifyAll()
537

538
      return acquired
539
540
541
    finally:
      self.__lock.release()

542

543
# Whenever we want to acquire a full LockSet we pass None as the value
Michael Hanselmann's avatar
Michael Hanselmann committed
544
# to acquire.  Hide this behind this nicely named constant.
545
546
547
ALL_SET = None


548
549
550
551
552
553
554
555
556
557
558
559
560
561
class LockSet:
  """Implements a set of locks.

  This abstraction implements a set of shared locks for the same resource type,
  distinguished by name. The user can lock a subset of the resources and the
  LockSet will take care of acquiring the locks always in the same order, thus
  preventing deadlock.

  All the locks needed in the same set must be acquired together, though.

  """
  def __init__(self, members=None):
    """Constructs a new LockSet.

Iustin Pop's avatar
Iustin Pop committed
562
    @param members: initial members of the set
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588

    """
    # Used internally to guarantee coherency.
    self.__lock = SharedLock()

    # The lockdict indexes the relationship name -> lock
    # The order-of-locking is implied by the alphabetical order of names
    self.__lockdict = {}

    if members is not None:
      for name in members:
        self.__lockdict[name] = SharedLock()

    # The owner dict contains the set of locks each thread owns. For
    # performance each thread can access its own key without a global lock on
    # this structure. It is paramount though that *no* other type of access is
    # done to this structure (eg. no looping over its keys). *_owner helper
    # function are defined to guarantee access is correct, but in general never
    # do anything different than __owners[threading.currentThread()], or there
    # will be trouble.
    self.__owners = {}

  def _is_owned(self):
    """Is the current thread a current level owner?"""
    return threading.currentThread() in self.__owners

Guido Trotter's avatar
Guido Trotter committed
589
  def _add_owned(self, name=None):
590
    """Note the current thread owns the given lock"""
Guido Trotter's avatar
Guido Trotter committed
591
592
593
    if name is None:
      if not self._is_owned():
        self.__owners[threading.currentThread()] = set()
594
    else:
Guido Trotter's avatar
Guido Trotter committed
595
596
597
598
599
600
      if self._is_owned():
        self.__owners[threading.currentThread()].add(name)
      else:
        self.__owners[threading.currentThread()] = set([name])

  def _del_owned(self, name=None):
601
602
    """Note the current thread owns the given lock"""

Guido Trotter's avatar
Guido Trotter committed
603
604
605
606
607
608
    if name is not None:
      self.__owners[threading.currentThread()].remove(name)

    # Only remove the key if we don't hold the set-lock as well
    if (not self.__lock._is_owned() and
        not self.__owners[threading.currentThread()]):
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
      del self.__owners[threading.currentThread()]

  def _list_owned(self):
    """Get the set of resource names owned by the current thread"""
    if self._is_owned():
      return self.__owners[threading.currentThread()].copy()
    else:
      return set()

  def __names(self):
    """Return the current set of names.

    Only call this function while holding __lock and don't iterate on the
    result after releasing the lock.

    """
625
    return self.__lockdict.keys()
626
627
628
629
630

  def _names(self):
    """Return a copy of the current set of elements.

    Used only for debugging purposes.
631

632
    """
633
634
635
636
637
638
    # If we don't already own the set-level lock acquired
    # we'll get it and note we need to release it later.
    release_lock = False
    if not self.__lock._is_owned():
      release_lock = True
      self.__lock.acquire(shared=1)
639
640
641
    try:
      result = self.__names()
    finally:
642
643
      if release_lock:
        self.__lock.release()
644
    return set(result)
645
646
647
648

  def acquire(self, names, blocking=1, shared=0):
    """Acquire a set of resource locks.

Iustin Pop's avatar
Iustin Pop committed
649
650
651
652
653
654
    @param names: the names of the locks which shall be acquired
        (special lock names, or instance/node names)
    @param shared: whether to acquire in shared mode; by default an
        exclusive lock will be acquired
    @param blocking: whether to block while trying to acquire or to
        operate in try-lock mode (this locking mode is not supported yet)
655

Iustin Pop's avatar
Iustin Pop committed
656
    @return: True when all the locks are successfully acquired
657

Iustin Pop's avatar
Iustin Pop committed
658
659
660
    @raise errors.LockError: when any lock we try to acquire has
        been deleted before we succeed. In this case none of the
        locks requested will be acquired.
661
662
663
664
665
666
667
668
669

    """
    if not blocking:
      # We don't have non-blocking mode for now
      raise NotImplementedError

    # Check we don't already own locks at this level
    assert not self._is_owned(), "Cannot acquire locks in the same set twice"

670
671
672
673
674
675
676
677
678
679
680
    if names is None:
      # If no names are given acquire the whole set by not letting new names
      # being added before we release, and getting the current list of names.
      # Some of them may then be deleted later, but we'll cope with this.
      #
      # We'd like to acquire this lock in a shared way, as it's nice if
      # everybody else can use the instances at the same time. If are acquiring
      # them exclusively though they won't be able to do this anyway, though,
      # so we'll get the list lock exclusively as well in order to be able to
      # do add() on the set while owning it.
      self.__lock.acquire(shared=shared)
Guido Trotter's avatar
Guido Trotter committed
681
682
683
684
685
686
687
688
689
690
      try:
        # note we own the set-lock
        self._add_owned()
        names = self.__names()
      except:
        # We shouldn't have problems adding the lock to the owners list, but
        # if we did we'll try to release this lock and re-raise exception.
        # Of course something is going to be really wrong, after this.
        self.__lock.release()
        raise
691

692
693
694
695
696
    try:
      # Support passing in a single resource to acquire rather than many
      if isinstance(names, basestring):
        names = [names]
      else:
697
        names = sorted(names)
698
699
700
701
702

      acquire_list = []
      # First we look the locks up on __lockdict. We have no way of being sure
      # they will still be there after, but this makes it a lot faster should
      # just one of them be the already wrong
703
      for lname in utils.UniqueSequence(names):
704
        try:
705
          lock = self.__lockdict[lname] # raises KeyError if lock is not there
706
707
          acquire_list.append((lname, lock))
        except (KeyError):
708
          if self.__lock._is_owned():
709
710
            # We are acquiring all the set, it doesn't matter if this
            # particular element is not there anymore.
711
712
713
            continue
          else:
            raise errors.LockError('non-existing lock in set (%s)' % lname)
714
715
716
717
718
719
720
721

      # This will hold the locknames we effectively acquired.
      acquired = set()
      # Now acquire_list contains a sorted list of resources and locks we want.
      # In order to get them we loop on this (private) list and acquire() them.
      # We gave no real guarantee they will still exist till this is done but
      # .acquire() itself is safe and will alert us if the lock gets deleted.
      for (lname, lock) in acquire_list:
722
        try:
723
          lock.acquire(shared=shared) # raises LockError if the lock is deleted
724
          # now the lock cannot be deleted, we have it!
Guido Trotter's avatar
Guido Trotter committed
725
          self._add_owned(name=lname)
726
          acquired.add(lname)
727
        except (errors.LockError):
728
          if self.__lock._is_owned():
729
730
            # We are acquiring all the set, it doesn't matter if this
            # particular element is not there anymore.
731
732
733
734
735
            continue
          else:
            name_fail = lname
            for lname in self._list_owned():
              self.__lockdict[lname].release()
Guido Trotter's avatar
Guido Trotter committed
736
              self._del_owned(name=lname)
737
            raise errors.LockError('non-existing lock in set (%s)' % name_fail)
738
739
740
741
742
743
        except:
          # We shouldn't have problems adding the lock to the owners list, but
          # if we did we'll try to release this lock and re-raise exception.
          # Of course something is going to be really wrong, after this.
          if lock._is_owned():
            lock.release()
744
          raise
745
746

    except:
747
748
749
      # If something went wrong and we had the set-lock let's release it...
      if self.__lock._is_owned():
        self.__lock.release()
750
      raise
751

752
    return acquired
753
754
755
756
757
758
759

  def release(self, names=None):
    """Release a set of resource locks, at the same level.

    You must have acquired the locks, either in shared or in exclusive mode,
    before releasing them.

Iustin Pop's avatar
Iustin Pop committed
760
761
    @param names: the names of the locks which shall be released
        (defaults to all the locks acquired at that level).
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777

    """
    assert self._is_owned(), "release() on lock set while not owner"

    # Support passing in a single resource to release rather than many
    if isinstance(names, basestring):
      names = [names]

    if names is None:
      names = self._list_owned()
    else:
      names = set(names)
      assert self._list_owned().issuperset(names), (
               "release() on unheld resources %s" %
               names.difference(self._list_owned()))

778
779
780
781
    # First of all let's release the "all elements" lock, if set.
    # After this 'add' can work again
    if self.__lock._is_owned():
      self.__lock.release()
Guido Trotter's avatar
Guido Trotter committed
782
      self._del_owned()
783

784
785
786
787
    for lockname in names:
      # If we are sure the lock doesn't leave __lockdict without being
      # exclusively held we can do this...
      self.__lockdict[lockname].release()
Guido Trotter's avatar
Guido Trotter committed
788
      self._del_owned(name=lockname)
789
790
791
792

  def add(self, names, acquired=0, shared=0):
    """Add a new set of elements to the set

Iustin Pop's avatar
Iustin Pop committed
793
794
795
    @param names: names of the new elements to add
    @param acquired: pre-acquire the new resource?
    @param shared: is the pre-acquisition shared?
796
797

    """
798
799
800
    # Check we don't already own locks at this level
    assert not self._is_owned() or self.__lock._is_owned(shared=0), \
      "Cannot add locks if the set is only partially owned, or shared"
801

802
803
804
805
    # Support passing in a single resource to add rather than many
    if isinstance(names, basestring):
      names = [names]

806
    # If we don't already own the set-level lock acquired in an exclusive way
807
808
809
810
811
812
    # we'll get it and note we need to release it later.
    release_lock = False
    if not self.__lock._is_owned():
      release_lock = True
      self.__lock.acquire()

813
    try:
814
      invalid_names = set(self.__names()).intersection(names)
815
816
817
818
819
820
821
822
823
824
825
826
827
      if invalid_names:
        # This must be an explicit raise, not an assert, because assert is
        # turned off when using optimization, and this can happen because of
        # concurrency even if the user doesn't want it.
        raise errors.LockError("duplicate add() (%s)" % invalid_names)

      for lockname in names:
        lock = SharedLock()

        if acquired:
          lock.acquire(shared=shared)
          # now the lock cannot be deleted, we have it!
          try:
Guido Trotter's avatar
Guido Trotter committed
828
            self._add_owned(name=lockname)
829
830
831
832
833
834
835
836
837
838
839
840
841
          except:
            # We shouldn't have problems adding the lock to the owners list,
            # but if we did we'll try to release this lock and re-raise
            # exception.  Of course something is going to be really wrong,
            # after this.  On the other hand the lock hasn't been added to the
            # __lockdict yet so no other threads should be pending on it. This
            # release is just a safety measure.
            lock.release()
            raise

        self.__lockdict[lockname] = lock

    finally:
842
843
844
      # Only release __lock if we were not holding it previously.
      if release_lock:
        self.__lock.release()
845
846
847
848
849
850
851
852
853

    return True

  def remove(self, names, blocking=1):
    """Remove elements from the lock set.

    You can either not hold anything in the lockset or already hold a superset
    of the elements you want to delete, exclusively.

Iustin Pop's avatar
Iustin Pop committed
854
855
856
857
    @param names: names of the resource to remove.
    @param blocking: whether to block while trying to acquire or to
        operate in try-lock mode (this locking mode is not supported
        yet unless you are already holding exclusively the locks)
858

Iustin Pop's avatar
Iustin Pop committed
859
860
861
    @return:: a list of locks which we removed; the list is always
        equal to the names list if we were holding all the locks
        exclusively
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877

    """
    if not blocking and not self._is_owned():
      # We don't have non-blocking mode for now
      raise NotImplementedError

    # Support passing in a single resource to remove rather than many
    if isinstance(names, basestring):
      names = [names]

    # If we own any subset of this lock it must be a superset of what we want
    # to delete. The ownership must also be exclusive, but that will be checked
    # by the lock itself.
    assert not self._is_owned() or self._list_owned().issuperset(names), (
      "remove() on acquired lockset while not owning all elements")

Guido Trotter's avatar
Guido Trotter committed
878
    removed = []
879
880
881
882
883
884
885
886
887

    for lname in names:
      # Calling delete() acquires the lock exclusively if we don't already own
      # it, and causes all pending and subsequent lock acquires to fail. It's
      # fine to call it out of order because delete() also implies release(),
      # and the assertion above guarantees that if we either already hold
      # everything we want to delete, or we hold none.
      try:
        self.__lockdict[lname].delete()
Guido Trotter's avatar
Guido Trotter committed
888
        removed.append(lname)
889
890
891
892
893
894
895
896
897
898
899
900
901
902
      except (KeyError, errors.LockError):
        # This cannot happen if we were already holding it, verify:
        assert not self._is_owned(), "remove failed while holding lockset"
      else:
        # If no LockError was raised we are the ones who deleted the lock.
        # This means we can safely remove it from lockdict, as any further or
        # pending delete() or acquire() will fail (and nobody can have the lock
        # since before our call to delete()).
        #
        # This is done in an else clause because if the exception was thrown
        # it's the job of the one who actually deleted it.
        del self.__lockdict[lname]
        # And let's remove it from our private list if we owned it.
        if self._is_owned():
Guido Trotter's avatar
Guido Trotter committed
903
          self._del_owned(name=lname)
904

Guido Trotter's avatar
Guido Trotter committed
905
    return removed
906

907
908
909
910
911
912
913
914
915
916
917

# Locking levels, must be acquired in increasing order.
# Current rules are:
#   - at level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be
#   acquired before performing any operation, either in shared or in exclusive
#   mode. acquiring the BGL in exclusive mode is discouraged and should be
#   avoided.
#   - at levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks.
#   If you need more than one node, or more than one instance, acquire them at
#   the same time.
LEVEL_CLUSTER = 0
918
919
LEVEL_INSTANCE = 1
LEVEL_NODE = 2
920
921

LEVELS = [LEVEL_CLUSTER,
922
923
          LEVEL_INSTANCE,
          LEVEL_NODE]
924
925
926
927

# Lock levels which are modifiable
LEVELS_MOD = [LEVEL_NODE, LEVEL_INSTANCE]

928
929
930
931
932
933
LEVEL_NAMES = {
  LEVEL_CLUSTER: "cluster",
  LEVEL_INSTANCE: "instance",
  LEVEL_NODE: "node",
  }

934
# Constant for the big ganeti lock
935
936
937
938
939
940
BGL = 'BGL'


class GanetiLockManager:
  """The Ganeti Locking Library

Michael Hanselmann's avatar
Michael Hanselmann committed
941
  The purpose of this small library is to manage locking for ganeti clusters
942
943
944
945
946
947
948
949
950
951
  in a central place, while at the same time doing dynamic checks against
  possible deadlocks. It will also make it easier to transition to a different
  lock type should we migrate away from python threads.

  """
  _instance = None

  def __init__(self, nodes=None, instances=None):
    """Constructs a new GanetiLockManager object.

952
953
    There should be only a GanetiLockManager object at any time, so this
    function raises an error if this is not the case.
954

Iustin Pop's avatar
Iustin Pop committed
955
956
    @param nodes: list of node names
    @param instances: list of instance names
957
958

    """
Iustin Pop's avatar
Iustin Pop committed
959
960
961
    assert self.__class__._instance is None, \
           "double GanetiLockManager instance"

962
963
964
965
966
967
968
969
970
971
972
973
974
    self.__class__._instance = self

    # The keyring contains all the locks, at their level and in the correct
    # locking order.
    self.__keyring = {
      LEVEL_CLUSTER: LockSet([BGL]),
      LEVEL_NODE: LockSet(nodes),
      LEVEL_INSTANCE: LockSet(instances),
    }

  def _names(self, level):
    """List the lock names at the given level.

Iustin Pop's avatar
Iustin Pop committed
975
976
977
    This can be used for debugging/testing purposes.

    @param level: the level whose list of locks to get
978
979
980
981
982
983
984
985
986
987
988

    """
    assert level in LEVELS, "Invalid locking level %s" % level
    return self.__keyring[level]._names()

  def _is_owned(self, level):
    """Check whether we are owning locks at the given level

    """
    return self.__keyring[level]._is_owned()

989
990
  is_owned = _is_owned

991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
  def _list_owned(self, level):
    """Get the set of owned locks at the given level

    """
    return self.__keyring[level]._list_owned()

  def _upper_owned(self, level):
    """Check that we don't own any lock at a level greater than the given one.

    """
    # This way of checking only works if LEVELS[i] = i, which we check for in
    # the test cases.
    return utils.any((self._is_owned(l) for l in LEVELS[level + 1:]))

  def _BGL_owned(self):
    """Check if the current thread owns the BGL.

    Both an exclusive or a shared acquisition work.

    """
    return BGL in self.__keyring[LEVEL_CLUSTER]._list_owned()

  def _contains_BGL(self, level, names):
Iustin Pop's avatar
Iustin Pop committed
1014
1015
1016
1017
    """Check if the level contains the BGL.

    Check if acting on the given level and set of names will change
    the status of the Big Ganeti Lock.
1018
1019
1020
1021
1022
1023
1024

    """
    return level == LEVEL_CLUSTER and (names is None or BGL in names)

  def acquire(self, level, names, blocking=1, shared=0):
    """Acquire a set of resource locks, at the same level.

Iustin Pop's avatar
Iustin Pop committed
1025
    @param level: the level at which the locks shall be acquired;
Michael Hanselmann's avatar
Michael Hanselmann committed
1026
        it must be a member of LEVELS.
Iustin Pop's avatar
Iustin Pop committed
1027
1028
1029
1030
1031
1032
    @param names: the names of the locks which shall be acquired
        (special lock names, or instance/node names)
    @param shared: whether to acquire in shared mode; by default
        an exclusive lock will be acquired
    @param blocking: whether to block while trying to acquire or to
        operate in try-lock mode (this locking mode is not supported yet)
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046

    """
    assert level in LEVELS, "Invalid locking level %s" % level

    # Check that we are either acquiring the Big Ganeti Lock or we already own
    # it. Some "legacy" opcodes need to be sure they are run non-concurrently
    # so even if we've migrated we need to at least share the BGL to be
    # compatible with them. Of course if we own the BGL exclusively there's no
    # point in acquiring any other lock, unless perhaps we are half way through
    # the migration of the current opcode.
    assert (self._contains_BGL(level, names) or self._BGL_owned()), (
            "You must own the Big Ganeti Lock before acquiring any other")

    # Check we don't own locks at the same or upper levels.
1047
    assert not self._upper_owned(level), ("Cannot acquire locks at a level"
1048
1049
1050
1051
1052
1053
1054
1055
1056
           " while owning some at a greater one")

    # Acquire the locks in the set.
    return self.__keyring[level].acquire(names, shared=shared,
                                         blocking=blocking)

  def release(self, level, names=None):
    """Release a set of resource locks, at the same level.

Iustin Pop's avatar
Iustin Pop committed
1057
1058
    You must have acquired the locks, either in shared or in exclusive
    mode, before releasing them.
1059

Iustin Pop's avatar
Iustin Pop committed
1060
    @param level: the level at which the locks shall be released;
Michael Hanselmann's avatar
Michael Hanselmann committed
1061
        it must be a member of LEVELS
Iustin Pop's avatar
Iustin Pop committed
1062
1063
    @param names: the names of the locks which shall be released
        (defaults to all the locks acquired at that level)
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077

    """
    assert level in LEVELS, "Invalid locking level %s" % level
    assert (not self._contains_BGL(level, names) or
            not self._upper_owned(LEVEL_CLUSTER)), (
            "Cannot release the Big Ganeti Lock while holding something"
            " at upper levels")

    # Release will complain if we don't own the locks already
    return self.__keyring[level].release(names)

  def add(self, level, names, acquired=0, shared=0):
    """Add locks at the specified level.

Iustin Pop's avatar
Iustin Pop committed
1078
    @param level: the level at which the locks shall be added;
Michael Hanselmann's avatar
Michael Hanselmann committed
1079
        it must be a member of LEVELS_MOD.
Iustin Pop's avatar
Iustin Pop committed
1080
1081
1082
1083
    @param names: names of the locks to acquire
    @param acquired: whether to acquire the newly added locks
    @param shared: whether the acquisition will be shared

1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
    """
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
    assert self._BGL_owned(), ("You must own the BGL before performing other"
           " operations")
    assert not self._upper_owned(level), ("Cannot add locks at a level"
           " while owning some at a greater one")
    return self.__keyring[level].add(names, acquired=acquired, shared=shared)

  def remove(self, level, names, blocking=1):
    """Remove locks from the specified level.

Iustin Pop's avatar
Iustin Pop committed
1095
1096
    You must either already own the locks you are trying to remove
    exclusively or not own any lock at an upper level.
1097

Iustin Pop's avatar
Iustin Pop committed
1098
1099
1100
1101
1102
1103
    @param level: the level at which the locks shall be removed;
        it must be a member of LEVELS_MOD
    @param names: the names of the locks which shall be removed
        (special lock names, or instance/node names)
    @param blocking: whether to block while trying to operate in
        try-lock mode (this locking mode is not supported yet)
1104
1105
1106
1107
1108

    """
    assert level in LEVELS_MOD, "Invalid or immutable level %s" % level
    assert self._BGL_owned(), ("You must own the BGL before performing other"
           " operations")
1109
1110
1111
    # Check we either own the level or don't own anything from here
    # up. LockSet.remove() will check the case in which we don't own
    # all the needed resources, or we have a shared ownership.
1112
1113
1114
    assert self._is_owned(level) or not self._upper_owned(level), (
           "Cannot remove locks at a level while not owning it or"
           " owning some at a greater one")
1115
    return self.__keyring[level].remove(names, blocking=blocking)