From d5cd389c61faca21f200400d2e65b2148da79ddd Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Mon, 15 Feb 2010 14:34:07 +0100
Subject: [PATCH] Release all node locks during disk replace

This patch extends commit 7ea7bcf by releasing all node locks in disk
replace for the early release mode. The rationale behind this is:

- LUCreateInstance already releases all node locks while waiting for
  disk synchronization, and does an instance startup later
- WaitForSync only runs (for disk template 'drbd') 'lvs' and read
  /proc/drbd on the primary node, which should be (modulo bugs in LVM)
  safe for parallel run

In any case, the worst I could foresee is a node having N lvs commands
run in parallel on it, while being a primary for disk storage. Based on
create instance doing this safely, and the fact that burnin with more
than two instances per node is safe, I think this can be applied.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Michael Hanselmann <hansmi@google.com>
---
 lib/cmdlib.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index 8d4adffc5..8e2cf3a7f 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -7012,10 +7012,9 @@ class TLReplaceDisks(Tasklet):
       self.lu.LogStep(cstep, steps_total, "Removing old storage")
       cstep += 1
       self._RemoveOldStorage(self.target_node, iv_names)
-      # only release the lock if we're doing secondary replace, since
-      # we use the primary node later
-      if self.target_node != self.instance.primary_node:
-        self._ReleaseNodeLock(self.target_node)
+      # WARNING: we release both node locks here, do not do other RPCs
+      # than WaitForSync to the primary node
+      self._ReleaseNodeLock([self.target_node, self.other_node])
 
     # Wait for sync
     # This can fail as the old devices are degraded and _WaitForSync
@@ -7170,7 +7169,11 @@ class TLReplaceDisks(Tasklet):
       self.lu.LogStep(cstep, steps_total, "Removing old storage")
       cstep += 1
       self._RemoveOldStorage(self.target_node, iv_names)
-      self._ReleaseNodeLock([self.target_node, self.new_node])
+      # WARNING: we release all node locks here, do not do other RPCs
+      # than WaitForSync to the primary node
+      self._ReleaseNodeLock([self.instance.primary_node,
+                             self.target_node,
+                             self.new_node])
 
     # Wait for sync
     # This can fail as the old devices are degraded and _WaitForSync
-- 
GitLab