From 2395c32265fcffe7e78ed2e4e8ed91e2b2dc108b Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Fri, 9 Nov 2007 20:13:33 +0000
Subject: [PATCH] Soften the requirements for hooks execution

Currently, an unreachable node (or one that return undetermined failure)
in the hooks pre-phase will abort the curren operation. This is not
good, as a down node could prevent many operation on the cluster.

This patch changes a RPC-level failure (and not a hook execution
failure) into a warning. It also modifies the related test cases.

This fixes issue 11.

Reviewed-by: ultrotter
---
 doc/hooks.sgml                | 12 ++++++++++++
 lib/mcpu.py                   | 11 ++++++-----
 test/ganeti.hooks_unittest.py | 17 +++++++++--------
 test/mocks.py                 | 10 ++++++++++
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/doc/hooks.sgml b/doc/hooks.sgml
index 26771fd93..eb89c7ebd 100644
--- a/doc/hooks.sgml
+++ b/doc/hooks.sgml
@@ -60,6 +60,18 @@
         the script(s) run again with exactly the same
         parameters.</para>
 
+      <para>
+        Note that if a node is unreachable at the time a hooks is run,
+        this will not be interpreted as a deny for the execution. In
+        other words, only an actual error returned from a script will
+        cause abort, and not an unreachable node.
+      </para>
+
+      <para>
+        Therefore, if you want to guarantee that a hook script is run
+        and denies an action, it's best to put it on the master node.
+      </para>
+
       </section>
 
       <section>
diff --git a/lib/mcpu.py b/lib/mcpu.py
index 1cba4c2ee..26512bc88 100644
--- a/lib/mcpu.py
+++ b/lib/mcpu.py
@@ -122,7 +122,7 @@ class Processor(object):
       write_count = 0
     lu = lu_class(self, op, self.cfg, self.sstore)
     lu.CheckPrereq()
-    hm = HooksMaster(rpc.call_hooks_runner, lu)
+    hm = HooksMaster(rpc.call_hooks_runner, self, lu)
     hm.RunPhase(constants.HOOKS_PHASE_PRE)
     result = lu.Exec(self._feedback_fn)
     hm.RunPhase(constants.HOOKS_PHASE_POST)
@@ -159,7 +159,7 @@ class Processor(object):
     lu = lu_class(self, op, self.cfg, self.sstore)
     lu.CheckPrereq()
     #if do_hooks:
-    #  hm = HooksMaster(rpc.call_hooks_runner, lu)
+    #  hm = HooksMaster(rpc.call_hooks_runner, self, lu)
     #  hm.RunPhase(constants.HOOKS_PHASE_PRE)
     result = lu.Exec(self._feedback_fn)
     #if do_hooks:
@@ -202,8 +202,9 @@ class HooksMaster(object):
   which behaves the same works.
 
   """
-  def __init__(self, callfn, lu):
+  def __init__(self, callfn, proc, lu):
     self.callfn = callfn
+    self.proc = proc
     self.lu = lu
     self.op = lu.op
     self.env, node_list_pre, node_list_post = self._BuildEnv()
@@ -272,8 +273,8 @@ class HooksMaster(object):
       for node_name in results:
         res = results[node_name]
         if res is False or not isinstance(res, list):
-          raise errors.HooksFailure("Communication failure to node %s" %
-                                    node_name)
+          self.proc.LogWarning("Communication failure to node %s" % node_name)
+          continue
         for script, hkr, output in res:
           if hkr == constants.HKR_FAIL:
             output = output.strip().encode("string_escape")
diff --git a/test/ganeti.hooks_unittest.py b/test/ganeti.hooks_unittest.py
index c95b7adb4..5183096c9 100755
--- a/test/ganeti.hooks_unittest.py
+++ b/test/ganeti.hooks_unittest.py
@@ -36,7 +36,7 @@ from ganeti import constants
 from ganeti import cmdlib
 from ganeti.constants import HKR_SUCCESS, HKR_FAIL, HKR_SKIP
 
-from mocks import FakeConfig, FakeSStore
+from mocks import FakeConfig, FakeSStore, FakeProc
 
 class FakeLU(cmdlib.LogicalUnit):
   HPATH = "test"
@@ -231,20 +231,21 @@ class TestHooksMaster(unittest.TestCase):
     sstore = FakeSStore()
     op = opcodes.OpCode()
     lu = FakeLU(None, op, cfg, sstore)
-    hm = mcpu.HooksMaster(self._call_false, lu)
+    hm = mcpu.HooksMaster(self._call_false, FakeProc(), lu)
     self.failUnlessRaises(errors.HooksFailure,
                           hm.RunPhase, constants.HOOKS_PHASE_PRE)
     hm.RunPhase(constants.HOOKS_PHASE_POST)
 
   def testIndividualFalse(self):
-    """Test individual rpc failure"""
+    """Test individual node failure"""
     cfg = FakeConfig()
     sstore = FakeSStore()
     op = opcodes.OpCode()
     lu = FakeLU(None, op, cfg, sstore)
-    hm = mcpu.HooksMaster(self._call_nodes_false, lu)
-    self.failUnlessRaises(errors.HooksFailure,
-                          hm.RunPhase, constants.HOOKS_PHASE_PRE)
+    hm = mcpu.HooksMaster(self._call_nodes_false, FakeProc(), lu)
+    hm.RunPhase(constants.HOOKS_PHASE_PRE)
+    #self.failUnlessRaises(errors.HooksFailure,
+    #                      hm.RunPhase, constants.HOOKS_PHASE_PRE)
     hm.RunPhase(constants.HOOKS_PHASE_POST)
 
   def testScriptFalse(self):
@@ -253,7 +254,7 @@ class TestHooksMaster(unittest.TestCase):
     op = opcodes.OpCode()
     sstore = FakeSStore()
     lu = FakeLU(None, op, cfg, sstore)
-    hm = mcpu.HooksMaster(self._call_script_fail, lu)
+    hm = mcpu.HooksMaster(self._call_script_fail, FakeProc(), lu)
     self.failUnlessRaises(errors.HooksAbort,
                           hm.RunPhase, constants.HOOKS_PHASE_PRE)
     hm.RunPhase(constants.HOOKS_PHASE_POST)
@@ -264,7 +265,7 @@ class TestHooksMaster(unittest.TestCase):
     op = opcodes.OpCode()
     sstore = FakeSStore()
     lu = FakeLU(None, op, cfg, sstore)
-    hm = mcpu.HooksMaster(self._call_script_succeed, lu)
+    hm = mcpu.HooksMaster(self._call_script_succeed, FakeProc(), lu)
     for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST):
       hm.RunPhase(phase)
 
diff --git a/test/mocks.py b/test/mocks.py
index eca00d88e..70b6c400d 100644
--- a/test/mocks.py
+++ b/test/mocks.py
@@ -45,3 +45,13 @@ class FakeSStore:
 
     def GetMasterNode(self):
         return utils.HostInfo().name
+
+
+class FakeProc:
+    """Fake processor object"""
+
+    def LogWarning(self, msg):
+        pass
+
+    def LogInfo(self, msg):
+        pass
-- 
GitLab