diff --git a/NEWS b/NEWS index 5f0728475e04192b029d1c474e7291e397f45e57..293378c9ea1d6072691e89aa1b94dfaddbf8d3fe 100644 --- a/NEWS +++ b/NEWS @@ -37,6 +37,9 @@ Version 2.7.0 beta1 a cluster from a machine by stopping all daemons, removing certificates and ssconf files. Unless the ``--no-backup`` option is given, copies of the certificates are made. +- Draining (``gnt-cluster queue drain``) and un-draining the job queue + (``gnt-cluster queue undrain``) now affects all nodes in a cluster and + the flag is not reset after a master failover. Version 2.6.1 diff --git a/lib/jqueue.py b/lib/jqueue.py index 5bde17d2887a5abf9d6ae382ab9e412e294ffd82..b855b77b2b8abb8c72cba1d020fecdc2102b9814 100644 --- a/lib/jqueue.py +++ b/lib/jqueue.py @@ -1800,6 +1800,15 @@ class JobQueue(object): logging.error("Failed to upload file %s to node %s: %s", file_name, node_name, msg) + # Set queue drained flag + result = \ + self._GetRpc(addrs).call_jobqueue_set_drain_flag([node_name], + self._drained) + msg = result[node_name].fail_msg + if msg: + logging.error("Failed to set queue drained flag on node %s: %s", + node_name, msg) + self._nodes[node_name] = node.primary_ip @locking.ssynchronized(_LOCK) @@ -2123,10 +2132,18 @@ class JobQueue(object): @param drain_flag: Whether to set or unset the drain flag """ + # Change flag locally jstore.SetDrainFlag(drain_flag) self._drained = drain_flag + # ... and on all nodes + (names, addrs) = self._GetNodeIp() + result = \ + self._GetRpc(addrs).call_jobqueue_set_drain_flag(names, drain_flag) + self._CheckRpcResult(result, self._nodes, + "Setting queue drain flag to %s" % drain_flag) + return True @_RequireOpenQueue diff --git a/lib/rpc_defs.py b/lib/rpc_defs.py index 6ecd2b26fc8b74364d8f173c31be1a768f82efcb..ecea05437685a9b53f8d1529d0a7df6540adb06b 100644 --- a/lib/rpc_defs.py +++ b/lib/rpc_defs.py @@ -522,6 +522,9 @@ CALLS = { ("jobqueue_rename", MULTI, None, constants.RPC_TMO_URGENT, [ ("rename", None, None), ], None, None, "Rename job queue file"), + ("jobqueue_set_drain_flag", MULTI, None, constants.RPC_TMO_URGENT, [ + ("flag", None, None), + ], None, None, "Set job queue drain flag"), ]), "RpcClientBootstrap": _Prepare([ ("node_start_master_daemons", SINGLE, None, constants.RPC_TMO_FAST, [ diff --git a/lib/server/noded.py b/lib/server/noded.py index e04a2c787d887c3f8ccc1f71f9bf39282599b131..91ce4cebcbc452ad8e8660b36d1324735c5dca8f 100644 --- a/lib/server/noded.py +++ b/lib/server/noded.py @@ -944,6 +944,16 @@ class NodeRequestHandler(http.server.HttpServerHandler): # TODO: What if a file fails to rename? return [backend.JobQueueRename(old, new) for old, new in params[0]] + @staticmethod + @_RequireJobQueueLock + def perspective_jobqueue_set_drain_flag(params): + """Set job queue's drain flag. + + """ + (flag, ) = params + + return jstore.SetDrainFlag(flag) + # hypervisor --------------- @staticmethod