diff --git a/daemons/ganeti-noded b/daemons/ganeti-noded index 7582597b3a2407964dd1df1f7ab7b7785fb245a6..3eaf7efc71d839d6b7d8098d9e84fa0eb47e4240 100755 --- a/daemons/ganeti-noded +++ b/daemons/ganeti-noded @@ -502,7 +502,8 @@ class NodeHttpServer(http.server.HttpServer): """ instance = objects.Instance.FromDict(params[0]) reboot_type = params[1] - return backend.InstanceReboot(instance, reboot_type) + shutdown_timeout = params[2] + return backend.InstanceReboot(instance, reboot_type, shutdown_timeout) @staticmethod def perspective_instance_info(params): diff --git a/lib/backend.py b/lib/backend.py index 1248aa67259f45cf423d169b1bbdf6a810ee6b10..4e40cb7381ff96b53be86e1ddbe0ed389b0939bb 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -1015,7 +1015,7 @@ def InstanceShutdown(instance, timeout): _RemoveBlockDevLinks(iname, instance.disks) -def InstanceReboot(instance, reboot_type): +def InstanceReboot(instance, reboot_type, shutdown_timeout): """Reboot an instance. @type instance: L{objects.Instance} @@ -1031,6 +1031,8 @@ def InstanceReboot(instance, reboot_type): not accepted here, since that mode is handled differently, in cmdlib, and translates into full stop and start of the instance (instead of a call_instance_reboot RPC) + @type timeout: integer + @param timeout: maximum timeout for soft shutdown @rtype: None """ @@ -1047,7 +1049,7 @@ def InstanceReboot(instance, reboot_type): _Fail("Failed to soft reboot instance %s: %s", instance.name, err) elif reboot_type == constants.INSTANCE_REBOOT_HARD: try: - InstanceShutdown(instance) + InstanceShutdown(instance, shutdown_timeout) return StartInstance(instance) except errors.HypervisorError, err: _Fail("Failed to hard reboot instance %s: %s", instance.name, err) diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 0dbdba1377d9bfcc6308941a3676c99cc74a0cf5..a0722825d42a9c8e47ef275bfbb68f521f0fb289 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -3598,6 +3598,13 @@ class LURebootInstance(LogicalUnit): _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"] REQ_BGL = False + def CheckArguments(self): + """Check the arguments. + + """ + self.shutdown_timeout = getattr(self.op, "shutdown_timeout", + constants.DEFAULT_SHUTDOWN_TIMEOUT) + def ExpandNames(self): if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT, constants.INSTANCE_REBOOT_HARD, @@ -3617,6 +3624,7 @@ class LURebootInstance(LogicalUnit): env = { "IGNORE_SECONDARIES": self.op.ignore_secondaries, "REBOOT_TYPE": self.op.reboot_type, + "SHUTDOWN_TIMEOUT": self.shutdown_timeout, } env.update(_BuildInstanceHookEnvByObject(self, self.instance)) nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) @@ -3652,10 +3660,12 @@ class LURebootInstance(LogicalUnit): for disk in instance.disks: self.cfg.SetDiskID(disk, node_current) result = self.rpc.call_instance_reboot(node_current, instance, - reboot_type) + reboot_type, + self.shutdown_timeout) result.Raise("Could not reboot instance") else: - result = self.rpc.call_instance_shutdown(node_current, instance) + result = self.rpc.call_instance_shutdown(node_current, instance, + self.shutdown_timeout) result.Raise("Could not shutdown instance for full reboot") _ShutdownInstanceDisks(self, instance) _StartInstanceDisks(self, instance, ignore_secondaries) @@ -4008,6 +4018,13 @@ class LURemoveInstance(LogicalUnit): _OP_REQP = ["instance_name", "ignore_failures"] REQ_BGL = False + def CheckArguments(self): + """Check the arguments. + + """ + self.shutdown_timeout = getattr(self.op, "shutdown_timeout", + constants.DEFAULT_SHUTDOWN_TIMEOUT) + def ExpandNames(self): self._ExpandAndLockInstance() self.needed_locks[locking.LEVEL_NODE] = [] @@ -4024,6 +4041,7 @@ class LURemoveInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self, self.instance) + env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout nl = [self.cfg.GetMasterNode()] return env, nl, nl @@ -4045,7 +4063,8 @@ class LURemoveInstance(LogicalUnit): logging.info("Shutting down instance %s on node %s", instance.name, instance.primary_node) - result = self.rpc.call_instance_shutdown(instance.primary_node, instance) + result = self.rpc.call_instance_shutdown(instance.primary_node, instance, + self.shutdown_timeout) msg = result.fail_msg if msg: if self.op.ignore_failures: @@ -4356,6 +4375,13 @@ class LUFailoverInstance(LogicalUnit): _OP_REQP = ["instance_name", "ignore_consistency"] REQ_BGL = False + def CheckArguments(self): + """Check the arguments. + + """ + self.shutdown_timeout = getattr(self.op, "shutdown_timeout", + constants.DEFAULT_SHUTDOWN_TIMEOUT) + def ExpandNames(self): self._ExpandAndLockInstance() self.needed_locks[locking.LEVEL_NODE] = [] @@ -4373,6 +4399,7 @@ class LUFailoverInstance(LogicalUnit): """ env = { "IGNORE_CONSISTENCY": self.op.ignore_consistency, + "SHUTDOWN_TIMEOUT": self.shutdown_timeout, } env.update(_BuildInstanceHookEnvByObject(self, self.instance)) nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes) @@ -4437,7 +4464,8 @@ class LUFailoverInstance(LogicalUnit): logging.info("Shutting down instance %s on node %s", instance.name, source_node) - result = self.rpc.call_instance_shutdown(source_node, instance) + result = self.rpc.call_instance_shutdown(source_node, instance, + self.shutdown_timeout) msg = result.fail_msg if msg: if self.op.ignore_consistency: @@ -4529,6 +4557,13 @@ class LUMoveInstance(LogicalUnit): _OP_REQP = ["instance_name", "target_node"] REQ_BGL = False + def CheckArguments(self): + """Check the arguments. + + """ + self.shutdown_timeout = getattr(self.op, "shutdown_timeout", + constants.DEFAULT_SHUTDOWN_TIMEOUT) + def ExpandNames(self): self._ExpandAndLockInstance() target_node = self.cfg.ExpandNodeName(self.op.target_node) @@ -4551,6 +4586,7 @@ class LUMoveInstance(LogicalUnit): """ env = { "TARGET_NODE": self.op.target_node, + "SHUTDOWN_TIMEOUT": self.shutdown_timeout, } env.update(_BuildInstanceHookEnvByObject(self, self.instance)) nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node, @@ -4614,7 +4650,8 @@ class LUMoveInstance(LogicalUnit): self.LogInfo("Shutting down instance %s on source node %s", instance.name, source_node) - result = self.rpc.call_instance_shutdown(source_node, instance) + result = self.rpc.call_instance_shutdown(source_node, instance, + self.shutdown_timeout) msg = result.fail_msg if msg: if self.op.ignore_consistency: @@ -7687,6 +7724,13 @@ class LUExportInstance(LogicalUnit): _OP_REQP = ["instance_name", "target_node", "shutdown"] REQ_BGL = False + def CheckArguments(self): + """Check the arguments. + + """ + self.shutdown_timeout = getattr(self.op, "shutdown_timeout", + constants.DEFAULT_SHUTDOWN_TIMEOUT) + def ExpandNames(self): self._ExpandAndLockInstance() # FIXME: lock only instance primary and destination node @@ -7712,6 +7756,7 @@ class LUExportInstance(LogicalUnit): env = { "EXPORT_NODE": self.op.target_node, "EXPORT_DO_SHUTDOWN": self.op.shutdown, + "SHUTDOWN_TIMEOUT": self.shutdown_timeout, } env.update(_BuildInstanceHookEnvByObject(self, self.instance)) nl = [self.cfg.GetMasterNode(), self.instance.primary_node, @@ -7756,7 +7801,8 @@ class LUExportInstance(LogicalUnit): if self.op.shutdown: # shutdown the instance, but not the disks feedback_fn("Shutting down instance %s" % instance.name) - result = self.rpc.call_instance_shutdown(src_node, instance) + result = self.rpc.call_instance_shutdown(src_node, instance, + self.shutdown_timeout) result.Raise("Could not shutdown instance %s on" " node %s" % (instance.name, src_node)) diff --git a/lib/opcodes.py b/lib/opcodes.py index 5ff1eb1ff34d94a9212613ed178de8496bf61a87..b0d34cb2bc2ab0324dd140cacb5835f1597251a3 100644 --- a/lib/opcodes.py +++ b/lib/opcodes.py @@ -497,7 +497,7 @@ class OpRebootInstance(OpCode): OP_ID = "OP_INSTANCE_REBOOT" OP_DSC_FIELD = "instance_name" __slots__ = OpCode.__slots__ + [ - "instance_name", "reboot_type", "ignore_secondaries", + "instance_name", "reboot_type", "ignore_secondaries", "shutdown_timeout", ] @@ -514,7 +514,9 @@ class OpFailoverInstance(OpCode): """Failover an instance.""" OP_ID = "OP_INSTANCE_FAILOVER" OP_DSC_FIELD = "instance_name" - __slots__ = OpCode.__slots__ + ["instance_name", "ignore_consistency"] + __slots__ = OpCode.__slots__ + [ + "instance_name", "ignore_consistency", "shutdown_timeout", + ] class OpMigrateInstance(OpCode): @@ -543,7 +545,9 @@ class OpMoveInstance(OpCode): """ OP_ID = "OP_INSTANCE_MOVE" OP_DSC_FIELD = "instance_name" - __slots__ = OpCode.__slots__ + ["instance_name", "target_node"] + __slots__ = OpCode.__slots__ + [ + "instance_name", "target_node", "shutdown_timeout", + ] class OpConnectConsole(OpCode): @@ -624,7 +628,9 @@ class OpExportInstance(OpCode): """Export an instance.""" OP_ID = "OP_BACKUP_EXPORT" OP_DSC_FIELD = "instance_name" - __slots__ = OpCode.__slots__ + ["instance_name", "target_node", "shutdown"] + __slots__ = OpCode.__slots__ + [ + "instance_name", "target_node", "shutdown", "shutdown_timeout", + ] class OpRemoveExport(OpCode): diff --git a/lib/rpc.py b/lib/rpc.py index 227e355cf1a24b00e8602990c90db2a946b5e4f7..61895d56304be0ef954784648bee61c3260bd5a5 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -548,14 +548,15 @@ class RpcRunner(object): return self._SingleNodeCall(node, "instance_migrate", [self._InstDict(instance), target, live]) - def call_instance_reboot(self, node, instance, reboot_type): + def call_instance_reboot(self, node, inst, reboot_type, shutdown_timeout): """Reboots an instance. This is a single-node call. """ return self._SingleNodeCall(node, "instance_reboot", - [self._InstDict(instance), reboot_type]) + [self._InstDict(inst), reboot_type, + shutdown_timeout]) def call_instance_os_add(self, node, inst, reinstall): """Installs an OS on the given instance. diff --git a/scripts/gnt-backup b/scripts/gnt-backup index 8189243c0a3485f0e87f86d0f4f7700279bcd4cc..dc381dc0f1322fed027c67eb87873b565cbe5fd2 100755 --- a/scripts/gnt-backup +++ b/scripts/gnt-backup @@ -72,7 +72,8 @@ def ExportInstance(opts, args): """ op = opcodes.OpExportInstance(instance_name=args[0], target_node=opts.node, - shutdown=opts.shutdown) + shutdown=opts.shutdown, + timeout=opts.timeout) fin_resu, dlist = SubmitOpCode(op) if not isinstance(dlist, list): @@ -150,7 +151,7 @@ commands = { "", "Lists instance exports available in the ganeti cluster"), 'export': ( ExportInstance, ARGS_ONE_INSTANCE, - [FORCE_OPT, SINGLE_NODE_OPT, NOSHUTDOWN_OPT], + [FORCE_OPT, SINGLE_NODE_OPT, NOSHUTDOWN_OPT, SHUTDOWN_TIMEOUT_OPT], "-n <target_node> [opts...] <name>", "Exports an instance to an image"), 'import': ( diff --git a/scripts/gnt-instance b/scripts/gnt-instance index 8d5351384925fa05a07d77688ff52c0e41522a06..a0941c200d3a6addba774e757dc3a706e6e2d511 100755 --- a/scripts/gnt-instance +++ b/scripts/gnt-instance @@ -563,7 +563,8 @@ def RemoveInstance(opts, args): return 1 op = opcodes.OpRemoveInstance(instance_name=instance_name, - ignore_failures=opts.ignore_failures) + ignore_failures=opts.ignore_failures, + timeout=opts.timeout) SubmitOrSend(op, opts, cl=cl) return 0 @@ -713,7 +714,8 @@ def _RebootInstance(name, opts): """ return opcodes.OpRebootInstance(instance_name=name, reboot_type=opts.reboot_type, - ignore_secondaries=opts.ignore_secondaries) + ignore_secondaries=opts.ignore_secondaries, + timeout=opts.timeout) def _ShutdownInstance(name, opts): @@ -803,7 +805,8 @@ def FailoverInstance(opts, args): return 1 op = opcodes.OpFailoverInstance(instance_name=instance_name, - ignore_consistency=opts.ignore_consistency) + ignore_consistency=opts.ignore_consistency, + timeout=opts.timeout) SubmitOrSend(op, opts, cl=cl) return 0 @@ -868,7 +871,8 @@ def MoveInstance(opts, args): return 1 op = opcodes.OpMoveInstance(instance_name=instance_name, - target_node=opts.node) + target_node=opts.node, + timeout=opts.timeout) SubmitOrSend(op, opts, cl=cl) return 0 @@ -1289,7 +1293,7 @@ commands = { "[--show-cmd] <instance>", "Opens a console on the specified instance"), 'failover': ( FailoverInstance, ARGS_ONE_INSTANCE, - [FORCE_OPT, IGNORE_CONSIST_OPT, SUBMIT_OPT], + [FORCE_OPT, IGNORE_CONSIST_OPT, SUBMIT_OPT, SHUTDOWN_TIMEOUT_OPT], "[-f] <instance>", "Stops the instance and starts it on the backup node," " using the remote mirror (only for instances of type drbd)"), 'migrate': ( @@ -1299,7 +1303,7 @@ commands = { " (only for instances of type drbd)"), 'move': ( MoveInstance, ARGS_ONE_INSTANCE, - [FORCE_OPT, SUBMIT_OPT, SINGLE_NODE_OPT], + [FORCE_OPT, SUBMIT_OPT, SINGLE_NODE_OPT, SHUTDOWN_TIMEOUT_OPT], "[-f] <instance>", "Move instance to an arbitrary node" " (only for instances of type file and lv)"), 'info': ( @@ -1327,7 +1331,7 @@ commands = { "[-f] <instance>", "Reinstall a stopped instance"), 'remove': ( RemoveInstance, ARGS_ONE_INSTANCE, - [FORCE_OPT, IGNORE_FAILURES_OPT, SUBMIT_OPT], + [FORCE_OPT, SHUTDOWN_TIMEOUT_OPT, IGNORE_FAILURES_OPT, SUBMIT_OPT], "[-f] <instance>", "Shuts down the instance and removes it"), 'rename': ( RenameInstance, @@ -1358,7 +1362,8 @@ commands = { 'reboot': ( GenericManyOps("reboot", _RebootInstance), [ArgInstance()], [m_force_multi, REBOOT_TYPE_OPT, IGNORE_SECONDARIES_OPT, m_node_opt, - m_pri_node_opt, m_sec_node_opt, m_clust_opt, m_inst_opt, SUBMIT_OPT], + m_pri_node_opt, m_sec_node_opt, m_clust_opt, m_inst_opt, SUBMIT_OPT, + SHUTDOWN_TIMEOUT_OPT], "<instance>", "Reboots an instance"), 'activate-disks': ( ActivateDisks, ARGS_ONE_INSTANCE, [SUBMIT_OPT, IGNORE_SIZE_OPT],