From 1d67656e70a81cffec19633646c191ada9ba0ecf Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Tue, 16 Oct 2007 13:51:51 +0000 Subject: [PATCH] Allow force removal of instances This patch adds a new option to the instance removal command "--ignore-failures" that forces the removal of the instance from the configuration even if the removal process encounters errors. In order to be able to do this when the remote node(s) is(are) down, we need to restrict the execution of the instance removal hook to the master only. I think this is a reasonable trade-off (but I'm not sure). Reviewed-by: imsnah --- doc/hooks.sgml | 1 + lib/cmdlib.py | 18 ++++++++++++------ lib/opcodes.py | 2 +- man/gnt-instance.sgml | 10 ++++++++++ scripts/gnt-instance | 12 ++++++++++-- 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/doc/hooks.sgml b/doc/hooks.sgml index daa1d07e8..b6cf2e956 100644 --- a/doc/hooks.sgml +++ b/doc/hooks.sgml @@ -256,6 +256,7 @@ <entry>Remove an instance</entry> <entry><computeroutput>gnt-instance remove</computeroutput></entry> <entry><constant>INSTANCE_NAME</constant>, <constant>INSTANCE_PRIMARY</constant>, <constant>INSTANCE_SECONDARIES</constant></entry> + <entry spanname="bothhooks">master node</entry> </row> <row> <entry>OP_INSTANCE_ADD_MDDRBD</entry> diff --git a/lib/cmdlib.py b/lib/cmdlib.py index ea5634591..d41ed5af9 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -2193,8 +2193,7 @@ class LURemoveInstance(LogicalUnit): """ env = _BuildInstanceHookEnvByObject(self.instance) - nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] + - list(self.instance.secondary_nodes)) + nl = [self.sstore.GetMasterNode()] return env, nl, nl def CheckPrereq(self): @@ -2219,12 +2218,19 @@ class LURemoveInstance(LogicalUnit): (instance.name, instance.primary_node)) if not rpc.call_instance_shutdown(instance.primary_node, instance): - raise errors.OpExecError("Could not shutdown instance %s on node %s" % - (instance.name, instance.primary_node)) + if self.op.ignore_failures: + feedback_fn("Warning: can't shutdown instance") + else: + raise errors.OpExecError("Could not shutdown instance %s on node %s" % + (instance.name, instance.primary_node)) logger.Info("removing block devices for instance %s" % instance.name) - _RemoveDisks(instance, self.cfg) + if not _RemoveDisks(instance, self.cfg): + if self.op.ignore_failures: + feedback_fn("Warning: can't remove instance's disks") + else: + raise errors.OpExecError("Can't remove instance's disks") logger.Info("removing instance %s out of cluster config" % instance.name) @@ -2649,7 +2655,7 @@ def _RemoveDisks(instance, cfg): This abstracts away some work from `AddInstance()` and `RemoveInstance()`. Note that in case some of the devices couldn't - be remove, the removal will continue with the other ones (compare + be removed, the removal will continue with the other ones (compare with `_CreateDisks()`). Args: diff --git a/lib/opcodes.py b/lib/opcodes.py index 7beb299d6..5e11fbae8 100644 --- a/lib/opcodes.py +++ b/lib/opcodes.py @@ -150,7 +150,7 @@ class OpReinstallInstance(OpCode): class OpRemoveInstance(OpCode): """Remove an instance.""" OP_ID = "OP_INSTANCE_REMOVE" - __slots__ = ["instance_name"] + __slots__ = ["instance_name", "ignore_failures"] class OpRenameInstance(OpCode): diff --git a/man/gnt-instance.sgml b/man/gnt-instance.sgml index 9ada7c76f..f31dca2fa 100644 --- a/man/gnt-instance.sgml +++ b/man/gnt-instance.sgml @@ -196,6 +196,7 @@ <cmdsynopsis> <command>remove</command> + <arg>--ignore-failures</arg> <arg choice="req"><replaceable>instance</replaceable></arg> </cmdsynopsis> @@ -205,6 +206,15 @@ you are not sure if you use an instance again, use <command>shutdown</command> first and leave it in the shutdown state for a while. + + </para> + + <para> + The <option>--ignore-failures</option> option will cause the + removal to proceed even in the presence of errors during the + removal of the instance (e.g. during the shutdown or the + disk removal). If this option is not given, the command will + stop at the first error. </para> <para> diff --git a/scripts/gnt-instance b/scripts/gnt-instance index 5768b44ec..943f4899b 100755 --- a/scripts/gnt-instance +++ b/scripts/gnt-instance @@ -270,7 +270,8 @@ def RemoveInstance(opts, args): if not AskUser(usertext): return 1 - op = opcodes.OpRemoveInstance(instance_name=instance_name) + op = opcodes.OpRemoveInstance(instance_name=instance_name, + ignore_failures=opts.ignore_failures) SubmitOpCode(op) return 0 @@ -689,7 +690,14 @@ commands = { "", "Lists the instances and their status"), 'reinstall': (ReinstallInstance, ARGS_ONE, [DEBUG_OPT, FORCE_OPT, os_opt], "[-f] <instance>", "Reinstall the instance"), - 'remove': (RemoveInstance, ARGS_ONE, [DEBUG_OPT, FORCE_OPT], + 'remove': (RemoveInstance, ARGS_ONE, + [DEBUG_OPT, FORCE_OPT, + make_option("--ignore-failures", dest="ignore_failures", + action="store_true", default=False, + help=("Remove the instance from the cluster even" + " if there are failures during the removal" + " process (shutdown, disk removal, etc.)")), + ], "[-f] <instance>", "Shuts down the instance and removes it"), 'remove-mirror': (RemoveMDDRBDComponent, ARGS_ONE, [DEBUG_OPT, node_opt, -- GitLab