diff --git a/lib/backend.py b/lib/backend.py index f73d87ebca2a3aca1a3790e62fae871becc58bea..75c4bdbdff3319502bf86c60158d23468d8df422 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -975,14 +975,14 @@ def MigrateInstance(instance, target, live): - msg is a string with details in case of failure """ - hyper = hypervisor.GetHypervisor(instance.hypervisor_name) + hyper = hypervisor.GetHypervisor(instance.hypervisor) try: hyper.MigrateInstance(instance.name, target, live) except errors.HypervisorError, err: - msg = "Failed to migrate instance: %s" % str(err) - logging.error(msg) - return (False, msg) + msg = "Failed to migrate instance" + logging.exception(msg) + return (False, "%s: %s" % (msg, err)) return (True, "Migration successfull") @@ -2223,9 +2223,9 @@ def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster): return status, bdevs if multimaster: - for cf, rd in zip(disks, bdevs): + for idx, rd in enumerate(bdevs): try: - _SymlinkBlockDev(instance_name, rd.dev_path, cf.iv_name) + _SymlinkBlockDev(instance_name, rd.dev_path, idx) except EnvironmentError, err: return (False, "Can't create symlink: %s" % str(err)) # reconnect disks, switch to new master configuration and if diff --git a/lib/cmdlib.py b/lib/cmdlib.py index cfd031f4790a1a7c5043ffb288c7d808bd0388e4..f9820428343ee0d376967baafd58fd8d7f2d4f23 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -3387,6 +3387,310 @@ class LUFailoverInstance(LogicalUnit): (instance.name, target_node)) +class LUMigrateInstance(LogicalUnit): + """Migrate an instance. + + This is migration without shutting down, compared to the failover, + which is done with shutdown. + + """ + HPATH = "instance-migrate" + HTYPE = constants.HTYPE_INSTANCE + _OP_REQP = ["instance_name", "live", "cleanup"] + + REQ_BGL = False + + def ExpandNames(self): + self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() + + def BuildHooksEnv(self): + """Build hooks env. + + This runs on master, primary and secondary nodes of the instance. + + """ + env = _BuildInstanceHookEnvByObject(self, self.instance) + nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes) + return env, nl, nl + + def CheckPrereq(self): + """Check prerequisites. + + This checks that the instance is in the cluster. + + """ + instance = self.cfg.GetInstanceInfo( + self.cfg.ExpandInstanceName(self.op.instance_name)) + if instance is None: + raise errors.OpPrereqError("Instance '%s' not known" % + self.op.instance_name) + + if instance.disk_template != constants.DT_DRBD8: + raise errors.OpPrereqError("Instance's disk layout is not" + " drbd8, cannot migrate.") + + secondary_nodes = instance.secondary_nodes + if not secondary_nodes: + raise errors.ProgrammerError("no secondary node but using " + "drbd8 disk template") + + i_be = self.cfg.GetClusterInfo().FillBE(instance) + + target_node = secondary_nodes[0] + # check memory requirements on the secondary node + _CheckNodeFreeMemory(self, target_node, "migrating instance %s" % + instance.name, i_be[constants.BE_MEMORY], + instance.hypervisor) + + # check bridge existance + brlist = [nic.bridge for nic in instance.nics] + result = self.rpc.call_bridges_exist(target_node, brlist) + if result.failed or not result.data: + raise errors.OpPrereqError("One or more target bridges %s does not" + " exist on destination node '%s'" % + (brlist, target_node)) + + if not self.op.cleanup: + result = self.rpc.call_instance_migratable(instance.primary_node, + instance) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpPrereqError("Can't migrate: %s - please use failover" % + msg) + + self.instance = instance + + def _WaitUntilSync(self): + """Poll with custom rpc for disk sync. + + This uses our own step-based rpc call. + + """ + self.feedback_fn("* wait until resync is done") + all_done = False + while not all_done: + all_done = True + result = self.rpc.call_drbd_wait_sync(self.all_nodes, + self.nodes_ip, + self.instance.disks) + min_percent = 100 + for node, nres in result.items(): + msg = nres.RemoteFailMsg() + if msg: + raise errors.OpExecError("Cannot resync disks on node %s: %s" % + (node, msg)) + node_done, node_percent = nres.data[1] + all_done = all_done and node_done + if node_percent is not None: + min_percent = min(min_percent, node_percent) + if not all_done: + if min_percent < 100: + self.feedback_fn(" - progress: %.1f%%" % min_percent) + time.sleep(2) + + def _EnsureSecondary(self, node): + """Demote a node to secondary. + + """ + self.feedback_fn("* switching node %s to secondary mode" % node) + + for dev in self.instance.disks: + self.cfg.SetDiskID(dev, node) + + result = self.rpc.call_blockdev_close(node, self.instance.name, + self.instance.disks) + msg = result.RemoteFailMsg() + if msg: + raise errors.OpExecError("Cannot change disk to secondary on node %s," + " error %s" % (node, msg)) + + def _GoStandalone(self): + """Disconnect from the network. + + """ + self.feedback_fn("* changing into standalone mode") + result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip, + self.instance.disks) + for node, nres in result.items(): + msg = nres.RemoteFailMsg() + if msg: + raise errors.OpExecError("Cannot disconnect disks node %s," + " error %s" % (node, msg)) + + def _GoReconnect(self, multimaster): + """Reconnect to the network. + + """ + if multimaster: + msg = "dual-master" + else: + msg = "single-master" + self.feedback_fn("* changing disks into %s mode" % msg) + result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip, + self.instance.disks, + self.instance.name, multimaster) + for node, nres in result.items(): + msg = nres.RemoteFailMsg() + if msg: + raise errors.OpExecError("Cannot change disks config on node %s," + " error: %s" % (node, msg)) + + def _ExecCleanup(self): + """Try to cleanup after a failed migration. + + The cleanup is done by: + - check that the instance is running only on one node + (and update the config if needed) + - change disks on its secondary node to secondary + - wait until disks are fully synchronized + - disconnect from the network + - change disks into single-master mode + - wait again until disks are fully synchronized + + """ + instance = self.instance + target_node = self.target_node + source_node = self.source_node + + # check running on only one node + self.feedback_fn("* checking where the instance actually runs" + " (if this hangs, the hypervisor might be in" + " a bad state)") + ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor]) + for node, result in ins_l.items(): + result.Raise() + if not isinstance(result.data, list): + raise errors.OpExecError("Can't contact node '%s'" % node) + + runningon_source = instance.name in ins_l[source_node].data + runningon_target = instance.name in ins_l[target_node].data + + if runningon_source and runningon_target: + raise errors.OpExecError("Instance seems to be running on two nodes," + " or the hypervisor is confused. You will have" + " to ensure manually that it runs only on one" + " and restart this operation.") + + if not (runningon_source or runningon_target): + raise errors.OpExecError("Instance does not seem to be running at all." + " In this case, it's safer to repair by" + " running 'gnt-instance stop' to ensure disk" + " shutdown, and then restarting it.") + + if runningon_target: + # the migration has actually succeeded, we need to update the config + self.feedback_fn("* instance running on secondary node (%s)," + " updating config" % target_node) + instance.primary_node = target_node + self.cfg.Update(instance) + demoted_node = source_node + else: + self.feedback_fn("* instance confirmed to be running on its" + " primary node (%s)" % source_node) + demoted_node = target_node + + self._EnsureSecondary(demoted_node) + try: + self._WaitUntilSync() + except errors.OpExecError: + # we ignore here errors, since if the device is standalone, it + # won't be able to sync + pass + self._GoStandalone() + self._GoReconnect(False) + self._WaitUntilSync() + + self.feedback_fn("* done") + + def _ExecMigration(self): + """Migrate an instance. + + The migrate is done by: + - change the disks into dual-master mode + - wait until disks are fully synchronized again + - migrate the instance + - change disks on the new secondary node (the old primary) to secondary + - wait until disks are fully synchronized + - change disks into single-master mode + + """ + instance = self.instance + target_node = self.target_node + source_node = self.source_node + + self.feedback_fn("* checking disk consistency between source and target") + for dev in instance.disks: + if not _CheckDiskConsistency(self, dev, target_node, False): + raise errors.OpExecError("Disk %s is degraded or not fully" + " synchronized on target node," + " aborting migrate." % dev.iv_name) + + self._EnsureSecondary(target_node) + self._GoStandalone() + self._GoReconnect(True) + self._WaitUntilSync() + + self.feedback_fn("* migrating instance to %s" % target_node) + time.sleep(10) + result = self.rpc.call_instance_migrate(source_node, instance, + self.nodes_ip[target_node], + self.op.live) + msg = result.RemoteFailMsg() + if msg: + logging.error("Instance migration failed, trying to revert" + " disk status: %s", msg) + try: + self._EnsureSecondary(target_node) + self._GoStandalone() + self._GoReconnect(False) + self._WaitUntilSync() + except errors.OpExecError, err: + self.LogWarning("Migration failed and I can't reconnect the" + " drives: error '%s'\n" + "Please look and recover the instance status" % + str(err)) + + raise errors.OpExecError("Could not migrate instance %s: %s" % + (instance.name, msg)) + time.sleep(10) + + instance.primary_node = target_node + # distribute new instance config to the other nodes + self.cfg.Update(instance) + + self._EnsureSecondary(source_node) + self._WaitUntilSync() + self._GoStandalone() + self._GoReconnect(False) + self._WaitUntilSync() + + self.feedback_fn("* done") + + def Exec(self, feedback_fn): + """Perform the migration. + + """ + self.feedback_fn = feedback_fn + + self.source_node = self.instance.primary_node + self.target_node = self.instance.secondary_nodes[0] + self.all_nodes = [self.source_node, self.target_node] + self.nodes_ip = { + self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip, + self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip, + } + if self.op.cleanup: + return self._ExecCleanup() + else: + return self._ExecMigration() + + def _CreateBlockDevOnPrimary(lu, node, instance, device, info): """Create a tree of block devices on the primary node. diff --git a/lib/hypervisor/hv_xen.py b/lib/hypervisor/hv_xen.py index a0a59ad3496131a6d80ba303004e9a808654f1b3..0aa956032acc0950f88a46d04d0130ce41a61d97 100644 --- a/lib/hypervisor/hv_xen.py +++ b/lib/hypervisor/hv_xen.py @@ -51,11 +51,11 @@ class XenHypervisor(hv_base.BaseHypervisor): raise NotImplementedError @staticmethod - def _RemoveConfigFile(instance): + def _RemoveConfigFile(instance_name): """Remove the xen configuration file. """ - utils.RemoveFile("/etc/xen/%s" % instance.name) + utils.RemoveFile("/etc/xen/%s" % instance_name) @staticmethod def _GetXMList(include_node): @@ -155,7 +155,7 @@ class XenHypervisor(hv_base.BaseHypervisor): """Stop an instance. """ - self._RemoveConfigFile(instance) + self._RemoveConfigFile(instance.name) if force: command = ["xm", "destroy", instance.name] else: @@ -290,6 +290,12 @@ class XenHypervisor(hv_base.BaseHypervisor): if result.failed: raise errors.HypervisorError("Failed to migrate instance %s: %s" % (instance, result.output)) + # remove old xen file after migration succeeded + try: + self._RemoveConfigFile(instance) + except EnvironmentError, err: + logger.Error("Failure while removing instance config file: %s" % + str(err)) class XenPvmHypervisor(XenHypervisor): diff --git a/lib/mcpu.py b/lib/mcpu.py index 1c3384da35c8fcc6dd8b812fdae0f65e16e9d288..23107005632f17b094c5601ff98d1e3bca06e2fc 100644 --- a/lib/mcpu.py +++ b/lib/mcpu.py @@ -68,6 +68,7 @@ class Processor(object): opcodes.OpDeactivateInstanceDisks: cmdlib.LUDeactivateInstanceDisks, opcodes.OpReplaceDisks: cmdlib.LUReplaceDisks, opcodes.OpFailoverInstance: cmdlib.LUFailoverInstance, + opcodes.OpMigrateInstance: cmdlib.LUMigrateInstance, opcodes.OpConnectConsole: cmdlib.LUConnectConsole, opcodes.OpQueryInstances: cmdlib.LUQueryInstances, opcodes.OpQueryInstanceData: cmdlib.LUQueryInstanceData, diff --git a/lib/opcodes.py b/lib/opcodes.py index 32edfaa7e68c1a01dd51f84303649106708a4177..6848c4e85fd88177c491e4bfb41e363749df4bc8 100644 --- a/lib/opcodes.py +++ b/lib/opcodes.py @@ -413,6 +413,19 @@ class OpFailoverInstance(OpCode): __slots__ = ["instance_name", "ignore_consistency"] +class OpMigrateInstance(OpCode): + """Migrate an instance. + + This migrates (without shutting down an instance) to its secondary + node. + + @var instance_name: the name of the instance + + """ + OP_ID = "OP_INSTANCE_MIGRATE" + __slots__ = ["instance_name", "live", "cleanup"] + + class OpConnectConsole(OpCode): """Connect to an instance's console.""" OP_ID = "OP_INSTANCE_CONSOLE" diff --git a/scripts/gnt-instance b/scripts/gnt-instance index b197174801f4f914904b9dc2a044a363a6c93f3e..f1063d8b4ae74abc58b95a08f384e64164ccc5ff 100755 --- a/scripts/gnt-instance +++ b/scripts/gnt-instance @@ -821,6 +821,41 @@ def FailoverInstance(opts, args): return 0 +def MigrateInstance(opts, args): + """Migrate an instance. + + The migrate is done without shutdown. + + Args: + opts - class with options as members + args - list with a single element, the instance name + Opts used: + force - whether to migrate without asking questions. + + """ + instance_name = args[0] + force = opts.force + + if not force: + if opts.cleanup: + usertext = ("Instance %s will be recovered from a failed migration." + " Note that the migration procedure (including cleanup)" % + (instance_name,)) + else: + usertext = ("Instance %s will be migrated. Note that migration" % + (instance_name,)) + usertext += (" is **experimental** in this version." + " This might impact the instance if anything goes wrong." + " Continue?") + if not AskUser(usertext): + return 1 + + op = opcodes.OpMigrateInstance(instance_name=instance_name, live=opts.live, + cleanup=opts.cleanup) + SubmitOpCode(op) + return 0 + + def ConnectToInstanceConsole(opts, args): """Connect to the console of an instance. @@ -1269,6 +1304,26 @@ commands = { "[-f] <instance>", "Stops the instance and starts it on the backup node, using" " the remote mirror (only for instances of type drbd)"), + 'migrate': (MigrateInstance, ARGS_ONE, + [DEBUG_OPT, FORCE_OPT, + make_option("--non-live", dest="live", + default=True, action="store_false", + help="Do a non-live migration (this usually means" + " freeze the instance, save the state," + " transfer and only then resume running on the" + " secondary node)"), + make_option("--cleanup", dest="cleanup", + default=False, action="store_true", + help="Instead of performing the migration, try to" + " recover from a failed cleanup. This is safe" + " to run even if the instance is healthy, but it" + " will create extra replication traffic and " + " disrupt briefly the replication (like during the" + " migration"), + ], + "[-f] <instance>", + "Migrate instance to its secondary node" + " (only for instances of type drbd)"), 'info': (ShowInstanceConfig, ARGS_ANY, [DEBUG_OPT, make_option("-s", "--static", dest="static",