diff --git a/daemons/ganeti-noded b/daemons/ganeti-noded index c146e8b6909b65a7900e1e7f8a90fbea5611985f..08984e957e35271aa05e6bda28b9e4822dab979b 100755 --- a/daemons/ganeti-noded +++ b/daemons/ganeti-noded @@ -402,6 +402,32 @@ class NodeHttpServer(http.server.HttpServer): extra_args = params[1] return backend.StartInstance(instance, extra_args) + @staticmethod + def perspective_migration_info(params): + """Gather information about an instance to be migrated. + + """ + instance = objects.Instance.FromDict(params[0]) + return backend.MigrationInfo(instance) + + @staticmethod + def perspective_accept_instance(params): + """Prepare the node to accept an instance. + + """ + instance, info, target = params + instance = objects.Instance.FromDict(instance) + return backend.AcceptInstance(instance, info, target) + + @staticmethod + def perspective_finalize_migration(params): + """Finalize the instance migration. + + """ + instance, info, success = params + instance = objects.Instance.FromDict(instance) + return backend.FinalizeMigration(instance, info, success) + @staticmethod def perspective_instance_migrate(params): """Migrates an instance. diff --git a/lib/backend.py b/lib/backend.py index cb9261a0ddd4ec0cf48b9052bcc00744fa34712b..6147a468418f9162f6fecabe84db5296cb1d7138 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -973,6 +973,44 @@ def RebootInstance(instance, reboot_type, extra_args): return True +def MigrationInfo(instance): + """Gather information about an instance to be migrated. + + @type instance: L{objects.Instance} + @param instance: the instance definition + + """ + return (True, '') + + +def AcceptInstance(instance, info, target): + """Prepare the node to accept an instance. + + @type instance: L{objects.Instance} + @param instance: the instance definition + @type info: string/data (opaque) + @param info: migration information, from the source node + @type target: string + @param target: target host (usually ip), on this node + + """ + return (True, "Accept successfull") + + +def FinalizeMigration(instance, info, success): + """Finalize any preparation to accept an instance. + + @type instance: L{objects.Instance} + @param instance: the instance definition + @type info: string/data (opaque) + @param info: migration information, from the source node + @type success: boolean + @param success: whether the migration was a success or a failure + + """ + return (True, "Migration Finalized") + + def MigrateInstance(instance, target, live): """Migrates an instance to another node. diff --git a/lib/cmdlib.py b/lib/cmdlib.py index f1b4ab226e4885ac565228ec26bb7886ca203ae0..b06cddc17e9586f2573d7b284283d56628d900c7 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -3631,6 +3631,41 @@ class LUMigrateInstance(LogicalUnit): self.feedback_fn("* done") + def _RevertDiskStatus(self): + """Try to revert the disk status after a failed migration. + + """ + target_node = self.target_node + try: + self._EnsureSecondary(target_node) + self._GoStandalone() + self._GoReconnect(False) + self._WaitUntilSync() + except errors.OpExecError, err: + self.LogWarning("Migration failed and I can't reconnect the" + " drives: error '%s'\n" + "Please look and recover the instance status" % + str(err)) + + def _AbortMigration(self): + """Call the hypervisor code to abort a started migration. + + """ + instance = self.instance + target_node = self.target_node + migration_info = self.migration_info + + abort_result = self.rpc.call_finalize_migration(target_node, + instance, + migration_info, + False) + abort_msg = abort_result.RemoteFailMsg() + if abort_msg: + logging.error("Aborting migration failed on target node %s: %s" % + (target_node, abort_msg)) + # Don't raise an exception here, as we stil have to try to revert the + # disk status, even if this step failed. + def _ExecMigration(self): """Migrate an instance. @@ -3654,11 +3689,38 @@ class LUMigrateInstance(LogicalUnit): " synchronized on target node," " aborting migrate." % dev.iv_name) + # First get the migration information from the remote node + result = self.rpc.call_migration_info(source_node, instance) + msg = result.RemoteFailMsg() + if msg: + log_err = ("Failed fetching source migration information from %s: %s" % + (source_node, msg)) + logging.error(log_err) + raise errors.OpExecError(log_err) + + self.migration_info = migration_info = result.data[1] + + # Then switch the disks to master/master mode self._EnsureSecondary(target_node) self._GoStandalone() self._GoReconnect(True) self._WaitUntilSync() + self.feedback_fn("* preparing %s to accept the instance" % target_node) + result = self.rpc.call_accept_instance(target_node, + instance, + migration_info, + self.nodes_ip[target_node]) + + msg = result.RemoteFailMsg() + if msg: + logging.error("Instance pre-migration failed, trying to revert" + " disk status: %s", msg) + self._AbortMigration() + self._RevertDiskStatus() + raise errors.OpExecError("Could not pre-migrate instance %s: %s" % + (instance.name, msg)) + self.feedback_fn("* migrating instance to %s" % target_node) time.sleep(10) result = self.rpc.call_instance_migrate(source_node, instance, @@ -3668,17 +3730,8 @@ class LUMigrateInstance(LogicalUnit): if msg: logging.error("Instance migration failed, trying to revert" " disk status: %s", msg) - try: - self._EnsureSecondary(target_node) - self._GoStandalone() - self._GoReconnect(False) - self._WaitUntilSync() - except errors.OpExecError, err: - self.LogWarning("Migration failed and I can't reconnect the" - " drives: error '%s'\n" - "Please look and recover the instance status" % - str(err)) - + self._AbortMigration() + self._RevertDiskStatus() raise errors.OpExecError("Could not migrate instance %s: %s" % (instance.name, msg)) time.sleep(10) @@ -3687,6 +3740,17 @@ class LUMigrateInstance(LogicalUnit): # distribute new instance config to the other nodes self.cfg.Update(instance) + result = self.rpc.call_finalize_migration(target_node, + instance, + migration_info, + True) + msg = result.RemoteFailMsg() + if msg: + logging.error("Instance migration succeeded, but finalization failed:" + " %s" % msg) + raise errors.OpExecError("Could not finalize instance migration: %s" % + msg) + self._EnsureSecondary(source_node) self._WaitUntilSync() self._GoStandalone() diff --git a/lib/rpc.py b/lib/rpc.py index 467163e8b7e541bfa3947a1f0107c1313c1f9e1d..fc71a9335031cb3c322f17dbc16c16fde23c3b81 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -432,6 +432,59 @@ class RpcRunner(object): return self._SingleNodeCall(node, "instance_shutdown", [self._InstDict(instance)]) + def call_migration_info(self, node, instance): + """Gather the information necessary to prepare an instance migration. + + This is a single-node call. + + @type node: string + @param node: the node on which the instance is currently running + @type instance: C{objects.Instance} + @param instance: the instance definition + + """ + return self._SingleNodeCall(node, "migration_info", + [self._InstDict(instance)]) + + def call_accept_instance(self, node, instance, info, target): + """Prepare a node to accept an instance. + + This is a single-node call. + + @type node: string + @param node: the target node for the migration + @type instance: C{objects.Instance} + @param instance: the instance definition + @type info: opaque/hypervisor specific (string/data) + @param info: result for the call_migration_info call + @type target: string + @param target: target hostname (usually ip address) (on the node itself) + + """ + return self._SingleNodeCall(node, "accept_instance", + [self._InstDict(instance), info, target]) + + def call_finalize_migration(self, node, instance, info, success): + """Finalize any target-node migration specific operation. + + This is called both in case of a successful migration and in case of error + (in which case it should abort the migration). + + This is a single-node call. + + @type node: string + @param node: the target node for the migration + @type instance: C{objects.Instance} + @param instance: the instance definition + @type info: opaque/hypervisor specific (string/data) + @param info: result for the call_migration_info call + @type success: boolean + @param success: whether the migration was a success or a failure + + """ + return self._SingleNodeCall(node, "finalize_migration", + [self._InstDict(instance), info, success]) + def call_instance_migrate(self, node, instance, target, live): """Migrate an instance.