From 7baf741d25bcb2f5c08715c9d6d5b28de59e5633 Mon Sep 17 00:00:00 2001 From: Guido Trotter <ultrotter@google.com> Date: Thu, 11 Sep 2008 09:44:33 +0000 Subject: [PATCH] Parallelize LUCreateInstance Finally, instance create on different node, without iallocator, can run in parallel. Iallocator usage still needs all nodes to be locked, unfortunately. As a bonus most checks which could have been moved to ExpandNames, before any locking is done. Reviewed-by: imsnah --- lib/cmdlib.py | 235 +++++++++++++++++++++++++++++--------------------- 1 file changed, 136 insertions(+), 99 deletions(-) diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 88ea24b82..6636d9c97 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -3071,6 +3071,125 @@ class LUCreateInstance(LogicalUnit): _OP_REQP = ["instance_name", "mem_size", "disk_size", "disk_template", "swap_size", "mode", "start", "vcpus", "wait_for_sync", "ip_check", "mac"] + REQ_BGL = False + + def _ExpandNode(self, node): + """Expands and checks one node name. + + """ + node_full = self.cfg.ExpandNodeName(node) + if node_full is None: + raise errors.OpPrereqError("Unknown node %s" % node) + return node_full + + def ExpandNames(self): + """ExpandNames for CreateInstance. + + Figure out the right locks for instance creation. + + """ + self.needed_locks = {} + + # set optional parameters to none if they don't exist + for attr in ["kernel_path", "initrd_path", "pnode", "snode", + "iallocator", "hvm_boot_order", "hvm_acpi", "hvm_pae", + "hvm_cdrom_image_path", "hvm_nic_type", "hvm_disk_type", + "vnc_bind_address"]: + if not hasattr(self.op, attr): + setattr(self.op, attr, None) + + # verify creation mode + if self.op.mode not in (constants.INSTANCE_CREATE, + constants.INSTANCE_IMPORT): + raise errors.OpPrereqError("Invalid instance creation mode '%s'" % + self.op.mode) + # disk template and mirror node verification + if self.op.disk_template not in constants.DISK_TEMPLATES: + raise errors.OpPrereqError("Invalid disk template name") + + #### instance parameters check + + # instance name verification + hostname1 = utils.HostInfo(self.op.instance_name) + self.op.instance_name = instance_name = hostname1.name + + # this is just a preventive check, but someone might still add this + # instance in the meantime, and creation will fail at lock-add time + if instance_name in self.cfg.GetInstanceList(): + raise errors.OpPrereqError("Instance '%s' is already in the cluster" % + instance_name) + + self.add_locks[locking.LEVEL_INSTANCE] = instance_name + + # ip validity checks + ip = getattr(self.op, "ip", None) + if ip is None or ip.lower() == "none": + inst_ip = None + elif ip.lower() == "auto": + inst_ip = hostname1.ip + else: + if not utils.IsValidIP(ip): + raise errors.OpPrereqError("given IP address '%s' doesn't look" + " like a valid IP" % ip) + inst_ip = ip + self.inst_ip = self.op.ip = inst_ip + # used in CheckPrereq for ip ping check + self.check_ip = hostname1.ip + + # MAC address verification + if self.op.mac != "auto": + if not utils.IsValidMac(self.op.mac.lower()): + raise errors.OpPrereqError("invalid MAC address specified: %s" % + self.op.mac) + + # boot order verification + if self.op.hvm_boot_order is not None: + if len(self.op.hvm_boot_order.strip("acdn")) != 0: + raise errors.OpPrereqError("invalid boot order specified," + " must be one or more of [acdn]") + # file storage checks + if (self.op.file_driver and + not self.op.file_driver in constants.FILE_DRIVER): + raise errors.OpPrereqError("Invalid file driver name '%s'" % + self.op.file_driver) + + if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir): + raise errors.OpPrereqError("File storage directory path not absolute") + + ### Node/iallocator related checks + if [self.op.iallocator, self.op.pnode].count(None) != 1: + raise errors.OpPrereqError("One and only one of iallocator and primary" + " node must be given") + + if self.op.iallocator: + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + else: + self.op.pnode = self._ExpandNode(self.op.pnode) + nodelist = [self.op.pnode] + if self.op.snode is not None: + self.op.snode = self._ExpandNode(self.op.snode) + nodelist.append(self.op.snode) + self.needed_locks[locking.LEVEL_NODE] = nodelist + + # in case of import lock the source node too + if self.op.mode == constants.INSTANCE_IMPORT: + src_node = getattr(self.op, "src_node", None) + src_path = getattr(self.op, "src_path", None) + + if src_node is None or src_path is None: + raise errors.OpPrereqError("Importing an instance requires source" + " node and path options") + + if not os.path.isabs(src_path): + raise errors.OpPrereqError("The source path must be absolute") + + self.op.src_node = src_node = self._ExpandNode(src_node) + if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: + self.needed_locks[locking.LEVEL_NODE].append(src_node) + + else: # INSTANCE_CREATE + if getattr(self.op, "os_type", None) is None: + raise errors.OpPrereqError("No guest OS specified") def _RunAllocator(self): """Run the allocator based on input opcode. @@ -3146,36 +3265,14 @@ class LUCreateInstance(LogicalUnit): """Check prerequisites. """ - # set optional parameters to none if they don't exist - for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode", - "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path", - "hvm_nic_type", "hvm_disk_type", "vnc_bind_address"]: - if not hasattr(self.op, attr): - setattr(self.op, attr, None) - - if self.op.mode not in (constants.INSTANCE_CREATE, - constants.INSTANCE_IMPORT): - raise errors.OpPrereqError("Invalid instance creation mode '%s'" % - self.op.mode) - if (not self.cfg.GetVGName() and self.op.disk_template not in constants.DTS_NOT_LVM): raise errors.OpPrereqError("Cluster does not support lvm-based" " instances") if self.op.mode == constants.INSTANCE_IMPORT: - src_node = getattr(self.op, "src_node", None) - src_path = getattr(self.op, "src_path", None) - if src_node is None or src_path is None: - raise errors.OpPrereqError("Importing an instance requires source" - " node and path options") - src_node_full = self.cfg.ExpandNodeName(src_node) - if src_node_full is None: - raise errors.OpPrereqError("Unknown source node '%s'" % src_node) - self.op.src_node = src_node = src_node_full - - if not os.path.isabs(src_path): - raise errors.OpPrereqError("The source path must be absolute") + src_node = self.op.src_node + src_path = self.op.src_path export_info = rpc.call_export_info(src_node, src_path) @@ -3199,52 +3296,17 @@ class LUCreateInstance(LogicalUnit): diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS, 'disk0_dump')) self.src_image = diskimage - else: # INSTANCE_CREATE - if getattr(self.op, "os_type", None) is None: - raise errors.OpPrereqError("No guest OS specified") - - #### instance parameters check - - # disk template and mirror node verification - if self.op.disk_template not in constants.DISK_TEMPLATES: - raise errors.OpPrereqError("Invalid disk template name") - - # instance name verification - hostname1 = utils.HostInfo(self.op.instance_name) - - self.op.instance_name = instance_name = hostname1.name - instance_list = self.cfg.GetInstanceList() - if instance_name in instance_list: - raise errors.OpPrereqError("Instance '%s' is already in the cluster" % - instance_name) - # ip validity checks - ip = getattr(self.op, "ip", None) - if ip is None or ip.lower() == "none": - inst_ip = None - elif ip.lower() == "auto": - inst_ip = hostname1.ip - else: - if not utils.IsValidIP(ip): - raise errors.OpPrereqError("given IP address '%s' doesn't look" - " like a valid IP" % ip) - inst_ip = ip - self.inst_ip = self.op.ip = inst_ip + # ip ping checks (we use the same ip that was resolved in ExpandNames) if self.op.start and not self.op.ip_check: raise errors.OpPrereqError("Cannot ignore IP address conflicts when" " adding an instance in start mode") if self.op.ip_check: - if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT): + if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("IP %s of instance %s already in use" % - (hostname1.ip, instance_name)) - - # MAC address verification - if self.op.mac != "auto": - if not utils.IsValidMac(self.op.mac.lower()): - raise errors.OpPrereqError("invalid MAC address specified: %s" % - self.op.mac) + (self.check_ip, instance_name)) # bridge verification bridge = getattr(self.op, "bridge", None) @@ -3253,54 +3315,28 @@ class LUCreateInstance(LogicalUnit): else: self.op.bridge = bridge - # boot order verification - if self.op.hvm_boot_order is not None: - if len(self.op.hvm_boot_order.strip("acdn")) != 0: - raise errors.OpPrereqError("invalid boot order specified," - " must be one or more of [acdn]") - # file storage checks - if (self.op.file_driver and - not self.op.file_driver in constants.FILE_DRIVER): - raise errors.OpPrereqError("Invalid file driver name '%s'" % - self.op.file_driver) - - if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir): - raise errors.OpPrereqError("File storage directory not a relative" - " path") #### allocator run - if [self.op.iallocator, self.op.pnode].count(None) != 1: - raise errors.OpPrereqError("One and only one of iallocator and primary" - " node must be given") - if self.op.iallocator is not None: self._RunAllocator() #### node related checks # check primary node - pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode)) - if pnode is None: - raise errors.OpPrereqError("Primary node '%s' is unknown" % - self.op.pnode) - self.op.pnode = pnode.name - self.pnode = pnode + self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode) + assert self.pnode is not None, \ + "Cannot retrieve locked node %s" % self.op.pnode self.secondaries = [] # mirror node verification if self.op.disk_template in constants.DTS_NET_MIRROR: - if getattr(self.op, "snode", None) is None: + if self.op.snode is None: raise errors.OpPrereqError("The networked disk templates need" " a mirror node") - - snode_name = self.cfg.ExpandNodeName(self.op.snode) - if snode_name is None: - raise errors.OpPrereqError("Unknown secondary node '%s'" % - self.op.snode) - elif snode_name == pnode.name: + if self.op.snode == pnode.name: raise errors.OpPrereqError("The secondary node cannot be" " the primary node.") - self.secondaries.append(snode_name) + self.secondaries.append(self.op.snode) req_size = _ComputeDiskSize(self.op.disk_template, self.op.disk_size, self.op.swap_size) @@ -3332,7 +3368,6 @@ class LUCreateInstance(LogicalUnit): if self.op.kernel_path == constants.VALUE_NONE: raise errors.OpPrereqError("Can't set instance kernel to none") - # bridge check on primary node if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]): raise errors.OpPrereqError("target bridge '%s' does not exist on" @@ -3347,6 +3382,7 @@ class LUCreateInstance(LogicalUnit): # hvm_cdrom_image_path verification if self.op.hvm_cdrom_image_path is not None: + # FIXME (als): shouldn't these checks happen on the destination node? if not os.path.isabs(self.op.hvm_cdrom_image_path): raise errors.OpPrereqError("The path to the HVM CDROM image must" " be an absolute path or None, not %s" % @@ -3450,8 +3486,9 @@ class LUCreateInstance(LogicalUnit): feedback_fn("adding instance %s to cluster config" % instance) self.cfg.AddInstance(iobj) - # Add the new instance to the Ganeti Lock Manager - self.context.glm.add(locking.LEVEL_INSTANCE, instance) + # Declare that we don't want to remove the instance lock anymore, as we've + # added the instance to the config + del self.remove_locks[locking.LEVEL_INSTANCE] if self.op.wait_for_sync: disk_abort = not _WaitForSync(self.cfg, iobj, self.proc) @@ -3466,8 +3503,8 @@ class LUCreateInstance(LogicalUnit): if disk_abort: _RemoveDisks(iobj, self.cfg) self.cfg.RemoveInstance(iobj.name) - # Remove the new instance from the Ganeti Lock Manager - self.context.glm.remove(locking.LEVEL_INSTANCE, iobj.name) + # Make sure the instance lock gets removed + self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name raise errors.OpExecError("There are some degraded disks for" " this instance") -- GitLab