diff --git a/Makefile.am b/Makefile.am index ff7e00fb1e8e89403ecaf18e59a4b58f03b84bb2..3d2ae8e94d2ab0e42e285f132bfa2301f34b24f6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -160,6 +160,7 @@ EXTRA_DIST = \ $(MAINTAINERCLEANFILES) \ NEWS \ DEVNOTES \ + pylintrc \ autotools/docbook-wrapper \ devel/upload.in \ $(docdot) \ diff --git a/daemons/ganeti-masterd b/daemons/ganeti-masterd index 5cf8c61c93679b30cfb41fb3c1a0178eb130d560..e5f291a4767e59da39293ffa5548112b957ba2cb 100755 --- a/daemons/ganeti-masterd +++ b/daemons/ganeti-masterd @@ -195,6 +195,7 @@ class ClientRqHandler(SocketServer.BaseRequestHandler): def send_message(self, msg): #print "sending", msg + # TODO: sendall is not guaranteed to send everything self.request.sendall(msg + self.EOM) @@ -403,6 +404,10 @@ def ParseOptions(): help="Do not check that the nodes agree on this node" " being the master and start the daemon unconditionally", default=False, action="store_true") + parser.add_option("--yes-do-it", dest="yes_do_it", + help="Override interactive check for --no-voting", + default=False, action="store_true") + options, args = parser.parse_args() return options, args @@ -479,7 +484,7 @@ def main(): ssconf.CheckMaster(options.debug) # we believe we are the master, let's ask the other nodes... - if options.no_voting: + if options.no_voting and not options.yes_do_it: sys.stdout.write("The 'no voting' option has been selected.\n") sys.stdout.write("This is dangerous, please confirm by" " typing uppercase 'yes': ") @@ -488,7 +493,7 @@ def main(): if confirmation != "YES": print "Aborting." return - else: + elif not options.no_voting: if not CheckAgreement(): return diff --git a/doc/hooks.rst b/doc/hooks.rst index 7dbe7d5e92ad0bafb1f1a74bbd5cd3dbeadbd3a7..b2f05ce5b00f9bf68dd219749e9813d4734b970e 100644 --- a/doc/hooks.rst +++ b/doc/hooks.rst @@ -104,7 +104,7 @@ The scripts will be run as follows: be left -All informations about the cluster is passed using environment +All information about the cluster is passed using environment variables. Different operations will have sligthly different environments, but most of the variables are common. diff --git a/doc/iallocator.rst b/doc/iallocator.rst index 58719a0b87ea55f2e1289e0c1e46c22571a8c602..467f79da9d2f59e070b5d29c8faa7bb914f1af26 100644 --- a/doc/iallocator.rst +++ b/doc/iallocator.rst @@ -233,7 +233,7 @@ The response message is much more simple than the input one. It is also a dict having three keys: success - a boolean value denoting if the allocation was successfull or not + a boolean value denoting if the allocation was successful or not info a string with information from the scripts; if the allocation fails, diff --git a/lib/backend.py b/lib/backend.py index 0860e0f8467478f50993d1e88ad5d0be6171b60f..456ba69e65246d958f3737b712cc45c3b9b6fadc 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -174,7 +174,7 @@ def GetMasterInfo(): master_node = cfg.GetMasterNode() except errors.ConfigurationError, err: _Fail("Cluster configuration incomplete: %s", err, exc=True) - return master_netdev, master_ip, master_node + return (master_netdev, master_ip, master_node) def StartMaster(start_daemons): @@ -337,7 +337,7 @@ def LeaveCluster(): def GetNodeInfo(vgname, hypervisor_type): - """Gives back a hash with different informations about the node. + """Gives back a hash with different information about the node. @type vgname: C{string} @param vgname: the name of the volume group to ask for disk space information @@ -609,7 +609,7 @@ def GetInstanceList(hypervisor_list): def GetInstanceInfo(instance, hname): - """Gives back the informations about an instance as a dictionary. + """Gives back the information about an instance as a dictionary. @type instance: string @param instance: the instance name @@ -764,7 +764,7 @@ def RunRenameInstance(instance, old_name): def _GetVGInfo(vg_name): - """Get informations about the volume group. + """Get information about the volume group. @type vg_name: str @param vg_name: the volume group which we query @@ -931,7 +931,7 @@ def InstanceShutdown(instance): # test every 10secs for 2min time.sleep(1) - for dummy in range(11): + for _ in range(11): if instance.name not in GetInstanceList([hv_name]): break time.sleep(10) @@ -1242,7 +1242,7 @@ def BlockdevAssemble(disk, owner, as_primary): def BlockdevShutdown(disk): """Shut down a block device. - First, if the device is assembled (Attach() is successfull), then + First, if the device is assembled (Attach() is successful), then the device is shutdown. Then the children of the device are shutdown. @@ -1348,7 +1348,7 @@ def BlockdevGetmirrorstatus(disks): def _RecursiveFindBD(disk): """Check if a device is activated. - If so, return informations about the real device. + If so, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk object we need to find @@ -1368,7 +1368,7 @@ def _RecursiveFindBD(disk): def BlockdevFind(disk): """Check if a device is activated. - If it is, return informations about the real device. + If it is, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk to find @@ -2068,7 +2068,7 @@ def RemoveFileStorageDir(file_storage_dir): @param file_storage_dir: the directory we should cleanup @rtype: tuple (success,) @return: tuple of one element, C{success}, denoting - whether the operation was successfull + whether the operation was successful """ file_storage_dir = _TransformFileStorageDir(file_storage_dir) @@ -2254,7 +2254,8 @@ def DemoteFromMC(): if utils.IsProcessAlive(utils.ReadPidFile(pid_file)): _Fail("The master daemon is running, will not demote") try: - utils.CreateBackup(constants.CLUSTER_CONF_FILE) + if os.path.isfile(constants.CLUSTER_CONF_FILE): + utils.CreateBackup(constants.CLUSTER_CONF_FILE) except EnvironmentError, err: if err.errno != errno.ENOENT: _Fail("Error while backing up cluster file: %s", err, exc=True) diff --git a/lib/bdev.py b/lib/bdev.py index 5f94d5078c123f59a93c18b91cf7c9a8bd8eb083..4971b53b1e33d1a9164c811f4aae5f62c2d0b0fe 100644 --- a/lib/bdev.py +++ b/lib/bdev.py @@ -161,7 +161,7 @@ class BlockDev(object): """Remove this device. This makes sense only for some of the device types: LV and file - storeage. Also note that if the device can't attach, the removal + storage. Also note that if the device can't attach, the removal can't be completed. """ @@ -444,7 +444,7 @@ class LogicalVolume(BlockDev): def Assemble(self): """Assemble the device. - We alway run `lvchange -ay` on the LV to ensure it's active before + We always run `lvchange -ay` on the LV to ensure it's active before use, as there were cases when xenvg was not active after boot (also possibly after disk issues). @@ -828,7 +828,13 @@ class BaseDRBD(BlockDev): bytes = sectors * 512 if bytes < 128 * 1024 * 1024: # less than 128MiB _ThrowError("Meta device too small (%.2fMib)", (bytes / 1024 / 1024)) - if bytes > (128 + 32) * 1024 * 1024: # account for an extra (big) PE on LVM + # the maximum *valid* size of the meta device when living on top + # of LVM is hard to compute: it depends on the number of stripes + # and the PE size; e.g. a 2-stripe, 64MB PE will result in a 128MB + # (normal size), but an eight-stripe 128MB PE will result in a 1GB + # size meta device; as such, we restrict it to 1GB (a little bit + # too generous, but making assumptions about PE size is hard) + if bytes > 1024 * 1024 * 1024: _ThrowError("Meta device too big (%.2fMiB)", (bytes / 1024 / 1024)) def Rename(self, new_id): @@ -1252,14 +1258,14 @@ class DRBD8(BaseDRBD): If sync_percent is None, it means all is ok - If estimated_time is None, it means we can't esimate + If estimated_time is None, it means we can't estimate the time needed, otherwise it's the time left in seconds. We set the is_degraded parameter to True on two conditions: network not connected or local disk missing. - We compute the ldisk parameter based on wheter we have a local + We compute the ldisk parameter based on whether we have a local disk or not. @rtype: tuple @@ -1329,14 +1335,14 @@ class DRBD8(BaseDRBD): ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor) timeout_limit = time.time() + self._NET_RECONFIG_TIMEOUT - sleep_time = 0.100 # we start the retry time at 100 miliseconds + sleep_time = 0.100 # we start the retry time at 100 milliseconds while time.time() < timeout_limit: status = self.GetProcStatus() if status.is_standalone: break # retry the disconnect, it seems possible that due to a # well-time disconnect on the peer, my disconnect command might - # be ingored and forgotten + # be ignored and forgotten ever_disconnected = _IgnoreError(self._ShutdownNet, self.minor) or \ ever_disconnected time.sleep(sleep_time) @@ -1641,7 +1647,7 @@ class FileStorage(BlockDev): def Shutdown(self): """Shutdown the device. - This is a no-op for the file type, as we don't deacivate + This is a no-op for the file type, as we don't deactivate the file on shutdown. """ diff --git a/lib/bootstrap.py b/lib/bootstrap.py index a3811a889d3b4585b01f2e5b7b03f2b758555896..e576c93524aca2e618f9123430d6c720645da9dc 100644 --- a/lib/bootstrap.py +++ b/lib/bootstrap.py @@ -79,24 +79,27 @@ def _GenerateSelfSignedSslCert(file_name, validity=(365 * 5)): """ (fd, tmp_file_name) = tempfile.mkstemp(dir=os.path.dirname(file_name)) try: - # Set permissions before writing key - os.chmod(tmp_file_name, 0600) - - result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024", - "-days", str(validity), "-nodes", "-x509", - "-keyout", tmp_file_name, "-out", tmp_file_name, - "-batch"]) - if result.failed: - raise errors.OpExecError("Could not generate SSL certificate, command" - " %s had exitcode %s and error message %s" % - (result.cmd, result.exit_code, result.output)) - - # Make read-only - os.chmod(tmp_file_name, 0400) - - os.rename(tmp_file_name, file_name) + try: + # Set permissions before writing key + os.chmod(tmp_file_name, 0600) + + result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024", + "-days", str(validity), "-nodes", "-x509", + "-keyout", tmp_file_name, "-out", tmp_file_name, + "-batch"]) + if result.failed: + raise errors.OpExecError("Could not generate SSL certificate, command" + " %s had exitcode %s and error message %s" % + (result.cmd, result.exit_code, result.output)) + + # Make read-only + os.chmod(tmp_file_name, 0400) + + os.rename(tmp_file_name, file_name) + finally: + utils.RemoveFile(tmp_file_name) finally: - utils.RemoveFile(tmp_file_name) + os.close(fd) def _InitGanetiServerSetup(): @@ -384,13 +387,17 @@ def SetupNodeDaemon(cluster_name, node, ssh_key_check): (node, result.fail_reason, result.output)) -def MasterFailover(): +def MasterFailover(no_voting=False): """Failover the master node. This checks that we are not already the master, and will cause the current master to cease being master, and the non-master to become new master. + @type no_voting: boolean + @param no_voting: force the operation without remote nodes agreement + (dangerous) + """ sstore = ssconf.SimpleStore() @@ -412,18 +419,20 @@ def MasterFailover(): " master candidates is:\n" "%s" % ('\n'.join(mc_no_master))) - vote_list = GatherMasterVotes(node_list) - - if vote_list: - voted_master = vote_list[0][0] - if voted_master is None: - raise errors.OpPrereqError("Cluster is inconsistent, most nodes did not" - " respond.") - elif voted_master != old_master: - raise errors.OpPrereqError("I have wrong configuration, I believe the" - " master is %s but the other nodes voted for" - " %s. Please resync the configuration of" - " this node." % (old_master, voted_master)) + if not no_voting: + vote_list = GatherMasterVotes(node_list) + + if vote_list: + voted_master = vote_list[0][0] + if voted_master is None: + raise errors.OpPrereqError("Cluster is inconsistent, most nodes did" + " not respond.") + elif voted_master != old_master: + raise errors.OpPrereqError("I have a wrong configuration, I believe" + " the master is %s but the other nodes" + " voted %s. Please resync the configuration" + " of this node." % + (old_master, voted_master)) # end checks rcode = 0 @@ -448,7 +457,8 @@ def MasterFailover(): # cluster info cfg.Update(cluster_info) - result = rpc.RpcRunner.call_node_start_master(new_master, True) + # 2.0.X: Don't start the master if no_voting is true + result = rpc.RpcRunner.call_node_start_master(new_master, not no_voting) msg = result.RemoteFailMsg() if msg: logging.error("Could not start the master role on the new master" @@ -490,7 +500,7 @@ def GatherMasterVotes(node_list): @type node_list: list @param node_list: the list of nodes to query for master info; the current - node wil be removed if it is in the list + node will be removed if it is in the list @rtype: list @return: list of (node, votes) diff --git a/lib/cli.py b/lib/cli.py index 6b40c72d0a48dfcdfc0d43f4cff9610649e26103..f9a9628e0e14a3a0961d92d00da924e618a1cd29 100644 --- a/lib/cli.py +++ b/lib/cli.py @@ -341,7 +341,7 @@ keyval_option = KeyValOption def _ParseArgs(argv, commands, aliases): """Parser for the command line arguments. - This function parses the arguements and returns the function which + This function parses the arguments and returns the function which must be executed together with its (modified) arguments. @param argv: the command line @@ -459,10 +459,10 @@ def AskUser(text, choices=None): choices = [('y', True, 'Perform the operation'), ('n', False, 'Do not perform the operation')] if not choices or not isinstance(choices, list): - raise errors.ProgrammerError("Invalid choiches argument to AskUser") + raise errors.ProgrammerError("Invalid choices argument to AskUser") for entry in choices: if not isinstance(entry, tuple) or len(entry) < 3 or entry[0] == '?': - raise errors.ProgrammerError("Invalid choiches element to AskUser") + raise errors.ProgrammerError("Invalid choices element to AskUser") answer = choices[-1][1] new_text = [] @@ -778,7 +778,7 @@ def GenericMain(commands, override=None, aliases=None): except (errors.GenericError, luxi.ProtocolError, JobSubmittedException), err: result, err_msg = FormatError(err) - logging.exception("Error durring command processing") + logging.exception("Error during command processing") ToStderr(err_msg) return result diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 6f0b518434d718479cdea1095d5070b414abd1ea..25e598bd0d4c53c58d48b7a27c2f54ee249fd631 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -68,7 +68,7 @@ class LogicalUnit(object): def __init__(self, processor, op, context, rpc): """Constructor for LogicalUnit. - This needs to be overriden in derived classes in order to check op + This needs to be overridden in derived classes in order to check op validity. """ @@ -118,7 +118,7 @@ class LogicalUnit(object): CheckPrereq, doing these separate is better because: - ExpandNames is left as as purely a lock-related function - - CheckPrereq is run after we have aquired locks (and possible + - CheckPrereq is run after we have acquired locks (and possible waited for them) The function is allowed to change the self.op attribute so that @@ -456,7 +456,7 @@ def _CheckNodeNotDrained(lu, node): def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, memory, vcpus, nics, disk_template, disks, - bep, hvp, hypervisor): + bep, hvp, hypervisor_name): """Builds instance related env variables for hooks This builds the hook environment from individual variables. @@ -479,15 +479,15 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, @param nics: list of tuples (ip, mac, mode, link) representing the NICs the instance has @type disk_template: string - @param disk_template: the distk template of the instance + @param disk_template: the disk template of the instance @type disks: list @param disks: the list of (size, mode) pairs @type bep: dict @param bep: the backend parameters for the instance @type hvp: dict @param hvp: the hypervisor parameters for the instance - @type hypervisor: string - @param hypervisor: the hypervisor for the instance + @type hypervisor_name: string + @param hypervisor_name: the hypervisor for the instance @rtype: dict @return: the hook environment for this instance @@ -506,7 +506,7 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, "INSTANCE_MEMORY": memory, "INSTANCE_VCPUS": vcpus, "INSTANCE_DISK_TEMPLATE": disk_template, - "INSTANCE_HYPERVISOR": hypervisor, + "INSTANCE_HYPERVISOR": hypervisor_name, } if nics: @@ -654,7 +654,7 @@ class LUDestroyCluster(NoHooksLU): This checks whether the cluster is empty. - Any errors are signalled by raising errors.OpPrereqError. + Any errors are signaled by raising errors.OpPrereqError. """ master = self.cfg.GetMasterNode() @@ -705,7 +705,7 @@ class LUVerifyCluster(LogicalUnit): Test list: - compares ganeti version - - checks vg existance and size > 20G + - checks vg existence and size > 20G - checks config file checksum - checks ssh to other nodes @@ -787,8 +787,8 @@ class LUVerifyCluster(LogicalUnit): else: # not candidate and this is not a must-have file bad = True - feedback_fn(" - ERROR: non master-candidate has old/wrong file" - " '%s'" % file_name) + feedback_fn(" - ERROR: file '%s' should not exist on non master" + " candidates (and the file is outdated)" % file_name) else: # all good, except non-master/non-must have combination if not node_is_mc and not must_have_file: @@ -944,7 +944,7 @@ class LUVerifyCluster(LogicalUnit): if bep[constants.BE_AUTO_BALANCE]: needed_mem += bep[constants.BE_MEMORY] if nodeinfo['mfree'] < needed_mem: - feedback_fn(" - ERROR: not enough memory on node %s to accomodate" + feedback_fn(" - ERROR: not enough memory on node %s to accommodate" " failovers should node %s fail" % (node, prinode)) bad = True return bad @@ -963,7 +963,7 @@ class LUVerifyCluster(LogicalUnit): def BuildHooksEnv(self): """Build hooks env. - Cluster-Verify hooks just rone in the post phase and their failure makes + Cluster-Verify hooks just ran in the post phase and their failure makes the output be logged in the verify output and the verification to fail. """ @@ -1231,7 +1231,7 @@ class LUVerifyCluster(LogicalUnit): return not bad def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result): - """Analize the post-hooks' result + """Analyze the post-hooks' result This method analyses the hook result, handles it, and sends some nicely-formatted feedback back to the user. @@ -1337,7 +1337,6 @@ class LUVerifyDisks(NoHooksLU): node_lvs = self.rpc.call_lv_list(nodes, vg_name) - to_act = set() for node in nodes: # node_volume node_res = node_lvs[node] @@ -1453,7 +1452,7 @@ def _RecursiveCheckIfLVMBased(disk): @type disk: L{objects.Disk} @param disk: the disk to check - @rtype: booleean + @rtype: boolean @return: boolean indicating whether a LD_LV dev_type was found or not """ @@ -1909,7 +1908,7 @@ class LURemoveNode(LogicalUnit): - it does not have primary or secondary instances - it's not the master - Any errors are signalled by raising errors.OpPrereqError. + Any errors are signaled by raising errors.OpPrereqError. """ node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name)) @@ -2239,7 +2238,7 @@ class LUAddNode(LogicalUnit): - it is resolvable - its parameters (single/dual homed) matches the cluster - Any errors are signalled by raising errors.OpPrereqError. + Any errors are signaled by raising errors.OpPrereqError. """ node_name = self.op.node_name @@ -2293,7 +2292,7 @@ class LUAddNode(LogicalUnit): raise errors.OpPrereqError("The master has a private ip but the" " new node doesn't have one") - # checks reachablity + # checks reachability if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("Node not reachable by ping") @@ -2305,14 +2304,24 @@ class LUAddNode(LogicalUnit): " based ping to noded port") cp_size = self.cfg.GetClusterInfo().candidate_pool_size - mc_now, _ = self.cfg.GetMasterCandidateStats() - master_candidate = mc_now < cp_size + if self.op.readd: + exceptions = [node] + else: + exceptions = [] + mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions) + # the new node will increase mc_max with one, so: + mc_max = min(mc_max + 1, cp_size) + self.master_candidate = mc_now < mc_max - self.new_node = objects.Node(name=node, - primary_ip=primary_ip, - secondary_ip=secondary_ip, - master_candidate=master_candidate, - offline=False, drained=False) + if self.op.readd: + self.new_node = self.cfg.GetNodeInfo(node) + assert self.new_node is not None, "Can't retrieve locked node %s" % node + else: + self.new_node = objects.Node(name=node, + primary_ip=primary_ip, + secondary_ip=secondary_ip, + master_candidate=self.master_candidate, + offline=False, drained=False) def Exec(self, feedback_fn): """Adds the new node to the cluster. @@ -2321,6 +2330,20 @@ class LUAddNode(LogicalUnit): new_node = self.new_node node = new_node.name + # for re-adds, reset the offline/drained/master-candidate flags; + # we need to reset here, otherwise offline would prevent RPC calls + # later in the procedure; this also means that if the re-add + # fails, we are left with a non-offlined, broken node + if self.op.readd: + new_node.drained = new_node.offline = False + self.LogInfo("Readding a node, the offline/drained flags were reset") + # if we demote the node, we do cleanup later in the procedure + new_node.master_candidate = self.master_candidate + + # notify the user about any possible mc promotion + if new_node.master_candidate: + self.LogInfo("Node will be a master candidate") + # check connectivity result = self.rpc.call_version([node])[node] result.Raise("Can't get version information from node %s" % node) @@ -2386,6 +2409,15 @@ class LUAddNode(LogicalUnit): if self.op.readd: _RedistributeAncillaryFiles(self) self.context.ReaddNode(new_node) + # make sure we redistribute the config + self.cfg.Update(new_node) + # and make sure the new node will not have old files around + if not new_node.master_candidate: + result = self.rpc.call_node_demote_from_mc(new_node.name) + msg = result.RemoteFailMsg() + if msg: + self.LogWarning("Node failed to demote itself from master" + " candidate status: %s" % msg) else: _RedistributeAncillaryFiles(self, additional_nodes=[node]) self.context.AddNode(new_node) @@ -2505,6 +2537,10 @@ class LUSetNodeParams(LogicalUnit): node.master_candidate = False changed_mc = True result.append(("master_candidate", "auto-demotion due to drain")) + rrc = self.rpc.call_node_demote_from_mc(node.name) + msg = rrc.RemoteFailMsg() + if msg: + self.LogWarning("Node failed to demote itself: %s" % msg) if node.offline: node.offline = False result.append(("offline", "clear offline status due to drain")) @@ -2593,8 +2629,8 @@ class LUQueryClusterInfo(NoHooksLU): "master": cluster.master_node, "default_hypervisor": cluster.default_hypervisor, "enabled_hypervisors": cluster.enabled_hypervisors, - "hvparams": dict([(hvname, cluster.hvparams[hvname]) - for hvname in cluster.enabled_hypervisors]), + "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor]) + for hypervisor_name in cluster.enabled_hypervisors]), "beparams": cluster.beparams, "nicparams": cluster.nicparams, "candidate_pool_size": cluster.candidate_pool_size, @@ -2757,7 +2793,7 @@ def _StartInstanceDisks(lu, instance, force): """Start the disks of an instance. """ - disks_ok, dummy = _AssembleInstanceDisks(lu, instance, + disks_ok, _ = _AssembleInstanceDisks(lu, instance, ignore_secondaries=force) if not disks_ok: _ShutdownInstanceDisks(lu, instance) @@ -2943,7 +2979,7 @@ class LUStartupInstance(LogicalUnit): _CheckNodeOnline(self, instance.primary_node) bep = self.cfg.GetClusterInfo().FillBE(instance) - # check bridges existance + # check bridges existence _CheckInstanceBridgesExist(self, instance) remote_info = self.rpc.call_instance_info(instance.primary_node, @@ -3022,7 +3058,7 @@ class LURebootInstance(LogicalUnit): _CheckNodeOnline(self, instance.primary_node) - # check bridges existance + # check bridges existence _CheckInstanceBridgesExist(self, instance) def Exec(self, feedback_fn): @@ -3762,7 +3798,7 @@ class LUFailoverInstance(LogicalUnit): logging.info("Starting instance %s on node %s", instance.name, target_node) - disks_ok, dummy = _AssembleInstanceDisks(self, instance, + disks_ok, _ = _AssembleInstanceDisks(self, instance, ignore_secondaries=True) if not disks_ok: _ShutdownInstanceDisks(self, instance) @@ -5501,7 +5537,6 @@ class LUReplaceDisks(LogicalUnit): logging.debug("Allocated minors %s" % (minors,)) self.proc.LogStep(4, steps_total, "changing drbd configuration") for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)): - size = dev.size info("activating a new drbd on %s for disk/%d" % (new_node, idx)) # create new devices on new_node; note that we create two IDs: # one without port, so the drbd will be activated without @@ -6077,7 +6112,7 @@ class LUSetInstanceParams(LogicalUnit): This only checks the instance list against the existing names. """ - force = self.force = self.op.force + self.force = self.op.force # checking the new params on the primary/secondary nodes @@ -6435,7 +6470,7 @@ class LUExportInstance(LogicalUnit): # remove it from its current node. In the future we could fix this by: # - making a tasklet to search (share-lock all), then create the new one, # then one to remove, after - # - removing the removal operation altoghether + # - removing the removal operation altogether self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET def DeclareLocks(self, level): @@ -7097,7 +7132,6 @@ class IAllocator(object): """ if call_fn is None: call_fn = self.lu.rpc.call_iallocator_runner - data = self.in_text result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text) result.Raise("Failure while running the iallocator script") diff --git a/lib/config.py b/lib/config.py index 21f6c18baadb7850b9a10e436315cb77bf3bdbad..acdca2ea0580eee573d2a2151493dd72f78f7387 100644 --- a/lib/config.py +++ b/lib/config.py @@ -474,8 +474,8 @@ class ConfigWriter: def _AppendUsedPorts(instance_name, disk, used): duplicates = [] if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5: - nodeA, nodeB, dummy, minorA, minorB = disk.logical_id[:5] - for node, port in ((nodeA, minorA), (nodeB, minorB)): + node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5] + for node, port in ((node_a, minor_a), (node_b, minor_b)): assert node in used, ("Node '%s' of instance '%s' not found" " in node list" % (node, instance_name)) if port in used[node]: @@ -796,7 +796,7 @@ class ConfigWriter: self._config_data.instances.keys()) def _UnlockedGetInstanceInfo(self, instance_name): - """Returns informations about an instance. + """Returns information about an instance. This function is for internal use, when the config lock is already held. @@ -808,9 +808,9 @@ class ConfigWriter: @locking.ssynchronized(_config_lock, shared=1) def GetInstanceInfo(self, instance_name): - """Returns informations about an instance. + """Returns information about an instance. - It takes the information from the configuration file. Other informations of + It takes the information from the configuration file. Other information of an instance are taken from the live systems. @param instance_name: name of the instance, e.g. @@ -945,15 +945,19 @@ class ConfigWriter: for node in self._UnlockedGetNodeList()]) return my_dict - def _UnlockedGetMasterCandidateStats(self): + def _UnlockedGetMasterCandidateStats(self, exceptions=None): """Get the number of current and maximum desired and possible candidates. + @type exceptions: list + @param exceptions: if passed, list of nodes that should be ignored @rtype: tuple @return: tuple of (current, desired and possible) """ mc_now = mc_max = 0 - for node in self._config_data.nodes.itervalues(): + for node in self._config_data.nodes.values(): + if exceptions and node.name in exceptions: + continue if not (node.offline or node.drained): mc_max += 1 if node.master_candidate: @@ -962,16 +966,18 @@ class ConfigWriter: return (mc_now, mc_max) @locking.ssynchronized(_config_lock, shared=1) - def GetMasterCandidateStats(self): + def GetMasterCandidateStats(self, exceptions=None): """Get the number of current and maximum possible candidates. This is just a wrapper over L{_UnlockedGetMasterCandidateStats}. + @type exceptions: list + @param exceptions: if passed, list of nodes that should be ignored @rtype: tuple @return: tuple of (current, max) """ - return self._UnlockedGetMasterCandidateStats() + return self._UnlockedGetMasterCandidateStats(exceptions) @locking.ssynchronized(_config_lock) def MaintainCandidatePool(self): @@ -1203,7 +1209,7 @@ class ConfigWriter: @locking.ssynchronized(_config_lock, shared=1) def GetClusterInfo(self): - """Returns informations about the cluster + """Returns information about the cluster @rtype: L{objects.Cluster} @return: the cluster object diff --git a/lib/http/__init__.py b/lib/http/__init__.py index 008cf9cb6712592dd08a718fe66a256df6cf29db..c98fa586dd10323f265ec11cd01f0b5e884f93e7 100644 --- a/lib/http/__init__.py +++ b/lib/http/__init__.py @@ -367,15 +367,12 @@ def SocketOperation(sock, op, arg1, timeout): # TODO: event_poll/event_check/override if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE): event_poll = select.POLLOUT - event_check = select.POLLOUT elif op == SOCKOP_RECV: event_poll = select.POLLIN - event_check = select.POLLIN | select.POLLPRI elif op == SOCKOP_SHUTDOWN: event_poll = None - event_check = None # The timeout is only used when OpenSSL requests polling for a condition. # It is not advisable to have no timeout for shutdown. @@ -744,7 +741,7 @@ class HttpMessageWriter(object): def HasMessageBody(self): """Checks whether the HTTP message contains a body. - Can be overriden by subclasses. + Can be overridden by subclasses. """ return bool(self._msg.body) @@ -937,7 +934,7 @@ class HttpMessageReader(object): def ParseStartLine(self, start_line): """Parses the start line of a message. - Must be overriden by subclass. + Must be overridden by subclass. @type start_line: string @param start_line: Start line string diff --git a/lib/http/auth.py b/lib/http/auth.py index 8a8d7201e604bcc99ea8e9cce57f844affa2498d..670b897b3a8d1fc6253a158f398cbd14999ead7c 100644 --- a/lib/http/auth.py +++ b/lib/http/auth.py @@ -23,12 +23,10 @@ """ import logging -import time import re import base64 import binascii -from ganeti import constants from ganeti import utils from ganeti import http @@ -80,7 +78,7 @@ class HttpServerRequestAuthentication(object): def GetAuthRealm(self, req): """Returns the authentication realm for a request. - MAY be overriden by a subclass, which then can return different realms for + MAY be overridden by a subclass, which then can return different realms for different paths. Returning "None" means no authentication is needed for a request. @@ -195,7 +193,7 @@ class HttpServerRequestAuthentication(object): def Authenticate(self, req, user, password): """Checks the password for a user. - This function MUST be overriden by a subclass. + This function MUST be overridden by a subclass. """ raise NotImplementedError() diff --git a/lib/http/client.py b/lib/http/client.py index 776fadeae37545f8659f1bb6cc896feddc840e90..717581f6f4295dfdab325a6bae15780d3793c4cc 100644 --- a/lib/http/client.py +++ b/lib/http/client.py @@ -22,23 +22,13 @@ """ -import BaseHTTPServer -import cgi -import logging -import OpenSSL import os import select import socket -import sys -import time -import signal import errno import threading -from ganeti import constants -from ganeti import serializer from ganeti import workerpool -from ganeti import utils from ganeti import http diff --git a/lib/http/server.py b/lib/http/server.py index b74eb3674121dc64958c3a81916971fde2aaad19..0afdcd00d3da52e9d25541f250a8d04a20b7c28b 100644 --- a/lib/http/server.py +++ b/lib/http/server.py @@ -31,9 +31,6 @@ import socket import time import signal -from ganeti import constants -from ganeti import serializer -from ganeti import utils from ganeti import http @@ -498,7 +495,7 @@ class HttpServer(http.HttpBase): # As soon as too many children run, we'll not respond to new # requests. The real solution would be to add a timeout for children # and killing them after some time. - pid, status = os.waitpid(0, 0) + pid, _ = os.waitpid(0, 0) except os.error: pid = None if pid and pid in self._children: @@ -536,14 +533,14 @@ class HttpServer(http.HttpBase): def PreHandleRequest(self, req): """Called before handling a request. - Can be overriden by a subclass. + Can be overridden by a subclass. """ def HandleRequest(self, req): """Handles a request. - Must be overriden by subclass. + Must be overridden by subclass. """ raise NotImplementedError() diff --git a/lib/hypervisor/hv_fake.py b/lib/hypervisor/hv_fake.py index 52d85bc96352a12c11e16b6be3161dc70a4aa1c0..38599d76bf12daa8ec3b9952ac6e3c538c177149 100644 --- a/lib/hypervisor/hv_fake.py +++ b/lib/hypervisor/hv_fake.py @@ -25,7 +25,6 @@ import os import os.path -import re from ganeti import utils from ganeti import constants diff --git a/lib/hypervisor/hv_xen.py b/lib/hypervisor/hv_xen.py index b3390f4f15325e04567d4ee661b02c151227a40f..552ff2f41eb5f29ac8896ffa5dd980d80d96a32f 100644 --- a/lib/hypervisor/hv_xen.py +++ b/lib/hypervisor/hv_xen.py @@ -94,7 +94,7 @@ class XenHypervisor(hv_base.BaseHypervisor): @return: list of (name, id, memory, vcpus, state, time spent) """ - for dummy in range(5): + for _ in range(5): result = utils.RunCmd(["xm", "list"]) if not result.failed: break diff --git a/lib/jqueue.py b/lib/jqueue.py index ed2b9a90858089e207ab5ea453d677277d41d750..74139b7b5aac54bfc78d2eecec128531f4a3fd47 100644 --- a/lib/jqueue.py +++ b/lib/jqueue.py @@ -69,7 +69,7 @@ def TimeStampNow(): class _QueuedOpCode(object): - """Encasulates an opcode object. + """Encapsulates an opcode object. @ivar log: holds the execution log and consists of tuples of the form C{(log_serial, timestamp, level, message)} @@ -286,7 +286,7 @@ class _QueuedJob(object): """Selectively returns the log entries. @type newer_than: None or int - @param newer_than: if this is None, return all log enties, + @param newer_than: if this is None, return all log entries, otherwise return only the log entries with serial higher than this value @rtype: list @@ -469,7 +469,7 @@ class _JobQueueWorkerPool(workerpool.WorkerPool): class JobQueue(object): - """Quue used to manaage the jobs. + """Queue used to manage the jobs. @cvar _RE_JOB_FILE: regex matching the valid job file names @@ -657,7 +657,7 @@ class JobQueue(object): Since we aim to keep consistency should this node (the current master) fail, we will log errors if our rpc fail, and especially - log the case when more than half of the nodes failes. + log the case when more than half of the nodes fails. @param result: the data as returned from the rpc call @type nodes: list @@ -940,7 +940,7 @@ class JobQueue(object): and in the future we might merge them. @type drain_flag: boolean - @param drain_flag: wheter to set or unset the drain flag + @param drain_flag: Whether to set or unset the drain flag """ if drain_flag: diff --git a/lib/jstore.py b/lib/jstore.py index 4d9189e39eee121bd861a713a20116f2387ba904..5c5996807e7881d8f1ead4a4bb551a6a653f6188 100644 --- a/lib/jstore.py +++ b/lib/jstore.py @@ -22,9 +22,7 @@ """Module implementing the job queue handling.""" import os -import logging import errno -import re from ganeti import constants from ganeti import errors diff --git a/lib/locking.py b/lib/locking.py index 647e14f4c030b393dc3addfe144bfa645c5e8627..16f302e73a0e72647d87f16271c91719af6317a2 100644 --- a/lib/locking.py +++ b/lib/locking.py @@ -297,7 +297,7 @@ class SharedLock: # Whenever we want to acquire a full LockSet we pass None as the value -# to acquire. Hide this behing this nicely named constant. +# to acquire. Hide this behind this nicely named constant. ALL_SET = None @@ -689,7 +689,7 @@ BGL = 'BGL' class GanetiLockManager: """The Ganeti Locking Library - The purpouse of this small library is to manage locking for ganeti clusters + The purpose of this small library is to manage locking for ganeti clusters in a central place, while at the same time doing dynamic checks against possible deadlocks. It will also make it easier to transition to a different lock type should we migrate away from python threads. @@ -774,7 +774,7 @@ class GanetiLockManager: """Acquire a set of resource locks, at the same level. @param level: the level at which the locks shall be acquired; - it must be a memmber of LEVELS. + it must be a member of LEVELS. @param names: the names of the locks which shall be acquired (special lock names, or instance/node names) @param shared: whether to acquire in shared mode; by default @@ -809,7 +809,7 @@ class GanetiLockManager: mode, before releasing them. @param level: the level at which the locks shall be released; - it must be a memmber of LEVELS + it must be a member of LEVELS @param names: the names of the locks which shall be released (defaults to all the locks acquired at that level) @@ -827,7 +827,7 @@ class GanetiLockManager: """Add locks at the specified level. @param level: the level at which the locks shall be added; - it must be a memmber of LEVELS_MOD. + it must be a member of LEVELS_MOD. @param names: names of the locks to acquire @param acquired: whether to acquire the newly added locks @param shared: whether the acquisition will be shared diff --git a/lib/luxi.py b/lib/luxi.py index 1c3ca6d3aa5a7fb7a3f4d954132dbc60fdb4f1ba..11ea61d29d830724401f0f189bf95abbdc1654dd 100644 --- a/lib/luxi.py +++ b/lib/luxi.py @@ -187,12 +187,13 @@ class Transport: raise EncodingError("Message terminator found in payload") self._CheckSocket() try: + # TODO: sendall is not guaranteed to send everything self.socket.sendall(msg + self.eom) except socket.timeout, err: raise TimeoutError("Sending timeout: %s" % str(err)) def Recv(self): - """Try to receive a messae from the socket. + """Try to receive a message from the socket. In case we already have messages queued, we just return from the queue. Otherwise, we try to read data with a _rwtimeout network @@ -205,10 +206,16 @@ class Transport: while not self._msgs: if time.time() > etime: raise TimeoutError("Extended receive timeout") - try: - data = self.socket.recv(4096) - except socket.timeout, err: - raise TimeoutError("Receive timeout: %s" % str(err)) + while True: + try: + data = self.socket.recv(4096) + except socket.error, err: + if err.args and err.args[0] == errno.EAGAIN: + continue + raise + except socket.timeout, err: + raise TimeoutError("Receive timeout: %s" % str(err)) + break if not data: raise ConnectionClosedError("Connection closed while reading") new_msgs = (self._buffer + data).split(self.eom) @@ -278,7 +285,7 @@ class Client(object): old_transp = self.transport self.transport = None old_transp.Close() - except Exception, err: + except Exception: pass def CallMethod(self, method, args): diff --git a/lib/mcpu.py b/lib/mcpu.py index 0844b7409dac85337df362f18b9c2331ec8ea5c6..78faa37ddbf5e3344f68310efce853a8a4529308 100644 --- a/lib/mcpu.py +++ b/lib/mcpu.py @@ -168,7 +168,7 @@ class Processor(object): self.context.glm.add(level, add_locks, acquired=1, shared=share) except errors.LockError: raise errors.OpPrereqError( - "Coudn't add locks (%s), probably because of a race condition" + "Couldn't add locks (%s), probably because of a race condition" " with another job, who added them first" % add_locks) try: try: @@ -197,7 +197,7 @@ class Processor(object): @type run_notifier: callable (no arguments) or None @param run_notifier: this function (if callable) will be called when we are about to call the lu's Exec() method, that - is, after we have aquired all locks + is, after we have acquired all locks """ if not isinstance(op, opcodes.OpCode): diff --git a/lib/objects.py b/lib/objects.py index 5ae1bb97881ec816d441c347114cd95e221e5338..106b704aa09f5e27de6f2cb9cb8eca9b627c7858 100644 --- a/lib/objects.py +++ b/lib/objects.py @@ -578,10 +578,10 @@ class Disk(ConfigObject): """Checks that this disk is correctly configured. """ - errs = [] + all_errors = [] if self.mode not in constants.DISK_ACCESS_SET: - errs.append("Disk access mode '%s' is invalid" % (self.mode, )) - return errs + all_errors.append("Disk access mode '%s' is invalid" % (self.mode, )) + return all_errors class Instance(TaggableObject): diff --git a/lib/rapi/baserlib.py b/lib/rapi/baserlib.py index a5522a5948ef1e38603c8fd01d2f80edad2c9652..be1524ed34a393b27ceeb7a32b48b6a36507b8ef 100644 --- a/lib/rapi/baserlib.py +++ b/lib/rapi/baserlib.py @@ -25,8 +25,6 @@ import logging -import ganeti.cli - from ganeti import luxi from ganeti import rapi from ganeti import http @@ -247,7 +245,7 @@ class R_Generic(object): val = 0 try: val = int(val) - except (ValueError, TypeError), err: + except (ValueError, TypeError): raise http.HttpBadRequest("Invalid value for the" " '%s' parameter" % (name,)) return val diff --git a/lib/rpc.py b/lib/rpc.py index b19c048cca24331e151a1e6d5c25f3a69eda973e..ed11ae4e41e0ac4af682306bf2a6ce7d4baabc42 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -31,7 +31,6 @@ # R0904: Too many public methods import os -import socket import logging import zlib import base64 @@ -83,7 +82,7 @@ class RpcResult(object): calls we can't raise an exception just because one one out of many failed, and therefore we use this class to encapsulate the result. - @ivar data: the data payload, for successfull results, or None + @ivar data: the data payload, for successful results, or None @type failed: boolean @ivar failed: whether the operation failed at transport level (not application level on the remote node) @@ -169,7 +168,7 @@ class Client: list of nodes, will contact (in parallel) all nodes, and return a dict of results (key: node name, value: result). - One current bug is that generic failure is still signalled by + One current bug is that generic failure is still signaled by 'False' result, which is not good. This overloading of values can cause bugs. @@ -228,7 +227,7 @@ class Client: @return: List of RPC results """ - assert _http_manager, "RPC module not intialized" + assert _http_manager, "RPC module not initialized" _http_manager.ExecRequests(self.nc.values()) @@ -277,9 +276,9 @@ class RpcRunner(object): @type instance: L{objects.Instance} @param instance: an Instance object @type hvp: dict or None - @param hvp: a dictionary with overriden hypervisor parameters + @param hvp: a dictionary with overridden hypervisor parameters @type bep: dict or None - @param bep: a dictionary with overriden backend parameters + @param bep: a dictionary with overridden backend parameters @rtype: dict @return: the instance dict, with the hvparams filled with the cluster defaults diff --git a/lib/ssh.py b/lib/ssh.py index 40df9996500dd1228399f37b89366bed03a27202..f0362b4b81933a3939d0f2975def532948be2ea2 100644 --- a/lib/ssh.py +++ b/lib/ssh.py @@ -201,7 +201,7 @@ class SshRunner: connected to). This is used to detect problems in ssh known_hosts files - (conflicting known hosts) and incosistencies between dns/hosts + (conflicting known hosts) and inconsistencies between dns/hosts entries and local machine names @param node: nodename of a host to check; can be short or diff --git a/lib/utils.py b/lib/utils.py index b552915e324e042c57010a0cabef6e582973ba4d..aace5eccea89b3cf5e1e8258736f6bc950c8d629 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -27,7 +27,6 @@ the command line scripts. """ -import sys import os import time import subprocess @@ -59,7 +58,6 @@ from ganeti import constants _locksheld = [] _re_shell_unquoted = re.compile('^[-.,=:/_+@A-Za-z0-9]+$') -debug = False debug_locks = False #: when set to True, L{RunCmd} is disabled @@ -136,7 +134,7 @@ def RunCmd(cmd, env=None, output=None, cwd='/'): directory for the command; the default will be / @rtype: L{RunResult} @return: RunResult instance - @raise erors.ProgrammerError: if we call this when forks are disabled + @raise errors.ProgrammerError: if we call this when forks are disabled """ if no_fork: @@ -665,7 +663,7 @@ def TryConvert(fn, val): """ try: nv = fn(val) - except (ValueError, TypeError), err: + except (ValueError, TypeError): nv = val return nv @@ -679,7 +677,7 @@ def IsValidIP(ip): @type ip: str @param ip: the address to be checked @rtype: a regular expression match object - @return: a regular epression match object, or None if the + @return: a regular expression match object, or None if the address is not valid """ @@ -712,7 +710,7 @@ def BuildShellCmd(template, *args): This function will check all arguments in the args list so that they are valid shell parameters (i.e. they don't contain shell - metacharaters). If everything is ok, it will return the result of + metacharacters). If everything is ok, it will return the result of template % args. @type template: str @@ -1041,7 +1039,7 @@ def ShellQuoteArgs(args): @type args: list @param args: list of arguments to be quoted @rtype: str - @return: the quoted arguments concatenaned with spaces + @return: the quoted arguments concatenated with spaces """ return ' '.join([ShellQuote(i) for i in args]) @@ -1058,7 +1056,7 @@ def TcpPing(target, port, timeout=10, live_port_needed=False, source=None): @type port: int @param port: the port to connect to @type timeout: int - @param timeout: the timeout on the connection attemp + @param timeout: the timeout on the connection attempt @type live_port_needed: boolean @param live_port_needed: whether a closed port will cause the function to return failure, as if there was a timeout @@ -1075,7 +1073,7 @@ def TcpPing(target, port, timeout=10, live_port_needed=False, source=None): if source is not None: try: sock.bind((source, 0)) - except socket.error, (errcode, errstring): + except socket.error, (errcode, _): if errcode == errno.EADDRNOTAVAIL: success = False @@ -1100,7 +1098,7 @@ def OwnIpAddress(address): address. @type address: string - @param address: the addres to check + @param address: the address to check @rtype: bool @return: True if we own the address @@ -1196,7 +1194,7 @@ def ReadFile(file_name, size=None): @type size: None or int @param size: Read at most size bytes @rtype: str - @return: the (possibly partial) conent of the file + @return: the (possibly partial) content of the file """ f = open(file_name, "r") @@ -1338,14 +1336,14 @@ def FirstFree(seq, base=0): def all(seq, pred=bool): "Returns True if pred(x) is True for every element in the iterable" - for elem in itertools.ifilterfalse(pred, seq): + for _ in itertools.ifilterfalse(pred, seq): return False return True def any(seq, pred=bool): "Returns True if pred(x) is True for at least one element in the iterable" - for elem in itertools.ifilter(pred, seq): + for _ in itertools.ifilter(pred, seq): return True return False @@ -1356,7 +1354,7 @@ def UniqueSequence(seq): Element order is preserved. @type seq: sequence - @param seq: the sequence with the source elementes + @param seq: the sequence with the source elements @rtype: list @return: list of unique elements from seq @@ -1368,7 +1366,7 @@ def UniqueSequence(seq): def IsValidMac(mac): """Predicate to check if a MAC address is valid. - Checks wether the supplied MAC address is formally correct, only + Checks whether the supplied MAC address is formally correct, only accepts colon separated format. @type mac: str @@ -1815,7 +1813,7 @@ def SafeEncode(text): """ if isinstance(text, unicode): - # onli if unicode; if str already, we handle it below + # only if unicode; if str already, we handle it below text = text.encode('ascii', 'backslashreplace') resu = "" for char in text: diff --git a/man/gnt-cluster.sgml b/man/gnt-cluster.sgml index 21f717003f77377ff9b0f5805bc172a65622a192..2634adb2e4e7970beafe0853d31a80c937ad0f5b 100644 --- a/man/gnt-cluster.sgml +++ b/man/gnt-cluster.sgml @@ -465,11 +465,32 @@ <cmdsynopsis> <command>masterfailover</command> + <arg>--no-voting</arg> </cmdsynopsis> <para> Failover the master role to the current node. </para> + + <para> + The <option>--no-voting</option> option skips the remote node agreement + checks. This is dangerous, but necessary in some cases (for example + failing over the master role in a 2 node cluster with the original master + down). If the original master then comes up, it won't be able to start + its master daemon because it won't have enough votes, but so won't the + new master, if the master daemon ever needs a restart. You can pass + --no-voting to ganeti-masterd on the new master to solve this problem, + and gnt-cluster redist-conf to make sure the cluster is consistent again. + </para> + + <para> + In version 2.0.X ganeti-masterd will not be able to start if + masterfailover is called with the --no-voting option (which, again, + should only be used on 2 nodes clusters with the former master being + down). In that case just start it manually passing --no-voting to it + as well, until you have restored cluster redundancy. + </para> + </refsect2> <refsect2> diff --git a/man/gnt-node.sgml b/man/gnt-node.sgml index f4836707107a3c62520548805d59e0563e6e89fd..99f80bffd62d957cd01b81439fdfc98fea3d672a 100644 --- a/man/gnt-node.sgml +++ b/man/gnt-node.sgml @@ -91,7 +91,7 @@ discussion in <citerefentry> <refentrytitle>gnt-cluster</refentrytitle> <manvolnum>8</manvolnum> </citerefentry> for more - informations. + information. </para> <para> diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..61163e7f2a3f33490e68cde8fddb35f70d98a178 --- /dev/null +++ b/pylintrc @@ -0,0 +1,78 @@ +# Configuration file for pylint (http://www.logilab.org/project/pylint). See +# http://www.logilab.org/card/pylintfeatures for more detailed variable +# descriptions. + +[MASTER] +profile = no +ignore = +persistent = no +cache-size = 50000 +load-plugins = + +[REPORTS] +output-format = colorized +include-ids = no +files-output = no +reports = no +evaluation = 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) +comment = yes + +[BASIC] +required-attributes = +no-docstring-rgx = __.*__ +module-rgx = (([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ +const-rgx = ((_{0,2}[A-Z][A-Z0-9_]*)|(__.*__))$ +class-rgx = _?[A-Z][a-zA-Z0-9]+$ +function-rgx = (_?([A-Z]+[a-z0-9]+([A-Z]+[a-z0-9]*)*)|main)$ +method-rgx = (_{0,2}[A-Z]+[a-z0-9]+([A-Z]+[a-z0-9]*)*|__.*__)$ +attr-rgx = [a-z_][a-z0-9_]{1,30}$ +argument-rgx = [a-z_][a-z0-9_]*$ +variable-rgx = (_?([a-z_][a-z0-9_]*)|([A-Z0-9_]+))$ +inlinevar-rgx = [A-Za-z_][A-Za-z0-9_]*$ +good-names = i,j,k,_ +bad-names = foo,bar,baz,toto,tutu,tata +bad-functions = + +[TYPECHECK] +ignore-mixin-members = yes +zope = no +acquired-members = + +[VARIABLES] +init-import = no +dummy-variables-rgx = _ +additional-builtins = + +[CLASSES] +ignore-iface-methods = +defining-attr-methods = __init__,__new__,setUp + +[DESIGN] +max-args = 6 +max-locals = 15 +max-returns = 6 +max-branchs = 12 +max-statements = 50 +max-parents = 7 +max-attributes = 7 +min-public-methods = 2 +max-public-methods = 20 + +[IMPORTS] +deprecated-modules = regsub,string,TERMIOS,Bastion,rexec +import-graph = +ext-import-graph = +int-import-graph = + +[FORMAT] +max-line-length = 80 +max-module-lines = 1000 +indent-string = " " + +[MISCELLANEOUS] +notes = FIXME,XXX,TODO + +[SIMILARITIES] +min-similarity-lines = 4 +ignore-comments = yes +ignore-docstrings = yes diff --git a/scripts/gnt-cluster b/scripts/gnt-cluster index 505ffa2187a01b55ecee379dd8a0ae5a44eae718..66c9c65ebec9a66fcb897fe1ef03af608059a9ee 100755 --- a/scripts/gnt-cluster +++ b/scripts/gnt-cluster @@ -424,7 +424,15 @@ def MasterFailover(opts, args): @return: the desired exit code """ - return bootstrap.MasterFailover() + if opts.no_voting: + usertext = ("This will perform the failover even if most other nodes" + " are down, or if this node is outdated. This is dangerous" + " as it can lead to a non-consistent cluster. Check the" + " gnt-cluster(8) man page before proceeding. Continue?") + if not AskUser(usertext): + return 1 + + return bootstrap.MasterFailover(no_voting=opts.no_voting) def SearchTags(opts, args): @@ -619,7 +627,12 @@ commands = { "", "Does a check on the cluster configuration"), 'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT], "", "Does a check on the cluster disk status"), - 'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT], + 'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT, + make_option("--no-voting", dest="no_voting", + help="Skip node agreement check (dangerous)", + action="store_true", + default=False,), + ], "", "Makes the current node the master"), 'version': (ShowClusterVersion, ARGS_NONE, [DEBUG_OPT], "", "Shows the cluster version"), diff --git a/scripts/gnt-node b/scripts/gnt-node index 6dd4e1dbf0e2dadfa7cee8752e611e58ea1f2bd8..c2b2f88bbd1450a6d3b6a6cd83db6ba3479ab000 100755 --- a/scripts/gnt-node +++ b/scripts/gnt-node @@ -100,18 +100,7 @@ def AddNode(opts, args): output = cl.QueryConfigValues(['cluster_name']) cluster_name = output[0] - if readd: - # clear the offline and drain flags on the node - ToStdout("Resetting the 'offline' and 'drained' flags due to re-add") - op = opcodes.OpSetNodeParams(node_name=node, force=True, - offline=False, drained=False) - - result = SubmitOpCode(op, cl=cl) - if result: - ToStdout("Modified:") - for param, data in result: - ToStdout(" - %-5s -> %s", param, data) - else: + if not readd: ToStderr("-- WARNING -- \n" "Performing this operation is going to replace the ssh daemon" " keypair\n" diff --git a/tools/lvmstrap b/tools/lvmstrap index 0a17a5f7c27b39b7bfe95107c1f12f60fa1dfca1..ed92a12789f1feef1b7c79159d7d8e293a071c9d 100755 --- a/tools/lvmstrap +++ b/tools/lvmstrap @@ -267,7 +267,7 @@ def CheckSysDev(name, devnum): devnum: the device number, e.g. 0x803 (2051 in decimal) for sda3 Returns: - None; failure of the check is signalled by raising a + None; failure of the check is signaled by raising a SysconfigError exception """ @@ -447,7 +447,7 @@ def GetMountInfo(): def DevInfo(name, dev, mountinfo): - """Computes miscellaneous informations about a block device. + """Computes miscellaneous information about a block device. Args: name: the device name, e.g. sda @@ -476,7 +476,7 @@ def DevInfo(name, dev, mountinfo): def ShowDiskInfo(opts): """Shows a nicely formatted block device list for this system. - This function shows the user a table with the informations gathered + This function shows the user a table with the information gathered by the other functions defined, in order to help the user make a choice about which disks should be allocated to our volume group.