diff --git a/daemons/ganeti-master b/daemons/ganeti-master index 1863d909a6d9bef451cf90c15d72c636b25db264..75c0d6c3ee6b25f1c3ed74d927dc993dd035f99a 100755 --- a/daemons/ganeti-master +++ b/daemons/ganeti-master @@ -35,7 +35,6 @@ generic errors as other python code can cause exit with code 1. import os import sys -import socket from optparse import OptionParser @@ -150,6 +149,12 @@ def main(): """ options, args = ParseOptions() debug = options.debug + try: + myself = utils.HostInfo() + except errors.ResolverError, err: + sys.stderr.write("Cannot resolve my own name (%s)\n" % err.args[0]) + return EXIT_NODESETUP_ERROR + result = CheckNodeSetup(debug) if not result: if debug: @@ -157,7 +162,7 @@ def main(): return EXIT_NODESETUP_ERROR master_node, master_netdev, master_ip = result - if socket.gethostname() != master_node and args[0] == "start": + if myself.name != master_node and args[0] == "start": if debug: sys.stderr.write("Not master, ignoring request.\n") return EXIT_NOTMASTER diff --git a/daemons/ganeti-watcher b/daemons/ganeti-watcher index ff798b124454d04ecf9cb166f4dce4d17c751117..ea2bf624962c2698190e53b1f5e4fe6117560cc8 100755 --- a/daemons/ganeti-watcher +++ b/daemons/ganeti-watcher @@ -39,13 +39,13 @@ import sys import time import fcntl import errno -import socket from optparse import OptionParser from ganeti import utils from ganeti import constants from ganeti import ssconf +from ganeti import errors class Error(Exception): @@ -263,7 +263,7 @@ class Restarter(object): def __init__(self): sstore = ssconf.SimpleStore() master = sstore.GetMasterNode() - if master != socket.gethostname(): + if master != utils.HostInfo().name: raise NotMasterError("This is not the master node") self.instances = InstanceList() self.messages = [] @@ -357,6 +357,9 @@ def main(): if options.debug: sys.stderr.write("Not master, exiting.\n") sys.exit(constants.EXIT_NOTMASTER) + except errors.ResolverError, err: + sys.stderr.write("Cannot resolve hostname '%s', exiting.\n" % err.args[0]) + sys.exit(constants.EXIT_NODESETUP_ERROR) except Error, err: print err diff --git a/lib/cli.py b/lib/cli.py index 26fa0a0b0aa3de7494d3c9edbf57d81a9f849b24..fbc7f12ea643078c07e412c030874797e8c8c067 100644 --- a/lib/cli.py +++ b/lib/cli.py @@ -296,6 +296,14 @@ def GenericMain(commands): except errors.HooksFailure, err: logger.ToStderr("Failure: hooks general failure: %s" % str(err)) result = 1 + except errors.ResolverError, err: + this_host = utils.HostInfo.SysName() + if err.args[0] == this_host: + msg = "Failure: can't resolve my own hostname ('%s')" + else: + msg = "Failure: can't resolve hostname '%s'" + logger.ToStderr(msg % err.args[0]) + result = 1 except errors.OpPrereqError, err: logger.ToStderr("Failure: prerequisites not met for this" " operation:\n%s" % str(err)) diff --git a/lib/cmdlib.py b/lib/cmdlib.py index d3f583c8ff74a38e44254a4642c7246e3cb93413..25996873d187c155c8110934e41967cab3b3e859 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -85,7 +85,7 @@ class LogicalUnit(object): " use 'gnt-cluster init' first.") if self.REQ_MASTER: master = sstore.GetMasterNode() - if master != socket.gethostname(): + if master != utils.HostInfo().name: raise errors.OpPrereqError("Commands must be run on the master" " node %s" % master) @@ -558,27 +558,14 @@ class LUInitCluster(LogicalUnit): if config.ConfigWriter.IsCluster(): raise errors.OpPrereqError("Cluster is already initialised") - hostname_local = socket.gethostname() - self.hostname = hostname = utils.LookupHostname(hostname_local) - if not hostname: - raise errors.OpPrereqError("Cannot resolve my own hostname ('%s')" % - hostname_local) - - if hostname.name != hostname_local: - raise errors.OpPrereqError("My own hostname (%s) does not match the" - " resolver (%s): probably not using FQDN" - " for hostname." % - (hostname_local, hostname.name)) + self.hostname = hostname = utils.HostInfo() if hostname.ip.startswith("127."): raise errors.OpPrereqError("This host's IP resolves to the private" " range (%s). Please fix DNS or /etc/hosts." % (hostname.ip,)) - self.clustername = clustername = utils.LookupHostname(self.op.cluster_name) - if not clustername: - raise errors.OpPrereqError("Cannot resolve given cluster name ('%s')" - % self.op.cluster_name) + self.clustername = clustername = utils.HostInfo(self.op.cluster_name) result = utils.RunCmd(["fping", "-S127.0.0.1", "-q", hostname.ip]) if result.failed: @@ -961,10 +948,7 @@ class LURenameCluster(LogicalUnit): """Verify that the passed name is a valid one. """ - hostname = utils.LookupHostname(self.op.name) - if not hostname: - raise errors.OpPrereqError("Cannot resolve the new cluster name ('%s')" % - self.op.name) + hostname = utils.HostInfo(self.op.name) new_name = hostname.name self.ip = new_ip = hostname.ip @@ -1404,9 +1388,7 @@ class LUAddNode(LogicalUnit): node_name = self.op.node_name cfg = self.cfg - dns_data = utils.LookupHostname(node_name) - if not dns_data: - raise errors.OpPrereqError("Node %s is not resolvable" % node_name) + dns_data = utils.HostInfo(node_name) node = dns_data.name primary_ip = self.op.primary_ip = dns_data.ip @@ -1614,8 +1596,7 @@ class LUMasterFailover(LogicalUnit): This checks that we are not already the master. """ - self.new_master = socket.gethostname() - + self.new_master = utils.HostInfo().name self.old_master = self.sstore.GetMasterNode() if self.old_master == self.new_master: @@ -1716,7 +1697,7 @@ class LUClusterCopyFile(NoHooksLU): """ filename = self.op.filename - myname = socket.gethostname() + myname = utils.HostInfo().name for node in self.nodes: if node == myname: @@ -2152,18 +2133,15 @@ class LURenameInstance(LogicalUnit): self.instance = instance # new name verification - hostname1 = utils.LookupHostname(self.op.new_name) - if not hostname1: - raise errors.OpPrereqError("New instance name '%s' not found in dns" % - self.op.new_name) + name_info = utils.HostInfo(self.op.new_name) - self.op.new_name = new_name = hostname1.name + self.op.new_name = new_name = name_info.name if not getattr(self.op, "ignore_ip", False): - command = ["fping", "-q", hostname1.ip] + command = ["fping", "-q", name_info.ip] result = utils.RunCmd(command) if not result.failed: raise errors.OpPrereqError("IP %s of instance %s already in use" % - (hostname1.ip, new_name)) + (name_info.ip, new_name)) def Exec(self, feedback_fn): @@ -2839,10 +2817,7 @@ class LUCreateInstance(LogicalUnit): " primary node" % self.op.os_type) # instance verification - hostname1 = utils.LookupHostname(self.op.instance_name) - if not hostname1: - raise errors.OpPrereqError("Instance name '%s' not found in dns" % - self.op.instance_name) + hostname1 = utils.HostInfo(self.op.instance_name) self.op.instance_name = instance_name = hostname1.name instance_list = self.cfg.GetInstanceList() diff --git a/lib/config.py b/lib/config.py index ef53cde551ed05ae0a3e5a85ddfdd4aaee67308f..05141a438a2983e53f193aac7ebd931ee2ecfc2b 100644 --- a/lib/config.py +++ b/lib/config.py @@ -35,7 +35,6 @@ we reverted to pickle using custom Unpicklers. """ import os -import socket import tempfile import random @@ -78,6 +77,11 @@ class ConfigWriter: else: self._cfg_file = cfg_file self._temporary_ids = set() + # Note: in order to prevent errors when resolving our name in + # _DistributeConfig, we compute it here once and reuse it; it's + # better to raise an error before starting to modify the config + # file than after it was modified + self._my_hostname = utils.HostInfo().name # this method needs to be static, so that we can call it on the class @staticmethod @@ -527,7 +531,7 @@ class ConfigWriter: return True bad = False nodelist = self.GetNodeList() - myhostname = socket.gethostname() + myhostname = self._my_hostname tgt_list = [] for node in nodelist: diff --git a/lib/errors.py b/lib/errors.py index 546ed27b7c3f2ba288299501104141477b09f8f2..3d3317d3d520f83ccbaf8a641029ff5326798ff2 100644 --- a/lib/errors.py +++ b/lib/errors.py @@ -151,6 +151,19 @@ class OpCodeUnknown(GenericError): """ +class ResolverError(GenericError): + """Host name cannot be resolved. + + This is not a normal situation for Ganeti, as we rely on having a + working resolver. + + The non-resolvable hostname is available as the first element of the + args tuple; the other two elements of the tuple are the first two + args of the socket.gaierror exception (error code and description). + + """ + + class HooksFailure(GenericError): """A generic hook failure. diff --git a/lib/utils.py b/lib/utils.py index e9f426f80440a1a867ae2a5bd77af16183c2446a..d964f8687ae225d9c0416765a30106738ce4583b 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -397,38 +397,51 @@ def MatchNameComponent(key, name_list): class HostInfo: - """Class holding host info as returned by gethostbyname + """Class implementing resolver and hostname functionality """ - def __init__(self, name, aliases, ipaddrs): + def __init__(self, name=None): """Initialize the host name object. - Arguments are the same as returned by socket.gethostbyname_ex() + If the name argument is not passed, it will use this system's + name. """ - self.name = name - self.aliases = aliases - self.ipaddrs = ipaddrs + if name is None: + name = self.SysName() + + self.query = name + self.name, self.aliases, self.ipaddrs = self.LookupHostname(name) self.ip = self.ipaddrs[0] + @staticmethod + def SysName(): + """Return the current system's name. -def LookupHostname(hostname): - """Look up hostname + This is simply a wrapper over socket.gethostname() - Args: - hostname: hostname to look up, can be also be a non FQDN + """ + return socket.gethostname() - Returns: - a HostInfo object + @staticmethod + def LookupHostname(hostname): + """Look up hostname - """ - try: - (name, aliases, ipaddrs) = socket.gethostbyname_ex(hostname) - except socket.gaierror: - # hostname not found in DNS - return None + Args: + hostname: hostname to look up + + Returns: + a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex + in case of errors in resolving, we raise a ResolverError + + """ + try: + result = socket.gethostbyname_ex(hostname) + except socket.gaierror, err: + # hostname not found in DNS + raise errors.ResolverError(hostname, err.args[0], err.args[1]) - return HostInfo(name, aliases, ipaddrs) + return result def ListVolumeGroups(): diff --git a/test/ganeti.config_unittest.py b/test/ganeti.config_unittest.py index f2bc1e880005bf6f902b98401a1a4553cb884eb8..c5e51158bfa7dce7b530ba2cef0a5aa820e579fd 100755 --- a/test/ganeti.config_unittest.py +++ b/test/ganeti.config_unittest.py @@ -54,7 +54,7 @@ class TestConfigRunner(unittest.TestCase): def _init_cluster(self, cfg): """Initializes the cfg object""" - cfg.InitConfig(socket.gethostname(), '127.0.0.1', None, '', 'aa:00:00', + cfg.InitConfig(utils.HostInfo().name, '127.0.0.1', None, '', 'aa:00:00', 'xenvg', constants.DEFAULT_BRIDGE) def _create_instance(self): diff --git a/test/mocks.py b/test/mocks.py index 866e212c0c0da1cf3f5243c875836442e59c38bc..4f569a68b8c97ef1d8694c041ff88482c04c25a1 100644 --- a/test/mocks.py +++ b/test/mocks.py @@ -22,6 +22,7 @@ """Module implementing a fake ConfigWriter""" import socket +from ganeti import utils class FakeConfig: """Fake configuration object""" @@ -33,7 +34,7 @@ class FakeConfig: return ["a", "b", "c"] def GetMaster(self): - return socket.gethostname() + return utils.HostInfo().name class FakeSStore: @@ -43,4 +44,4 @@ class FakeSStore: return "test.cluster" def GetMasterNode(self): - return socket.gethostname() + return utils.HostInfo().name