From 89e1fc26888652d244bc5ff09cd95081eaf2561b Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Fri, 21 Sep 2007 13:37:49 +0000
Subject: [PATCH] Remove requirement that host names are FQDN

We currently require that hostnames are FQDN not short names
(node1.example.com instead of node1). We can allow short names as long
as:
  - we always resolve the names as returned by socket.gethostname()
  - we rely on having a working resolver

These issues are not as big as may seem, as we only did gethostname() in
a few places in order to check for the master; we already required
working resolver all over the code for the other nodes names (and thus
requiring the same for the current node name is normal).  The patch
moves some resolver calls from within execution path to the checking
path (which can abort without any problems). It is important that after
this patch is applied, no name resolving is called from the execution
path (LU.Exec() or other code that is called from within those methods)
as in this case we get much better code flow.

This patch also changes the functions for doing name lookups and
encapsulates all functionality in a single class.

The final change is that, by requiring working resolver at all times, we
can change the 'return None' into an exception and thus we don't have to
check manually each time; only some special cases will check
(ganeti-daemon and ganeti-watcher which are not covered by the
generalized exception handling in cli.py). The code is cleaner this way.

Reviewed-by: imsnah
---
 daemons/ganeti-master          |  9 ++++--
 daemons/ganeti-watcher         |  7 +++--
 lib/cli.py                     |  8 ++++++
 lib/cmdlib.py                  | 49 ++++++++------------------------
 lib/config.py                  |  8 ++++--
 lib/errors.py                  | 13 +++++++++
 lib/utils.py                   | 51 +++++++++++++++++++++-------------
 test/ganeti.config_unittest.py |  2 +-
 test/mocks.py                  |  5 ++--
 9 files changed, 87 insertions(+), 65 deletions(-)

diff --git a/daemons/ganeti-master b/daemons/ganeti-master
index 1863d909a..75c0d6c3e 100755
--- a/daemons/ganeti-master
+++ b/daemons/ganeti-master
@@ -35,7 +35,6 @@ generic errors as other python code can cause exit with code 1.
 
 import os
 import sys
-import socket
 
 from optparse import OptionParser
 
@@ -150,6 +149,12 @@ def main():
   """
   options, args = ParseOptions()
   debug = options.debug
+  try:
+    myself = utils.HostInfo()
+  except errors.ResolverError, err:
+    sys.stderr.write("Cannot resolve my own name (%s)\n" % err.args[0])
+    return EXIT_NODESETUP_ERROR
+
   result = CheckNodeSetup(debug)
   if not result:
     if debug:
@@ -157,7 +162,7 @@ def main():
     return EXIT_NODESETUP_ERROR
 
   master_node, master_netdev, master_ip = result
-  if socket.gethostname() != master_node and args[0] == "start":
+  if myself.name != master_node and args[0] == "start":
     if debug:
       sys.stderr.write("Not master, ignoring request.\n")
     return EXIT_NOTMASTER
diff --git a/daemons/ganeti-watcher b/daemons/ganeti-watcher
index ff798b124..ea2bf6249 100755
--- a/daemons/ganeti-watcher
+++ b/daemons/ganeti-watcher
@@ -39,13 +39,13 @@ import sys
 import time
 import fcntl
 import errno
-import socket
 from optparse import OptionParser
 
 
 from ganeti import utils
 from ganeti import constants
 from ganeti import ssconf
+from ganeti import errors
 
 
 class Error(Exception):
@@ -263,7 +263,7 @@ class Restarter(object):
   def __init__(self):
     sstore = ssconf.SimpleStore()
     master = sstore.GetMasterNode()
-    if master != socket.gethostname():
+    if master != utils.HostInfo().name:
       raise NotMasterError("This is not the master node")
     self.instances = InstanceList()
     self.messages = []
@@ -357,6 +357,9 @@ def main():
     if options.debug:
       sys.stderr.write("Not master, exiting.\n")
     sys.exit(constants.EXIT_NOTMASTER)
+  except errors.ResolverError, err:
+    sys.stderr.write("Cannot resolve hostname '%s', exiting.\n" % err.args[0])
+    sys.exit(constants.EXIT_NODESETUP_ERROR)
   except Error, err:
     print err
 
diff --git a/lib/cli.py b/lib/cli.py
index 26fa0a0b0..fbc7f12ea 100644
--- a/lib/cli.py
+++ b/lib/cli.py
@@ -296,6 +296,14 @@ def GenericMain(commands):
     except errors.HooksFailure, err:
       logger.ToStderr("Failure: hooks general failure: %s" % str(err))
       result = 1
+    except errors.ResolverError, err:
+      this_host = utils.HostInfo.SysName()
+      if err.args[0] == this_host:
+        msg = "Failure: can't resolve my own hostname ('%s')"
+      else:
+        msg = "Failure: can't resolve hostname '%s'"
+      logger.ToStderr(msg % err.args[0])
+      result = 1
     except errors.OpPrereqError, err:
       logger.ToStderr("Failure: prerequisites not met for this"
                       " operation:\n%s" % str(err))
diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index d3f583c8f..25996873d 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -85,7 +85,7 @@ class LogicalUnit(object):
                                    " use 'gnt-cluster init' first.")
       if self.REQ_MASTER:
         master = sstore.GetMasterNode()
-        if master != socket.gethostname():
+        if master != utils.HostInfo().name:
           raise errors.OpPrereqError("Commands must be run on the master"
                                      " node %s" % master)
 
@@ -558,27 +558,14 @@ class LUInitCluster(LogicalUnit):
     if config.ConfigWriter.IsCluster():
       raise errors.OpPrereqError("Cluster is already initialised")
 
-    hostname_local = socket.gethostname()
-    self.hostname = hostname = utils.LookupHostname(hostname_local)
-    if not hostname:
-      raise errors.OpPrereqError("Cannot resolve my own hostname ('%s')" %
-                                 hostname_local)
-
-    if hostname.name != hostname_local:
-      raise errors.OpPrereqError("My own hostname (%s) does not match the"
-                                 " resolver (%s): probably not using FQDN"
-                                 " for hostname." %
-                                 (hostname_local, hostname.name))
+    self.hostname = hostname = utils.HostInfo()
 
     if hostname.ip.startswith("127."):
       raise errors.OpPrereqError("This host's IP resolves to the private"
                                  " range (%s). Please fix DNS or /etc/hosts." %
                                  (hostname.ip,))
 
-    self.clustername = clustername = utils.LookupHostname(self.op.cluster_name)
-    if not clustername:
-      raise errors.OpPrereqError("Cannot resolve given cluster name ('%s')"
-                                 % self.op.cluster_name)
+    self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
 
     result = utils.RunCmd(["fping", "-S127.0.0.1", "-q", hostname.ip])
     if result.failed:
@@ -961,10 +948,7 @@ class LURenameCluster(LogicalUnit):
     """Verify that the passed name is a valid one.
 
     """
-    hostname = utils.LookupHostname(self.op.name)
-    if not hostname:
-      raise errors.OpPrereqError("Cannot resolve the new cluster name ('%s')" %
-                                 self.op.name)
+    hostname = utils.HostInfo(self.op.name)
 
     new_name = hostname.name
     self.ip = new_ip = hostname.ip
@@ -1404,9 +1388,7 @@ class LUAddNode(LogicalUnit):
     node_name = self.op.node_name
     cfg = self.cfg
 
-    dns_data = utils.LookupHostname(node_name)
-    if not dns_data:
-      raise errors.OpPrereqError("Node %s is not resolvable" % node_name)
+    dns_data = utils.HostInfo(node_name)
 
     node = dns_data.name
     primary_ip = self.op.primary_ip = dns_data.ip
@@ -1614,8 +1596,7 @@ class LUMasterFailover(LogicalUnit):
     This checks that we are not already the master.
 
     """
-    self.new_master = socket.gethostname()
-
+    self.new_master = utils.HostInfo().name
     self.old_master = self.sstore.GetMasterNode()
 
     if self.old_master == self.new_master:
@@ -1716,7 +1697,7 @@ class LUClusterCopyFile(NoHooksLU):
     """
     filename = self.op.filename
 
-    myname = socket.gethostname()
+    myname = utils.HostInfo().name
 
     for node in self.nodes:
       if node == myname:
@@ -2152,18 +2133,15 @@ class LURenameInstance(LogicalUnit):
     self.instance = instance
 
     # new name verification
-    hostname1 = utils.LookupHostname(self.op.new_name)
-    if not hostname1:
-      raise errors.OpPrereqError("New instance name '%s' not found in dns" %
-                                 self.op.new_name)
+    name_info = utils.HostInfo(self.op.new_name)
 
-    self.op.new_name = new_name = hostname1.name
+    self.op.new_name = new_name = name_info.name
     if not getattr(self.op, "ignore_ip", False):
-      command = ["fping", "-q", hostname1.ip]
+      command = ["fping", "-q", name_info.ip]
       result = utils.RunCmd(command)
       if not result.failed:
         raise errors.OpPrereqError("IP %s of instance %s already in use" %
-                                   (hostname1.ip, new_name))
+                                   (name_info.ip, new_name))
 
 
   def Exec(self, feedback_fn):
@@ -2839,10 +2817,7 @@ class LUCreateInstance(LogicalUnit):
                                  " primary node"  % self.op.os_type)
 
     # instance verification
-    hostname1 = utils.LookupHostname(self.op.instance_name)
-    if not hostname1:
-      raise errors.OpPrereqError("Instance name '%s' not found in dns" %
-                                 self.op.instance_name)
+    hostname1 = utils.HostInfo(self.op.instance_name)
 
     self.op.instance_name = instance_name = hostname1.name
     instance_list = self.cfg.GetInstanceList()
diff --git a/lib/config.py b/lib/config.py
index ef53cde55..05141a438 100644
--- a/lib/config.py
+++ b/lib/config.py
@@ -35,7 +35,6 @@ we reverted to pickle using custom Unpicklers.
 """
 
 import os
-import socket
 import tempfile
 import random
 
@@ -78,6 +77,11 @@ class ConfigWriter:
     else:
       self._cfg_file = cfg_file
     self._temporary_ids = set()
+    # Note: in order to prevent errors when resolving our name in
+    # _DistributeConfig, we compute it here once and reuse it; it's
+    # better to raise an error before starting to modify the config
+    # file than after it was modified
+    self._my_hostname = utils.HostInfo().name
 
   # this method needs to be static, so that we can call it on the class
   @staticmethod
@@ -527,7 +531,7 @@ class ConfigWriter:
       return True
     bad = False
     nodelist = self.GetNodeList()
-    myhostname = socket.gethostname()
+    myhostname = self._my_hostname
 
     tgt_list = []
     for node in nodelist:
diff --git a/lib/errors.py b/lib/errors.py
index 546ed27b7..3d3317d3d 100644
--- a/lib/errors.py
+++ b/lib/errors.py
@@ -151,6 +151,19 @@ class OpCodeUnknown(GenericError):
   """
 
 
+class ResolverError(GenericError):
+  """Host name cannot be resolved.
+
+  This is not a normal situation for Ganeti, as we rely on having a
+  working resolver.
+
+  The non-resolvable hostname is available as the first element of the
+  args tuple; the other two elements of the tuple are the first two
+  args of the socket.gaierror exception (error code and description).
+
+  """
+
+
 class HooksFailure(GenericError):
   """A generic hook failure.
 
diff --git a/lib/utils.py b/lib/utils.py
index e9f426f80..d964f8687 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -397,38 +397,51 @@ def MatchNameComponent(key, name_list):
 
 
 class HostInfo:
-  """Class holding host info as returned by gethostbyname
+  """Class implementing resolver and hostname functionality
 
   """
-  def __init__(self, name, aliases, ipaddrs):
+  def __init__(self, name=None):
     """Initialize the host name object.
 
-    Arguments are the same as returned by socket.gethostbyname_ex()
+    If the name argument is not passed, it will use this system's
+    name.
 
     """
-    self.name = name
-    self.aliases = aliases
-    self.ipaddrs = ipaddrs
+    if name is None:
+      name = self.SysName()
+
+    self.query = name
+    self.name, self.aliases, self.ipaddrs = self.LookupHostname(name)
     self.ip = self.ipaddrs[0]
 
+  @staticmethod
+  def SysName():
+    """Return the current system's name.
 
-def LookupHostname(hostname):
-  """Look up hostname
+    This is simply a wrapper over socket.gethostname()
 
-  Args:
-    hostname: hostname to look up, can be also be a non FQDN
+    """
+    return socket.gethostname()
 
-  Returns:
-    a HostInfo object
+  @staticmethod
+  def LookupHostname(hostname):
+    """Look up hostname
 
-  """
-  try:
-    (name, aliases, ipaddrs) = socket.gethostbyname_ex(hostname)
-  except socket.gaierror:
-    # hostname not found in DNS
-    return None
+    Args:
+      hostname: hostname to look up
+
+    Returns:
+      a tuple (name, aliases, ipaddrs) as returned by socket.gethostbyname_ex
+      in case of errors in resolving, we raise a ResolverError
+
+    """
+    try:
+      result = socket.gethostbyname_ex(hostname)
+    except socket.gaierror, err:
+      # hostname not found in DNS
+      raise errors.ResolverError(hostname, err.args[0], err.args[1])
 
-  return HostInfo(name, aliases, ipaddrs)
+    return result
 
 
 def ListVolumeGroups():
diff --git a/test/ganeti.config_unittest.py b/test/ganeti.config_unittest.py
index f2bc1e880..c5e51158b 100755
--- a/test/ganeti.config_unittest.py
+++ b/test/ganeti.config_unittest.py
@@ -54,7 +54,7 @@ class TestConfigRunner(unittest.TestCase):
 
   def _init_cluster(self, cfg):
     """Initializes the cfg object"""
-    cfg.InitConfig(socket.gethostname(), '127.0.0.1', None, '', 'aa:00:00',
+    cfg.InitConfig(utils.HostInfo().name, '127.0.0.1', None, '', 'aa:00:00',
                    'xenvg', constants.DEFAULT_BRIDGE)
 
   def _create_instance(self):
diff --git a/test/mocks.py b/test/mocks.py
index 866e212c0..4f569a68b 100644
--- a/test/mocks.py
+++ b/test/mocks.py
@@ -22,6 +22,7 @@
 """Module implementing a fake ConfigWriter"""
 
 import socket
+from ganeti import utils
 
 class FakeConfig:
     """Fake configuration object"""
@@ -33,7 +34,7 @@ class FakeConfig:
         return ["a", "b", "c"]
 
     def GetMaster(self):
-        return socket.gethostname()
+        return utils.HostInfo().name
 
 
 class FakeSStore:
@@ -43,4 +44,4 @@ class FakeSStore:
         return "test.cluster"
 
     def GetMasterNode(self):
-        return socket.gethostname()
+        return utils.HostInfo().name
-- 
GitLab