diff --git a/lib/backend.py b/lib/backend.py index f5f258bd25c1cb6439b4d343015420beed28b716..cce99aba726755574160655942abb0865f53409f 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -551,6 +551,10 @@ def VerifyNode(what, cluster_name): tmpr.append("The procfs filesystem doesn't seem to be mounted" " under /proc, missing required directory /proc/sys and" " the file /proc/sysrq-trigger") + + if constants.NV_TIME in what: + result[constants.NV_TIME] = utils.SplitTime(time.time()) + return result diff --git a/lib/cmdlib.py b/lib/cmdlib.py index c149a229d49f404db72a84c049eda77bffd32b7a..9bbc1965534cf5e9b3db599521504539095db335 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -944,6 +944,7 @@ class LUVerifyCluster(LogicalUnit): ENODESSH = (TNODE, "ENODESSH") ENODEVERSION = (TNODE, "ENODEVERSION") ENODESETUP = (TNODE, "ENODESETUP") + ENODETIME = (TNODE, "ENODETIME") ETYPE_FIELD = "code" ETYPE_ERROR = "ERROR" @@ -1326,14 +1327,23 @@ class LUVerifyCluster(LogicalUnit): constants.NV_VERSION: None, constants.NV_HVINFO: self.cfg.GetHypervisorType(), constants.NV_NODESETUP: None, + constants.NV_TIME: None, } + if vg_name is not None: node_verify_param[constants.NV_VGLIST] = None node_verify_param[constants.NV_LVLIST] = vg_name node_verify_param[constants.NV_PVLIST] = [vg_name] node_verify_param[constants.NV_DRBDLIST] = None + + # Due to the way our RPC system works, exact response times cannot be + # guaranteed (e.g. a broken node could run into a timeout). By keeping the + # time before and after executing the request, we can at least have a time + # window. + nvinfo_starttime = time.time() all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param, self.cfg.GetClusterName()) + nvinfo_endtime = time.time() cluster = self.cfg.GetClusterInfo() master_node = self.cfg.GetMasterNode() @@ -1380,6 +1390,7 @@ class LUVerifyCluster(LogicalUnit): else: instance = instanceinfo[instance] node_drbd[minor] = (instance.name, instance.admin_up) + self._VerifyNode(node_i, file_names, local_checksums, nresult, master_files, node_drbd, vg_name) @@ -1413,6 +1424,27 @@ class LUVerifyCluster(LogicalUnit): if test: continue + # Node time + ntime = nresult.get(constants.NV_TIME, None) + try: + ntime_merged = utils.MergeTime(ntime) + except (ValueError, TypeError): + _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time") + + if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): + ntime_diff = abs(nvinfo_starttime - ntime_merged) + elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): + ntime_diff = abs(ntime_merged - nvinfo_endtime) + else: + ntime_diff = None + + _ErrorIf(ntime_diff is not None, self.ENODETIME, node, + "Node time diverges by at least %0.1fs from master node time", + ntime_diff) + + if ntime_diff is not None: + continue + try: node_info[node] = { "mfree": int(nodeinfo['memory_free']), diff --git a/lib/constants.py b/lib/constants.py index 0f035ef79eaf4d9564095637e44869ad0df340d0..9ff39588c16b4c31512420badaa353b56318db90 100644 --- a/lib/constants.py +++ b/lib/constants.py @@ -320,7 +320,7 @@ DEFAULT_MAC_PREFIX = "aa:00:00" LVM_STRIPECOUNT = _autoconf.LVM_STRIPECOUNT # default maximum instance wait time, in seconds. DEFAULT_SHUTDOWN_TIMEOUT = 120 - +NODE_MAX_CLOCK_SKEW = 150 # RPC constants (RPC_ENCODING_NONE, @@ -526,6 +526,7 @@ NV_LVLIST = "lvlist" NV_PVLIST = "pvlist" NV_DRBDLIST = "drbd-list" NV_NODESETUP = "nodesetup" +NV_TIME = "time" # Allocator framework constants IALLOCATOR_VERSION = 2 @@ -722,7 +723,7 @@ CONFD_ERROR_ARGUMENT = 3 # Each request is "salted" by the current timestamp. # This constants decides how many seconds of skew to accept. # TODO: make this a default and allow the value to be more configurable -CONFD_MAX_CLOCK_SKEW = 300 +CONFD_MAX_CLOCK_SKEW = 2 * NODE_MAX_CLOCK_SKEW # When we haven't reloaded the config for more than this amount of seconds, we # force a test to see if inotify is betraying us. diff --git a/test/ganeti.constants_unittest.py b/test/ganeti.constants_unittest.py index 219afee8459d1c1857d16abeca89fe27873dbea1..7f1f0cba7f781b42f3f1f59d35c591980f06771a 100755 --- a/test/ganeti.constants_unittest.py +++ b/test/ganeti.constants_unittest.py @@ -60,6 +60,10 @@ class TestConstants(unittest.TestCase): self.failUnless(constants.LDS_OKAY < constants.LDS_UNKNOWN) self.failUnless(constants.LDS_UNKNOWN < constants.LDS_FAULTY) + def testClockSkew(self): + self.failUnless(constants.NODE_MAX_CLOCK_SKEW < + (0.8 * constants.CONFD_MAX_CLOCK_SKEW)) + class TestParameterNames(unittest.TestCase): """HV/BE parameter tests""" @@ -81,7 +85,7 @@ class TestConfdConstants(unittest.TestCase): def testFourCc(self): self.failUnlessEqual(len(constants.CONFD_MAGIC_FOURCC), 4, - "Invalid fourcc len, should be 4") + "Invalid fourcc len, should be 4") def _IsUniqueSequence(self, sequence): seen = set()