From 4a89c54a18904f71afc93d6af639c02114fdeabc Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Fri, 23 Jan 2009 12:36:00 +0000 Subject: [PATCH] Introduce more configuration consistency checks This patch enhances the duplicate DRBD minors checks (currently just a few) and adds automatic checks of configuration consistency at configuration file writing time. In order to do so and show meaningful error messages, the _UnlockedComputeDRBDMap function is changed to not raise errors in case of duplicates, but instead return both the minors map and the duplicate list, and its callers now raise the error. This allows the VerifyConfig function to return a complete list of duplicates. The new checks required some small updates to the unittests for the config module. Reviewed-by: ultrotter --- lib/config.py | 93 ++++++++++++++++++++++++++-------- test/ganeti.config_unittest.py | 8 +-- test/mocks.py | 4 -- 3 files changed, 77 insertions(+), 28 deletions(-) diff --git a/lib/config.py b/lib/config.py index f34aa9e75..9884d9242 100644 --- a/lib/config.py +++ b/lib/config.py @@ -227,10 +227,13 @@ class ConfigWriter: return result - @locking.ssynchronized(_config_lock, shared=1) - def VerifyConfig(self): + def _UnlockedVerifyConfig(self): """Verify function. + @rtype: list + @return: a list of error messages; a non-empty list signifies + configuration errors + """ result = [] seen_macs = [] @@ -290,13 +293,33 @@ class ConfigWriter: if not data.nodes[data.cluster.master_node].master_candidate: result.append("Master node is not a master candidate") + # master candidate checks mc_now, mc_max = self._UnlockedGetMasterCandidateStats() if mc_now < mc_max: result.append("Not enough master candidates: actual %d, target %d" % (mc_now, mc_max)) + # drbd minors check + d_map, duplicates = self._UnlockedComputeDRBDMap() + for node, minor, instance_a, instance_b in duplicates: + result.append("DRBD minor %d on node %s is assigned twice to instances" + " %s and %s" % (minor, node, instance_a, instance_b)) + return result + @locking.ssynchronized(_config_lock, shared=1) + def VerifyConfig(self): + """Verify function. + + This is just a wrapper over L{_UnlockedVerifyConfig}. + + @rtype: list + @return: a list of error messages; a non-empty list signifies + configuration errors + + """ + return self._UnlockedVerifyConfig() + def _UnlockedSetDiskID(self, disk, node_name): """Convert the unique ID to the ID needed on the target nodes. @@ -392,34 +415,41 @@ class ConfigWriter: def _UnlockedComputeDRBDMap(self): """Compute the used DRBD minor/nodes. + @rtype: (dict, list) @return: dictionary of node_name: dict of minor: instance_name; the returned dict will have all the nodes in it (even if with - an empty list). + an empty list), and a list of duplicates; if the duplicates + list is not empty, the configuration is corrupted and its caller + should raise an exception """ def _AppendUsedPorts(instance_name, disk, used): + duplicates = [] if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5: nodeA, nodeB, dummy, minorA, minorB = disk.logical_id[:5] for node, port in ((nodeA, minorA), (nodeB, minorB)): - assert node in used, "Instance node not found in node list" + assert node in used, ("Node '%s' of instance '%s' not found" + " in node list" % (node, instance_name)) if port in used[node]: - raise errors.ProgrammerError("DRBD minor already used:" - " %s/%s, %s/%s" % - (node, port, instance_name, - used[node][port])) - - used[node][port] = instance_name + duplicates.append((node, port, instance_name, used[node][port])) + else: + used[node][port] = instance_name if disk.children: for child in disk.children: - _AppendUsedPorts(instance_name, child, used) + duplicates.extend(_AppendUsedPorts(instance_name, child, used)) + return duplicates + duplicates = [] my_dict = dict((node, {}) for node in self._config_data.nodes) for (node, minor), instance in self._temporary_drbds.iteritems(): - my_dict[node][minor] = instance + if minor in my_dict[node]: + duplicates.append((node, minor, instance, my_dict[node][minor])) + else: + my_dict[node][minor] = instance for instance in self._config_data.instances.itervalues(): for disk in instance.disks: - _AppendUsedPorts(instance.name, disk, my_dict) - return my_dict + duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict)) + return my_dict, duplicates @locking.ssynchronized(_config_lock) def ComputeDRBDMap(self): @@ -432,7 +462,11 @@ class ConfigWriter: an empty list). """ - return self._UnlockedComputeDRBDMap() + d_map, duplicates = self._UnlockedComputeDRBDMap() + if duplicates: + raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" % + str(duplicates)) + return d_map @locking.ssynchronized(_config_lock) def AllocateDRBDMinor(self, nodes, instance): @@ -448,9 +482,12 @@ class ConfigWriter: """ assert isinstance(instance, basestring), \ - "Invalid argument passed to AllocateDRBDMinor" + "Invalid argument '%s' passed to AllocateDRBDMinor" % instance - d_map = self._UnlockedComputeDRBDMap() + d_map, duplicates = self._UnlockedComputeDRBDMap() + if duplicates: + raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" % + str(duplicates)) result = [] for nname in nodes: ndata = d_map[nname] @@ -469,11 +506,20 @@ class ConfigWriter: minor = keys[-1] + 1 else: minor = ffree - result.append(minor) + # double-check minor against current instances + assert minor not in d_map[nname], \ + ("Attempt to reuse allocated DRBD minor %d on node %s," + " already allocated to instance %s" % + (minor, nname, d_map[nname][minor])) ndata[minor] = instance - assert (nname, minor) not in self._temporary_drbds, \ - "Attempt to reuse reserved DRBD minor" - self._temporary_drbds[(nname, minor)] = instance + # double-check minor against reservation + r_key = (nname, minor) + assert r_key not in self._temporary_drbds, \ + ("Attempt to reuse reserved DRBD minor %d on node %s," + " reserved for instance %s" % + (minor, nname, self._temporary_drbds[r_key])) + self._temporary_drbds[r_key] = instance + result.append(minor) logging.debug("Request to allocate drbd minors, input: %s, returning %s", nodes, result) return result @@ -973,6 +1019,11 @@ class ConfigWriter: """Write the configuration data to persistent storage. """ + config_errors = self._UnlockedVerifyConfig() + if config_errors: + raise errors.ConfigurationError("Configuration data is not" + " consistent: %s" % + (", ".join(config_errors))) if destination is None: destination = self._cfg_file self._BumpSerialNo() diff --git a/test/ganeti.config_unittest.py b/test/ganeti.config_unittest.py index eb3ebc797..24c9491ca 100755 --- a/test/ganeti.config_unittest.py +++ b/test/ganeti.config_unittest.py @@ -79,15 +79,17 @@ class TestConfigRunner(unittest.TestCase): master_node_config = objects.Node(name=me.name, primary_ip=me.ip, secondary_ip=ip, - serial_no=1) + serial_no=1, + master_candidate=True) bootstrap.InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config, self.cfg_file) def _create_instance(self): """Create and return an instance object""" - inst = objects.Instance(name="test.example.com", disks=[], - disk_template=constants.DT_DISKLESS) + inst = objects.Instance(name="test.example.com", disks=[], nics=[], + disk_template=constants.DT_DISKLESS, + primary_node=self._get_object().GetMasterNode()) return inst def testEmpty(self): diff --git a/test/mocks.py b/test/mocks.py index bea8dd399..249cd7252 100644 --- a/test/mocks.py +++ b/test/mocks.py @@ -42,9 +42,6 @@ class FakeConfig: def GetNodeList(self): return ["a", "b", "c"] - def GetMaster(self): - return utils.HostInfo().name - def GetHostKey(self): return FAKE_CLUSTER_KEY @@ -71,4 +68,3 @@ class FakeContext: self.cfg = FakeConfig() # TODO: decide what features a mock Ganeti Lock Manager must have self.GLM = None - -- GitLab