From b0d851783270136284624df761e232398dd3efa6 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Wed, 16 Jun 2010 00:40:44 +0200 Subject: [PATCH] Add OS verification support to cluster verify For this, we needed to extend the NodeImage class with a few extra variables, and we do a trick in the node verification where we pick the first node that returned valid OS data as the reference node, and then we compare all other nodes against it. The checks added are: - consistency of DiagnoseOS responses - multiple paths for an OS - inconsistent OS between a reference node and the current node Signed-off-by: Iustin Pop <iustin@google.com> Reviewed-by: Guido Trotter <ultrotter@google.com> --- lib/backend.py | 3 ++ lib/cmdlib.py | 120 +++++++++++++++++++++++++++++++++++++++++++++-- lib/constants.py | 13 ++--- 3 files changed, 127 insertions(+), 9 deletions(-) diff --git a/lib/backend.py b/lib/backend.py index 366a3b459..15ac873e5 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -599,6 +599,9 @@ def VerifyNode(what, cluster_name): if constants.NV_TIME in what: result[constants.NV_TIME] = utils.SplitTime(time.time()) + if constants.NV_OSLIST in what: + result[constants.NV_OSLIST] = DiagnoseOS() + return result diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 0941fa5e5..a027c24a3 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1115,6 +1115,7 @@ class LUVerifyCluster(LogicalUnit): ENODELVM = (TNODE, "ENODELVM") ENODEN1 = (TNODE, "ENODEN1") ENODENET = (TNODE, "ENODENET") + ENODEOS = (TNODE, "ENODEOS") ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE") ENODEORPHANLV = (TNODE, "ENODEORPHANLV") ENODERPC = (TNODE, "ENODERPC") @@ -1130,6 +1131,8 @@ class LUVerifyCluster(LogicalUnit): class NodeImage(object): """A class representing the logical and physical status of a node. + @type name: string + @ivar name: the node name to which this object refers @ivar volumes: a structure as returned from L{ganeti.backend.GetVolumeList} (runtime) @ivar instances: a list of running instances (runtime) @@ -1149,9 +1152,14 @@ class LUVerifyCluster(LogicalUnit): @ivar hyp_fail: whether the RPC call didn't return the instance list @type ghost: boolean @ivar ghost: whether this is a known node or not (config) + @type os_fail: boolean + @ivar os_fail: whether the RPC call didn't return valid OS data + @type oslist: list + @ivar oslist: list of OSes as diagnosed by DiagnoseOS """ - def __init__(self, offline=False): + def __init__(self, offline=False, name=None): + self.name = name self.volumes = {} self.instances = [] self.pinst = [] @@ -1164,6 +1172,8 @@ class LUVerifyCluster(LogicalUnit): self.lvm_fail = False self.hyp_fail = False self.ghost = False + self.os_fail = False + self.oslist = {} def ExpandNames(self): self.needed_locks = { @@ -1574,6 +1584,100 @@ class LUVerifyCluster(LogicalUnit): _ErrorIf(test, self.ENODEDRBD, node, "unallocated drbd minor %d is in use", minor) + def _UpdateNodeOS(self, ninfo, nresult, nimg): + """Builds the node OS structures. + + @type ninfo: L{objects.Node} + @param ninfo: the node to check + @param nresult: the remote results for the node + @param nimg: the node image object + + """ + node = ninfo.name + _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 + + remote_os = nresult.get(constants.NV_OSLIST, None) + test = (not isinstance(remote_os, list) or + not compat.all(remote_os, + lambda v: isinstance(v, list) and len(v) == 7)) + + _ErrorIf(test, self.ENODEOS, node, + "node hasn't returned valid OS data") + + nimg.os_fail = test + + if test: + return + + os_dict = {} + + for (name, os_path, status, diagnose, + variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: + + if name not in os_dict: + os_dict[name] = [] + + # parameters is a list of lists instead of list of tuples due to + # JSON lacking a real tuple type, fix it: + parameters = [tuple(v) for v in parameters] + os_dict[name].append((os_path, status, diagnose, + set(variants), set(parameters), set(api_ver))) + + nimg.oslist = os_dict + + def _VerifyNodeOS(self, ninfo, nimg, base): + """Verifies the node OS list. + + @type ninfo: L{objects.Node} + @param ninfo: the node to check + @param nimg: the node image object + @param base: the 'template' node we match against (e.g. from the master) + + """ + node = ninfo.name + _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 + + assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" + + for os_name, os_data in nimg.oslist.items(): + assert os_data, "Empty OS status for OS %s?!" % os_name + f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] + _ErrorIf(not f_status, self.ENODEOS, node, + "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) + _ErrorIf(len(os_data) > 1, self.ENODEOS, node, + "OS '%s' has multiple entries (first one shadows the rest): %s", + os_name, utils.CommaJoin([v[0] for v in os_data])) + # this will catched in backend too + _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15) + and not f_var, self.ENODEOS, node, + "OS %s with API at least %d does not declare any variant", + os_name, constants.OS_API_V15) + # comparisons with the 'base' image + test = os_name not in base.oslist + _ErrorIf(test, self.ENODEOS, node, + "Extra OS %s not present on reference node (%s)", + os_name, base.name) + if test: + continue + assert base.oslist[os_name], "Base node has empty OS status?" + _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] + if not b_status: + # base OS is invalid, skipping + continue + for kind, a, b in [("API version", f_api, b_api), + ("variants list", f_var, b_var), + ("parameters", f_param, b_param)]: + _ErrorIf(a != b, self.ENODEOS, node, + "OS %s %s differs from reference node %s: %s vs. %s", + kind, os_name, base.name, + utils.CommaJoin(a), utils.CommaJoin(a)) + + # check any missing OSes + missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) + _ErrorIf(missing, self.ENODEOS, node, + "OSes present on reference node %s but missing on this node: %s", + base.name, utils.CommaJoin(missing)) + def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name): """Verifies and updates the node volume data. @@ -1751,6 +1855,7 @@ class LUVerifyCluster(LogicalUnit): constants.NV_NODESETUP: None, constants.NV_TIME: None, constants.NV_MASTERIP: (master_node, master_ip), + constants.NV_OSLIST: None, } if vg_name is not None: @@ -1760,7 +1865,8 @@ class LUVerifyCluster(LogicalUnit): node_verify_param[constants.NV_DRBDLIST] = None # Build our expected cluster state - node_image = dict((node.name, self.NodeImage(offline=node.offline)) + node_image = dict((node.name, self.NodeImage(offline=node.offline, + name=node.name)) for node in nodeinfo) for instance in instancelist: @@ -1769,7 +1875,7 @@ class LUVerifyCluster(LogicalUnit): for nname in inst_config.all_nodes: if nname not in node_image: # ghost node - gnode = self.NodeImage() + gnode = self.NodeImage(name=nname) gnode.ghost = True node_image[nname] = gnode @@ -1800,6 +1906,9 @@ class LUVerifyCluster(LogicalUnit): all_drbd_map = self.cfg.ComputeDRBDMap() feedback_fn("* Verifying node status") + + refos_img = None + for node_i in nodeinfo: node = node_i.name nimg = node_image[node] @@ -1841,6 +1950,11 @@ class LUVerifyCluster(LogicalUnit): self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) self._UpdateNodeInstances(node_i, nresult, nimg) self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) + self._UpdateNodeOS(node_i, nresult, nimg) + if not nimg.os_fail: + if refos_img is None: + refos_img = nimg + self._VerifyNodeOS(node_i, nimg, refos_img) feedback_fn("* Verifying instance status") for instance in instancelist: diff --git a/lib/constants.py b/lib/constants.py index 60dd81586..8c04d8bae 100644 --- a/lib/constants.py +++ b/lib/constants.py @@ -694,20 +694,21 @@ VERIFY_NPLUSONE_MEM = 'nplusone_mem' VERIFY_OPTIONAL_CHECKS = frozenset([VERIFY_NPLUSONE_MEM]) # Node verify constants +NV_DRBDLIST = "drbd-list" NV_FILELIST = "filelist" NV_HVINFO = "hvinfo" NV_HYPERVISOR = "hypervisor" NV_INSTANCELIST = "instancelist" +NV_LVLIST = "lvlist" +NV_MASTERIP = "master-ip" NV_NODELIST = "nodelist" NV_NODENETTEST = "node-net-test" -NV_VERSION = "version" -NV_VGLIST = "vglist" -NV_LVLIST = "lvlist" -NV_PVLIST = "pvlist" -NV_DRBDLIST = "drbd-list" NV_NODESETUP = "nodesetup" +NV_OSLIST = "oslist" +NV_PVLIST = "pvlist" NV_TIME = "time" -NV_MASTERIP = "master-ip" +NV_VERSION = "version" +NV_VGLIST = "vglist" # SSL certificate check constants (in days) SSL_CERT_EXPIRATION_WARN = 30 -- GitLab