Commit 78519c10 authored by Michael Hanselmann's avatar Michael Hanselmann
Browse files

Change “node_info” RPC to accept multiple VGs/hypervisors



Keeping the node state up to date will require information from multiple
VGs and hypervisors. Instead of requiring multiple calls this change
allows a single call to return all needed information. Existing users
are changed.
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarIustin Pop <iustin@google.com>
parent c6a622cf
...@@ -522,44 +522,71 @@ def LeaveCluster(modify_ssh_setup): ...@@ -522,44 +522,71 @@ def LeaveCluster(modify_ssh_setup):
raise errors.QuitGanetiException(True, "Shutdown scheduled") raise errors.QuitGanetiException(True, "Shutdown scheduled")
def GetNodeInfo(vgname, hypervisor_type): def _GetVgInfo(name):
"""Retrieves information about a LVM volume group.
"""
# TODO: GetVGInfo supports returning information for multiple VGs at once
vginfo = bdev.LogicalVolume.GetVGInfo([name])
if vginfo:
vg_free = int(round(vginfo[0][0], 0))
vg_size = int(round(vginfo[0][1], 0))
else:
vg_free = None
vg_size = None
return {
"name": name,
"free": vg_free,
"size": vg_size,
}
def _GetHvInfo(name):
"""Retrieves node information from a hypervisor.
The information returned depends on the hypervisor. Common items:
- vg_size is the size of the configured volume group in MiB
- vg_free is the free size of the volume group in MiB
- memory_dom0 is the memory allocated for domain0 in MiB
- memory_free is the currently available (free) ram in MiB
- memory_total is the total number of ram in MiB
- hv_version: the hypervisor version, if available
"""
return hypervisor.GetHypervisor(name).GetNodeInfo()
def _GetNamedNodeInfo(names, fn):
"""Calls C{fn} for all names in C{names} and returns a dictionary.
@rtype: None or dict
"""
if names is None:
return None
else:
return dict((name, fn(name)) for name in names)
def GetNodeInfo(vg_names, hv_names):
"""Gives back a hash with different information about the node. """Gives back a hash with different information about the node.
@type vgname: C{string} @type vg_names: list of string
@param vgname: the name of the volume group to ask for disk space information @param vg_names: Names of the volume groups to ask for disk space information
@type hypervisor_type: C{str} @type hv_names: list of string
@param hypervisor_type: the name of the hypervisor to ask for @param hv_names: Names of the hypervisors to ask for node information
memory information @rtype: tuple; (string, None/dict, None/dict)
@rtype: C{dict} @return: Tuple containing boot ID, volume group information and hypervisor
@return: dictionary with the following keys: information
- vg_size is the size of the configured volume group in MiB
- vg_free is the free size of the volume group in MiB """
- memory_dom0 is the memory allocated for domain0 in MiB bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
- memory_free is the currently available (free) ram in MiB vg_info = _GetNamedNodeInfo(vg_names, _GetVgInfo)
- memory_total is the total number of ram in MiB hv_info = _GetNamedNodeInfo(hv_names, _GetHvInfo)
- hv_version: the hypervisor version, if available
return (bootid, vg_info, hv_info)
"""
outputarray = {}
if vgname is not None:
vginfo = bdev.LogicalVolume.GetVGInfo([vgname])
vg_free = vg_size = None
if vginfo:
vg_free = int(round(vginfo[0][0], 0))
vg_size = int(round(vginfo[0][1], 0))
outputarray["vg_size"] = vg_size
outputarray["vg_free"] = vg_free
if hypervisor_type is not None:
hyper = hypervisor.GetHypervisor(hypervisor_type)
hyp_info = hyper.GetNodeInfo()
if hyp_info is not None:
outputarray.update(hyp_info)
outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
return outputarray
def VerifyNode(what, cluster_name): def VerifyNode(what, cluster_name):
......
...@@ -573,6 +573,20 @@ def _ShareAll(): ...@@ -573,6 +573,20 @@ def _ShareAll():
return dict.fromkeys(locking.LEVELS, 1) return dict.fromkeys(locking.LEVELS, 1)
def _MakeLegacyNodeInfo(data):
"""Formats the data returned by L{rpc.RpcRunner.call_node_info}.
Converts the data into a single dictionary. This is fine for most use cases,
but some require information from more than one volume group or hypervisor.
"""
(bootid, (vg_info, ), (hv_info, )) = data
return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
"bootid": bootid,
})
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups): def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
"""Checks if the owned node groups are still correct for an instance. """Checks if the owned node groups are still correct for an instance.
...@@ -4591,9 +4605,9 @@ class _NodeQuery(_QueryBase): ...@@ -4591,9 +4605,9 @@ class _NodeQuery(_QueryBase):
# filter out non-vm_capable nodes # filter out non-vm_capable nodes
toquery_nodes = [name for name in nodenames if all_info[name].vm_capable] toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(), node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
lu.cfg.GetHypervisorType()) [lu.cfg.GetHypervisorType()])
live_data = dict((name, nresult.payload) live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
for (name, nresult) in node_data.items() for (name, nresult) in node_data.items()
if not nresult.fail_msg and nresult.payload) if not nresult.fail_msg and nresult.payload)
else: else:
...@@ -6012,10 +6026,12 @@ def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name): ...@@ -6012,10 +6026,12 @@ def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
we cannot check the node we cannot check the node
""" """
nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name) nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
nodeinfo[node].Raise("Can't get data from node %s" % node, nodeinfo[node].Raise("Can't get data from node %s" % node,
prereq=True, ecode=errors.ECODE_ENVIRON) prereq=True, ecode=errors.ECODE_ENVIRON)
free_mem = nodeinfo[node].payload.get("memory_free", None) (_, _, (hv_info, )) = nodeinfo[node].payload
free_mem = hv_info.get("memory_free", None)
if not isinstance(free_mem, int): if not isinstance(free_mem, int):
raise errors.OpPrereqError("Can't compute free memory on node %s, result" raise errors.OpPrereqError("Can't compute free memory on node %s, result"
" was '%s'" % (node, free_mem), " was '%s'" % (node, free_mem),
...@@ -6070,12 +6086,13 @@ def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested): ...@@ -6070,12 +6086,13 @@ def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
or we cannot check the node or we cannot check the node
""" """
nodeinfo = lu.rpc.call_node_info(nodenames, vg, None) nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
for node in nodenames: for node in nodenames:
info = nodeinfo[node] info = nodeinfo[node]
info.Raise("Cannot get current information from node %s" % node, info.Raise("Cannot get current information from node %s" % node,
prereq=True, ecode=errors.ECODE_ENVIRON) prereq=True, ecode=errors.ECODE_ENVIRON)
vg_free = info.payload.get("vg_free", None) (_, (vg_info, ), _) = info.payload
vg_free = vg_info.get("vg_free", None)
if not isinstance(vg_free, int): if not isinstance(vg_free, int):
raise errors.OpPrereqError("Can't compute free disk space on node" raise errors.OpPrereqError("Can't compute free disk space on node"
" %s for vg %s, result was '%s'" % " %s for vg %s, result was '%s'" %
...@@ -6105,12 +6122,13 @@ def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name): ...@@ -6105,12 +6122,13 @@ def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
or we cannot check the node or we cannot check the node
""" """
nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name) nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
for node in nodenames: for node in nodenames:
info = nodeinfo[node] info = nodeinfo[node]
info.Raise("Cannot get current information from node %s" % node, info.Raise("Cannot get current information from node %s" % node,
prereq=True, ecode=errors.ECODE_ENVIRON) prereq=True, ecode=errors.ECODE_ENVIRON)
num_cpus = info.payload.get("cpu_total", None) (_, _, (hv_info, )) = info.payload
num_cpus = hv_info.get("cpu_total", None)
if not isinstance(num_cpus, int): if not isinstance(num_cpus, int):
raise errors.OpPrereqError("Can't compute the number of physical CPUs" raise errors.OpPrereqError("Can't compute the number of physical CPUs"
" on node %s, result was '%s'" % " on node %s, result was '%s'" %
...@@ -7678,14 +7696,17 @@ class TLMigrateInstance(Tasklet): ...@@ -7678,14 +7696,17 @@ class TLMigrateInstance(Tasklet):
# Check for hypervisor version mismatch and warn the user. # Check for hypervisor version mismatch and warn the user.
nodeinfo = self.rpc.call_node_info([source_node, target_node], nodeinfo = self.rpc.call_node_info([source_node, target_node],
None, self.instance.hypervisor) None, [self.instance.hypervisor])
src_info = nodeinfo[source_node] for ninfo in nodeinfo.items():
dst_info = nodeinfo[target_node] ninfo.Raise("Unable to retrieve node information from node '%s'" %
ninfo.node)
if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and (_, _, (src_info, )) = nodeinfo[source_node].payload
(constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)): (_, _, (dst_info, )) = nodeinfo[target_node].payload
src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION] if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
(constants.HV_NODEINFO_KEY_VERSION in dst_info)):
src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
if src_version != dst_version: if src_version != dst_version:
self.feedback_fn("* warning: hypervisor version mismatch between" self.feedback_fn("* warning: hypervisor version mismatch between"
" source (%s) and target (%s) node" % " source (%s) and target (%s) node" %
...@@ -11377,35 +11398,39 @@ class LUInstanceSetParams(LogicalUnit): ...@@ -11377,35 +11398,39 @@ class LUInstanceSetParams(LogicalUnit):
instance_info = self.rpc.call_instance_info(pnode, instance.name, instance_info = self.rpc.call_instance_info(pnode, instance.name,
instance.hypervisor) instance.hypervisor)
nodeinfo = self.rpc.call_node_info(mem_check_list, None, nodeinfo = self.rpc.call_node_info(mem_check_list, None,
instance.hypervisor) [instance.hypervisor])
pninfo = nodeinfo[pnode] pninfo = nodeinfo[pnode]
msg = pninfo.fail_msg msg = pninfo.fail_msg
if msg: if msg:
# Assume the primary node is unreachable and go ahead # Assume the primary node is unreachable and go ahead
self.warn.append("Can't get info from primary node %s: %s" % self.warn.append("Can't get info from primary node %s: %s" %
(pnode, msg)) (pnode, msg))
elif not isinstance(pninfo.payload.get("memory_free", None), int):
self.warn.append("Node data from primary node %s doesn't contain"
" free memory information" % pnode)
elif instance_info.fail_msg:
self.warn.append("Can't get instance runtime information: %s" %
instance_info.fail_msg)
else: else:
if instance_info.payload: (_, _, (pnhvinfo, )) = pninfo.payload
current_mem = int(instance_info.payload["memory"]) if not isinstance(pnhvinfo.get("memory_free", None), int):
self.warn.append("Node data from primary node %s doesn't contain"
" free memory information" % pnode)
elif instance_info.fail_msg:
self.warn.append("Can't get instance runtime information: %s" %
instance_info.fail_msg)
else: else:
# Assume instance not running if instance_info.payload:
# (there is a slight race condition here, but it's not very probable, current_mem = int(instance_info.payload["memory"])
# and we have no other way to check) else:
current_mem = 0 # Assume instance not running
#TODO(dynmem): do the appropriate check involving MINMEM # (there is a slight race condition here, but it's not very
miss_mem = (be_new[constants.BE_MAXMEM] - current_mem - # probable, and we have no other way to check)
pninfo.payload["memory_free"]) # TODO: Describe race condition
if miss_mem > 0: current_mem = 0
raise errors.OpPrereqError("This change will prevent the instance" #TODO(dynmem): do the appropriate check involving MINMEM
" from starting, due to %d MB of memory" miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
" missing on its primary node" % miss_mem, pninfo.payload["memory_free"])
errors.ECODE_NORES) if miss_mem > 0:
raise errors.OpPrereqError("This change will prevent the instance"
" from starting, due to %d MB of memory"
" missing on its primary node" %
miss_mem,
errors.ECODE_NORES)
if be_new[constants.BE_AUTO_BALANCE]: if be_new[constants.BE_AUTO_BALANCE]:
for node, nres in nodeinfo.items(): for node, nres in nodeinfo.items():
...@@ -11413,12 +11438,13 @@ class LUInstanceSetParams(LogicalUnit): ...@@ -11413,12 +11438,13 @@ class LUInstanceSetParams(LogicalUnit):
continue continue
nres.Raise("Can't get info from secondary node %s" % node, nres.Raise("Can't get info from secondary node %s" % node,
prereq=True, ecode=errors.ECODE_STATE) prereq=True, ecode=errors.ECODE_STATE)
if not isinstance(nres.payload.get("memory_free", None), int): (_, _, (nhvinfo, )) = nres.payload
if not isinstance(nhvinfo.get("memory_free", None), int):
raise errors.OpPrereqError("Secondary node %s didn't return free" raise errors.OpPrereqError("Secondary node %s didn't return free"
" memory information" % node, " memory information" % node,
errors.ECODE_STATE) errors.ECODE_STATE)
#TODO(dynmem): do the appropriate check involving MINMEM #TODO(dynmem): do the appropriate check involving MINMEM
elif be_new[constants.BE_MAXMEM] > nres.payload["memory_free"]: elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
raise errors.OpPrereqError("This change will prevent the instance" raise errors.OpPrereqError("This change will prevent the instance"
" from failover to its secondary node" " from failover to its secondary node"
" %s, due to not enough memory" % node, " %s, due to not enough memory" % node,
...@@ -13491,8 +13517,8 @@ class IAllocator(object): ...@@ -13491,8 +13517,8 @@ class IAllocator(object):
else: else:
hypervisor_name = cluster_info.enabled_hypervisors[0] hypervisor_name = cluster_info.enabled_hypervisors[0]
node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(), node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
hypervisor_name) [hypervisor_name])
node_iinfo = \ node_iinfo = \
self.rpc.call_all_instances_info(node_list, self.rpc.call_all_instances_info(node_list,
cluster_info.enabled_hypervisors) cluster_info.enabled_hypervisors)
...@@ -13565,7 +13591,7 @@ class IAllocator(object): ...@@ -13565,7 +13591,7 @@ class IAllocator(object):
nresult.Raise("Can't get data for node %s" % nname) nresult.Raise("Can't get data for node %s" % nname)
node_iinfo[nname].Raise("Can't get node instance info from node %s" % node_iinfo[nname].Raise("Can't get node instance info from node %s" %
nname) nname)
remote_info = nresult.payload remote_info = _MakeLegacyNodeInfo(nresult.payload)
for attr in ["memory_total", "memory_free", "memory_dom0", for attr in ["memory_total", "memory_free", "memory_dom0",
"vg_size", "vg_free", "cpu_total"]: "vg_size", "vg_free", "cpu_total"]:
......
...@@ -412,10 +412,10 @@ _NODE_CALLS = [ ...@@ -412,10 +412,10 @@ _NODE_CALLS = [
("address", None, "IP address"), ("address", None, "IP address"),
], None, "Checks if a node has the given IP address"), ], None, "Checks if a node has the given IP address"),
("node_info", MULTI, TMO_URGENT, [ ("node_info", MULTI, TMO_URGENT, [
("vg_name", None, ("vg_names", None,
"Name of the volume group to ask for disk space information"), "Names of the volume groups to ask for disk space information"),
("hypervisor_type", None, ("hv_names", None,
"Name of the hypervisor to ask for memory information"), "Names of the hypervisors to ask for node information"),
], None, "Return node information"), ], None, "Return node information"),
("node_verify", MULTI, TMO_NORMAL, [ ("node_verify", MULTI, TMO_NORMAL, [
("checkdict", None, None), ("checkdict", None, None),
......
...@@ -666,8 +666,8 @@ class NodeHttpServer(http.server.HttpServer): ...@@ -666,8 +666,8 @@ class NodeHttpServer(http.server.HttpServer):
"""Query node information. """Query node information.
""" """
vgname, hypervisor_type = params (vg_names, hv_names) = params
return backend.GetNodeInfo(vgname, hypervisor_type) return backend.GetNodeInfo(vg_names, hv_names)
@staticmethod @staticmethod
def perspective_etc_hosts_modify(params): def perspective_etc_hosts_modify(params):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment