Commit e69d05fd authored by Iustin Pop's avatar Iustin Pop
Browse files

Move the hypervisor attribute to the instances

This (big) patch moves the hypervisor type from the cluster to the
instance level; the cluster attribute remains as the default hypervisor,
and will be renamed accordingly in a next patch. The cluster also gains
the ‘enable_hypervisors’ attribute, and instances can be created with
any of the enabled ones (no provision yet for changing that attribute).

The many many changes in the rpc/backend layer are due to the fact that
all backend code read the hypervisor from the local copy of the config,
and now we have to send it (either in the instance object, or as a
separate parameter) for each function.

The node list by default will list the node free/total memory for the
default hypervisor, a new flag to it should exist to select another
hypervisor. Instance list has a new field, hypervisor, that shows the
instance hypervisor. Cluster verify runs for all enabled hypervisor
types.

The new FIXMEs are related to IAllocator, since now the node
total/free/used memory counts are wrong (we can't reliably compute the
free memory).

Reviewed-by: imsnah
parent 6884c0ca
...@@ -387,14 +387,14 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): ...@@ -387,14 +387,14 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler):
"""Query information about all instances. """Query information about all instances.
""" """
return backend.GetAllInstancesInfo() return backend.GetAllInstancesInfo(params[0])
@staticmethod @staticmethod
def perspective_instance_list(params): def perspective_instance_list(params):
"""Query the list of running instances. """Query the list of running instances.
""" """
return backend.GetInstanceList() return backend.GetInstanceList(params[0])
# node -------------------------- # node --------------------------
...@@ -411,8 +411,8 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler): ...@@ -411,8 +411,8 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler):
"""Query node information. """Query node information.
""" """
vgname = params[0] vgname, hypervisor_type = params
return backend.GetNodeInfo(vgname) return backend.GetNodeInfo(vgname, hypervisor_type)
@staticmethod @staticmethod
def perspective_node_add(params): def perspective_node_add(params):
......
...@@ -231,18 +231,21 @@ def LeaveCluster(): ...@@ -231,18 +231,21 @@ def LeaveCluster():
raise errors.QuitGanetiException(False, 'Shutdown scheduled') raise errors.QuitGanetiException(False, 'Shutdown scheduled')
def GetNodeInfo(vgname): def GetNodeInfo(vgname, hypervisor_type):
"""Gives back a hash with different informations about the node. """Gives back a hash with different informations about the node.
Returns: @type vgname: C{string}
{ 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx, @param vgname: the name of the volume group to ask for disk space information
'memory_free' : xxx, 'memory_total' : xxx } @type hypervisor_type: C{str}
where @param hypervisor_type: the name of the hypervisor to ask for
vg_size is the size of the configured volume group in MiB memory information
vg_free is the free size of the volume group in MiB @rtype: C{dict}
memory_dom0 is the memory allocated for domain0 in MiB @return: dictionary with the following keys:
memory_free is the currently available (free) ram in MiB - vg_size is the size of the configured volume group in MiB
memory_total is the total number of ram in MiB - vg_free is the free size of the volume group in MiB
- memory_dom0 is the memory allocated for domain0 in MiB
- memory_free is the currently available (free) ram in MiB
- memory_total is the total number of ram in MiB
""" """
outputarray = {} outputarray = {}
...@@ -250,7 +253,7 @@ def GetNodeInfo(vgname): ...@@ -250,7 +253,7 @@ def GetNodeInfo(vgname):
outputarray['vg_size'] = vginfo['vg_size'] outputarray['vg_size'] = vginfo['vg_size']
outputarray['vg_free'] = vginfo['vg_free'] outputarray['vg_free'] = vginfo['vg_free']
hyper = hypervisor.GetHypervisor(_GetConfig()) hyper = hypervisor.GetHypervisor(hypervisor_type)
hyp_info = hyper.GetNodeInfo() hyp_info = hyper.GetNodeInfo()
if hyp_info is not None: if hyp_info is not None:
outputarray.update(hyp_info) outputarray.update(hyp_info)
...@@ -267,25 +270,36 @@ def GetNodeInfo(vgname): ...@@ -267,25 +270,36 @@ def GetNodeInfo(vgname):
def VerifyNode(what, cluster_name): def VerifyNode(what, cluster_name):
"""Verify the status of the local node. """Verify the status of the local node.
Args: Based on the input L{what} parameter, various checks are done on the
what - a dictionary of things to check: local node.
'filelist' : list of files for which to compute checksums
'nodelist' : list of nodes we should check communication with If the I{filelist} key is present, this list of
'hypervisor': run the hypervisor-specific verify files is checksummed and the file/checksum pairs are returned.
If the I{nodelist} key is present, we check that we have
connectivity via ssh with the target nodes (and check the hostname
report).
Requested files on local node are checksummed and the result returned. If the I{node-net-test} key is present, we check that we have
connectivity to the given nodes via both primary IP and, if
applicable, secondary IPs.
@type what: C{dict}
@param what: a dictionary of things to check:
- filelist: list of files for which to compute checksums
- nodelist: list of nodes we should check ssh communication with
- node-net-test: list of nodes we should check node daemon port
connectivity with
- hypervisor: list with hypervisors to run the verify for
The nodelist is traversed, with the following checks being made
for each node:
- known_hosts key correct
- correct resolving of node name (target node returns its own hostname
by ssh-execution of 'hostname', result compared against name in list.
""" """
result = {} result = {}
if 'hypervisor' in what: if 'hypervisor' in what:
result['hypervisor'] = hypervisor.GetHypervisor(_GetConfig()).Verify() result['hypervisor'] = my_dict = {}
for hv_name in what['hypervisor']:
my_dict[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
if 'filelist' in what: if 'filelist' in what:
result['filelist'] = utils.FingerprintFiles(what['filelist']) result['filelist'] = utils.FingerprintFiles(what['filelist'])
...@@ -415,41 +429,49 @@ def BridgesExist(bridges_list): ...@@ -415,41 +429,49 @@ def BridgesExist(bridges_list):
return True return True
def GetInstanceList(): def GetInstanceList(hypervisor_list):
"""Provides a list of instances. """Provides a list of instances.
Returns: @type hypervisor_list: list
A list of all running instances on the current node @param hypervisor_list: the list of hypervisors to query information
- instance1.example.com
- instance2.example.com @rtype: list
@return: a list of all running instances on the current node
- instance1.example.com
- instance2.example.com
""" """
try: results = []
names = hypervisor.GetHypervisor(_GetConfig()).ListInstances() for hname in hypervisor_list:
except errors.HypervisorError, err: try:
logging.exception("Error enumerating instances") names = hypervisor.GetHypervisor(hname).ListInstances()
raise results.extend(names)
except errors.HypervisorError, err:
logging.exception("Error enumerating instances for hypevisor %s", hname)
# FIXME: should we somehow not propagate this to the master?
raise
return names return results
def GetInstanceInfo(instance): def GetInstanceInfo(instance, hname):
"""Gives back the informations about an instance as a dictionary. """Gives back the informations about an instance as a dictionary.
Args: @type instance: string
instance: name of the instance (ex. instance1.example.com) @param instance: the instance name
@type hname: string
@param hname: the hypervisor type of the instance
Returns: @rtype: dict
{ 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, } @return: dictionary with the following keys:
where - memory: memory size of instance (int)
memory: memory size of instance (int) - state: xen state of instance (string)
state: xen state of instance (string) - time: cpu time of instance (float)
time: cpu time of instance (float)
""" """
output = {} output = {}
iinfo = hypervisor.GetHypervisor(_GetConfig()).GetInstanceInfo(instance) iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance)
if iinfo is not None: if iinfo is not None:
output['memory'] = iinfo[2] output['memory'] = iinfo[2]
output['state'] = iinfo[4] output['state'] = iinfo[4]
...@@ -458,34 +480,38 @@ def GetInstanceInfo(instance): ...@@ -458,34 +480,38 @@ def GetInstanceInfo(instance):
return output return output
def GetAllInstancesInfo(): def GetAllInstancesInfo(hypervisor_list):
"""Gather data about all instances. """Gather data about all instances.
This is the equivalent of `GetInstanceInfo()`, except that it This is the equivalent of `GetInstanceInfo()`, except that it
computes data for all instances at once, thus being faster if one computes data for all instances at once, thus being faster if one
needs data about more than one instance. needs data about more than one instance.
Returns: a dictionary of dictionaries, keys being the instance name, @type hypervisor_list: list
and with values: @param hypervisor_list: list of hypervisors to query for instance data
{ 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
where @rtype: dict of dicts
memory: memory size of instance (int) @return: dictionary of instance: data, with data having the following keys:
state: xen state of instance (string) - memory: memory size of instance (int)
time: cpu time of instance (float) - state: xen state of instance (string)
vcpus: the number of cpus - time: cpu time of instance (float)
- vcpuus: the number of vcpus
""" """
output = {} output = {}
iinfo = hypervisor.GetHypervisor(_GetConfig()).GetAllInstancesInfo() for hname in hypervisor_list:
if iinfo: iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
for name, inst_id, memory, vcpus, state, times in iinfo: if iinfo:
output[name] = { for name, inst_id, memory, vcpus, state, times in iinfo:
'memory': memory, if name in output:
'vcpus': vcpus, raise errors.HypervisorError("Instance %s running duplicate" % name)
'state': state, output[name] = {
'time': times, 'memory': memory,
} 'vcpus': vcpus,
'state': state,
'time': times,
}
return output return output
...@@ -499,7 +525,6 @@ def AddOSToInstance(instance, os_disk, swap_disk): ...@@ -499,7 +525,6 @@ def AddOSToInstance(instance, os_disk, swap_disk):
swap_disk: the instance-visible name of the swap device swap_disk: the instance-visible name of the swap device
""" """
cfg = _GetConfig()
inst_os = OSFromDisk(instance.os) inst_os = OSFromDisk(instance.os)
create_script = inst_os.create_script create_script = inst_os.create_script
...@@ -535,7 +560,7 @@ def AddOSToInstance(instance, os_disk, swap_disk): ...@@ -535,7 +560,7 @@ def AddOSToInstance(instance, os_disk, swap_disk):
inst_os.path, create_script, instance.name, inst_os.path, create_script, instance.name,
real_os_dev.dev_path, real_swap_dev.dev_path, real_os_dev.dev_path, real_swap_dev.dev_path,
logfile) logfile)
env = {'HYPERVISOR': cfg.GetHypervisorType()} env = {'HYPERVISOR': instance.hypervisor}
result = utils.RunCmd(command, env=env) result = utils.RunCmd(command, env=env)
if result.failed: if result.failed:
...@@ -666,17 +691,19 @@ def _GatherBlockDevs(instance): ...@@ -666,17 +691,19 @@ def _GatherBlockDevs(instance):
def StartInstance(instance, extra_args): def StartInstance(instance, extra_args):
"""Start an instance. """Start an instance.
Args: @type instance: instance object
instance - name of instance to start. @param instance: the instance object
@rtype: boolean
@return: whether the startup was successful or not
""" """
running_instances = GetInstanceList() running_instances = GetInstanceList([instance.hypervisor])
if instance.name in running_instances: if instance.name in running_instances:
return True return True
block_devices = _GatherBlockDevs(instance) block_devices = _GatherBlockDevs(instance)
hyper = hypervisor.GetHypervisor(_GetConfig()) hyper = hypervisor.GetHypervisor(instance.hypervisor)
try: try:
hyper.StartInstance(instance, block_devices, extra_args) hyper.StartInstance(instance, block_devices, extra_args)
...@@ -690,16 +717,19 @@ def StartInstance(instance, extra_args): ...@@ -690,16 +717,19 @@ def StartInstance(instance, extra_args):
def ShutdownInstance(instance): def ShutdownInstance(instance):
"""Shut an instance down. """Shut an instance down.
Args: @type instance: instance object
instance - name of instance to shutdown. @param instance: the instance object
@rtype: boolean
@return: whether the startup was successful or not
""" """
running_instances = GetInstanceList() hv_name = instance.hypervisor
running_instances = GetInstanceList([hv_name])
if instance.name not in running_instances: if instance.name not in running_instances:
return True return True
hyper = hypervisor.GetHypervisor(_GetConfig()) hyper = hypervisor.GetHypervisor(hv_name)
try: try:
hyper.StopInstance(instance) hyper.StopInstance(instance)
except errors.HypervisorError, err: except errors.HypervisorError, err:
...@@ -711,7 +741,7 @@ def ShutdownInstance(instance): ...@@ -711,7 +741,7 @@ def ShutdownInstance(instance):
time.sleep(1) time.sleep(1)
for dummy in range(11): for dummy in range(11):
if instance.name not in GetInstanceList(): if instance.name not in GetInstanceList([hv_name]):
break break
time.sleep(10) time.sleep(10)
else: else:
...@@ -725,7 +755,7 @@ def ShutdownInstance(instance): ...@@ -725,7 +755,7 @@ def ShutdownInstance(instance):
return False return False
time.sleep(1) time.sleep(1)
if instance.name in GetInstanceList(): if instance.name in GetInstanceList([hv_name]):
logging.error("could not shutdown instance '%s' even by destroy", logging.error("could not shutdown instance '%s' even by destroy",
instance.name) instance.name)
return False return False
...@@ -741,13 +771,13 @@ def RebootInstance(instance, reboot_type, extra_args): ...@@ -741,13 +771,13 @@ def RebootInstance(instance, reboot_type, extra_args):
reboot_type - how to reboot [soft,hard,full] reboot_type - how to reboot [soft,hard,full]
""" """
running_instances = GetInstanceList() running_instances = GetInstanceList([instance.hypervisor])
if instance.name not in running_instances: if instance.name not in running_instances:
logging.error("Cannot reboot instance that is not running") logging.error("Cannot reboot instance that is not running")
return False return False
hyper = hypervisor.GetHypervisor(_GetConfig()) hyper = hypervisor.GetHypervisor(instance.hypervisor)
if reboot_type == constants.INSTANCE_REBOOT_SOFT: if reboot_type == constants.INSTANCE_REBOOT_SOFT:
try: try:
hyper.RebootInstance(instance) hyper.RebootInstance(instance)
...@@ -764,7 +794,6 @@ def RebootInstance(instance, reboot_type, extra_args): ...@@ -764,7 +794,6 @@ def RebootInstance(instance, reboot_type, extra_args):
else: else:
raise errors.ParameterError("reboot_type invalid") raise errors.ParameterError("reboot_type invalid")
return True return True
...@@ -784,7 +813,7 @@ def MigrateInstance(instance, target, live): ...@@ -784,7 +813,7 @@ def MigrateInstance(instance, target, live):
- msg is a string with details in case of failure - msg is a string with details in case of failure
""" """
hyper = hypervisor.GetHypervisor(_GetConfig()) hyper = hypervisor.GetHypervisor(instance.hypervisor_name)
try: try:
hyper.MigrateInstance(instance.name, target, live) hyper.MigrateInstance(instance.name, target, live)
...@@ -1464,7 +1493,6 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image, ...@@ -1464,7 +1493,6 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image,
False in case of error, True otherwise. False in case of error, True otherwise.
""" """
cfg = _GetConfig()
inst_os = OSFromDisk(instance.os) inst_os = OSFromDisk(instance.os)
import_script = inst_os.import_script import_script = inst_os.import_script
...@@ -1507,7 +1535,7 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image, ...@@ -1507,7 +1535,7 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image,
logfile) logfile)
command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd]) command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
env = {'HYPERVISOR': cfg.GetHypervisorType()} env = {'HYPERVISOR': instance.hypervisor}
result = utils.RunCmd(command, env=env) result = utils.RunCmd(command, env=env)
......
...@@ -586,8 +586,11 @@ class LUVerifyCluster(LogicalUnit): ...@@ -586,8 +586,11 @@ class LUVerifyCluster(LogicalUnit):
(node, node_result['node-net-test'][node])) (node, node_result['node-net-test'][node]))
hyp_result = node_result.get('hypervisor', None) hyp_result = node_result.get('hypervisor', None)
if hyp_result is not None: if isinstance(hyp_result, dict):
feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result) for hv_name, hv_result in hyp_result.iteritems():
if hv_result is not None:
feedback_fn(" - ERROR: hypervisor %s verify failure: '%s'" %
(hv_name, hv_result))
return bad return bad
def _VerifyInstance(self, instance, instanceconfig, node_vol_is, def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
...@@ -721,6 +724,7 @@ class LUVerifyCluster(LogicalUnit): ...@@ -721,6 +724,7 @@ class LUVerifyCluster(LogicalUnit):
feedback_fn(" - ERROR: %s" % msg) feedback_fn(" - ERROR: %s" % msg)
vg_name = self.cfg.GetVGName() vg_name = self.cfg.GetVGName()
hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
nodelist = utils.NiceSort(self.cfg.GetNodeList()) nodelist = utils.NiceSort(self.cfg.GetNodeList())
nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist] nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList()) instancelist = utils.NiceSort(self.cfg.GetInstanceList())
...@@ -739,19 +743,20 @@ class LUVerifyCluster(LogicalUnit): ...@@ -739,19 +743,20 @@ class LUVerifyCluster(LogicalUnit):
feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
all_volumeinfo = rpc.call_volume_list(nodelist, vg_name) all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
all_instanceinfo = rpc.call_instance_list(nodelist) all_instanceinfo = rpc.call_instance_list(nodelist, hypervisors)
all_vglist = rpc.call_vg_list(nodelist) all_vglist = rpc.call_vg_list(nodelist)
node_verify_param = { node_verify_param = {
'filelist': file_names, 'filelist': file_names,
'nodelist': nodelist, 'nodelist': nodelist,
'hypervisor': None, 'hypervisor': hypervisors,
'node-net-test': [(node.name, node.primary_ip, node.secondary_ip) 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
for node in nodeinfo] for node in nodeinfo]
} }
all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param, all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param,
self.cfg.GetClusterName()) self.cfg.GetClusterName())
all_rversion = rpc.call_version(nodelist) all_rversion = rpc.call_version(nodelist)
all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName()) all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName(),
self.cfg.GetHypervisorType())
for node in nodelist: for node in nodelist:
feedback_fn("* Verifying node %s" % node) feedback_fn("* Verifying node %s" % node)
...@@ -1470,7 +1475,8 @@ class LUQueryNodes(NoHooksLU): ...@@ -1470,7 +1475,8 @@ class LUQueryNodes(NoHooksLU):
if self.dynamic_fields.intersection(self.op.output_fields): if self.dynamic_fields.intersection(self.op.output_fields):
live_data = {} live_data = {}
node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName()) node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName(),
self.cfg.GetHypervisorType())
for name in nodenames: for name in nodenames:
nodeinfo = node_data.get(name, None) nodeinfo = node_data.get(name, None)
if nodeinfo: if nodeinfo:
...@@ -1808,7 +1814,7 @@ class LUAddNode(LogicalUnit): ...@@ -1808,7 +1814,7 @@ class LUAddNode(LogicalUnit):
(fname, to_node)) (fname, to_node))
to_copy = [] to_copy = []
if self.cfg.GetHypervisorType() == constants.HT_XEN_HVM31: if constants.HT_XEN_HVM31 in self.cfg.GetClusterInfo().enabled_hypervisors:
to_copy.append(constants.VNC_PASSWORD_FILE) to_copy.append(constants.VNC_PASSWORD_FILE)
for fname in to_copy: for fname in to_copy:
result = rpc.call_upload_file([node], fname) result = rpc.call_upload_file([node], fname)
...@@ -1852,6 +1858,7 @@ class LUQueryClusterInfo(NoHooksLU): ...@@ -1852,6 +1858,7 @@ class LUQueryClusterInfo(NoHooksLU):
"master": self.cfg.GetMasterNode(), "master": self.cfg.GetMasterNode(),
"architecture": (platform.architecture()[0], platform.machine()), "architecture": (platform.architecture()[0], platform.machine()),
"hypervisor_type": self.cfg.GetHypervisorType(), "hypervisor_type": self.cfg.GetHypervisorType(),
"enabled_hypervisors": self.cfg.GetClusterInfo().enabled_hypervisors,
} }
return result return result
...@@ -2047,7 +2054,8 @@ def _SafeShutdownInstanceDisks(instance, cfg): ...@@ -2047,7 +2054,8 @@ def _SafeShutdownInstanceDisks(instance, cfg):
_ShutdownInstanceDisks. _ShutdownInstanceDisks.
""" """
ins_l = rpc.call_instance_list([instance.primary_node]) ins_l = rpc.call_instance_list([instance.primary_node],
[instance.hypervisor])
ins_l = ins_l[instance.primary_node] ins_l = ins_l[instance.primary_node]
if not type(ins_l) is list: if not type(ins_l) is list:
raise errors.OpExecError("Can't contact node '%s'" % raise errors.OpExecError("Can't contact node '%s'" %
...@@ -2081,7 +2089,7 @@ def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False): ...@@ -2081,7 +2089,7 @@ def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
return result return result
def _CheckNodeFreeMemory(cfg, node, reason, requested): def _CheckNodeFreeMemory(cfg, node, reason, requested, hypervisor):
"""Checks if a node has enough free memory. """Checks if a node has enough free memory.
This function check if a given node has the needed amount of free This function check if a given node has the needed amount of free
...@@ -2089,14 +2097,21 @@ def _CheckNodeFreeMemory(cfg, node, reason, requested): ...@@ -2089,14 +2097,21 @@ def _CheckNodeFreeMemory(cfg, node, reason, requested):
information from the node, this function raise an OpPrereqError information from the node, this function raise an OpPrereqError
exception. exception.
Args: @type cfg: C{config.ConfigWriter}