Commit e69d05fd authored by Iustin Pop's avatar Iustin Pop
Browse files

Move the hypervisor attribute to the instances

This (big) patch moves the hypervisor type from the cluster to the
instance level; the cluster attribute remains as the default hypervisor,
and will be renamed accordingly in a next patch. The cluster also gains
the ‘enable_hypervisors’ attribute, and instances can be created with
any of the enabled ones (no provision yet for changing that attribute).

The many many changes in the rpc/backend layer are due to the fact that
all backend code read the hypervisor from the local copy of the config,
and now we have to send it (either in the instance object, or as a
separate parameter) for each function.

The node list by default will list the node free/total memory for the
default hypervisor, a new flag to it should exist to select another
hypervisor. Instance list has a new field, hypervisor, that shows the
instance hypervisor. Cluster verify runs for all enabled hypervisor
types.

The new FIXMEs are related to IAllocator, since now the node
total/free/used memory counts are wrong (we can't reliably compute the
free memory).

Reviewed-by: imsnah
parent 6884c0ca
......@@ -387,14 +387,14 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler):
"""Query information about all instances.
"""
return backend.GetAllInstancesInfo()
return backend.GetAllInstancesInfo(params[0])
@staticmethod
def perspective_instance_list(params):
"""Query the list of running instances.
"""
return backend.GetInstanceList()
return backend.GetInstanceList(params[0])
# node --------------------------
......@@ -411,8 +411,8 @@ class NodeDaemonRequestHandler(http.HTTPRequestHandler):
"""Query node information.
"""
vgname = params[0]
return backend.GetNodeInfo(vgname)
vgname, hypervisor_type = params
return backend.GetNodeInfo(vgname, hypervisor_type)
@staticmethod
def perspective_node_add(params):
......
......@@ -231,18 +231,21 @@ def LeaveCluster():
raise errors.QuitGanetiException(False, 'Shutdown scheduled')
def GetNodeInfo(vgname):
def GetNodeInfo(vgname, hypervisor_type):
"""Gives back a hash with different informations about the node.
Returns:
{ 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
'memory_free' : xxx, 'memory_total' : xxx }
where
vg_size is the size of the configured volume group in MiB
vg_free is the free size of the volume group in MiB
memory_dom0 is the memory allocated for domain0 in MiB
memory_free is the currently available (free) ram in MiB
memory_total is the total number of ram in MiB
@type vgname: C{string}
@param vgname: the name of the volume group to ask for disk space information
@type hypervisor_type: C{str}
@param hypervisor_type: the name of the hypervisor to ask for
memory information
@rtype: C{dict}
@return: dictionary with the following keys:
- vg_size is the size of the configured volume group in MiB
- vg_free is the free size of the volume group in MiB
- memory_dom0 is the memory allocated for domain0 in MiB
- memory_free is the currently available (free) ram in MiB
- memory_total is the total number of ram in MiB
"""
outputarray = {}
......@@ -250,7 +253,7 @@ def GetNodeInfo(vgname):
outputarray['vg_size'] = vginfo['vg_size']
outputarray['vg_free'] = vginfo['vg_free']
hyper = hypervisor.GetHypervisor(_GetConfig())
hyper = hypervisor.GetHypervisor(hypervisor_type)
hyp_info = hyper.GetNodeInfo()
if hyp_info is not None:
outputarray.update(hyp_info)
......@@ -267,25 +270,36 @@ def GetNodeInfo(vgname):
def VerifyNode(what, cluster_name):
"""Verify the status of the local node.
Args:
what - a dictionary of things to check:
'filelist' : list of files for which to compute checksums
'nodelist' : list of nodes we should check communication with
'hypervisor': run the hypervisor-specific verify
Based on the input L{what} parameter, various checks are done on the
local node.
If the I{filelist} key is present, this list of
files is checksummed and the file/checksum pairs are returned.
If the I{nodelist} key is present, we check that we have
connectivity via ssh with the target nodes (and check the hostname
report).
Requested files on local node are checksummed and the result returned.
If the I{node-net-test} key is present, we check that we have
connectivity to the given nodes via both primary IP and, if
applicable, secondary IPs.
@type what: C{dict}
@param what: a dictionary of things to check:
- filelist: list of files for which to compute checksums
- nodelist: list of nodes we should check ssh communication with
- node-net-test: list of nodes we should check node daemon port
connectivity with
- hypervisor: list with hypervisors to run the verify for
The nodelist is traversed, with the following checks being made
for each node:
- known_hosts key correct
- correct resolving of node name (target node returns its own hostname
by ssh-execution of 'hostname', result compared against name in list.
"""
result = {}
if 'hypervisor' in what:
result['hypervisor'] = hypervisor.GetHypervisor(_GetConfig()).Verify()
result['hypervisor'] = my_dict = {}
for hv_name in what['hypervisor']:
my_dict[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
if 'filelist' in what:
result['filelist'] = utils.FingerprintFiles(what['filelist'])
......@@ -415,41 +429,49 @@ def BridgesExist(bridges_list):
return True
def GetInstanceList():
def GetInstanceList(hypervisor_list):
"""Provides a list of instances.
Returns:
A list of all running instances on the current node
- instance1.example.com
- instance2.example.com
@type hypervisor_list: list
@param hypervisor_list: the list of hypervisors to query information
@rtype: list
@return: a list of all running instances on the current node
- instance1.example.com
- instance2.example.com
"""
try:
names = hypervisor.GetHypervisor(_GetConfig()).ListInstances()
except errors.HypervisorError, err:
logging.exception("Error enumerating instances")
raise
results = []
for hname in hypervisor_list:
try:
names = hypervisor.GetHypervisor(hname).ListInstances()
results.extend(names)
except errors.HypervisorError, err:
logging.exception("Error enumerating instances for hypevisor %s", hname)
# FIXME: should we somehow not propagate this to the master?
raise
return names
return results
def GetInstanceInfo(instance):
def GetInstanceInfo(instance, hname):
"""Gives back the informations about an instance as a dictionary.
Args:
instance: name of the instance (ex. instance1.example.com)
@type instance: string
@param instance: the instance name
@type hname: string
@param hname: the hypervisor type of the instance
Returns:
{ 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
where
memory: memory size of instance (int)
state: xen state of instance (string)
time: cpu time of instance (float)
@rtype: dict
@return: dictionary with the following keys:
- memory: memory size of instance (int)
- state: xen state of instance (string)
- time: cpu time of instance (float)
"""
output = {}
iinfo = hypervisor.GetHypervisor(_GetConfig()).GetInstanceInfo(instance)
iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance)
if iinfo is not None:
output['memory'] = iinfo[2]
output['state'] = iinfo[4]
......@@ -458,34 +480,38 @@ def GetInstanceInfo(instance):
return output
def GetAllInstancesInfo():
def GetAllInstancesInfo(hypervisor_list):
"""Gather data about all instances.
This is the equivalent of `GetInstanceInfo()`, except that it
computes data for all instances at once, thus being faster if one
needs data about more than one instance.
Returns: a dictionary of dictionaries, keys being the instance name,
and with values:
{ 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
where
memory: memory size of instance (int)
state: xen state of instance (string)
time: cpu time of instance (float)
vcpus: the number of cpus
@type hypervisor_list: list
@param hypervisor_list: list of hypervisors to query for instance data
@rtype: dict of dicts
@return: dictionary of instance: data, with data having the following keys:
- memory: memory size of instance (int)
- state: xen state of instance (string)
- time: cpu time of instance (float)
- vcpuus: the number of vcpus
"""
output = {}
iinfo = hypervisor.GetHypervisor(_GetConfig()).GetAllInstancesInfo()
if iinfo:
for name, inst_id, memory, vcpus, state, times in iinfo:
output[name] = {
'memory': memory,
'vcpus': vcpus,
'state': state,
'time': times,
}
for hname in hypervisor_list:
iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
if iinfo:
for name, inst_id, memory, vcpus, state, times in iinfo:
if name in output:
raise errors.HypervisorError("Instance %s running duplicate" % name)
output[name] = {
'memory': memory,
'vcpus': vcpus,
'state': state,
'time': times,
}
return output
......@@ -499,7 +525,6 @@ def AddOSToInstance(instance, os_disk, swap_disk):
swap_disk: the instance-visible name of the swap device
"""
cfg = _GetConfig()
inst_os = OSFromDisk(instance.os)
create_script = inst_os.create_script
......@@ -535,7 +560,7 @@ def AddOSToInstance(instance, os_disk, swap_disk):
inst_os.path, create_script, instance.name,
real_os_dev.dev_path, real_swap_dev.dev_path,
logfile)
env = {'HYPERVISOR': cfg.GetHypervisorType()}
env = {'HYPERVISOR': instance.hypervisor}
result = utils.RunCmd(command, env=env)
if result.failed:
......@@ -666,17 +691,19 @@ def _GatherBlockDevs(instance):
def StartInstance(instance, extra_args):
"""Start an instance.
Args:
instance - name of instance to start.
@type instance: instance object
@param instance: the instance object
@rtype: boolean
@return: whether the startup was successful or not
"""
running_instances = GetInstanceList()
running_instances = GetInstanceList([instance.hypervisor])
if instance.name in running_instances:
return True
block_devices = _GatherBlockDevs(instance)
hyper = hypervisor.GetHypervisor(_GetConfig())
hyper = hypervisor.GetHypervisor(instance.hypervisor)
try:
hyper.StartInstance(instance, block_devices, extra_args)
......@@ -690,16 +717,19 @@ def StartInstance(instance, extra_args):
def ShutdownInstance(instance):
"""Shut an instance down.
Args:
instance - name of instance to shutdown.
@type instance: instance object
@param instance: the instance object
@rtype: boolean
@return: whether the startup was successful or not
"""
running_instances = GetInstanceList()
hv_name = instance.hypervisor
running_instances = GetInstanceList([hv_name])
if instance.name not in running_instances:
return True
hyper = hypervisor.GetHypervisor(_GetConfig())
hyper = hypervisor.GetHypervisor(hv_name)
try:
hyper.StopInstance(instance)
except errors.HypervisorError, err:
......@@ -711,7 +741,7 @@ def ShutdownInstance(instance):
time.sleep(1)
for dummy in range(11):
if instance.name not in GetInstanceList():
if instance.name not in GetInstanceList([hv_name]):
break
time.sleep(10)
else:
......@@ -725,7 +755,7 @@ def ShutdownInstance(instance):
return False
time.sleep(1)
if instance.name in GetInstanceList():
if instance.name in GetInstanceList([hv_name]):
logging.error("could not shutdown instance '%s' even by destroy",
instance.name)
return False
......@@ -741,13 +771,13 @@ def RebootInstance(instance, reboot_type, extra_args):
reboot_type - how to reboot [soft,hard,full]
"""
running_instances = GetInstanceList()
running_instances = GetInstanceList([instance.hypervisor])
if instance.name not in running_instances:
logging.error("Cannot reboot instance that is not running")
return False
hyper = hypervisor.GetHypervisor(_GetConfig())
hyper = hypervisor.GetHypervisor(instance.hypervisor)
if reboot_type == constants.INSTANCE_REBOOT_SOFT:
try:
hyper.RebootInstance(instance)
......@@ -764,7 +794,6 @@ def RebootInstance(instance, reboot_type, extra_args):
else:
raise errors.ParameterError("reboot_type invalid")
return True
......@@ -784,7 +813,7 @@ def MigrateInstance(instance, target, live):
- msg is a string with details in case of failure
"""
hyper = hypervisor.GetHypervisor(_GetConfig())
hyper = hypervisor.GetHypervisor(instance.hypervisor_name)
try:
hyper.MigrateInstance(instance.name, target, live)
......@@ -1464,7 +1493,6 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image,
False in case of error, True otherwise.
"""
cfg = _GetConfig()
inst_os = OSFromDisk(instance.os)
import_script = inst_os.import_script
......@@ -1507,7 +1535,7 @@ def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image,
logfile)
command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
env = {'HYPERVISOR': cfg.GetHypervisorType()}
env = {'HYPERVISOR': instance.hypervisor}
result = utils.RunCmd(command, env=env)
......
......@@ -586,8 +586,11 @@ class LUVerifyCluster(LogicalUnit):
(node, node_result['node-net-test'][node]))
hyp_result = node_result.get('hypervisor', None)
if hyp_result is not None:
feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
if isinstance(hyp_result, dict):
for hv_name, hv_result in hyp_result.iteritems():
if hv_result is not None:
feedback_fn(" - ERROR: hypervisor %s verify failure: '%s'" %
(hv_name, hv_result))
return bad
def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
......@@ -721,6 +724,7 @@ class LUVerifyCluster(LogicalUnit):
feedback_fn(" - ERROR: %s" % msg)
vg_name = self.cfg.GetVGName()
hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
nodelist = utils.NiceSort(self.cfg.GetNodeList())
nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
......@@ -739,19 +743,20 @@ class LUVerifyCluster(LogicalUnit):
feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
all_instanceinfo = rpc.call_instance_list(nodelist)
all_instanceinfo = rpc.call_instance_list(nodelist, hypervisors)
all_vglist = rpc.call_vg_list(nodelist)
node_verify_param = {
'filelist': file_names,
'nodelist': nodelist,
'hypervisor': None,
'hypervisor': hypervisors,
'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
for node in nodeinfo]
}
all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param,
self.cfg.GetClusterName())
all_rversion = rpc.call_version(nodelist)
all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName(),
self.cfg.GetHypervisorType())
for node in nodelist:
feedback_fn("* Verifying node %s" % node)
......@@ -1470,7 +1475,8 @@ class LUQueryNodes(NoHooksLU):
if self.dynamic_fields.intersection(self.op.output_fields):
live_data = {}
node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName(),
self.cfg.GetHypervisorType())
for name in nodenames:
nodeinfo = node_data.get(name, None)
if nodeinfo:
......@@ -1808,7 +1814,7 @@ class LUAddNode(LogicalUnit):
(fname, to_node))
to_copy = []
if self.cfg.GetHypervisorType() == constants.HT_XEN_HVM31:
if constants.HT_XEN_HVM31 in self.cfg.GetClusterInfo().enabled_hypervisors:
to_copy.append(constants.VNC_PASSWORD_FILE)
for fname in to_copy:
result = rpc.call_upload_file([node], fname)
......@@ -1852,6 +1858,7 @@ class LUQueryClusterInfo(NoHooksLU):
"master": self.cfg.GetMasterNode(),
"architecture": (platform.architecture()[0], platform.machine()),
"hypervisor_type": self.cfg.GetHypervisorType(),
"enabled_hypervisors": self.cfg.GetClusterInfo().enabled_hypervisors,
}
return result
......@@ -2047,7 +2054,8 @@ def _SafeShutdownInstanceDisks(instance, cfg):
_ShutdownInstanceDisks.
"""
ins_l = rpc.call_instance_list([instance.primary_node])
ins_l = rpc.call_instance_list([instance.primary_node],
[instance.hypervisor])
ins_l = ins_l[instance.primary_node]
if not type(ins_l) is list:
raise errors.OpExecError("Can't contact node '%s'" %
......@@ -2081,7 +2089,7 @@ def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
return result
def _CheckNodeFreeMemory(cfg, node, reason, requested):
def _CheckNodeFreeMemory(cfg, node, reason, requested, hypervisor):
"""Checks if a node has enough free memory.
This function check if a given node has the needed amount of free
......@@ -2089,14 +2097,21 @@ def _CheckNodeFreeMemory(cfg, node, reason, requested):
information from the node, this function raise an OpPrereqError
exception.
Args:
- cfg: a ConfigWriter instance
- node: the node name
- reason: string to use in the error message
- requested: the amount of memory in MiB
@type cfg: C{config.ConfigWriter}
@param cfg: the ConfigWriter instance from which we get configuration data
@type node: C{str}
@param node: the node to check
@type reason: C{str}
@param reason: string to use in the error message
@type requested: C{int}
@param requested: the amount of memory in MiB to check for
@type hypervisor: C{str}
@param hypervisor: the hypervisor to ask for memory stats
@raise errors.OpPrereqError: if the node doesn't have enough memory, or
we cannot check the node
"""
nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
nodeinfo = rpc.call_node_info([node], cfg.GetVGName(), hypervisor)
if not nodeinfo or not isinstance(nodeinfo, dict):
raise errors.OpPrereqError("Could not contact node %s for resource"
" information" % (node,))
......@@ -2158,7 +2173,7 @@ class LUStartupInstance(LogicalUnit):
_CheckNodeFreeMemory(self.cfg, instance.primary_node,
"starting instance %s" % instance.name,
instance.memory)
instance.memory, instance.hypervisor)
def Exec(self, feedback_fn):
"""Start the instance.
......@@ -2357,7 +2372,8 @@ class LUReinstallInstance(LogicalUnit):
if instance.status != "down":
raise errors.OpPrereqError("Instance '%s' is marked to be up" %
self.op.instance_name)
remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
remote_info = rpc.call_instance_info(instance.primary_node, instance.name,
instance.hypervisor)
if remote_info:
raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
(self.op.instance_name,
......@@ -2434,7 +2450,8 @@ class LURenameInstance(LogicalUnit):
if instance.status != "down":
raise errors.OpPrereqError("Instance '%s' is marked to be up" %
self.op.instance_name)
remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
remote_info = rpc.call_instance_info(instance.primary_node, instance.name,
instance.hypervisor)
if remote_info:
raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
(self.op.instance_name,
......@@ -2590,7 +2607,7 @@ class LUQueryInstances(NoHooksLU):
"hvm_boot_order", "hvm_acpi", "hvm_pae",
"hvm_cdrom_image_path", "hvm_nic_type",
"hvm_disk_type", "vnc_bind_address",
"serial_no",
"serial_no", "hypervisor",
])
_CheckOutputFields(static=self.static_fields,
dynamic=self.dynamic_fields,
......@@ -2642,11 +2659,12 @@ class LUQueryInstances(NoHooksLU):
# begin data gathering
nodes = frozenset([inst.primary_node for inst in instance_list])
hv_list = list(set([inst.hypervisor for inst in instance_list]))
bad_nodes = []
if self.dynamic_fields.intersection(self.op.output_fields):
live_data = {}
node_data = rpc.call_all_instances_info(nodes)
node_data = rpc.call_all_instances_info(nodes, hv_list)
for name in nodes:
result = node_data[name]
if result:
......@@ -2734,6 +2752,8 @@ class LUQueryInstances(NoHooksLU):
val = "default"
else:
val = "-"
elif field == "hypervisor":
val = instance.hypervisor
else:
raise errors.ParameterError(field)
iout.append(val)
......@@ -2795,7 +2815,8 @@ class LUFailoverInstance(LogicalUnit):
target_node = secondary_nodes[0]
# check memory requirements on the secondary node
_CheckNodeFreeMemory(self.cfg, target_node, "failing over instance %s" %
instance.name, instance.memory)
instance.name, instance.memory,
instance.hypervisor)
# check bridge existance
brlist = [nic.bridge for nic in instance.nics]
......@@ -3150,7 +3171,7 @@ class LUCreateInstance(LogicalUnit):
for attr in ["kernel_path", "initrd_path", "pnode", "snode",
"iallocator", "hvm_boot_order", "hvm_acpi", "hvm_pae",
"hvm_cdrom_image_path", "hvm_nic_type", "hvm_disk_type",
"vnc_bind_address"]:
"vnc_bind_address", "hypervisor"]:
if not hasattr(self.op, attr):
setattr(self.op, attr, None)
......@@ -3327,6 +3348,19 @@ class LUCreateInstance(LogicalUnit):
raise errors.OpPrereqError("Cluster does not support lvm-based"
" instances")
# cheap checks (from the config only)
if self.op.hypervisor is None:
self.op.hypervisor = self.cfg.GetHypervisorType()
enabled_hvs = self.cfg.GetClusterInfo().enabled_hypervisors
if self.op.hypervisor not in enabled_hvs:
raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
" cluster (%s)" % (self.op.hypervisor,
",".join(enabled_hvs)))
# costly checks (from nodes)
if self.op.mode == constants.INSTANCE_IMPORT:
src_node = self.op.src_node
src_path = self.op.src_path
......@@ -3401,7 +3435,8 @@ class LUCreateInstance(LogicalUnit):
# Check lv size requirements
if req_size is not None:
nodenames = [pnode.name] + self.secondaries
nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName(),
self.op.hypervisor)
for node in nodenames:
info = nodeinfo.get(node, None)
if not info:
......@@ -3435,7 +3470,7 @@ class LUCreateInstance(LogicalUnit):
if self.op.start:
_CheckNodeFreeMemory(self.cfg, self.pnode.name,
"creating instance %s" % self.op.instance_name,
self.op.mem_size)
self.op.mem_size, self.op.hypervisor)
# hvm_cdrom_image_path verification
if self.op.hvm_cdrom_image_path is not None:
......@@ -3458,7 +3493,7 @@ class LUCreateInstance(LogicalUnit):
self.op.vnc_bind_address)
# Xen HVM device type checks
if self.cfg.GetHypervisorType() == constants.HT_XEN_HVM31:
if self.op.hypervisor == constants.HT_XEN_HVM31:
if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
raise errors.OpPrereqError("Invalid NIC type %s specified for Xen HVM"
" hypervisor" % self.op.hvm_nic_type)
......@@ -3487,7 +3522,7 @@ class LUCreateInstance(LogicalUnit):
if self.inst_ip is not None:
nic.ip = self.inst_ip
ht_kind = self.cfg.GetHypervisorType()
ht_kind = self.op.hypervisor
if ht_kind in constants.HTS_REQ_PORT:
network_port = self.cfg.AllocatePort()
else:
......@@ -3533,6 +3568,7 @@ class LUCreateInstance(LogicalUnit):
vnc_bind_address=self.op.vnc_bind_address,
hvm_nic_type=self.op.hvm_nic_type,
hvm_disk_type=self.op.hvm_disk_type,
hypervisor=self.op.hypervisor,
)
feedback_fn("* creating instance disks...")
......@@ -3632,7 +3668,8 @@ class LUConnectConsole(NoHooksLU):
instance = self.instance
node = instance.primary_node
node_insts = rpc.call_instance_list([node])[node]
node_insts = rpc.call_instance_list([node],
[instance.hypervisor])[node]