Commit e431074f authored by René Nussbaumer's avatar René Nussbaumer

Fix instance list for instances running multiple times

If for some reason (e.g. failed migration) one instance is running
on multiple nodes the output can become inconsistent. To get that error
and make it consistent between runs we make the call on the secondary
too and look if it's running there. If so we report the instance as
ERROR_wrongnode.
Signed-off-by: default avatarRené Nussbaumer <rn@google.com>
Reviewed-by: default avatarIustin Pop <iustin@google.com>
parent b9e478fe
......@@ -3873,10 +3873,12 @@ class _InstanceQuery(_QueryBase):
instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
instance_list = [all_info[name] for name in instance_names]
nodes = frozenset([inst.primary_node for inst in instance_list])
nodes = frozenset(itertools.chain(*(inst.all_nodes
for inst in instance_list)))
hv_list = list(set([inst.hypervisor for inst in instance_list]))
bad_nodes = []
offline_nodes = []
wrongnode_inst = set()
# Gather data as requested
if query.IQ_LIVE in self.requested_data:
......@@ -3891,7 +3893,11 @@ class _InstanceQuery(_QueryBase):
if result.fail_msg:
bad_nodes.append(name)
elif result.payload:
live_data.update(result.payload)
for inst in result.payload:
if all_info[inst].primary_node == name:
live_data.update(result.payload)
else:
wrongnode_inst.add(inst)
# else no instance is alive
else:
live_data = {}
......@@ -3907,7 +3913,7 @@ class _InstanceQuery(_QueryBase):
return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
disk_usage, offline_nodes, bad_nodes,
live_data)
live_data, wrongnode_inst)
class LUQuery(NoHooksLU):
......
......@@ -664,7 +664,7 @@ class InstanceQueryData:
"""
def __init__(self, instances, cluster, disk_usage, offline_nodes, bad_nodes,
live_data):
live_data, wrongnode_inst):
"""Initializes this class.
@param instances: List of instance objects
......@@ -677,6 +677,8 @@ class InstanceQueryData:
@param bad_nodes: List of faulty nodes
@type live_data: dict; instance name as key
@param live_data: Per-instance live data
@type wrongnode_inst: set
@param wrongnode_inst: Set of instances running on wrong node(s)
"""
assert len(set(bad_nodes) & set(offline_nodes)) == len(offline_nodes), \
......@@ -690,6 +692,7 @@ class InstanceQueryData:
self.offline_nodes = offline_nodes
self.bad_nodes = bad_nodes
self.live_data = live_data
self.wrongnode_inst = wrongnode_inst
# Used for individual rows
self.inst_hvparams = None
......@@ -774,7 +777,9 @@ def _GetInstStatus(ctx, inst):
return "ERROR_nodedown"
if bool(ctx.live_data.get(inst.name)):
if inst.admin_up:
if inst.name in ctx.wrongnode_inst:
return "ERROR_wrongnode"
elif inst.admin_up:
return "running"
else:
return "ERROR_up"
......
......@@ -672,12 +672,13 @@ oper\_state
"running", "stopped", "(node down)"
status
combined form of admin\_state and oper\_stat; this can be one of:
ERROR\_nodedown if the node of the instance is down, ERROR\_down if
the instance should run but is down, ERROR\_up if the instance
should be stopped but is actually running, ADMIN\_down if the
instance has been stopped (and is stopped) and running if the
instance is set to be running (and is running)
combined form of ``admin_state`` and ``oper_stat``; this can be one of:
``ERROR_nodedown`` if the node of the instance is down, ``ERROR_down`` if
the instance should run but is down, ``ERROR_up`` if the instance should be
stopped but is actually running, ``ERROR_wrongnode`` if the instance is
running but not on the primary, ``ADMIN_down`` if the instance has been
stopped (and is stopped) and ``running`` if the instance is set to be
running (and is running)
oper\_ram
the actual memory usage of the instance as seen by the hypervisor
......
......@@ -539,7 +539,7 @@ class TestInstanceQuery(unittest.TestCase):
nics=[objects.NIC(ip="192.0.2.99", nicparams={})]),
]
iqd = query.InstanceQueryData(instances, cluster, None, [], [], {})
iqd = query.InstanceQueryData(instances, cluster, None, [], [], {}, set())
self.assertEqual(q.Query(iqd),
[[(constants.RS_NORMAL, "inst1"),
(constants.RS_NORMAL, 128),
......@@ -694,9 +694,11 @@ class TestInstanceQuery(unittest.TestCase):
"memory": 768,
},
}
wrongnode_inst = set("inst2")
iqd = query.InstanceQueryData(instances, cluster, disk_usage,
offline_nodes, bad_nodes, live_data)
offline_nodes, bad_nodes, live_data,
wrongnode_inst)
result = q.Query(iqd)
self.assertEqual(len(result), len(instances))
self.assert_(compat.all(len(row) == len(selected)
......@@ -718,7 +720,9 @@ class TestInstanceQuery(unittest.TestCase):
elif inst.primary_node in bad_nodes:
exp_status = "ERROR_nodedown"
elif inst.name in live_data:
if inst.admin_up:
if inst.name in wrongnode_inst:
exp_status = "ERROR_wrongnode"
elif inst.admin_up:
exp_status = "running"
else:
exp_status = "ERROR_up"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment