Commit 33fe9eb8 authored by Christos Stavrakakis's avatar Christos Stavrakakis
Browse files

cyclades: Refactor reconciliation of pools

Move reconciliation of IP pools from 'snf-manage reconcile-networks'
to 'snf-manage reconcile-pools'. Also change the way reconciliation
works: Instead of reconciling the IP pools in DB with the IP pools in
Ganeti, the new reconciliation just checks that:
* there is no IPv4 address that is used by more than one NICs
* the only reserved values in the pools are the ones that are currently
  used by NICs
The reason why reconciliation of pools with Ganeti has been removed is
that there are too many race conditions, and that it may result in
strange incosistencies. As far as external reservations is concerned,
the new reconciliation mechanism just checks that each IP that is
reserved in a Ganeti backend is also reserved in the Cyclades DB.
parent b7dae491
......@@ -763,10 +763,16 @@ class PoolTable(models.Model):
class BridgePoolTable(PoolTable):
manager = pools.BridgePool
def __unicode__(self):
return u"<Bridge Pool id:%s>" % self.id
class MacPrefixPoolTable(PoolTable):
manager = pools.MacPrefixPool
def __unicode__(self):
return u"<MAC Prefix Pool id:%s>" % self.id
class IPPoolTable(PoolTable):
manager = pools.IPPool
......
......@@ -49,7 +49,6 @@ Network reconciliation can detect and fix the following cases:
- Missing Ganeti networks
- Ganeti networks that are not connected to all Ganeti nodegroups
- Networks that have unsynced state
- Networks that have unsynced IP pools
- Orphan networks in the Ganeti backend
"""
......@@ -58,13 +57,9 @@ Network reconciliation can detect and fix the following cases:
make_option('--fix-all', action='store_true',
dest='fix', default=False,
help='Fix all issues.'),
make_option('--conflicting-ips', action='store_true',
dest='conflicting_ips', default=False,
help='Detect conflicting ips')
)
def handle(self, **options):
conflicting_ips = options['conflicting_ips']
verbosity = int(options["verbosity"])
fix = options["fix"]
......@@ -84,8 +79,5 @@ Network reconciliation can detect and fix the following cases:
logger.setLevel(logging.WARNING)
logger.addHandler(log_handler)
reconciler = reconciliation.NetworkReconciler(
logger=logger,
fix=fix,
conflicting_ips=conflicting_ips)
reconciler = reconciliation.NetworkReconciler(logger=logger, fix=fix)
reconciler.reconcile_networks()
# Copyright 2011-2012 GRNET S.A. All rights reserved.
# Copyright 2011-2013 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
......@@ -27,122 +27,53 @@
# those of the authors and should not be interpreted as representing official
# policies, either expressed or implied, of GRNET S.A.
#
import logging
from optparse import make_option
from django.core.management.base import BaseCommand
from synnefo.logic import reconciliation
from synnefo.db.models import (Network, BackendNetwork,
BridgePoolTable, MacPrefixPoolTable)
from synnefo.db.pools import EmptyPool
HELP_MSG = """\
Check the consistency of pools of resources and fix them if possible.
class Command(BaseCommand):
help = 'Check consistency of unique resources.'
def handle(self, **options):
self.detect_bridges()
self.detect_mac_prefixes()
self.detect_unique_mac_prefixes()
def detect_bridges(self):
write = self.stdout.write
write("---------------------------------------\n")
write("Checking consistency of the Bridge Pool\n")
write("---------------------------------------\n")
try:
bridge_pool = BridgePoolTable.get_pool()
except EmptyPool:
write("No Bridge Pool\n")
return
bridges = []
for i in xrange(0, bridge_pool.size()):
used_bridge = not (bridge_pool.is_available(i, index=True) or
bridge_pool.is_reserved(i, index=True))
if used_bridge:
bridges.append(bridge_pool.index_to_value(i))
write("Used bridges from Pool: %d\n" % len(bridges))
network_bridges = Network.objects.filter(flavor='PHYSICAL_VLAN',
deleted=False)\
.values_list('link', flat=True)
write("Used bridges from Networks: %d\n" % len(network_bridges))
This command checks that values that come from pools are not used more than
once. Also, it checks that are no stale reserved values in a pool by checking
that the reserved values are only the ones that are currently used.
set_network_bridges = set(network_bridges)
if len(network_bridges) > len(set_network_bridges):
write("Found duplicated bridges:\n")
duplicates = list(network_bridges)
for bridge in set_network_bridges:
duplicates.remove(bridge)
for bridge in set(duplicates):
write("Duplicated bridge: %s. " % bridge)
write("Used by the following Networks:\n")
nets = Network.objects.filter(deleted=False, link=bridge)
write(" " + "\n ".join([str(net.id) for net in nets]) + "\n")
The pools for the following resources are checked:
* Pool of bridges
* Pool of MAC prefixes
* Pool of IPv4 addresses for each network"""
def detect_mac_prefixes(self):
write = self.stdout.write
write("---------------------------------------\n")
write("Checking consistency of the MAC Prefix Pool\n")
write("---------------------------------------\n")
try:
macp_pool = MacPrefixPoolTable.get_pool()
except EmptyPool:
write("No mac-prefix pool\n")
return
macs = []
for i in xrange(1, macp_pool.size()):
used_macp = not (macp_pool.is_available(i, index=True) or
macp_pool.is_reserved(i, index=True))
if used_macp:
value = macp_pool.index_to_value(i)
macs.append(value)
write("Used MAC prefixes from Pool: %d\n" % len(macs))
network_mac_prefixes = \
Network.objects.filter(deleted=False, flavor='MAC_FILTERED')\
.values_list('mac_prefix', flat=True)
write("Used MAC prefixes from Networks: %d\n" %
len(network_mac_prefixes))
set_network_mac_prefixes = set(network_mac_prefixes)
if len(network_mac_prefixes) > len(set_network_mac_prefixes):
write("Found duplicated mac_prefixes:\n")
duplicates = list(network_mac_prefixes)
for mac_prefix in set_network_mac_prefixes:
duplicates.remove(mac_prefix)
for mac_prefix in set(duplicates):
write("Duplicated mac_prefix: %s. " % mac_prefix)
write("Used by the following Networks:\n")
nets = Network.objects.filter(deleted=False,
mac_prefix=mac_prefix)
write(" " + "\n ".join([str(net.id) for net in nets]) + "\n")
def detect_unique_mac_prefixes(self):
write = self.stdout.write
class Command(BaseCommand):
help = HELP_MSG
write("---------------------------------------\n")
write("Checking uniqueness of BackendNetwork prefixes.\n")
write("---------------------------------------\n")
option_list = BaseCommand.option_list + (
make_option("--fix", action="store_true",
dest="fix", default=False,
help='Fix all issues.'),
)
back_networks = BackendNetwork.objects
mac_prefixes = back_networks.filter(deleted=False,
network__flavor='MAC_FILTERED')\
.values_list('mac_prefix', flat=True)
set_mac_prefixes = set(mac_prefixes)
if len(mac_prefixes) > len(set_mac_prefixes):
write("Found duplicated mac_prefixes:\n")
duplicates = list(mac_prefixes)
for mac_prefix in set_mac_prefixes:
duplicates.remove(mac_prefix)
for mac_prefix in set(duplicates):
write("Duplicated mac_prefix: %s. " % mac_prefix)
write("Used by the following BackendNetworks:\n")
nets = BackendNetwork.objects.filter(deleted=False,
mac_prefix=mac_prefix)
write(" " + "\n ".join([str(net.id) for net in nets]) + "\n")
def handle(self, **options):
verbosity = int(options["verbosity"])
fix = options["fix"]
logger = logging.getLogger("reconcile-pools")
logger.propagate = 0
formatter = logging.Formatter("%(message)s")
log_handler = logging.StreamHandler()
log_handler.setFormatter(formatter)
if verbosity == 2:
formatter = logging.Formatter("%(asctime)s: %(message)s")
log_handler.setFormatter(formatter)
logger.setLevel(logging.DEBUG)
elif verbosity == 1:
logger.setLevel(logging.INFO)
else:
logger.setLevel(logging.WARNING)
logger.addHandler(log_handler)
reconciler = reconciliation.PoolReconciler(logger=logger, fix=fix)
reconciler.reconcile()
......@@ -66,8 +66,9 @@ from datetime import datetime
from django.db import transaction
from synnefo.db.models import (Backend, VirtualMachine, Flavor,
pooled_rapi_client, Network,
BackendNetwork)
from synnefo.db.pools import IPPool
BackendNetwork, BridgePoolTable,
MacPrefixPoolTable)
from synnefo.db import pools
from synnefo.logic import utils, backend as backend_mod
logger = logging.getLogger()
......@@ -457,95 +458,102 @@ def disks_from_instance(i):
class NetworkReconciler(object):
def __init__(self, logger, fix=False, conflicting_ips=False):
def __init__(self, logger, fix=False):
self.log = logger
self.conflicting_ips = conflicting_ips
self.fix = fix
@transaction.commit_on_success
def reconcile_networks(self):
# Get models from DB
backends = Backend.objects.exclude(offline=True)
networks = Network.objects.filter(deleted=False)
self.backends = Backend.objects.exclude(offline=True)
self.networks = Network.objects.filter(deleted=False)
self.event_time = datetime.now()
# Get info from all ganeti backends
ganeti_networks = {}
ganeti_hanging_networks = {}
for b in backends:
self.ganeti_networks = {}
self.ganeti_hanging_networks = {}
for b in self.backends:
g_nets = get_networks_from_ganeti(b)
ganeti_networks[b] = g_nets
self.ganeti_networks[b] = g_nets
g_hanging_nets = hanging_networks(b, g_nets)
ganeti_hanging_networks[b] = g_hanging_nets
# Perform reconciliation for each network
for network in networks:
ip_available_maps = []
ip_reserved_maps = []
for bend in backends:
bnet = get_backend_network(network, bend)
gnet = ganeti_networks[bend].get(network.id)
if not bnet:
if network.floating_ip_pool:
# Network is a floating IP pool and does not exist in
# backend. We need to create it
bnet = self.reconcile_parted_network(network, bend)
elif not gnet:
# Network does not exist either in Ganeti nor in BD.
continue
else:
# Network exists in Ganeti and not in DB.
if network.action != "DESTROY" and not network.public:
bnet = self.reconcile_parted_network(network, bend)
else:
continue
if not gnet:
# Network does not exist in Ganeti. If the network action
# is DESTROY, we have to mark as deleted in DB, else we
# have to create it in Ganeti.
if network.action == "DESTROY":
if bnet.operstate != "DELETED":
self.reconcile_stale_network(bnet)
else:
self.reconcile_missing_network(network, bend)
# Skip rest reconciliation!
continue
try:
hanging_groups = ganeti_hanging_networks[bend][network.id]
except KeyError:
# Network is connected to all nodegroups
hanging_groups = []
self.ganeti_hanging_networks[b] = g_hanging_nets
if hanging_groups:
# CASE-3: Ganeti networks not connected to all nodegroups
self.reconcile_hanging_groups(network, bend,
hanging_groups)
continue
self._reconcile_orphan_networks()
if bnet.operstate != 'ACTIVE':
# CASE-4: Unsynced network state. At this point the network
# exists and is connected to all nodes so is must be
# active!
self.reconcile_unsynced_network(network, bend, bnet)
for network in self.networks:
self._reconcile_network(network)
# Get ganeti IP Pools
available_map, reserved_map = get_network_pool(gnet)
ip_available_maps.append(available_map)
ip_reserved_maps.append(reserved_map)
@transaction.commit_on_success
def _reconcile_network(self, network):
"""Reconcile a network with corresponging Ganeti networks.
Reconcile a Network and the associated BackendNetworks with the
corresponding Ganeti networks in all Ganeti backends.
"""
network_ip_pool = network.get_pool() # X-Lock on IP Pool
for bend in self.backends:
bnet = get_backend_network(network, bend)
gnet = self.ganeti_networks[bend].get(network.id)
if not bnet:
if network.floating_ip_pool:
# Network is a floating IP pool and does not exist in
# backend. We need to create it
bnet = self.reconcile_parted_network(network, bend)
elif not gnet:
# Network does not exist either in Ganeti nor in BD.
continue
else:
# Network exists in Ganeti and not in DB.
if network.action != "DESTROY" and not network.public:
bnet = self.reconcile_parted_network(network, bend)
else:
continue
if ip_available_maps or ip_reserved_maps:
# CASE-5: Unsynced IP Pools
self.reconcile_ip_pools(network, ip_available_maps,
ip_reserved_maps)
if not gnet:
# Network does not exist in Ganeti. If the network action
# is DESTROY, we have to mark as deleted in DB, else we
# have to create it in Ganeti.
if network.action == "DESTROY":
if bnet.operstate != "DELETED":
self.reconcile_stale_network(bnet)
else:
self.reconcile_missing_network(network, bend)
# Skip rest reconciliation!
continue
if self.conflicting_ips:
self.detect_conflicting_ips()
try:
hanging_groups = self.ganeti_hanging_networks[bend][network.id]
except KeyError:
# Network is connected to all nodegroups
hanging_groups = []
if hanging_groups:
# CASE-3: Ganeti networks not connected to all nodegroups
self.reconcile_hanging_groups(network, bend,
hanging_groups)
continue
# CASE-6: Orphan networks
self.reconcile_orphan_networks(networks, ganeti_networks)
if bnet.operstate != 'ACTIVE':
# CASE-4: Unsynced network state. At this point the network
# exists and is connected to all nodes so is must be
# active!
self.reconcile_unsynced_network(network, bend, bnet)
# Check that externally reserved IPs of the network in Ganeti are
# also externally reserved to the IP pool
externally_reserved = gnet['external_reservations']
for ip in externally_reserved.split(","):
ip = ip.strip()
if not network_ip_pool.is_reserved(ip):
msg = ("D: IP '%s' is reserved for network '%s' in"
" backend '%s' but not in DB.")
self.log.info(msg, ip, network, bend)
if self.fix:
network_ip_pool.reserve(ip, external=True)
network_ip_pool.save()
self.log.info("F: Reserved IP '%s'", ip)
def reconcile_parted_network(self, network, backend):
self.log.info("D: Missing DB entry for network %s in backend %s",
......@@ -595,50 +603,9 @@ class NetworkReconciler(object):
"success",
"Reconciliation simulated eventd")
def reconcile_ip_pools(self, network, available_maps, reserved_maps):
available_map = reduce(lambda x, y: x & y, available_maps)
reserved_map = reduce(lambda x, y: x & y, reserved_maps)
pool = network.get_pool()
# Temporary release unused floating IPs
temp_pool = network.get_pool()
used_ips = network.nics.values_list("ipv4", flat=True)
unused_static_ips = network.floating_ips.exclude(ipv4__in=used_ips)
map(lambda ip: temp_pool.put(ip.ipv4), unused_static_ips)
if temp_pool.available != available_map:
self.log.info("D: Unsynced available map of network %s:\n"
"\tDB: %r\n\tGB: %r", network,
temp_pool.available.to01(),
available_map.to01())
if self.fix:
pool.available = available_map
# Release unsued floating IPs, as they are not included in the
# available map
map(lambda ip: pool.reserve(ip.ipv4), unused_static_ips)
pool.save()
if pool.reserved != reserved_map:
self.log.info("D: Unsynced reserved map of network %s:\n"
"\tDB: %r\n\tGB: %r", network, pool.reserved.to01(),
reserved_map.to01())
if self.fix:
pool.reserved = reserved_map
pool.save()
def detect_conflicting_ips(self, network):
"""Detect NIC's that have the same IP in the same network."""
machine_ips = network.nics.all().values_list('ipv4', 'machine')
ips = map(lambda x: x[0], machine_ips)
distinct_ips = set(ips)
if len(distinct_ips) < len(ips):
for i in distinct_ips:
ips.remove(i)
for i in ips:
machines = [utils.id_to_instance_name(x[1])
for x in machine_ips if x[0] == i]
self.log.info('D: Conflicting IP:%s Machines: %s',
i, ', '.join(machines))
def reconcile_orphan_networks(self, db_networks, ganeti_networks):
def _reconcile_orphan_networks(self):
db_networks = self.networks
ganeti_networks = self.ganeti_networks
# Detect Orphan Networks in Ganeti
db_network_ids = set([net.id for net in db_networks])
for back_end, ganeti_networks in ganeti_networks.items():
......@@ -669,39 +636,107 @@ def get_backend_network(network, backend):
return None
def get_network_pool(gnet):
"""Return available and reserved IP maps.
class PoolReconciler(object):
def __init__(self, logger, fix=False):
self.log = logger
self.fix = fix
def reconcile(self):
self.reconcile_bridges()
self.reconcile_mac_prefixes()
for network in Network.objects.filter(deleted=False):
self.reconcile_ip_pool(network)
@transaction.commit_on_success
def reconcile_bridges(self):
networks = Network.objects.filter(deleted=False,
flavor="PHYSICAL_VLAN")
check_unique_values(objects=networks, field='link', logger=self.log)
try:
pool = BridgePoolTable.get_pool()
except pools.EmptyPool:
self.log.info("There is no available pool for bridges.")
return
used_bridges = set(networks.values_list('link', flat=True))
check_pool_consistent(pool=pool, pool_class=pools.BridgePool,
used_values=used_bridges, fix=self.fix,
logger=self.log)
Extract the available and reserved IP map from the info return from Ganeti
for a network.
@transaction.commit_on_success
def reconcile_mac_prefixes(self):
networks = Network.objects.filter(deleted=False, flavor="MAC_FILTERED")
check_unique_values(objects=networks, field='mac_prefix',
logger=self.log)
try:
pool = MacPrefixPoolTable.get_pool()
except pools.EmptyPool:
self.log.info("There is no available pool for MAC prefixes.")
return
used_mac_prefixes = set(networks.values_list('mac_prefix', flat=True))
check_pool_consistent(pool=pool, pool_class=pools.MacPrefixPool,
used_values=used_mac_prefixes, fix=self.fix,
logger=self.log)
"""
converter = IPPool(Foo(gnet['network']))
a_map = bitarray_from_map(gnet['map'])
a_map.invert()
reserved = gnet['external_reservations']
r_map = a_map.copy()
r_map.setall(True)
if reserved:
for address in reserved.split(','):
index = converter.value_to_index(address.strip())
a_map[index] = True
r_map[index] = False
return a_map, r_map
def bitarray_from_map(bitmap):
return bitarray.bitarray(bitmap.replace("X", "1").replace(".", "0"))
class Foo():
def __init__(self, subnet):
self.available_map = ''
self.reserved_map = ''
self.size = 0
self.network = Foo.Foo1(subnet)
class Foo1():
def __init__(self, subnet):
self.subnet = subnet
self.gateway = None
@transaction.commit_on_success
def reconcile_ip_pool(self, network):
# Check that all NICs have unique IPv4 address
nics = network.nics.filter(ipv4__isnull=False)
check_unique_values(objects=nics, field='ipv4', logger=self.log)
# Check that all Floating IPs have unique IPv4 address
floating_ips = network.floating_ips.filter(deleted=False)
check_unique_values(objects=floating_ips, field='ipv4',
logger=self.log)
# First get(lock) the IP pool of the network to prevent new NICs
# from being created.
network_ip_pool = network.get_pool()
used_ips = set(list(nics.values_list("ipv4", flat=True)) +
list(floating_ips.values_list("ipv4", flat=True)))
check_pool_consistent(pool=network_ip_pool,
pool_class=pools.IPPool,
used_values=used_ips,
fix=self.fix, logger=self.log)
def check_unique_values(objects, field, logger):
used_values = list(objects.values_list(field, flat=True))
if len(used_values) != len(set(used_values)):
duplicate_values = [v for v in used_values if used_values.count(v) > 1]
for value in duplicate_values:
filter_args = {field: value}
using_objects = objects.filter(**filter_args)
msg = "Value '%s' is used as %s for more than one objects: %s"
logger.error(msg, value, field, ",".join(map(str, using_objects)))
return False
logger.debug("Values for field '%s' are unique.", field)
return True
def check_pool_consistent(pool, pool_class, used_values, fix, logger):
dummy_pool = create_empty_pool(pool, pool_class)
[dummy_pool.reserve(value) for value in used_values]
if dummy_pool.available != pool.available:
msg = "'%s' is not consistent!\nPool: %s\nUsed: %s"
pool_diff = dummy_pool.available ^ pool.available
for index in pool_diff.itersearch(bitarray.bitarray("1")):
value = pool.index_to_value(int(index))