Commit 4ab23c89 authored by Giorgos Korfiatis's avatar Giorgos Korfiatis Committed by Christos Stavrakakis
Browse files

cyclades: Enforce quota

Introduce command enforce-resources-cyclades, which checks for quota
violations and applies actions to enforce the limits. By default, it
operates only on these resources: cpu, ram, floating_ip.
parent c3cc2f3f
......@@ -970,6 +970,7 @@ backend-list List backends
backend-modify Modify a backend
backend-update-status Update backend statistics for instance allocation
backend-remove Remove a Ganeti backend
enforce-resources-cyclades Check and fix quota violations for Cyclades resources
server-create Create a new server
server-show Show server details
server-list List servers
......
......@@ -732,10 +732,10 @@ class IPAddress(models.Model):
% (self.address, self.network_id, self.subnet_id, ip_type)
def in_use(self):
if self.machine is None:
if self.nic is None or self.nic.machine is None:
return False
else:
return (not self.machine.deleted)
return (not self.nic.machine.deleted)
class Meta:
unique_together = ("network", "address")
......
# Copyright 2013 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and
# documentation are those of the authors and should not be
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.
import time
from synnefo.db.models import VirtualMachine, IPAddress, NetworkInterface
from synnefo.logic import servers
from synnefo.logic import ips as logic_ips
from synnefo.logic import backend
MiB = 2 ** 20
GiB = 2 ** 30
def _partition_by(f, l, convert=None):
if convert is None:
convert = lambda x: x
d = {}
for x in l:
group = f(x)
group_l = d.get(group, [])
group_l.append(convert(x))
d[group] = group_l
return d
CHANGE = {
"cyclades.ram": lambda vm: vm.flavor.ram * MiB,
"cyclades.cpu": lambda vm: vm.flavor.cpu,
"cyclades.vm": lambda vm: 1,
"cyclades.total_ram": lambda vm: vm.flavor.ram * MiB,
"cyclades.total_cpu": lambda vm: vm.flavor.cpu,
"cyclades.disk": lambda vm: vm.flavor.disk * GiB,
"cyclades.floating_ip": lambda vm: 1,
}
def wait_server_job(server):
jobID = server.task_job_id
client = server.get_client()
status, error = backend.wait_for_job(client, jobID)
if status != "success":
raise ValueError(error)
VM_SORT_LEVEL = {
"ERROR": 4,
"BUILD": 3,
"STOPPED": 2,
"STARTED": 1,
"RESIZE": 1,
"DESTROYED": 0,
}
def sort_vms():
def f(vm):
level = VM_SORT_LEVEL[vm.operstate]
return (level, vm.id)
return f
def handle_stop_active(viol_id, resource, vms, diff, actions):
vm_actions = actions["vm"]
vms = [vm for vm in vms if vm.operstate in ["STARTED", "BUILD", "ERROR"]]
vms = sorted(vms, key=sort_vms(), reverse=True)
for vm in vms:
if diff < 1:
break
diff -= CHANGE[resource](vm)
if vm_actions.get(vm.id) is None:
action = "REMOVE" if vm.operstate == "ERROR" else "SHUTDOWN"
vm_actions[vm.id] = viol_id, vm.operstate, action
def handle_destroy(viol_id, resource, vms, diff, actions):
vm_actions = actions["vm"]
vms = sorted(vms, key=sort_vms(), reverse=True)
for vm in vms:
if diff < 1:
break
diff -= CHANGE[resource](vm)
vm_actions[vm.id] = viol_id, vm.operstate, "REMOVE"
def _state_after_action(vm, action):
if action == "REMOVE":
return "ERROR" # highest
if action == "SHUTDOWN":
return "STOPPED"
return vm.operstate # no action
def sort_ips(vm_actions):
def f(ip):
if not ip.in_use():
level = 5
else:
machine = ip.nic.machine
_, _, action = vm_actions.get(machine.id, (None, None, None))
level = VM_SORT_LEVEL[_state_after_action(machine, action)]
return (level, ip.id)
return f
def handle_floating_ip(viol_id, resource, ips, diff, actions):
vm_actions = actions.get("vm", {})
ip_actions = actions["floating_ip"]
ips = sorted(ips, key=sort_ips(vm_actions), reverse=True)
for ip in ips:
if diff < 1:
break
diff -= CHANGE[resource](ip)
state = "USED" if ip.in_use() else "FREE"
ip_actions[ip.id] = viol_id, state, "REMOVE"
def get_vms(users=None):
vms = VirtualMachine.objects.filter(deleted=False).\
select_related("flavor").order_by('-id')
if users is not None:
vms = vms.filter(userid__in=users)
return _partition_by(lambda vm: vm.userid, vms)
def get_floating_ips(users=None):
ips = IPAddress.objects.filter(deleted=False, floating_ip=True).\
select_related("nic__machine")
if users is not None:
ips = ips.filter(userid__in=users)
return _partition_by(lambda ip: ip.userid, ips)
def get_actual_resources(resource_type, users=None):
ACTUAL_RESOURCES = {
"vm": get_vms,
"floating_ip": get_floating_ips,
}
return ACTUAL_RESOURCES[resource_type](users=users)
VM_ACTION = {
"REMOVE": servers.destroy,
"SHUTDOWN": servers.stop,
}
def apply_to_vm(action, vm_id):
try:
vm = VirtualMachine.objects.select_for_update().get(id=vm_id)
VM_ACTION[action](vm)
return True
except BaseException:
return False
def perform_vm_actions(actions, fix=False):
log = []
for vm_id, (viol_id, state, vm_action) in actions.iteritems():
data = ("vm", vm_id, state, vm_action, viol_id)
if fix:
r = apply_to_vm(vm_action, vm_id)
data += ("DONE" if r else "FAILED",)
log.append(data)
return log
def wait_for_ip(ip_id):
for i in range(100):
ip = IPAddress.objects.get(id=ip_id)
if ip.nic_id is None:
objs = IPAddress.objects.select_for_update()
return objs.get(id=ip_id)
time.sleep(1)
raise ValueError(
"Floating_ip %s: Waiting for port delete timed out." % ip_id)
def remove_ip(ip_id):
try:
ip = IPAddress.objects.select_for_update().get(id=ip_id)
port_id = ip.nic_id
if port_id:
objs = NetworkInterface.objects.select_for_update()
port = objs.get(id=port_id)
servers.delete_port(port)
if port.machine:
wait_server_job(port.machine)
ip = wait_for_ip(ip_id)
logic_ips.delete_floating_ip(ip)
return True
except BaseException:
return False
def perform_floating_ip_actions(actions, fix=False):
log = []
for ip_id, (viol_id, state, ip_action) in actions.iteritems():
data = ("floating_ip", ip_id, state, ip_action, viol_id)
if ip_action == "REMOVE":
if fix:
r = remove_ip(ip_id)
data += ("DONE" if r else "FAILED",)
log.append(data)
return log
def perform_actions(actions, fix=False):
ACTION_HANDLING = [
("floating_ip", perform_floating_ip_actions),
("vm", perform_vm_actions),
]
logs = []
for resource_type, handler in ACTION_HANDLING:
t_actions = actions.get(resource_type, {})
log = handler(t_actions, fix=fix)
logs += log
return logs
# It is important to check resources in this order, especially
# floating_ip after vm resources.
RESOURCE_HANDLING = [
("cyclades.cpu", handle_stop_active, "vm"),
("cyclades.ram", handle_stop_active, "vm"),
("cyclades.total_cpu", handle_destroy, "vm"),
("cyclades.total_ram", handle_destroy, "vm"),
("cyclades.disk", handle_destroy, "vm"),
("cyclades.vm", handle_destroy, "vm"),
("cyclades.floating_ip", handle_floating_ip, "floating_ip"),
]
# Copyright 2013 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and
# documentation are those of the authors and should not be
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.
import string
from optparse import make_option
from django.db import transaction
from synnefo.quotas import util
from synnefo.quotas import enforce
from synnefo.quotas import errors
from snf_django.management.commands import SynnefoCommand, CommandError
from snf_django.management.utils import pprint_table
DEFAULT_RESOURCES = ["cyclades.cpu",
"cyclades.ram",
"cyclades.floating_ip",
]
class Command(SynnefoCommand):
help = """Check and fix quota violations for Cyclades resources.
"""
option_list = SynnefoCommand.option_list + (
make_option("--users", dest="users",
help=("Enforce resources only for the specified list "
"of users, e.g uuid1,uuid2")),
make_option("--resources",
help="Specify resources to check, default: %s" %
",".join(DEFAULT_RESOURCES)),
make_option("--fix",
default=False,
action="store_true",
help="Fix violations"),
make_option("--force",
default=False,
action="store_true",
help=("Confirm actions that may permanently "
"remove a vm")),
)
def confirm(self):
self.stderr.write("Confirm? [y/N] ")
response = raw_input()
if string.lower(response) not in ['y', 'yes']:
self.stdout.write("Aborted.\n")
exit()
def get_handlers(self, resources):
def rem(v):
try:
resources.remove(v)
return True
except ValueError:
return False
if resources is None:
resources = list(DEFAULT_RESOURCES)
else:
resources = resources.split(",")
handlers = [h for h in enforce.RESOURCE_HANDLING if rem(h[0])]
if resources:
m = "No such resource '%s'" % resources[0]
raise CommandError(m)
return handlers
@transaction.commit_on_success
def handle(self, *args, **options):
write = self.stderr.write
fix = options["fix"]
force = options["force"]
users = options['users']
if users is not None:
users = users.split(',')
handlers = self.get_handlers(options["resources"])
try:
qh_holdings = util.get_qh_users_holdings(users)
except errors.AstakosClientException as e:
raise CommandError(e)
resources = set(h[0] for h in handlers)
dangerous = bool(resources.difference(DEFAULT_RESOURCES))
actions = {}
overlimit = []
viol_id = 0
for resource, handle_resource, resource_type in handlers:
if resource_type not in actions:
actions[resource_type] = {}
actual_resources = enforce.get_actual_resources(resource_type,
users)
for user, user_quota in qh_holdings.iteritems():
for source, source_quota in user_quota.iteritems():
try:
qh = util.transform_quotas(source_quota)
qh_value, qh_limit, qh_pending = qh[resource]
except KeyError:
write("Resource '%s' does not exist in Quotaholder"
" for user '%s' and source '%s'!\n" %
(resource, user, source))
continue
if qh_pending:
write("Pending commission for user '%s', source '%s', "
"resource '%s'. Skipping\n" %
(user, source, resource))
continue
diff = qh_value - qh_limit
if diff > 0:
viol_id += 1
overlimit.append((viol_id, user, source, resource,
qh_limit, qh_value))
relevant_resources = actual_resources[user]
handle_resource(viol_id, resource, relevant_resources,
diff, actions)
if not overlimit:
write("No violations.\n")
return
headers = ("#", "User", "Source", "Resource", "Limit", "Usage")
pprint_table(self.stderr, overlimit, headers,
options["output_format"], title="Violations")
if any(actions.values()):
write("\n")
if fix:
if dangerous and not force:
write("You are enforcing resources that may permanently "
"remove a vm.\n")
self.confirm()
write("Applying actions. Please wait...\n")
title = "Applied Actions" if fix else "Suggested Actions"
log = enforce.perform_actions(actions, fix=fix)
headers = ("Type", "ID", "State", "Action", "Violation")
if fix:
headers += ("Result",)
pprint_table(self.stderr, log, headers,
options["output_format"], title=title)
......@@ -103,6 +103,26 @@ def get_quotaholder_holdings(user=None):
return qh.service_get_quotas(user)
def get_qh_users_holdings(users=None):
qh = Quotaholder.get()
if users is None or len(users) != 1:
req = None
else:
req = users[0]
quotas = qh.service_get_quotas(req)
if users is None:
return quotas
qs = {}
for user in users:
try:
qs[user] = quotas[user]
except KeyError:
pass
return qs
def transform_quotas(quotas):
d = {}
for resource, counters in quotas.iteritems():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment