Commit 84fcfa69 authored by Christos Stavrakakis's avatar Christos Stavrakakis
Browse files

cyclades: Workaround for race condition in Ganeti

Workaround for race where OP_INSTANCE_REMOVE starts executing on Ganeti
before OP_INSTANCE_CREATE. If this is the case, OP_INSTANCE_REMOVE will
not wait for locks, instead it will fail because the instance has not
yet be created. Dispatcher will received the failed OP_INSTANCE_REMOVE
job and will query Ganeti to see if the instance exists. However, if the
OP_INSTANCE_CREATE has not yet started, the query will return 404, and
dispatcher will delete the instance from DB and release its IP
addresses. Following instance creations will fail because the IPv4
address will be already used in Ganeti.

This commit is a workaround for this issue until OP_INSTANCE_REMOVE
supports the 'depends' attribute. Until then, the API will raise 409 if
the job has not yet completed and the intance has not been added to
Ganeti.
parent 9efc056e
......@@ -51,7 +51,8 @@ from synnefo.api.actions import server_actions
from synnefo.db.models import (VirtualMachine, VirtualMachineMetadata,
NetworkInterface)
from synnefo.logic.backend import (create_instance, delete_instance,
process_op_status)
process_op_status, job_is_still_running,
vm_exists_in_backend)
from synnefo.logic.utils import get_rsapi_state
from synnefo.logic.backend_allocator import BackendAllocator
from synnefo import quotas
......@@ -403,6 +404,7 @@ def do_create_server(userid, name, password, flavor, image, metadata={},
jobID = create_instance(vm, nic, flavor, image)
# At this point the job is enqueued in the Ganeti backend
vm.backendopcode = "OP_INSTANCE_CREATE"
vm.backendjobid = jobID
vm.save()
transaction.commit()
......@@ -482,6 +484,14 @@ def delete_server(request, server_id):
log.info('delete_server %s', server_id)
vm = util.get_vm(server_id, request.user_uniq, for_update=True,
non_suspended=True)
# XXX: Workaround for race where OP_INSTANCE_REMOVE starts executing on
# Ganeti before OP_INSTANCE_CREATE. This will be fixed when
# OP_INSTANCE_REMOVE supports the 'depends' request attribute.
if (vm.backendopcode == "OP_INSTANCE_CREATE" and
vm.backendjobstatus != "success"):
if job_is_still_running(vm) and not vm_exists_in_backend(vm):
raise faults.BuildInProgress("Server is being build")
start_action(vm, 'DESTROY')
delete_instance(vm)
return HttpResponse(status=204)
......
......@@ -41,7 +41,7 @@ from synnefo.logic import utils
from synnefo import quotas
from synnefo.api.util import release_resource
from synnefo.util.mac2eui64 import mac2eui64
from synnefo.logic.rapi import GanetiApiError
from synnefo.logic.rapi import GanetiApiError, JOB_STATUS_FINALIZED
from logging import getLogger
log = getLogger(__name__)
......@@ -522,6 +522,15 @@ def network_exists_in_backend(backend_network):
return False
def job_is_still_running(vm):
with pooled_rapi_client(vm) as c:
try:
job_info = c.GetJobStatus(vm.backendjobid)
return not (job_info["status"] in JOB_STATUS_FINALIZED)
except GanetiApiError:
return False
def create_network(network, backend, connect=True):
"""Create a network in a Ganeti backend"""
log.debug("Creating network %s in backend %s", network, backend)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment