Commit 9bdab621 authored by Michael Hanselmann's avatar Michael Hanselmann
Browse files

jqueue: Remove lock status field



With the job queue changes for Ganeti 2.2, watched and queried jobs are
loaded directly from disk, rendering the in-memory “lock_status” field
useless. Writing it to disk would be possible, but has a huge cost at
runtime (when tested, processing 1'000 opcodes involved 4'000 additional
writes to job files, even with replication turned off).

Using an additional in-memory dictionary to just manage this field turned
out to be a complicated task due to the necessary locking.

The plan is to introduce a more generic lock debugging mechanism in the
near future. Hence the decision is to remove this field now instead of
spending a lot of time to make it working again.
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarIustin Pop <iustin@google.com>
parent 1377433b
...@@ -167,13 +167,11 @@ class _QueuedJob(object): ...@@ -167,13 +167,11 @@ class _QueuedJob(object):
@ivar received_timestamp: the timestamp for when the job was received @ivar received_timestamp: the timestamp for when the job was received
@ivar start_timestmap: the timestamp for start of execution @ivar start_timestmap: the timestamp for start of execution
@ivar end_timestamp: the timestamp for end of execution @ivar end_timestamp: the timestamp for end of execution
@ivar lock_status: In-memory locking information for debugging
""" """
# pylint: disable-msg=W0212 # pylint: disable-msg=W0212
__slots__ = ["queue", "id", "ops", "log_serial", __slots__ = ["queue", "id", "ops", "log_serial",
"received_timestamp", "start_timestamp", "end_timestamp", "received_timestamp", "start_timestamp", "end_timestamp",
"lock_status", "change",
"__weakref__"] "__weakref__"]
def __init__(self, queue, job_id, ops): def __init__(self, queue, job_id, ops):
...@@ -199,9 +197,6 @@ class _QueuedJob(object): ...@@ -199,9 +197,6 @@ class _QueuedJob(object):
self.start_timestamp = None self.start_timestamp = None
self.end_timestamp = None self.end_timestamp = None
# In-memory attributes
self.lock_status = None
def __repr__(self): def __repr__(self):
status = ["%s.%s" % (self.__class__.__module__, self.__class__.__name__), status = ["%s.%s" % (self.__class__.__module__, self.__class__.__name__),
"id=%s" % self.id, "id=%s" % self.id,
...@@ -228,9 +223,6 @@ class _QueuedJob(object): ...@@ -228,9 +223,6 @@ class _QueuedJob(object):
obj.start_timestamp = state.get("start_timestamp", None) obj.start_timestamp = state.get("start_timestamp", None)
obj.end_timestamp = state.get("end_timestamp", None) obj.end_timestamp = state.get("end_timestamp", None)
# In-memory attributes
obj.lock_status = None
obj.ops = [] obj.ops = []
obj.log_serial = 0 obj.log_serial = 0
for op_state in state["ops"]: for op_state in state["ops"]:
...@@ -368,8 +360,6 @@ class _QueuedJob(object): ...@@ -368,8 +360,6 @@ class _QueuedJob(object):
row.append(self.start_timestamp) row.append(self.start_timestamp)
elif fname == "end_ts": elif fname == "end_ts":
row.append(self.end_timestamp) row.append(self.end_timestamp)
elif fname == "lock_status":
row.append(self.lock_status)
elif fname == "summary": elif fname == "summary":
row.append([op.input.Summary() for op in self.ops]) row.append([op.input.Summary() for op in self.ops])
else: else:
...@@ -439,16 +429,15 @@ class _OpExecCallbacks(mcpu.OpExecCbBase): ...@@ -439,16 +429,15 @@ class _OpExecCallbacks(mcpu.OpExecCbBase):
Processor.ExecOpCode) set to OP_STATUS_WAITLOCK. Processor.ExecOpCode) set to OP_STATUS_WAITLOCK.
""" """
assert self._op in self._job.ops
assert self._op.status in (constants.OP_STATUS_WAITLOCK, assert self._op.status in (constants.OP_STATUS_WAITLOCK,
constants.OP_STATUS_CANCELING) constants.OP_STATUS_CANCELING)
# All locks are acquired by now
self._job.lock_status = None
# Cancel here if we were asked to # Cancel here if we were asked to
self._CheckCancel() self._CheckCancel()
logging.debug("Opcode is now running") logging.debug("Opcode is now running")
self._op.status = constants.OP_STATUS_RUNNING self._op.status = constants.OP_STATUS_RUNNING
self._op.exec_timestamp = TimeStampNow() self._op.exec_timestamp = TimeStampNow()
...@@ -490,9 +479,6 @@ class _OpExecCallbacks(mcpu.OpExecCbBase): ...@@ -490,9 +479,6 @@ class _OpExecCallbacks(mcpu.OpExecCbBase):
assert self._op.status in (constants.OP_STATUS_WAITLOCK, assert self._op.status in (constants.OP_STATUS_WAITLOCK,
constants.OP_STATUS_CANCELING) constants.OP_STATUS_CANCELING)
# Not getting the queue lock because this is a single assignment
self._job.lock_status = msg
# Cancel here if we were asked to # Cancel here if we were asked to
self._CheckCancel() self._CheckCancel()
...@@ -755,7 +741,6 @@ class _JobQueueWorker(workerpool.BaseWorker): ...@@ -755,7 +741,6 @@ class _JobQueueWorker(workerpool.BaseWorker):
op.result = result op.result = result
op.end_timestamp = TimeStampNow() op.end_timestamp = TimeStampNow()
if idx == count - 1: if idx == count - 1:
job.lock_status = None
job.end_timestamp = TimeStampNow() job.end_timestamp = TimeStampNow()
# Consistency check # Consistency check
...@@ -797,7 +782,6 @@ class _JobQueueWorker(workerpool.BaseWorker): ...@@ -797,7 +782,6 @@ class _JobQueueWorker(workerpool.BaseWorker):
errors.GetEncodedError(i.result) errors.GetEncodedError(i.result)
for i in job.ops[idx:]) for i in job.ops[idx:])
finally: finally:
job.lock_status = None
job.end_timestamp = TimeStampNow() job.end_timestamp = TimeStampNow()
queue.UpdateJobUnlocked(job) queue.UpdateJobUnlocked(job)
finally: finally:
...@@ -809,7 +793,6 @@ class _JobQueueWorker(workerpool.BaseWorker): ...@@ -809,7 +793,6 @@ class _JobQueueWorker(workerpool.BaseWorker):
try: try:
job.MarkUnfinishedOps(constants.OP_STATUS_CANCELED, job.MarkUnfinishedOps(constants.OP_STATUS_CANCELED,
"Job canceled by request") "Job canceled by request")
job.lock_status = None
job.end_timestamp = TimeStampNow() job.end_timestamp = TimeStampNow()
queue.UpdateJobUnlocked(job) queue.UpdateJobUnlocked(job)
finally: finally:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment