From f64247416d5c17ee3512194f6659bf8716459578 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Sun, 19 Jul 2009 04:12:11 +0200 Subject: [PATCH] job queue: fix interrupted job processing If a job with more than one opcodes is being processed, and the master daemon crashes between two opcodes, we have the first N opcodes marked successful, and the rest marked as queued. This means that the overall jbo status is queued, and thus on master daemon restart it will be resent for completion. However, the RunTask() function in jqueue.py doesn't deal with partially-completed jobs. This patch makes it simply skip such opcodes. An alternative option would be to not mark partially-completed jobs as QUEUED but instead RUNNING, which would result in aborting of the job at restart time. Signed-off-by: Iustin Pop <iustin@google.com> Reviewed-by: Guido Trotter <ultrotter@google.com> --- lib/jqueue.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/jqueue.py b/lib/jqueue.py index 3f533323b..c13e30352 100644 --- a/lib/jqueue.py +++ b/lib/jqueue.py @@ -362,6 +362,15 @@ class _JobQueueWorker(workerpool.BaseWorker): count = len(job.ops) for idx, op in enumerate(job.ops): op_summary = op.input.Summary() + if op.status == constants.OP_STATUS_SUCCESS: + # this is a job that was partially completed before master + # daemon shutdown, so it can be expected that some opcodes + # are already completed successfully (if any did error + # out, then the whole job should have been aborted and not + # resubmitted for processing) + logging.info("Op %s/%s: opcode %s already processed, skipping", + idx + 1, count, op_summary) + continue try: logging.info("Op %s/%s: Starting opcode %s", idx + 1, count, op_summary) -- GitLab