From f64247416d5c17ee3512194f6659bf8716459578 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Sun, 19 Jul 2009 04:12:11 +0200
Subject: [PATCH] job queue: fix interrupted job processing

If a job with more than one opcodes is being processed, and the master
daemon crashes between two opcodes, we have the first N opcodes marked
successful, and the rest marked as queued. This means that the overall
jbo status is queued, and thus on master daemon restart it will be
resent for completion.

However, the RunTask() function in jqueue.py doesn't deal with
partially-completed jobs. This patch makes it simply skip such opcodes.

An alternative option would be to not mark partially-completed jobs as
QUEUED but instead RUNNING, which would result in aborting of the job at
restart time.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Guido Trotter <ultrotter@google.com>
---
 lib/jqueue.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lib/jqueue.py b/lib/jqueue.py
index 3f533323b..c13e30352 100644
--- a/lib/jqueue.py
+++ b/lib/jqueue.py
@@ -362,6 +362,15 @@ class _JobQueueWorker(workerpool.BaseWorker):
         count = len(job.ops)
         for idx, op in enumerate(job.ops):
           op_summary = op.input.Summary()
+          if op.status == constants.OP_STATUS_SUCCESS:
+            # this is a job that was partially completed before master
+            # daemon shutdown, so it can be expected that some opcodes
+            # are already completed successfully (if any did error
+            # out, then the whole job should have been aborted and not
+            # resubmitted for processing)
+            logging.info("Op %s/%s: opcode %s already processed, skipping",
+                         idx + 1, count, op_summary)
+            continue
           try:
             logging.info("Op %s/%s: Starting opcode %s", idx + 1, count,
                          op_summary)
-- 
GitLab