Newer
Older
self.assertEqual(queue.GetNextUpdate(), (job, False))
self.assertRaises(IndexError, queue.GetNextUpdate)
opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart)
for remaining in reversed(range(len(job.ops))):
self.assertRaises(IndexError, queue.GetNextUpdate)
result = jqueue._JobProcessor(queue, opexec, job)()
self.assertEqual(queue.GetNextUpdate(), (job, True))
self.assertRaises(IndexError, queue.GetNextUpdate)
if remaining == 0:
# Last opcode
self.assert_(result)
break
self.assertFalse(result)
self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED)
self.assertRaises(IndexError, queue.GetNextUpdate)
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS)
self.assertEqual(job.GetInfo(["opresult"]),
[[op.input.result for op in job.ops]])
logmsgcount = sum(len(m) for m in messages.values())
self._CheckLogMessages(job, logmsgcount)
# Serialize and restore (simulates program restart)
newjob = jqueue._QueuedJob.Restore(queue, job.Serialize())
self._CheckLogMessages(newjob, logmsgcount)
# Check each message
prevserial = -1
for idx, oplog in enumerate(job.GetInfo(["oplog"])[0]):
for (serial, timestamp, log_type, msg) in oplog:
(exptype, expmsg) = messages.get(idx).pop(0)
if exptype:
self.assertEqual(log_type, exptype)
else:
self.assertEqual(log_type, constants.ELOG_MESSAGE)
self.assertEqual(expmsg, msg)
self.assert_(serial > prevserial)
prevserial = serial
def _CheckLogMessages(self, job, count):
# Check serial
self.assertEqual(job.log_serial, count)
# No filter
self.assertEqual(job.GetLogEntries(None),
[entry for entries in job.GetInfo(["oplog"])[0] if entries
for entry in entries])
# Filter with serial
assert count > 3
self.assert_(job.GetLogEntries(3))
self.assertEqual(job.GetLogEntries(3),
[entry for entries in job.GetInfo(["oplog"])[0] if entries
for entry in entries][3:])
# No log message after highest serial
self.assertFalse(job.GetLogEntries(count))
self.assertFalse(job.GetLogEntries(count + 3))
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
class _FakeTimeoutStrategy:
def __init__(self, timeouts):
self.timeouts = timeouts
self.attempts = 0
self.last_timeout = None
def NextAttempt(self):
self.attempts += 1
if self.timeouts:
timeout = self.timeouts.pop(0)
else:
timeout = None
self.last_timeout = timeout
return timeout
class TestJobProcessorTimeouts(unittest.TestCase, _JobProcessorTestUtils):
def setUp(self):
self.queue = _FakeQueueForProc()
self.job = None
self.curop = None
self.opcounter = None
self.timeout_strategy = None
self.retries = 0
self.prev_tsop = None
self.prev_prio = None
self.prev_status = None
self.lock_acq_prio = None
self.gave_lock = None
self.done_lock_before_blocking = False
def _BeforeStart(self, timeout, priority):
# If status has changed, job must've been written
if self.prev_status != self.job.ops[self.curop].status:
self.assertEqual(self.queue.GetNextUpdate(), (job, True))
self.assertRaises(IndexError, self.queue.GetNextUpdate)
self.assertFalse(self.queue.IsAcquired())
self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITLOCK)
ts = self.timeout_strategy
self.assert_(timeout is None or isinstance(timeout, (int, float)))
self.assertEqual(timeout, ts.last_timeout)
self.assertEqual(priority, job.ops[self.curop].priority)
self.gave_lock = True
self.lock_acq_prio = priority
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
if (self.curop == 3 and
job.ops[self.curop].priority == constants.OP_PRIO_HIGHEST + 3):
# Give locks before running into blocking acquire
assert self.retries == 7
self.retries = 0
self.done_lock_before_blocking = True
return
if self.retries > 0:
self.assert_(timeout is not None)
self.retries -= 1
self.gave_lock = False
raise mcpu.LockAcquireTimeout()
if job.ops[self.curop].priority == constants.OP_PRIO_HIGHEST:
assert self.retries == 0, "Didn't exhaust all retries at highest priority"
assert not ts.timeouts
self.assert_(timeout is None)
def _AfterStart(self, op, cbs):
job = self.job
# Setting to "running" requires an update
self.assertEqual(self.queue.GetNextUpdate(), (job, True))
self.assertRaises(IndexError, self.queue.GetNextUpdate)
self.assertFalse(self.queue.IsAcquired())
self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING)
# Job is running, cancelling shouldn't be possible
(success, _) = job.Cancel()
self.assertFalse(success)
def _NextOpcode(self):
self.curop = self.opcounter.next()
self.prev_prio = self.job.ops[self.curop].priority
self.prev_status = self.job.ops[self.curop].status
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
def _NewTimeoutStrategy(self):
job = self.job
self.assertEqual(self.retries, 0)
if self.prev_tsop == self.curop:
# Still on the same opcode, priority must've been increased
self.assertEqual(self.prev_prio, job.ops[self.curop].priority + 1)
if self.curop == 1:
# Normal retry
timeouts = range(10, 31, 10)
self.retries = len(timeouts) - 1
elif self.curop == 2:
# Let this run into a blocking acquire
timeouts = range(11, 61, 12)
self.retries = len(timeouts)
elif self.curop == 3:
# Wait for priority to increase, but give lock before blocking acquire
timeouts = range(12, 100, 14)
self.retries = len(timeouts)
self.assertFalse(self.done_lock_before_blocking)
elif self.curop == 4:
self.assert_(self.done_lock_before_blocking)
# Timeouts, but no need to retry
timeouts = range(10, 31, 10)
self.retries = 0
elif self.curop == 5:
# Normal retry
timeouts = range(19, 100, 11)
self.retries = len(timeouts)
else:
timeouts = []
self.retries = 0
assert len(job.ops) == 10
assert self.retries <= len(timeouts)
ts = _FakeTimeoutStrategy(timeouts)
self.timeout_strategy = ts
self.prev_tsop = self.curop
self.prev_prio = job.ops[self.curop].priority
return ts
def testTimeout(self):
ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False)
for i in range(10)]
# Create job
job_id = 15801
job = self._CreateJob(self.queue, job_id, ops)
self.job = job
self.opcounter = itertools.count(0)
opexec = _FakeExecOpCodeForProc(self.queue, self._BeforeStart,
self._AfterStart)
tsf = self._NewTimeoutStrategy
self.assertFalse(self.done_lock_before_blocking)
while True:
proc = jqueue._JobProcessor(self.queue, opexec, job,
_timeout_strategy_factory=tsf)
self.assertRaises(IndexError, self.queue.GetNextUpdate)
if self.curop is not None:
self.prev_status = self.job.ops[self.curop].status
self.lock_acq_prio = None
result = proc(_nextop_fn=self._NextOpcode)
assert self.curop is not None
if result or self.gave_lock:
# Got lock and/or job is done, result must've been written
self.assertFalse(job.cur_opctx)
self.assertEqual(self.queue.GetNextUpdate(), (job, True))
self.assertRaises(IndexError, self.queue.GetNextUpdate)
self.assertEqual(self.lock_acq_prio, job.ops[self.curop].priority)
self.assert_(job.ops[self.curop].exec_timestamp)
if result:
self.assertFalse(job.cur_opctx)
break
self.assertFalse(result)
if self.curop == 0:
self.assertEqual(job.ops[self.curop].start_timestamp,
job.start_timestamp)
# Opcode finished, but job not yet done
self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED)
# Did not get locks
self.assert_(job.cur_opctx)
self.assertEqual(job.cur_opctx._timeout_strategy._fn,
self.timeout_strategy.NextAttempt)
self.assertFalse(job.ops[self.curop].exec_timestamp)
self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITLOCK)
# If priority has changed since acquiring locks, the job must've been
# updated
if self.lock_acq_prio != job.ops[self.curop].priority:
self.assertEqual(self.queue.GetNextUpdate(), (job, True))
self.assertRaises(IndexError, self.queue.GetNextUpdate)
self.assert_(job.start_timestamp)
self.assertFalse(job.end_timestamp)
self.assertEqual(self.curop, len(job.ops) - 1)
self.assertEqual(self.job, job)
self.assertEqual(self.opcounter.next(), len(job.ops))
self.assert_(self.done_lock_before_blocking)
self.assertRaises(IndexError, self.queue.GetNextUpdate)
self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS)
self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_SUCCESS])
self.assertEqual(job.GetInfo(["opresult"]),
[[op.input.result for op in job.ops]])
self.assertEqual(job.GetInfo(["opstatus"]),
[len(job.ops) * [constants.OP_STATUS_SUCCESS]])
self.assert_(compat.all(op.start_timestamp and op.end_timestamp
for op in job.ops))
# Finished jobs can't be processed any further
self.assertRaises(errors.ProgrammerError,
jqueue._JobProcessor(self.queue, opexec, job))
if __name__ == "__main__":
testutils.GanetiTestProgram()