Commit c666e6aa authored by Petr Pudlak's avatar Petr Pudlak

When checking job death, check if its lock is the Luxi lock

In this case, the call trying to acquire a shared lock always succeeds,
because the daemon already has an exclusive lock, which falsely reports
that the job has died.
Signed-off-by: default avatarPetr Pudlak <pudlak@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parent cab9400a
......@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
module Ganeti.JQScheduler
( JQStatus
, jqLivelock
, emptyJQStatus
, initJQScheduler
, enqueueNewJobs
......
......@@ -560,8 +560,10 @@ waitForJob jid tmout = do
-- | Try to cancel a job that has already been handed over to execution,
-- by terminating the process.
cancelJob :: JobId -> IO (ErrorResult (Bool, String))
cancelJob jid = runResultT $ do
cancelJob :: Livelock -- ^ Luxi's livelock path
-> JobId -- ^ the job to cancel
-> IO (ErrorResult (Bool, String))
cancelJob luxiLivelock jid = runResultT $ do
-- we can't terminate the job if it's just being started, so
-- retry several times in such a case
result <- runMaybeT . msum . flip map [0..5 :: Int] $ \tryNo -> do
......@@ -573,7 +575,9 @@ cancelJob jid = runResultT $ do
qDir <- liftIO queueDir
(job, _) <- lift . mkResultT $ loadJobFromDisk qDir True jid
let jName = ("Job " ++) . show . fromJobId . qjId $ job
dead <- maybe (return False) (liftIO . isDead) (qjLivelock job)
dead <- maybe (return False) (liftIO . isDead)
. mfilter (/= luxiLivelock)
$ qjLivelock job
case qjProcessId job of
_ | dead ->
return (True, jName ++ " has been already dead")
......
......@@ -347,7 +347,7 @@ handleCall _ qstat cfg (CancelJob jid) = do
writeAndReplicateJob cfg qDir job'
Ok False -> do
logDebug $ jName ++ " not queued; trying to cancel directly"
fmap showJSON <$> cancelJob jid
fmap showJSON <$> cancelJob (jqLivelock qstat) jid
Bad s -> return . Ok . showJSON $ (False, s)
handleCall qlock _ cfg (ArchiveJob jid) = do
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment