Commit c666e6aa authored by Petr Pudlak's avatar Petr Pudlak
Browse files

When checking job death, check if its lock is the Luxi lock



In this case, the call trying to acquire a shared lock always succeeds,
because the daemon already has an exclusive lock, which falsely reports
that the job has died.
Signed-off-by: default avatarPetr Pudlak <pudlak@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parent cab9400a
......@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
module Ganeti.JQScheduler
( JQStatus
, jqLivelock
, emptyJQStatus
, initJQScheduler
, enqueueNewJobs
......
......@@ -560,8 +560,10 @@ waitForJob jid tmout = do
-- | Try to cancel a job that has already been handed over to execution,
-- by terminating the process.
cancelJob :: JobId -> IO (ErrorResult (Bool, String))
cancelJob jid = runResultT $ do
cancelJob :: Livelock -- ^ Luxi's livelock path
-> JobId -- ^ the job to cancel
-> IO (ErrorResult (Bool, String))
cancelJob luxiLivelock jid = runResultT $ do
-- we can't terminate the job if it's just being started, so
-- retry several times in such a case
result <- runMaybeT . msum . flip map [0..5 :: Int] $ \tryNo -> do
......@@ -573,7 +575,9 @@ cancelJob jid = runResultT $ do
qDir <- liftIO queueDir
(job, _) <- lift . mkResultT $ loadJobFromDisk qDir True jid
let jName = ("Job " ++) . show . fromJobId . qjId $ job
dead <- maybe (return False) (liftIO . isDead) (qjLivelock job)
dead <- maybe (return False) (liftIO . isDead)
. mfilter (/= luxiLivelock)
$ qjLivelock job
case qjProcessId job of
_ | dead ->
return (True, jName ++ " has been already dead")
......
......@@ -347,7 +347,7 @@ handleCall _ qstat cfg (CancelJob jid) = do
writeAndReplicateJob cfg qDir job'
Ok False -> do
logDebug $ jName ++ " not queued; trying to cancel directly"
fmap showJSON <$> cancelJob jid
fmap showJSON <$> cancelJob (jqLivelock qstat) jid
Bad s -> return . Ok . showJSON $ (False, s)
handleCall qlock _ cfg (ArchiveJob jid) = do
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment