Skip to content
Snippets Groups Projects
Commit 03cb89f0 authored by Iustin Pop's avatar Iustin Pop
Browse files

hbal: implement user-friendly termination requests

Currently, hbal will abort immediately when requested (^C, or SIGINT,
etc.). This is not nice, since then the already started jobs need to be
tracked manually.

This patch adds a signal handler for SIGINT and SIGTERM, which will, the
first time, simply record the shutdown request (and hbal will then exit
once all jobs in the current jobset finish), and at the second request,
will cause an immediate exit.
parent 5f715404
No related branches found
No related tags found
No related merge requests found
...@@ -29,9 +29,12 @@ import Control.Concurrent (threadDelay) ...@@ -29,9 +29,12 @@ import Control.Concurrent (threadDelay)
import Control.Exception (bracket) import Control.Exception (bracket)
import Data.List import Data.List
import Data.Maybe (isJust, fromJust) import Data.Maybe (isJust, fromJust)
import Data.IORef
import Monad import Monad
import System (exitWith, ExitCode(..)) import System (exitWith, ExitCode(..))
import System.IO import System.IO
import System.Posix.Process
import System.Posix.Signals
import qualified System import qualified System
import Text.Printf (printf, hPrintf) import Text.Printf (printf, hPrintf)
...@@ -152,10 +155,16 @@ checkJobsStatus = all (== JOB_STATUS_SUCCESS) ...@@ -152,10 +155,16 @@ checkJobsStatus = all (== JOB_STATUS_SUCCESS)
-- | Execute an entire jobset -- | Execute an entire jobset
execJobSet :: String -> Node.List execJobSet :: String -> Node.List
-> Instance.List -> [JobSet] -> IO () -> Instance.List -> IORef Int -> [JobSet] -> IO ()
execJobSet _ _ _ [] = return () execJobSet _ _ _ _ [] = return ()
execJobSet master nl il (js:jss) = do execJobSet master nl il cref alljss@(js:jss) = do
-- map from jobset (htools list of positions) to [[opcodes]] -- map from jobset (htools list of positions) to [[opcodes]]
cancel <- readIORef cref
when (cancel > 0) $ do
putStrLn ("Exiting early due to user request, " ++ show (length alljss) ++
" jobset(s) remaining.")
exitWith $ ExitFailure 1
let jobs = map (\(_, idx, move, _) -> let jobs = map (\(_, idx, move, _) ->
Cluster.iMoveToJob nl il idx move) js Cluster.iMoveToJob nl il idx move) js
let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
...@@ -174,12 +183,30 @@ execJobSet master nl il (js:jss) = do ...@@ -174,12 +183,30 @@ execJobSet master nl il (js:jss) = do
hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
return () return ()
Ok x -> if checkJobsStatus x Ok x -> if checkJobsStatus x
then execJobSet master nl il jss then execJobSet master nl il cref jss
else do else do
hPutStrLn stderr $ "Not all jobs completed successfully: " ++ hPutStrLn stderr $ "Not all jobs completed successfully: " ++
show x show x
hPutStrLn stderr "Aborting.") hPutStrLn stderr "Aborting.")
-- | Signal handler
handleSig :: IORef Int -> IO ()
handleSig cref = do
cnt <- atomicModifyIORef cref (\x -> let y = x + 1
in (y, y))
when (cnt > 1) $ do
putStrLn "Double cancel request, exiting now..."
exitImmediately $ ExitFailure 1
when (cnt > 0) $ putStrLn ("Cancel request registered, will exit at" ++
" the end of the current job set...")
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO ()
runJobSet master fin_nl il cmd_jobs = do
cref <- newIORef 0
mapM_ (\sig -> installHandler sig (Catch (handleSig cref)) Nothing)
[softwareTermination, keyboardSignal]
execJobSet master fin_nl il cref cmd_jobs
-- | Main function. -- | Main function.
main :: IO () main :: IO ()
main = do main = do
...@@ -346,4 +373,4 @@ main = do ...@@ -346,4 +373,4 @@ main = do
Nothing -> do Nothing -> do
hPutStrLn stderr "Execution of commands possible only on LUXI" hPutStrLn stderr "Execution of commands possible only on LUXI"
exitWith $ ExitFailure 1 exitWith $ ExitFailure 1
Just master -> execJobSet master fin_nl il cmd_jobs) Just master -> runJobSet master fin_nl il cmd_jobs)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment