Commit 5dad2589 authored by Iustin Pop's avatar Iustin Pop
Browse files

Rework/split hbal's main function



This is just moving code around. A subsequent patch will do a bit more
cleanup and changing the output.
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarAgata Murawska <agatamurawska@google.com>
parent 60de49c3
......@@ -139,6 +139,30 @@ formatOneline ini_cv plc_len fin_cv =
printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
(if fin_cv == 0 then 1 else ini_cv / fin_cv)
-- | Displays the cluster stats.
printStats :: Node.List -> Node.List -> IO ()
printStats ini_nl fin_nl = do
let ini_cs = Cluster.totalResources ini_nl
fin_cs = Cluster.totalResources fin_nl
printf "Original: mem=%d disk=%d\n"
(Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
printf "Final: mem=%d disk=%d\n"
(Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
-- | Saves the rebalance commands to a text file.
saveBalanceCommands :: Options -> String -> IO ()
saveBalanceCommands opts cmd_data = do
let out_path = fromJust $ optShowCmds opts
putStrLn ""
(if out_path == "-" then
printf "Commands to run to reach the above solution:\n%s"
(unlines . map (" " ++) .
filter (/= " check") .
lines $ cmd_data)
else do
writeFile out_path (shTemplate ++ cmd_data)
printf "The commands have been written to file '%s'\n" out_path)
-- | Polls a set of jobs at a fixed interval until all are finished
-- one way or another.
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
......@@ -159,7 +183,7 @@ checkJobsStatus = all (== JOB_STATUS_SUCCESS)
-- | Wrapper over execJobSet checking for early termination.
execWrapper :: String -> Node.List
-> Instance.List -> IORef Int -> [JobSet] -> IO Bool
-> Instance.List -> IORef Int -> [JobSet] -> IO Bool
execWrapper _ _ _ _ [] = return True
execWrapper master nl il cref alljss = do
cancel <- readIORef cref
......@@ -201,6 +225,22 @@ execJobSet master nl il cref (js:jss) = do
hPutStrLn stderr "Aborting."
return False)
-- | Executes the jobs, if possible and desired.
maybeExecJobs :: Options
-> [a]
-> Node.List
-> Instance.List
-> [JobSet]
-> IO Bool
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
if optExecJobs opts && not (null ord_plc)
then (case optLuxi opts of
Nothing -> do
hPutStrLn stderr "Execution of commands possible only on LUXI"
return False
Just master -> runJobSet master fin_nl il cmd_jobs)
else return True
-- | Signal handler for graceful termination.
hangleSigInt :: IORef Int -> IO ()
hangleSigInt cref = do
......@@ -224,23 +264,9 @@ runJobSet master fin_nl il cmd_jobs = do
[(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
execWrapper master fin_nl il cref cmd_jobs
-- | Main function.
main :: IO ()
main = do
cmd_args <- System.getArgs
(opts, args) <- parseOpts cmd_args "hbal" options
unless (null args) $ do
hPutStrLn stderr "Error: this program doesn't take any arguments."
exitWith $ ExitFailure 1
let oneline = optOneline opts
verbose = optVerbose opts
shownodes = optShowNodes opts
showinsts = optShowInsts opts
ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
-- | Set node properties based on command line options.
setNodesStatus :: Options -> Node.List -> IO Node.List
setNodesStatus opts fixed_nl = do
let offline_passed = optOffline opts
all_nodes = Container.elems fixed_nl
offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
......@@ -251,7 +277,6 @@ main = do
all_nodes
m_cpu = optMcpu opts
m_dsk = optMdsk opts
csf = commonSuffix fixed_nl ilf
when (not (null offline_wrong)) $ do
hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
......@@ -263,22 +288,12 @@ main = do
else n) fixed_nl
nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
nm
return nlf
when (not oneline && verbose > 1) $
putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
when (Container.size ilf == 0) $ do
(if oneline then putStrLn $ formatOneline 0 0 0
else printf "Cluster is empty, exiting.\n")
exitWith ExitSuccess
let split_insts = Cluster.findSplitInstances nlf ilf
unless (null split_insts) $ do
hPutStrLn stderr "Found instances belonging to multiple node groups:"
mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
-- | Select the target node group.
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
-> IO (String, (Node.List, Instance.List))
selectGroup opts gl nlf ilf = do
let ngroups = Cluster.splitCluster nlf ilf
when (length ngroups > 1 && isNothing (optGroup opts)) $ do
hPutStrLn stderr "Found multiple node groups:"
......@@ -287,13 +302,7 @@ main = do
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
unless oneline $ printf "Loaded %d nodes, %d instances\n"
(Container.size nlf)
(Container.size ilf)
(gname, (nl, il)) <- case optGroup opts of
case optGroup opts of
Nothing -> do
let (gidx, cdata) = head ngroups
grp = Container.find gidx gl
......@@ -314,15 +323,40 @@ main = do
exitWith $ ExitFailure 1
Just cdata -> return (Group.name grp, cdata)
-- | Do a few checks on the cluster data.
checkCluster :: Bool -> Int -> Node.List -> Instance.List -> IO ()
checkCluster oneline verbose nl il = do
-- nothing to do on an empty cluster
when (Container.null il) $ do
(if oneline then putStrLn $ formatOneline 0 0 0
else printf "Cluster is empty, exiting.\n")
exitWith ExitSuccess
-- hbal doesn't currently handle split clusters
let split_insts = Cluster.findSplitInstances nl il
unless (null split_insts) $ do
hPutStrLn stderr "Found instances belonging to multiple node groups:"
mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
unless oneline $ printf "Loaded %d nodes, %d instances\n"
(Container.size nl)
(Container.size il)
let csf = commonSuffix nl il
when (not (null csf) && not oneline && verbose > 1) $
printf "Note: Stripping common suffix of '%s' from names\n" csf
-- | Do a few checks on the selected group data.
checkGroup :: Bool -> Int -> String -> Node.List -> Instance.List -> IO ()
checkGroup oneline verbose gname nl il = do
unless oneline $ printf "Group size %d nodes, %d instances\n"
(Container.size nl)
(Container.size il)
putStrLn $ "Selected node group: " ++ gname
when (length csf > 0 && not oneline && verbose > 1) $
printf "Note: Stripping common suffix of '%s' from names\n" csf
let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
unless (oneline || verbose == 0) $ printf
"Initial check done: %d bad nodes, %d bad instances.\n"
......@@ -332,6 +366,48 @@ main = do
putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
\that the cluster will end N+1 happy."
-- | Check that we actually need to rebalance.
checkNeedRebalance :: Options -> Score -> IO ()
checkNeedRebalance opts ini_cv = do
let min_cv = optMinScore opts
oneline = optOneline opts
when (ini_cv < min_cv) $ do
(if oneline then
putStrLn $ formatOneline ini_cv 0 ini_cv
else printf "Cluster is already well balanced (initial score %.6g,\n\
\minimum score %.6g).\nNothing to do, exiting\n"
ini_cv min_cv)
exitWith ExitSuccess
-- | Main function.
main :: IO ()
main = do
cmd_args <- System.getArgs
(opts, args) <- parseOpts cmd_args "hbal" options
unless (null args) $ do
hPutStrLn stderr "Error: this program doesn't take any arguments."
exitWith $ ExitFailure 1
let oneline = optOneline opts
verbose = optVerbose opts
shownodes = optShowNodes opts
showinsts = optShowInsts opts
ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
when (not oneline && verbose > 1) $
putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
nlf <- setNodesStatus opts fixed_nl
checkCluster oneline verbose nlf ilf
maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
(gname, (nl, il)) <- selectGroup opts gl nlf ilf
checkGroup oneline verbose gname nl il
maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
......@@ -340,13 +416,7 @@ main = do
ini_tbl = Cluster.Table nl il ini_cv []
min_cv = optMinScore opts
when (ini_cv < min_cv) $ do
(if oneline then
putStrLn $ formatOneline ini_cv 0 ini_cv
else printf "Cluster is already well balanced (initial score %.6g,\n\
\minimum score %.6g).\nNothing to do, exiting\n"
ini_cv min_cv)
exitWith ExitSuccess
checkNeedRebalance opts ini_cv
unless oneline (if verbose > 2 then
printf "Initial coefficients: overall %.8f, %s\n"
......@@ -381,20 +451,9 @@ main = do
printf "Solution length=%d\n" (length ord_plc)
let cmd_jobs = Cluster.splitJobs cmd_strs
cmd_data = Cluster.formatCmds cmd_jobs
when (isJust $ optShowCmds opts) $
do
let out_path = fromJust $ optShowCmds opts
putStrLn ""
(if out_path == "-" then
printf "Commands to run to reach the above solution:\n%s"
(unlines . map (" " ++) .
filter (/= " check") .
lines $ cmd_data)
else do
writeFile out_path (shTemplate ++ cmd_data)
printf "The commands have been written to file '%s'\n" out_path)
saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
(ClusterData gl fin_nl fin_il ctags)
......@@ -403,22 +462,9 @@ main = do
maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
when (verbose > 3) $ do
let ini_cs = Cluster.totalResources nl
fin_cs = Cluster.totalResources fin_nl
printf "Original: mem=%d disk=%d\n"
(Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
printf "Final: mem=%d disk=%d\n"
(Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
when oneline $
putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
eval <-
if optExecJobs opts && not (null ord_plc)
then (case optLuxi opts of
Nothing -> do
hPutStrLn stderr "Execution of commands possible only on LUXI"
return False
Just master -> runJobSet master fin_nl il cmd_jobs)
else return True
when (verbose > 3) $ printStats nl fin_nl
when oneline $ putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
unless eval (exitWith (ExitFailure 1))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment