Skip to content
Snippets Groups Projects
Commit 646aa028 authored by Iustin Pop's avatar Iustin Pop
Browse files

hbal: implement handling of multi-group clusters


On a single-group cluster, we proceed as before. On multi-group
clusters, we require selection of the desired group (currently via UUID
only).

Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarBalazs Lecz <leczb@google.com>
parent f4161783
No related branches found
No related tags found
No related merge requests found
...@@ -28,7 +28,7 @@ module Main (main) where ...@@ -28,7 +28,7 @@ module Main (main) where
import Control.Concurrent (threadDelay) import Control.Concurrent (threadDelay)
import Control.Exception (bracket) import Control.Exception (bracket)
import Data.List import Data.List
import Data.Maybe (isJust, fromJust) import Data.Maybe (isJust, isNothing, fromJust)
import Data.IORef import Data.IORef
import Monad import Monad
import System (exitWith, ExitCode(..)) import System (exitWith, ExitCode(..))
...@@ -66,6 +66,7 @@ options = ...@@ -66,6 +66,7 @@ options =
, oRapiMaster , oRapiMaster
, oLuxiSocket , oLuxiSocket
, oExecJobs , oExecJobs
, oGroup
, oMaxSolLength , oMaxSolLength
, oVerbose , oVerbose
, oQuiet , oQuiet
...@@ -225,7 +226,7 @@ main = do ...@@ -225,7 +226,7 @@ main = do
verbose = optVerbose opts verbose = optVerbose opts
shownodes = optShowNodes opts shownodes = optShowNodes opts
(fixed_nl, il, ctags) <- loadExternalData opts (fixed_nl, ilf, ctags) <- loadExternalData opts
let offline_names = optOffline opts let offline_names = optOffline opts
all_nodes = Container.elems fixed_nl all_nodes = Container.elems fixed_nl
...@@ -238,7 +239,7 @@ main = do ...@@ -238,7 +239,7 @@ main = do
all_nodes all_nodes
m_cpu = optMcpu opts m_cpu = optMcpu opts
m_dsk = optMdsk opts m_dsk = optMdsk opts
csf = commonSuffix fixed_nl il csf = commonSuffix fixed_nl ilf
when (length offline_wrong > 0) $ do when (length offline_wrong > 0) $ do
hPrintf stderr "Wrong node name(s) set as offline: %s\n" hPrintf stderr "Wrong node name(s) set as offline: %s\n"
...@@ -248,21 +249,52 @@ main = do ...@@ -248,21 +249,52 @@ main = do
let nm = Container.map (\n -> if Node.idx n `elem` offline_indices let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
then Node.setOffline n True then Node.setOffline n True
else n) fixed_nl else n) fixed_nl
nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
nm nm
when (not oneline && verbose > 1) $ when (not oneline && verbose > 1) $
putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
when (Container.size il == 0) $ do when (Container.size ilf == 0) $ do
(if oneline then putStrLn $ formatOneline 0 0 0 (if oneline then putStrLn $ formatOneline 0 0 0
else printf "Cluster is empty, exiting.\n") else printf "Cluster is empty, exiting.\n")
exitWith ExitSuccess exitWith ExitSuccess
let split_insts = Cluster.findSplitInstances nlf ilf
when (not . null $ split_insts) $ do
hPutStrLn stderr "Found instances belonging to multiple node groups:"
mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
let ngroups = Cluster.splitCluster nlf ilf
when (length ngroups > 1 && isNothing (optGroup opts)) $ do
hPutStrLn stderr "Found multiple node groups:"
mapM_ (hPutStrLn stderr . (" " ++) . fst ) ngroups
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
unless oneline $ printf "Loaded %d nodes, %d instances\n" unless oneline $ printf "Loaded %d nodes, %d instances\n"
(Container.size nlf)
(Container.size ilf)
(guuid, (nl, il)) <- case optGroup opts of
Nothing -> return $ head ngroups
Just g -> case lookup g ngroups of
Nothing -> do
hPutStrLn stderr $ "Node group " ++ g ++
" not found. Node group list is:"
mapM_ (hPutStrLn stderr . (" " ++) . fst ) ngroups
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
Just cdata -> return (g, cdata)
unless oneline $ printf "Group size %d nodes, %d instances\n"
(Container.size nl) (Container.size nl)
(Container.size il) (Container.size il)
putStrLn $ "Selected node group: " ++ guuid
when (length csf > 0 && not oneline && verbose > 1) $ when (length csf > 0 && not oneline && verbose > 1) $
printf "Note: Stripping common suffix of '%s' from names\n" csf printf "Note: Stripping common suffix of '%s' from names\n" csf
......
...@@ -436,6 +436,11 @@ The options that can be passed to the program are as follows: ...@@ -436,6 +436,11 @@ The options that can be passed to the program are as follows:
number. For example, specifying *disk-ratio* as **0.25** means that number. For example, specifying *disk-ratio* as **0.25** means that
at least one quarter of disk space should be left free on nodes. at least one quarter of disk space should be left free on nodes.
-G *uuid*, --group=*uuid*
On an multi-group cluster, select this group for
processing. Otherwise hbal will abort, since it cannot balance
multiple groups at the same time.
-v, --verbose -v, --verbose
Increase the output verbosity. Each usage of this option will Increase the output verbosity. Each usage of this option will
increase the verbosity (currently more than 2 doesn't make sense) increase the verbosity (currently more than 2 doesn't make sense)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment