From 646aa0284e833903ab23ca1edb6677dffdefca94 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Tue, 30 Nov 2010 15:42:14 +0000 Subject: [PATCH] hbal: implement handling of multi-group clusters On a single-group cluster, we proceed as before. On multi-group clusters, we require selection of the desired group (currently via UUID only). Signed-off-by: Iustin Pop <iustin@google.com> Reviewed-by: Balazs Lecz <leczb@google.com> --- hbal.hs | 44 ++++++++++++++++++++++++++++++++++++++------ man/hbal.rst | 5 +++++ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/hbal.hs b/hbal.hs index 59edde484..f8ad8d065 100644 --- a/hbal.hs +++ b/hbal.hs @@ -28,7 +28,7 @@ module Main (main) where import Control.Concurrent (threadDelay) import Control.Exception (bracket) import Data.List -import Data.Maybe (isJust, fromJust) +import Data.Maybe (isJust, isNothing, fromJust) import Data.IORef import Monad import System (exitWith, ExitCode(..)) @@ -66,6 +66,7 @@ options = , oRapiMaster , oLuxiSocket , oExecJobs + , oGroup , oMaxSolLength , oVerbose , oQuiet @@ -225,7 +226,7 @@ main = do verbose = optVerbose opts shownodes = optShowNodes opts - (fixed_nl, il, ctags) <- loadExternalData opts + (fixed_nl, ilf, ctags) <- loadExternalData opts let offline_names = optOffline opts all_nodes = Container.elems fixed_nl @@ -238,7 +239,7 @@ main = do all_nodes m_cpu = optMcpu opts m_dsk = optMdsk opts - csf = commonSuffix fixed_nl il + csf = commonSuffix fixed_nl ilf when (length offline_wrong > 0) $ do hPrintf stderr "Wrong node name(s) set as offline: %s\n" @@ -248,21 +249,52 @@ main = do let nm = Container.map (\n -> if Node.idx n `elem` offline_indices then Node.setOffline n True else n) fixed_nl - nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) - nm + nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) + nm when (not oneline && verbose > 1) $ putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags - when (Container.size il == 0) $ do + when (Container.size ilf == 0) $ do (if oneline then putStrLn $ formatOneline 0 0 0 else printf "Cluster is empty, exiting.\n") exitWith ExitSuccess + let split_insts = Cluster.findSplitInstances nlf ilf + when (not . null $ split_insts) $ do + hPutStrLn stderr "Found instances belonging to multiple node groups:" + mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts + hPutStrLn stderr "Aborting." + exitWith $ ExitFailure 1 + + let ngroups = Cluster.splitCluster nlf ilf + when (length ngroups > 1 && isNothing (optGroup opts)) $ do + hPutStrLn stderr "Found multiple node groups:" + mapM_ (hPutStrLn stderr . (" " ++) . fst ) ngroups + hPutStrLn stderr "Aborting." + exitWith $ ExitFailure 1 + unless oneline $ printf "Loaded %d nodes, %d instances\n" + (Container.size nlf) + (Container.size ilf) + + (guuid, (nl, il)) <- case optGroup opts of + Nothing -> return $ head ngroups + Just g -> case lookup g ngroups of + Nothing -> do + hPutStrLn stderr $ "Node group " ++ g ++ + " not found. Node group list is:" + mapM_ (hPutStrLn stderr . (" " ++) . fst ) ngroups + hPutStrLn stderr "Aborting." + exitWith $ ExitFailure 1 + Just cdata -> return (g, cdata) + + unless oneline $ printf "Group size %d nodes, %d instances\n" (Container.size nl) (Container.size il) + putStrLn $ "Selected node group: " ++ guuid + when (length csf > 0 && not oneline && verbose > 1) $ printf "Note: Stripping common suffix of '%s' from names\n" csf diff --git a/man/hbal.rst b/man/hbal.rst index 8fe081cb6..7988aa3a1 100644 --- a/man/hbal.rst +++ b/man/hbal.rst @@ -436,6 +436,11 @@ The options that can be passed to the program are as follows: number. For example, specifying *disk-ratio* as **0.25** means that at least one quarter of disk space should be left free on nodes. +-G *uuid*, --group=*uuid* + On an multi-group cluster, select this group for + processing. Otherwise hbal will abort, since it cannot balance + multiple groups at the same time. + -v, --verbose Increase the output verbosity. Each usage of this option will increase the verbosity (currently more than 2 doesn't make sense) -- GitLab