From 646aa0284e833903ab23ca1edb6677dffdefca94 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Tue, 30 Nov 2010 15:42:14 +0000
Subject: [PATCH] hbal: implement handling of multi-group clusters

On a single-group cluster, we proceed as before. On multi-group
clusters, we require selection of the desired group (currently via UUID
only).

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Balazs Lecz <leczb@google.com>
---
 hbal.hs      | 44 ++++++++++++++++++++++++++++++++++++++------
 man/hbal.rst |  5 +++++
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/hbal.hs b/hbal.hs
index 59edde484..f8ad8d065 100644
--- a/hbal.hs
+++ b/hbal.hs
@@ -28,7 +28,7 @@ module Main (main) where
 import Control.Concurrent (threadDelay)
 import Control.Exception (bracket)
 import Data.List
-import Data.Maybe (isJust, fromJust)
+import Data.Maybe (isJust, isNothing, fromJust)
 import Data.IORef
 import Monad
 import System (exitWith, ExitCode(..))
@@ -66,6 +66,7 @@ options =
     , oRapiMaster
     , oLuxiSocket
     , oExecJobs
+    , oGroup
     , oMaxSolLength
     , oVerbose
     , oQuiet
@@ -225,7 +226,7 @@ main = do
       verbose = optVerbose opts
       shownodes = optShowNodes opts
 
-  (fixed_nl, il, ctags) <- loadExternalData opts
+  (fixed_nl, ilf, ctags) <- loadExternalData opts
 
   let offline_names = optOffline opts
       all_nodes = Container.elems fixed_nl
@@ -238,7 +239,7 @@ main = do
                                all_nodes
       m_cpu = optMcpu opts
       m_dsk = optMdsk opts
-      csf = commonSuffix fixed_nl il
+      csf = commonSuffix fixed_nl ilf
 
   when (length offline_wrong > 0) $ do
          hPrintf stderr "Wrong node name(s) set as offline: %s\n"
@@ -248,21 +249,52 @@ main = do
   let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
                                 then Node.setOffline n True
                                 else n) fixed_nl
-      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
-           nm
+      nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
+            nm
 
   when (not oneline && verbose > 1) $
        putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
 
-  when (Container.size il == 0) $ do
+  when (Container.size ilf == 0) $ do
          (if oneline then putStrLn $ formatOneline 0 0 0
           else printf "Cluster is empty, exiting.\n")
          exitWith ExitSuccess
 
+  let split_insts = Cluster.findSplitInstances nlf ilf
+  when (not . null $ split_insts) $ do
+    hPutStrLn stderr "Found instances belonging to multiple node groups:"
+    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
+    hPutStrLn stderr "Aborting."
+    exitWith $ ExitFailure 1
+
+  let ngroups = Cluster.splitCluster nlf ilf
+  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
+    hPutStrLn stderr "Found multiple node groups:"
+    mapM_ (hPutStrLn stderr . ("  " ++) . fst ) ngroups
+    hPutStrLn stderr "Aborting."
+    exitWith $ ExitFailure 1
+
   unless oneline $ printf "Loaded %d nodes, %d instances\n"
+             (Container.size nlf)
+             (Container.size ilf)
+
+  (guuid, (nl, il)) <- case optGroup opts of
+    Nothing -> return $ head ngroups
+    Just g -> case lookup g ngroups of
+      Nothing -> do
+        hPutStrLn stderr $ "Node group " ++ g ++
+          " not found. Node group list is:"
+        mapM_ (hPutStrLn stderr . ("  " ++) . fst ) ngroups
+        hPutStrLn stderr "Aborting."
+        exitWith $ ExitFailure 1
+      Just cdata -> return (g, cdata)
+
+  unless oneline $ printf "Group size %d nodes, %d instances\n"
              (Container.size nl)
              (Container.size il)
 
+  putStrLn $ "Selected node group: " ++ guuid
+
   when (length csf > 0 && not oneline && verbose > 1) $
        printf "Note: Stripping common suffix of '%s' from names\n" csf
 
diff --git a/man/hbal.rst b/man/hbal.rst
index 8fe081cb6..7988aa3a1 100644
--- a/man/hbal.rst
+++ b/man/hbal.rst
@@ -436,6 +436,11 @@ The options that can be passed to the program are as follows:
   number. For example, specifying *disk-ratio* as **0.25** means that
   at least one quarter of disk space should be left free on nodes.
 
+-G *uuid*, --group=*uuid*
+  On an multi-group cluster, select this group for
+  processing. Otherwise hbal will abort, since it cannot balance
+  multiple groups at the same time.
+
 -v, --verbose
   Increase the output verbosity. Each usage of this option will
   increase the verbosity (currently more than 2 doesn't make sense)
-- 
GitLab