diff --git a/htools/Ganeti/HTools/Program/Hroller.hs b/htools/Ganeti/HTools/Program/Hroller.hs index 166973d259849cfb81b305b278c71d6940c8decd..25e096ba5c1ad6bbb32ecfcf6c50c7df32774f06 100644 --- a/htools/Ganeti/HTools/Program/Hroller.hs +++ b/htools/Ganeti/HTools/Program/Hroller.hs @@ -29,8 +29,21 @@ module Ganeti.HTools.Program.Hroller , arguments ) where +import Control.Monad +import Data.List +import Data.Ord + +import qualified Data.IntMap as IntMap + +import qualified Ganeti.HTools.Container as Container +import qualified Ganeti.HTools.Node as Node + import Ganeti.Common import Ganeti.HTools.CLI +import Ganeti.HTools.ExtLoader +import Ganeti.HTools.Graph +import Ganeti.HTools.Loader +import Ganeti.Utils -- | Options list and functions. options :: IO [OptType] @@ -52,6 +65,62 @@ options = do arguments :: [ArgCompletion] arguments = [] +-- | Gather statistics for the coloring algorithms. +-- Returns a string with a summary on how each algorithm has performed, +-- in order of non-decreasing effectiveness, and whether it tied or lost +-- with the previous one. +getStats :: [(String, ColorVertMap)] -> String +getStats colorings = snd . foldr helper (0,"") $ algBySize colorings + where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap + size cmap = show (IntMap.size cmap) ++ " " + grpsizes cmap = + "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")" + algBySize = sortBy (flip (comparing (IntMap.size.snd))) + helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String) + helper el (0, _) = ((IntMap.size.snd) el, algostat el) + helper el (old, str) + | old == elsize = (elsize, str ++ " TIE " ++ algostat el) + | otherwise = (elsize, str ++ " LOOSE " ++ algostat el) + where elsize = (IntMap.size.snd) el + -- | Main function. main :: Options -> [String] -> IO () -main _ _ = return () +main opts args = do + unless (null args) $ exitErr "This program doesn't take any arguments." + + let verbose = optVerbose opts + + -- Load cluster data. The last two arguments, cluster tags and ipolicy, are + -- currently not used by this tool. + ini_cdata@(ClusterData _ fixed_nl ilf _ _) <- loadExternalData opts + + nlf <- setNodeStatus opts fixed_nl + + maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata + + -- TODO: only online nodes! + -- TODO: filter by node group + -- TODO: fail if instances are running (with option to warn only) + -- TODO: identify master node, and put it last + + nodeGraph <- case Node.mkNodeGraph nlf ilf of + Nothing -> exitErr "Cannot create node graph" + Just g -> return g + + when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph + + let colorAlgorithms = [ ("LF", colorLF) + , ("Dsatur", colorDsatur) + , ("Dcolor", colorDcolor) + ] + colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms + smallestColoring = + (snd . minimumBy (comparing (IntMap.size . snd))) colorings + idToName = Node.name . (`Container.find` nlf) + nodesbycoloring = map (map idToName) $ IntMap.elems smallestColoring + + when (verbose > 1) . putStrLn $ getStats colorings + + unless (optNoHeaders opts) $ + putStrLn "'Node Reboot Groups'" + mapM_ (putStrLn . commaJoin) nodesbycoloring diff --git a/man/hroller.rst b/man/hroller.rst index ee7aebca1ca8367d91f6e6f0fe2b830d1e04f66b..41d99a6be21c665bdd49c64da1bceed4d7d28f2f 100644 --- a/man/hroller.rst +++ b/man/hroller.rst @@ -35,6 +35,16 @@ hroller is a cluster maintenance reboot scheduler. It can calculate which set of nodes can be rebooted at the same time while avoiding having both primary and secondary nodes being rebooted at the same time. +ALGORITHM FOR CALCULATING OFFLINE REBOOT GROUPS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +hroller will view the nodes as vertices of an undirected graph, +connecting by instances which have both a primary and a secondary node. +It will then color the graph using a few different heuristics, and +return the minimum-size color set found. Node with the same color don't +share an edge, and as such don't have an instance with both primary and +secondary node on them, so they are safe to be rebooted concurrently. + OPTIONS ------- @@ -44,7 +54,38 @@ check **htools(7)** and **hbal(1)**. BUGS ---- -The program does nothing. +The master node should be always the last node of the last group, or anyway +somehow easily identifiable. Right now this is not done. + +Offline nodes should be ignored. + +Filtering by nodegroup should be allowed. + +If instances are online the tool should refuse to do offline rolling +maintenances, unless explicitely requested. + +End-to-end shelltests should be provided. + +Online rolling maintenances (where instance need not be shut down, but +are migrated from node to node) are not supported yet. Hroller by design +should support them both with and without secondary node replacement. + +EXAMPLE +------- + +Note that these examples may not for the latest version. + +Offline Rolling node reboot output +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +With the default options, the program shows one reboot group per line as +a comma-separated list. + + $ hroller + 'Node Reboot Groups' + node1.example.com,node3.example.com,node5.example.com + node8.example.com,node6.example.com,node2.example.com + node7.example.com,node4.example.com .. vim: set textwidth=72 : .. Local Variables: