Skip to content
Snippets Groups Projects
Commit 1213f9d6 authored by Agata Murawska's avatar Agata Murawska
Browse files

Human readable hcheck functionality


Full hcheck functionality is implemented in this patch. Hcheck runs
per-group checks, provides summary for the entire cluster and simulates
rebalance if required, the printing the modified metrics.
In this patch, only the human-readable output is available.

Signed-off-by: default avatarAgata Murawska <agatamurawska@google.com>
Reviewed-by: default avatarRené Nussbaumer <rn@google.com>
parent e6685c53
No related branches found
No related tags found
No related merge requests found
...@@ -26,10 +26,22 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA ...@@ -26,10 +26,22 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
module Ganeti.HTools.Program.Hcheck (main, options) where module Ganeti.HTools.Program.Hcheck (main, options) where
import Control.Monad import Control.Monad
import List (transpose)
import System.Exit import System.Exit
import System.IO import System.IO
import Text.Printf (printf)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Program.Hbal as Hbal
import Ganeti.HTools.CLI import Ganeti.HTools.CLI
import Ganeti.HTools.ExtLoader
import Ganeti.HTools.Loader
import Ganeti.HTools.Types
-- | Options list and functions. -- | Options list and functions.
options :: [OptType] options :: [OptType]
...@@ -60,9 +72,157 @@ options = ...@@ -60,9 +72,157 @@ options =
, oVerbose , oVerbose
] ]
-- | Check phase - are we before (initial) or after rebalance.
data Phase = Initial
| Rebalanced
-- | Prefix for machine readable names
htcPrefix :: String
htcPrefix = "HCHECK"
-- | Description of phases for human readable version.
phaseDescription :: Phase -> String
phaseDescription Initial = "initially"
phaseDescription Rebalanced = "after rebalancing"
-- | Data showed both per group and per cluster.
commonData :: [(String, String)]
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
, ("CONFLICT_TAGS", "Nodes with conflicting instances")
, ("OFFLINE_PRI", "Instances with primary on an offline node")
, ("OFFLINE_SEC", "Instances with seondary on an offline node")
]
-- | Data showed per group.
groupData :: [(String, String)]
groupData = commonData ++ [("SCORE", "Group score")]
-- | Data showed per cluster.
clusterData :: [(String, String)]
clusterData = commonData ++ [("NEED_REBALANCE", "Cluster is not healthy")]
-- | Print all the statistics on a group level.
printGroupStats :: Int -> Bool -> Phase -> Gdx -> [Int] -> Double -> IO ()
printGroupStats _ True _ _ _ _ = return ()
printGroupStats verbose False phase gidx stats score = do
let printstats = map (printf "%d") stats ++ [printf "%.8f" score] :: [String]
unless (verbose == 0) $ do
printf "\nStatistics for group %d %s\n"
gidx (phaseDescription phase) :: IO ()
mapM_ (\(a,b) -> printf " %s: %s\n" (snd a) b :: IO ())
(zip groupData printstats)
-- | Print all the statistics on a cluster (global) level.
printClusterStats :: Int -> Bool -> Phase -> [Int] -> IO (Bool)
printClusterStats _ True _ stats = do
let needrebal = sum stats
return $ needrebal > 0
printClusterStats verbose False phase stats = do
let needrebal = sum stats
printstats = map (printf "%d") stats :: [String]
unless (verbose == 0) $ do
printf "\nCluster statistics %s\n" (phaseDescription phase) :: IO ()
mapM_ (\(a,b) -> printf " %s: %s\n" (snd a) b :: IO ())
(zip clusterData (printstats ++ [show (needrebal>0)]))
return $ needrebal > 0
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
instances residing on offline nodes.
-}
perGroupChecks :: Int -> Bool -> Phase -> (Gdx, (Node.List, Instance.List))
-> IO ([Int])
perGroupChecks verbose machineread phase (gidx, (nl, il)) = do
let offnl = filter Node.offline (Container.elems nl)
n1violated = length $ fst $ Cluster.computeBadItems nl il
conflicttags = length $ filter (>0)
(map Node.conflictingPrimaries (Container.elems nl))
offline_pri = sum . map length $ map Node.pList offnl
offline_sec = length $ map Node.sList offnl
score = Cluster.compCV nl
groupstats = [ n1violated
, conflicttags
, offline_pri
, offline_sec
]
printGroupStats verbose machineread phase gidx groupstats score
return groupstats
-- | Use Hbal's iterateDepth to simulate group rebalance.
simulateRebalance :: Options ->
(Gdx, (Node.List, Instance.List)) ->
IO ( (Gdx, (Node.List, Instance.List)) )
simulateRebalance opts (gidx, (nl, il)) = do
let ini_cv = Cluster.compCV nl
ini_tbl = Cluster.Table nl il ini_cv []
min_cv = optMinScore opts
if (ini_cv < min_cv)
then return (gidx, (nl, il))
else do
let imlen = maximum . map (length . Instance.alias) $ Container.elems il
nmlen = maximum . map (length . Node.alias) $ Container.elems nl
(fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
(optMaxLength opts)
(optDiskMoves opts)
(optInstMoves opts)
nmlen imlen [] min_cv
(optMinGainLim opts) (optMinGain opts)
(optEvacMode opts)
let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
return (gidx, (fin_nl, fin_il))
-- | Prints the final @OK@ marker in machine readable output.
printFinalHTC :: Bool -> IO ()
printFinalHTC = printFinal htcPrefix
-- | Main function. -- | Main function.
main :: Options -> [String] -> IO () main :: Options -> [String] -> IO ()
main _ args = do main opts args = do
unless (null args) $ do unless (null args) $ do
hPutStrLn stderr "Error: this program doesn't take any arguments." hPutStrLn stderr "Error: this program doesn't take any arguments."
exitWith $ ExitFailure 1 exitWith $ ExitFailure 1
let verbose = optVerbose opts
machineread = optMachineReadable opts
nosimulation = optNoSimulation opts
(ClusterData _ fixed_nl ilf _ _) <- loadExternalData opts
nlf <- setNodeStatus opts fixed_nl
let splitinstances = Cluster.findSplitInstances nlf ilf
splitcluster = Cluster.splitCluster nlf ilf
groupsstats <- mapM (perGroupChecks verbose machineread Initial) splitcluster
let clusterstats = map sum (transpose groupsstats) :: [Int]
needrebalance <- printClusterStats verbose machineread Initial clusterstats
when nosimulation $ do
unless (verbose == 0 || machineread) $
printf "Running in no-simulation mode. Exiting.\n"
printFinalHTC machineread
exitWith ExitSuccess
when (length splitinstances > 0) $ do
unless (verbose == 0 || machineread) $
printf "Split instances found, simulation of re-balancing not possible\n"
exitWith $ ExitFailure 1
unless needrebalance $ do
unless (verbose == 0 || machineread) $
printf "No need to rebalance cluster, no problems found. Exiting.\n"
printFinalHTC machineread
exitWith ExitSuccess
rebalancedcluster <- mapM (simulateRebalance opts) splitcluster
newgroupstats <- mapM (perGroupChecks verbose machineread Rebalanced)
rebalancedcluster
let newclusterstats = map sum (transpose newgroupstats) :: [Int]
_ <- printClusterStats verbose machineread Rebalanced newclusterstats
printFinalHTC machineread
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment