From 44763b519ffda6e7ddb35b9d7c0b823fd7f9ccec Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Sun, 5 Jul 2009 18:56:10 +0300 Subject: [PATCH] Add computation of the failure reason in hspace This patch enhances hspace to report why the allocation sequence stopped, both in absolute error count and for the top reason. --- Ganeti/HTools/Types.hs | 1 + hspace.hs | 52 ++++++++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/Ganeti/HTools/Types.hs b/Ganeti/HTools/Types.hs index 3a494f7f7..f7f55f593 100644 --- a/Ganeti/HTools/Types.hs +++ b/Ganeti/HTools/Types.hs @@ -65,6 +65,7 @@ data FailMode = FailMem -- ^ Failed due to not enough RAM | FailDisk -- ^ Failed due to not enough disk | FailCPU -- ^ Failed due to not enough CPU capacity | FailN1 -- ^ Failed due to not passing N1 checks + deriving (Eq, Show) -- | Either-like data-type customized for our failure modes data OpResult a = OpFail FailMode -- ^ Failed operation diff --git a/hspace.hs b/hspace.hs index ec7df9c68..a6d458103 100644 --- a/hspace.hs +++ b/hspace.hs @@ -147,19 +147,26 @@ options = "show help" ] +concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)] +concatFailure flst reason = + let cval = lookup reason flst + in case cval of + Nothing -> (reason, 1):flst + Just val -> let plain = filter (\(x, _) -> x /= reason) flst + in (reason, val+1):plain + filterFails :: Cluster.AllocSolution - -> OpResult [(Node.List, Instance.Instance, [Node.Node])] + -> ([(FailMode, Int)], + [(Node.List, Instance.Instance, [Node.Node])]) filterFails sols = - let sols' = concat . map (\ (onl, i, nn) -> - case onl of - OpFail _ -> [] - OpGood gnl -> [(gnl, i, nn)] - ) $ sols - in - if null sols' then - OpFail FailN1 - else - return sols' + let (alst, blst) = unzip . map (\ (onl, i, nn) -> + case onl of + OpFail reason -> ([reason], []) + OpGood gnl -> ([], [(gnl, i, nn)]) + ) $ sols + aval = concat alst + bval = concat blst + in (foldl' concatFailure [] aval, bval) processResults :: [(Node.List, Instance.Instance, [Node.Node])] -> (Node.List, Instance.Instance, [Node.Node]) @@ -173,7 +180,7 @@ iterateDepth :: Node.List -> Instance.Instance -> Int -> [Instance.Instance] - -> (Node.List, [Instance.Instance]) + -> ([(FailMode, Int)], Node.List, [Instance.Instance]) iterateDepth nl il newinst nreq ixes = let depth = length ixes newname = (printf "new-%d" depth)::String @@ -181,17 +188,14 @@ iterateDepth nl il newinst nreq ixes = newi2 = Instance.setIdx (Instance.setName newinst newname) newidx sols = (Cluster.tryAlloc nl il newi2 nreq):: OpResult Cluster.AllocSolution - orig = (nl, ixes) in case sols of - OpFail _ -> orig + OpFail _ -> ([], nl, ixes) OpGood sols' -> - let - sols'' = filterFails sols' - in case sols'' of - OpFail _ -> orig - OpGood sols''' -> - let (xnl, xi, _) = processResults sols''' - in iterateDepth xnl il newinst nreq (xi:ixes) + let (errs, sols3) = filterFails sols' + in if null sols3 + then (errs, nl, ixes) + else let (xnl, xi, _) = processResults sols3 + in iterateDepth xnl il newinst nreq (xi:ixes) printStats :: String -> Cluster.CStats -> IO () printStats kind cs = do @@ -275,12 +279,13 @@ main = do newinst = Instance.create "new" (optIMem opts) (optIDsk opts) (optIVCPUs opts) "ADMIN_down" (-1) (-1) - let (fin_nl, ixes) = iterateDepth nl il newinst req_nodes [] + let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes [] allocs = length ixes fin_instances = num_instances + allocs fin_ixes = reverse ixes ix_namelen = maximum . map (length . Instance.name) $ fin_ixes fin_stats = Cluster.totalResources fin_nl + sreason = reverse $ sortBy (compare `on` snd) ereason printf "Final score: %.8f\n" (Cluster.compCV fin_nl) printf "Final instances: %d\n" (num_instances + allocs) @@ -288,6 +293,9 @@ main = do printf "Usage: %.5f\n" (((fromIntegral num_instances)::Double) / (fromIntegral fin_instances)) printf "Allocations: %d\n" allocs + putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason) + printf "Most likely fail reason: %s\n" (show . fst . head $ sreason) + when (verbose > 1) $ do putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s" ix_namelen (Instance.name i) -- GitLab