diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs index c4dba4c86833a61ff162e345dbbe4413df52db35..7fa8ed8abdc51d7b4a0f3fc4d9eb327b401cbd39 100644 --- a/Ganeti/HTools/Cluster.hs +++ b/Ganeti/HTools/Cluster.hs @@ -51,6 +51,7 @@ module Ganeti.HTools.Cluster -- * IAllocator functions , tryAlloc , tryReloc + , collapseFailures ) where import Data.List @@ -73,7 +74,10 @@ type Score = Double type Placement = (Idx, Ndx, Ndx, Score) -- | Allocation\/relocation solution. -type AllocSolution = [OpResult (Node.List, Instance.Instance, [Node.Node])] +type AllocSolution = ([FailMode], Int, Maybe (Score, AllocElement)) + +-- | Allocation\/relocation element. +type AllocElement = (Node.List, Instance.Instance, [Node.Node]) -- | An instance move definition data IMove = Failover -- ^ Failover the instance (f) @@ -332,7 +336,7 @@ applyMove nl inst (FailoverAndReplace new_sdx) = -- | Tries to allocate an instance on one given node. allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node - -> OpResult (Node.List, Instance.Instance, [Node.Node]) + -> OpResult AllocElement allocateOnSingle nl inst p = let new_pdx = Node.idx p new_inst = Instance.setBoth inst new_pdx Node.noSecondary @@ -342,7 +346,7 @@ allocateOnSingle nl inst p = -- | Tries to allocate an instance on a given pair of nodes. allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node - -> OpResult (Node.List, Instance.Instance, [Node.Node]) + -> OpResult AllocElement allocateOnPair nl inst tgt_p tgt_s = let new_pdx = Node.idx tgt_p new_sdx = Node.idx tgt_s @@ -432,7 +436,34 @@ checkMove nodes_idx ini_tbl victims = else best_tbl --- * Alocation functions +-- * Allocation functions + +-- | Build failure stats out of a list of failures +collapseFailures :: [FailMode] -> FailStats +collapseFailures flst = + map (\k -> (k, length $ filter ((==) k) flst)) [minBound..maxBound] + +-- | Update current Allocation solution and failure stats with new +-- elements +concatAllocs :: AllocSolution -> OpResult AllocElement -> AllocSolution +concatAllocs (flst, succ, sols) (OpFail reason) = (reason:flst, succ, sols) + +concatAllocs (flst, succ, osols) (OpGood ns@(nl, _, _)) = + let nscore = compCV nl + -- Choose the old or new solution, based on the cluster score + nsols = case osols of + Nothing -> Just (nscore, ns) + Just (oscore, _) -> + if oscore < nscore + then osols + else Just (nscore, ns) + nsuc = succ + 1 + -- Note: we force evaluation of nsols here in order to keep the + -- memory profile low - we know that we will need nsols for sure + -- in the next cycle, so we force evaluation of nsols, since the + -- foldl' in the caller will only evaluate the tuple, but not the + -- *elements* of the tuple + in nsols `seq` nsuc `seq` (flst, nsuc, nsols) -- | Try to allocate an instance on the cluster. tryAlloc :: (Monad m) => @@ -445,12 +476,16 @@ tryAlloc nl _ inst 2 = let all_nodes = getOnline nl all_pairs = liftM2 (,) all_nodes all_nodes ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs - sols = map (uncurry $ allocateOnPair nl inst) ok_pairs + sols = foldl' (\cstate (p, s) -> + concatAllocs cstate $ allocateOnPair nl inst p s + ) ([], 0, Nothing) ok_pairs in return sols tryAlloc nl _ inst 1 = let all_nodes = getOnline nl - sols = map (allocateOnSingle nl inst) all_nodes + sols = foldl' (\cstate p -> + concatAllocs cstate $ allocateOnSingle nl inst p + ) ([], 0, Nothing) all_nodes in return sols tryAlloc _ _ _ reqn = fail $ "Unsupported number of alllocation \ @@ -462,7 +497,7 @@ tryReloc :: (Monad m) => Node.List -- ^ The node list -> Instance.List -- ^ The instance list -> Idx -- ^ The index of the instance to move - -> Int -- ^ The numver of nodes required + -> Int -- ^ The number of nodes required -> [Ndx] -- ^ Nodes which should not be used -> m AllocSolution -- ^ Solution list tryReloc nl il xid 1 ex_idx = @@ -471,10 +506,13 @@ tryReloc nl il xid 1 ex_idx = ex_idx' = Instance.pnode inst:ex_idx valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes valid_idxes = map Node.idx valid_nodes - sols1 = map (\x -> do - (mnl, i, _, _) <- applyMove nl inst (ReplaceSecondary x) - return (mnl, i, [Container.find x nl]) - ) valid_idxes + sols1 = foldl' (\cstate x -> + let elem = do + (mnl, i, _, _) <- + applyMove nl inst (ReplaceSecondary x) + return (mnl, i, [Container.find x mnl]) + in concatAllocs cstate elem + ) ([], 0, Nothing) valid_idxes in return sols1 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ diff --git a/Ganeti/HTools/Types.hs b/Ganeti/HTools/Types.hs index cb384e852b06633a3537d5df1e33f9f871e3f11d..4a009f449a20fd413c83a08e31a2f4678f928a71 100644 --- a/Ganeti/HTools/Types.hs +++ b/Ganeti/HTools/Types.hs @@ -30,6 +30,7 @@ module Ganeti.HTools.Types , Result(..) , Element(..) , FailMode(..) + , FailStats , OpResult(..) ) where @@ -67,6 +68,9 @@ data FailMode = FailMem -- ^ Failed due to not enough RAM | FailN1 -- ^ Failed due to not passing N1 checks deriving (Eq, Enum, Bounded, Show) +-- | List with failure statistics +type FailStats = [(FailMode, Int)] + -- | Either-like data-type customized for our failure modes data OpResult a = OpFail FailMode -- ^ Failed operation | OpGood a -- ^ Success operation diff --git a/hail.hs b/hail.hs index b412680d19201f907e221134254815e24962939a..0167d55ac7d970f62d0532391287816faf185898 100644 --- a/hail.hs +++ b/hail.hs @@ -37,7 +37,6 @@ import Text.Printf (printf) import qualified Ganeti.HTools.Cluster as Cluster import qualified Ganeti.HTools.Node as Node -import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.CLI as CLI import Ganeti.HTools.IAlloc import Ganeti.HTools.Types @@ -72,37 +71,21 @@ options = ] -filterFails :: (Monad m) => - [OpResult (Node.List, Instance.Instance, [Node.Node])] - -> m [(Node.List, [Node.Node])] -filterFails sols = - if null sols then fail "No nodes onto which to allocate at all" - else let sols' = concatMap (\ e -> - case e of - OpFail _ -> [] - OpGood (gnl, _, nn) -> [(gnl, nn)] - ) sols - in - if null sols' - then fail "No valid allocation solutions" - else return sols' - -processResults :: (Monad m) => [(Node.List, [Node.Node])] - -> m (String, [Node.Node]) -processResults sols = - let sols' = map (\(nl', ns) -> (Cluster.compCV nl', ns)) sols - sols'' = sortBy (compare `on` fst) sols' - (best, w) = head sols'' - (worst, l) = last sols'' - info = printf "Valid results: %d, best score: %.8f for node(s) %s, \ - \worst score: %.8f for node(s) %s" (length sols'') - best (intercalate "/" . map Node.name $ w) - worst (intercalate "/" . map Node.name $ l)::String - in return (info, w) +processResults :: (Monad m) => Cluster.AllocSolution -> m (String, [Node.Node]) +processResults (fstats, succ, sols) = + case sols of + Nothing -> fail "No valid allocation solutions" + Just (best, (_, _, w)) -> + let tfails = length fstats + info = printf "successes %d, failures %d,\ + \ best score: %.8f for node(s) %s" + succ tfails + best (intercalate "/" . map Node.name $ w)::String + in return (info, w) -- | Process a request and return new node lists processRequest :: Request - -> Result [OpResult (Node.List, Instance.Instance, [Node.Node])] + -> Result Cluster.AllocSolution processRequest request = let Request rqtype nl il _ = request in case rqtype of @@ -129,10 +112,11 @@ main = do Ok rq -> return rq let Request _ _ _ csf = request - sols = processRequest request >>= filterFails >>= processResults - let (ok, info, rn) = case sols of - Ok (info, sn) -> (True, "Request successful: " ++ info, - map ((++ csf) . Node.name) sn) - Bad s -> (False, "Request failed: " ++ s, []) + sols = processRequest request >>= processResults + let (ok, info, rn) = + case sols of + Ok (info, sn) -> (True, "Request successful: " ++ info, + map ((++ csf) . Node.name) sn) + Bad s -> (False, "Request failed: " ++ s, []) resp = formatResponse ok info rn putStrLn resp diff --git a/hspace.hs b/hspace.hs index 23b8acf009b1659a81ab9afa6036371b8ede8742..ce09d5ce0e232b6591d765200e25606b594b66fb 100644 --- a/hspace.hs +++ b/hspace.hs @@ -192,39 +192,6 @@ clusterData = [ ("MEM", printf "%.0f" . Cluster.cs_tmem) , ("CPU", printf "%.0f" . Cluster.cs_tcpu) ] --- | Build failure stats out of a list of failure reasons -concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)] -concatFailure flst reason = - let cval = lookup reason flst - in case cval of - Nothing -> (reason, 1):flst - Just val -> let plain = filter (\(x, _) -> x /= reason) flst - in (reason, val+1):plain - --- | Build list of failures and placements out of an list of possible --- | allocations -filterFails :: Cluster.AllocSolution - -> ([(FailMode, Int)], - [(Node.List, Instance.Instance, [Node.Node])]) -filterFails sols = - let (alst, blst) = unzip . map (\ e -> - case e of - OpFail reason -> ([reason], []) - OpGood (gnl, i, nn) -> - ([], [(gnl, i, nn)]) - ) $ sols - aval = concat alst - bval = concat blst - in (foldl' concatFailure [(x, 0) | x <- [minBound..maxBound]] aval, bval) - --- | Get the placement with best score out of a list of possible placements -processResults :: [(Node.List, Instance.Instance, [Node.Node])] - -> (Node.List, Instance.Instance, [Node.Node]) -processResults sols = - let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols - sols'' = sortBy (compare `on` fst) sols' - in snd $ head sols'' - -- | Recursively place instances on the cluster until we're out of space iterateDepth :: Node.List -> Instance.List @@ -241,12 +208,11 @@ iterateDepth nl il newinst nreq ixes = OpResult Cluster.AllocSolution in case sols of OpFail _ -> ([], nl, ixes) - OpGood sols' -> - let (errs, sols3) = filterFails sols' - in if null sols3 - then (errs, nl, ixes) - else let (xnl, xi, _) = processResults sols3 - in iterateDepth xnl il newinst nreq (xi:ixes) + OpGood (errs, _, sols3) -> + case sols3 of + Nothing -> (Cluster.collapseFailures errs, nl, ixes) + Just (_, (xnl, xi, _)) -> + iterateDepth xnl il newinst nreq $! (xi:ixes) -- | Function to print stats for a given phase printStats :: Phase -> Cluster.CStats -> [(String, String)]