diff --git a/Ganeti/HTools/CLI.hs b/Ganeti/HTools/CLI.hs index b977b6a854573f0bae6ff242f758779b63ccb176..696333aed6d001e13ce19ffd24f1f27e04574b81 100644 --- a/Ganeti/HTools/CLI.hs +++ b/Ganeti/HTools/CLI.hs @@ -59,6 +59,7 @@ module Ganeti.HTools.CLI , oDiskMoves , oDynuFile , oTieredSpec + , oExTags , oShowVer , oShowHelp ) where @@ -106,6 +107,7 @@ data Options = Options , optMdsk :: Double -- ^ Max disk usage ratio for nodes , optDiskMoves :: Bool -- ^ Allow disk moves , optDynuFile :: Maybe FilePath -- ^ Optional file with dynamic use data + , optExTags :: Maybe [String] -- ^ Tags to use for exclusion , optVerbose :: Int -- ^ Verbosity level , optShowVer :: Bool -- ^ Just show the program version , optShowHelp :: Bool -- ^ Just show the help @@ -138,6 +140,7 @@ defaultOptions = Options , optMdsk = -1 , optDiskMoves = True , optDynuFile = Nothing + , optExTags = Nothing , optVerbose = 1 , optShowVer = False , optShowHelp = False @@ -293,6 +296,11 @@ oDynuFile = Option "U" ["dynu-file"] (ReqArg (\ f opts -> Ok opts { optDynuFile = Just f }) "FILE") "Import dynamic utilisation data from the given FILE" +oExTags :: OptType +oExTags = Option "" ["exclusion-tags"] + (ReqArg (\ f opts -> Ok opts { optExTags = Just $ sepSplit ',' f }) + "TAG,...") "Enable instance exclusion based on given tag prefix" + oTieredSpec :: OptType oTieredSpec = Option "" ["tiered-alloc"] (ReqArg (\ inp opts -> do diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs index 9a94d0b0a2cb42d569407cd14df60d52f45e1afe..ffdb8763c4a6789b243489c152ceae139efd4ba2 100644 --- a/Ganeti/HTools/Cluster.hs +++ b/Ganeti/HTools/Cluster.hs @@ -185,6 +185,7 @@ detailedCVNames = [ "free_mem_cv" , "mem_load_cv" , "disk_load_cv" , "net_load_cv" + , "pri_tags_score" ] -- | Compute the mem and disk covariance. @@ -225,9 +226,13 @@ compDetailedCV nl = DynUtil c2 m2 d2 n2 = Node.utilPool n in (c1/c2, m1/m2, d1/d2, n1/n2) ) nodes + -- metric: conflicting instance count + pri_tags_inst = sum $ map Node.conflictingPrimaries nodes + pri_tags_score = fromIntegral pri_tags_inst::Double in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv , varianceCoeff c_load, varianceCoeff m_load - , varianceCoeff d_load, varianceCoeff n_load] + , varianceCoeff d_load, varianceCoeff n_load + , pri_tags_score ] -- | Compute the /total/ variance. compCV :: Node.List -> Double diff --git a/Ganeti/HTools/ExtLoader.hs b/Ganeti/HTools/ExtLoader.hs index cc11b1c8bbda739ce7e0919fe0c7de68bdf6a67a..94c787bd08aff7ed0a9ec0c343cd1be9c2312dc0 100644 --- a/Ganeti/HTools/ExtLoader.hs +++ b/Ganeti/HTools/ExtLoader.hs @@ -83,7 +83,7 @@ parseUtilisation line = -- | External tool data loader from a variety of sources. loadExternalData :: Options - -> IO (Node.List, Instance.List, String) + -> IO (Node.List, Instance.List, [String], String) loadExternalData opts = do (env_node, env_inst) <- parseEnv () let nodef = if optNodeSet opts then optNodeFile opts @@ -98,6 +98,10 @@ loadExternalData opts = do setSim = isJust simdata setFiles = optNodeSet opts || optInstSet opts allSet = filter id [setRapi, setLuxi, setFiles] + exTags = case optExTags opts of + Nothing -> [] + Just etl -> map (++ ":") etl + when (length allSet > 1) $ do hPutStrLn stderr ("Error: Only one of the rapi, luxi, and data" ++ @@ -126,8 +130,8 @@ loadExternalData opts = do | setSim -> Simu.loadData $ fromJust simdata | otherwise -> wrapIO $ Text.loadData nodef instf - let ldresult = input_data >>= Loader.mergeData util_data' - (loaded_nl, il, csf) <- + let ldresult = input_data >>= Loader.mergeData util_data' exTags + (loaded_nl, il, tags, csf) <- (case ldresult of Ok x -> return x Bad s -> do @@ -140,4 +144,4 @@ loadExternalData opts = do hPutStrLn stderr "Warning: cluster has inconsistent data:" hPutStrLn stderr . unlines . map (printf " - %s") $ fix_msgs - return (fixed_nl, il, csf) + return (fixed_nl, il, tags, csf) diff --git a/Ganeti/HTools/IAlloc.hs b/Ganeti/HTools/IAlloc.hs index ef5d6608138ca6b2f059cb375bf387b596ffd053..819021c8aea8c32ed3bec6ded99b5bfc4acee778 100644 --- a/Ganeti/HTools/IAlloc.hs +++ b/Ganeti/HTools/IAlloc.hs @@ -52,8 +52,9 @@ parseBaseInstance n a = do disk <- fromObj "disk_space_total" a mem <- fromObj "memory" a vcpus <- fromObj "vcpus" a + tags <- fromObj "tags" a let running = "running" - return (n, Instance.create n mem disk vcpus running 0 0) + return (n, Instance.create n mem disk vcpus running tags 0 0) -- | Parses an instance as found in the cluster instance listg. parseInstance :: NameAssoc -- ^ The node name-to-index association list @@ -109,7 +110,9 @@ parseData body = do iobj <- mapM (\(x,y) -> asJSObject y >>= parseInstance ktn x . fromJSObject) idata let (kti, il) = assignIndices iobj - (map_n, map_i, csf) <- mergeData [] (nl, il) + -- cluster tags + ctags <- fromObj "cluster_tags" obj + (map_n, map_i, ptags, csf) <- mergeData [] [] (nl, il, ctags) req_nodes <- fromObj "required_nodes" request optype <- fromObj "type" request rqtype <- @@ -127,7 +130,7 @@ parseData body = do ex_idex <- mapM (Container.findByName map_n) ex_nodes' return $ Relocate ridx req_nodes (map Node.idx ex_idex) other -> fail ("Invalid request type '" ++ other ++ "'") - return $ Request rqtype map_n map_i csf + return $ Request rqtype map_n map_i ptags csf -- | Formats the response into a valid IAllocator response message. formatResponse :: Bool -- ^ Whether the request was successful diff --git a/Ganeti/HTools/Instance.hs b/Ganeti/HTools/Instance.hs index fe64d637cac67432c979f907befa3eb0ca3b2fb4..c3b532432e232242394b52ee13e9f1dd775ebf53 100644 --- a/Ganeti/HTools/Instance.hs +++ b/Ganeti/HTools/Instance.hs @@ -56,6 +56,7 @@ data Instance = Instance { name :: String -- ^ The instance name , sNode :: T.Ndx -- ^ Original secondary node , idx :: T.Idx -- ^ Internal index , util :: T.DynUtil -- ^ Dynamic resource usage + , tags :: [String] -- ^ List of instance tags } deriving (Show) instance T.Element Instance where @@ -86,8 +87,9 @@ type List = Container.Container Instance -- -- Some parameters are not initialized by function, and must be set -- later (via 'setIdx' for example). -create :: String -> Int -> Int -> Int -> String -> T.Ndx -> T.Ndx -> Instance -create name_init mem_init dsk_init vcpus_init run_init pn sn = +create :: String -> Int -> Int -> Int -> String + -> [String] -> T.Ndx -> T.Ndx -> Instance +create name_init mem_init dsk_init vcpus_init run_init tags_init pn sn = Instance { name = name_init , mem = mem_init , dsk = dsk_init @@ -98,6 +100,7 @@ create name_init mem_init dsk_init vcpus_init run_init pn sn = , sNode = sn , idx = -1 , util = T.baseUtil + , tags = tags_init } -- | Changes the index. diff --git a/Ganeti/HTools/Loader.hs b/Ganeti/HTools/Loader.hs index 8a351502b5c0e95ee636eea9cae3dbf69ff3c5fd..49e752f401b06dd3f3965a240d0ac7856df88574 100644 --- a/Ganeti/HTools/Loader.hs +++ b/Ganeti/HTools/Loader.hs @@ -48,6 +48,12 @@ import qualified Ganeti.HTools.Node as Node import Ganeti.HTools.Types +-- * Constants + +-- | The exclusion tag prefix +exTagsPrefix :: String +exTagsPrefix = "htools:iextags:" + -- * Types {-| The request type. @@ -63,7 +69,7 @@ data RqType deriving (Show) -- | A complete request, as received from Ganeti. -data Request = Request RqType Node.List Instance.List String +data Request = Request RqType Node.List Instance.List [String] String deriving (Show) -- * Functions @@ -114,6 +120,14 @@ fixNodes accu inst = in (sdx, snew):ac3 else ac2 +-- | Remove non-selected tags from the exclusion list +filterExTags :: [String] -> Instance.Instance -> Instance.Instance +filterExTags tl inst = + let old_tags = Instance.tags inst + new_tags = filter (\tag -> any (\extag -> isPrefixOf extag tag) tl) + old_tags + in inst { Instance.tags = new_tags } + -- | Compute the longest common suffix of a list of strings that -- | starts with a dot. longestDomain :: [String] -> String @@ -128,14 +142,20 @@ longestDomain (x:xs) = stripSuffix :: Int -> String -> String stripSuffix sflen name = take (length name - sflen) name +-- | Extracts the exclusion tags from the cluster configuration +extractExTags :: [String] -> [String] +extractExTags = + map (drop (length exTagsPrefix)) . + filter (isPrefixOf exTagsPrefix) + -- | Initializer function that loads the data from a node and instance -- list and massages it into the correct format. mergeData :: [(String, DynUtil)] -- ^ Instance utilisation data - -> (Node.AssocList, - Instance.AssocList) -- ^ Data from either Text.loadData - -- or Rapi.loadData - -> Result (Node.List, Instance.List, String) -mergeData um (nl, il) = + -> [String] -- ^ Exclusion tags + -> (Node.AssocList, Instance.AssocList, [String]) + -- ^ Data from backends + -> Result (Node.List, Instance.List, [String], String) +mergeData um extags (nl, il, tags) = let il2 = Container.fromAssocList il il3 = foldl' (\im (name, n_util) -> case Container.findByName im name of @@ -144,16 +164,18 @@ mergeData um (nl, il) = let new_i = inst { Instance.util = n_util } in Container.add (Instance.idx inst) new_i im ) il2 um - nl2 = foldl' fixNodes nl (Container.elems il3) + allextags = extags ++ extractExTags tags + il4 = Container.map (filterExTags allextags) il3 + nl2 = foldl' fixNodes nl (Container.elems il4) nl3 = Container.fromAssocList - (map (\ (k, v) -> (k, Node.buildPeers v il3)) nl2) + (map (\ (k, v) -> (k, Node.buildPeers v il4)) nl2) node_names = map (Node.name . snd) nl inst_names = map (Instance.name . snd) il common_suffix = longestDomain (node_names ++ inst_names) csl = length common_suffix snl = Container.map (\n -> setName n (stripSuffix csl $ nameOf n)) nl3 - sil = Container.map (\i -> setName i (stripSuffix csl $ nameOf i)) il3 - in Ok (snl, sil, common_suffix) + sil = Container.map (\i -> setName i (stripSuffix csl $ nameOf i)) il4 + in Ok (snl, sil, tags, common_suffix) -- | Checks the cluster data for consistency. checkData :: Node.List -> Instance.List diff --git a/Ganeti/HTools/Luxi.hs b/Ganeti/HTools/Luxi.hs index de83d6cb58a132b348826fc9fa778a8a9f526e1f..a547405979ca411db4e93585de5f563e0cdfa0db 100644 --- a/Ganeti/HTools/Luxi.hs +++ b/Ganeti/HTools/Luxi.hs @@ -38,7 +38,7 @@ import Ganeti.HTools.Loader import Ganeti.HTools.Types import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Instance as Instance -import Ganeti.HTools.Utils (fromJVal, annotateResult) +import Ganeti.HTools.Utils (fromJVal, annotateResult, tryFromObj, asJSObject) -- * Utility functions @@ -70,11 +70,15 @@ queryInstancesMsg = let nnames = JSArray [] fnames = ["name", "disk_usage", "be/memory", "be/vcpus", - "status", "pnode", "snodes"] + "status", "pnode", "snodes", "tags"] fields = JSArray $ map (JSString . toJSString) fnames use_locking = JSBool False in JSArray [nnames, fields, use_locking] +-- | The input data for cluster query +queryClusterInfoMsg :: JSValue +queryClusterInfoMsg = JSArray [] + -- | Wraper over callMethod doing node query. queryNodes :: L.Client -> IO (Result JSValue) queryNodes = L.callMethod L.QueryNodes queryNodesMsg @@ -83,6 +87,9 @@ queryNodes = L.callMethod L.QueryNodes queryNodesMsg queryInstances :: L.Client -> IO (Result JSValue) queryInstances = L.callMethod L.QueryInstances queryInstancesMsg +queryClusterInfo :: L.Client -> IO (Result JSValue) +queryClusterInfo = L.callMethod L.QueryClusterInfo queryClusterInfoMsg + -- | Parse a instance list in JSON format. getInstances :: NameAssoc -> JSValue @@ -93,7 +100,8 @@ getInstances ktn arr = toArray arr >>= mapM (parseInstance ktn) parseInstance :: [(String, Ndx)] -> JSValue -> Result (String, Instance.Instance) -parseInstance ktn (JSArray (name:disk:mem:vcpus:status:pnode:snodes:[])) = do +parseInstance ktn (JSArray [ name, disk, mem, vcpus + , status, pnode, snodes, tags ]) = do xname <- annotateResult "Parsing new instance" (fromJVal name) let convert v = annotateResult ("Instance '" ++ xname ++ "'") (fromJVal v) xdisk <- convert disk @@ -104,7 +112,9 @@ parseInstance ktn (JSArray (name:disk:mem:vcpus:status:pnode:snodes:[])) = do snode <- (if null xsnodes then return Node.noSecondary else lookupNode ktn xname (fromJSString $ head xsnodes)) xrunning <- convert status - let inst = Instance.create xname xmem xdisk xvcpus xrunning xpnode snode + xtags <- convert tags + let inst = Instance.create xname xmem xdisk xvcpus + xrunning xtags xpnode snode return (xname, inst) parseInstance _ v = fail ("Invalid instance query result: " ++ show v) @@ -115,8 +125,8 @@ getNodes arr = toArray arr >>= mapM parseNode -- | Construct a node from a JSON object. parseNode :: JSValue -> Result (String, Node.Node) -parseNode (JSArray - (name:mtotal:mnode:mfree:dtotal:dfree:ctotal:offline:drained:[])) +parseNode (JSArray [ name, mtotal, mnode, mfree, dtotal, dfree + , ctotal, offline, drained ]) = do xname <- annotateResult "Parsing new node" (fromJVal name) let convert v = annotateResult ("Node '" ++ xname ++ "'") (fromJVal v) @@ -137,11 +147,18 @@ parseNode (JSArray parseNode v = fail ("Invalid node query result: " ++ show v) +getClusterTags :: JSValue -> Result [String] +getClusterTags v = do + let errmsg = "Parsing cluster info" + obj <- annotateResult errmsg $ asJSObject v + tags <- tryFromObj errmsg (fromJSObject obj) "tag" + return tags + -- * Main loader functionality -- | Builds the cluster data from an URL. loadData :: String -- ^ Unix socket to use as source - -> IO (Result (Node.AssocList, Instance.AssocList)) + -> IO (Result (Node.AssocList, Instance.AssocList, [String])) loadData master = E.bracket (L.getClient master) @@ -149,10 +166,12 @@ loadData master = (\s -> do nodes <- queryNodes s instances <- queryInstances s + cinfo <- queryClusterInfo s return $ do -- Result monad node_data <- nodes >>= getNodes let (node_names, node_idx) = assignIndices node_data inst_data <- instances >>= getInstances node_names let (_, inst_idx) = assignIndices inst_data - return (node_idx, inst_idx) + ctags <- cinfo >>= getClusterTags + return (node_idx, inst_idx, ctags) ) diff --git a/Ganeti/HTools/Node.hs b/Ganeti/HTools/Node.hs index 296d8b1583f2570138afe12fb878d8557ba378b7..1e7edf3cd0fb39e40583d5716844a9be63bd1076 100644 --- a/Ganeti/HTools/Node.hs +++ b/Ganeti/HTools/Node.hs @@ -48,6 +48,7 @@ module Ganeti.HTools.Node , addSec -- * Stats , availDisk + , conflictingPrimaries -- * Formatting , defaultFields , showHeader @@ -59,6 +60,8 @@ module Ganeti.HTools.Node ) where import Data.List +import qualified Data.Map as Map +import qualified Data.Foldable as Foldable import Text.Printf (printf) import qualified Ganeti.HTools.Container as Container @@ -69,6 +72,9 @@ import qualified Ganeti.HTools.Types as T -- * Type declarations +-- | The tag map type +type TagMap = Map.Map String Int + -- | The node type. data Node = Node { name :: String -- ^ The node name @@ -102,6 +108,7 @@ data Node = Node -- computations , utilPool :: T.DynUtil -- ^ Total utilisation capacity , utilLoad :: T.DynUtil -- ^ Sum of instance utilisation + , pTags :: TagMap -- ^ Map of primary instance tags and their count } deriving (Show) instance T.Element Node where @@ -128,6 +135,39 @@ noLimit = -1 noLimitInt :: Int noLimitInt = -1 +-- * Helper functions + +-- | Add a tag to a tagmap +addTag :: TagMap -> String -> TagMap +addTag t s = Map.insertWith (+) s 1 t + +-- | Add multiple tags +addTags :: TagMap -> [String] -> TagMap +addTags = foldl' addTag + +-- | Adjust or delete a tag from a tagmap +delTag :: TagMap -> String -> TagMap +delTag t s = Map.update (\v -> if v > 1 + then Just (v-1) + else Nothing) + s t + +-- | Remove multiple tags +delTags :: TagMap -> [String] -> TagMap +delTags = foldl' delTag + +-- | Check if we can add a list of tags to a tagmap +rejectAddTags :: TagMap -> [String] -> Bool +rejectAddTags t = any (flip Map.member t) + +-- | Check how many primary instances have conflicting tags. The +-- algorithm to compute this is to sum the count of all tags, then +-- subtract the size of the tag map (since each tag has at least one, +-- non-conflicting instance); this is equivalent to summing the +-- values in the tag map minus one. +conflictingPrimaries :: Node -> Int +conflictingPrimaries (Node { pTags = t }) = Foldable.sum t - Map.size t + -- * Initialization functions -- | Create a new node. @@ -164,6 +204,7 @@ create name_init mem_t_init mem_n_init mem_f_init , hiCpu = noLimitInt , utilPool = T.baseUtil , utilLoad = T.zeroUtil + , pTags = Map.empty } -- | Changes the index. @@ -215,12 +256,13 @@ buildPeers t il = in t {peers=pmap, failN1 = new_failN1, rMem = new_rmem, pRem = new_prem} -- | Assigns an instance to a node as primary and update the used VCPU --- count and utilisation data. +-- count, utilisation data and tags map. setPri :: Node -> Instance.Instance -> Node setPri t inst = t { pList = Instance.idx inst:pList t , uCpu = new_count , pCpu = fromIntegral new_count / tCpu t , utilLoad = utilLoad t `T.addUtil` Instance.util inst + , pTags = addTags (pTags t) (Instance.tags inst) } where new_count = uCpu t + Instance.vcpus inst @@ -256,7 +298,8 @@ removePri t inst = new_load = utilLoad t `T.subUtil` Instance.util inst in t { pList = new_plist, fMem = new_mem, fDsk = new_dsk , failN1 = new_failn1, pMem = new_mp, pDsk = new_dp - , uCpu = new_ucpu, pCpu = new_rcpu, utilLoad = new_load } + , uCpu = new_ucpu, pCpu = new_rcpu, utilLoad = new_load + , pTags = delTags (pTags t) (Instance.tags inst) } -- | Removes a secondary instance. removeSec :: Node -> Instance.Instance -> Node @@ -295,16 +338,21 @@ addPri t inst = new_dp = fromIntegral new_dsk / tDsk t l_cpu = mCpu t new_load = utilLoad t `T.addUtil` Instance.util inst + inst_tags = Instance.tags inst + old_tags = pTags t in if new_mem <= 0 then T.OpFail T.FailMem else if new_dsk <= 0 || mDsk t > new_dp then T.OpFail T.FailDisk else if new_failn1 && not (failN1 t) then T.OpFail T.FailMem else if l_cpu >= 0 && l_cpu < new_pcpu then T.OpFail T.FailCPU + else if rejectAddTags old_tags inst_tags + then T.OpFail T.FailTags else let new_plist = iname:pList t new_mp = fromIntegral new_mem / tMem t r = t { pList = new_plist, fMem = new_mem, fDsk = new_dsk , failN1 = new_failn1, pMem = new_mp, pDsk = new_dp - , uCpu = new_ucpu, pCpu = new_pcpu, utilLoad = new_load } + , uCpu = new_ucpu, pCpu = new_pcpu, utilLoad = new_load + , pTags = addTags old_tags inst_tags } in T.OpGood r -- | Adds a secondary instance. @@ -374,6 +422,8 @@ showField t field = "mload" -> printf "%5.3f" uM "dload" -> printf "%5.3f" uD "nload" -> printf "%5.3f" uN + "ptags" -> intercalate "," . map (\(k, v) -> printf "%s=%d" k v) . + Map.toList $ pTags t _ -> printf "<unknown field>" where T.DynUtil { T.cpuWeight = uC, T.memWeight = uM, @@ -406,6 +456,7 @@ showHeader field = "mload" -> ("lMem", True) "dload" -> ("lDsk", True) "nload" -> ("lNet", True) + "ptags" -> ("PrimaryTags", False) _ -> ("<unknown field>", False) -- | String converter for the node list functionality. diff --git a/Ganeti/HTools/Rapi.hs b/Ganeti/HTools/Rapi.hs index 6050f30965f4424e92b741cc8063b6b3647dccf9..3f60699074b692bd58148ab47f1dd3af054979c4 100644 --- a/Ganeti/HTools/Rapi.hs +++ b/Ganeti/HTools/Rapi.hs @@ -33,7 +33,7 @@ import Network.Curl.Types () import Network.Curl.Code import Data.List import Control.Monad -import Text.JSON (JSObject, JSValue, fromJSObject) +import Text.JSON (JSObject, JSValue, fromJSObject, decodeStrict) import Text.Printf (printf) import Ganeti.HTools.Utils @@ -88,7 +88,8 @@ parseInstance ktn a = do snode <- (if null snodes then return Node.noSecondary else readEitherString (head snodes) >>= lookupNode ktn name) running <- extract "status" a - let inst = Instance.create name mem disk vcpus running pnode snode + tags <- extract "tags" a + let inst = Instance.create name mem disk vcpus running tags pnode snode return (name, inst) -- | Construct a node from a JSON object. @@ -113,14 +114,16 @@ parseNode a = do -- | Builds the cluster data from an URL. loadData :: String -- ^ Cluster or URL to use as source - -> IO (Result (Node.AssocList, Instance.AssocList)) + -> IO (Result (Node.AssocList, Instance.AssocList, [String])) loadData master = do -- IO monad let url = formatHost master node_body <- getUrl $ printf "%s/2/nodes?bulk=1" url inst_body <- getUrl $ printf "%s/2/instances?bulk=1" url + tags_body <- getUrl $ printf "%s/2/tags" url return $ do -- Result monad node_data <- node_body >>= getNodes let (node_names, node_idx) = assignIndices node_data inst_data <- inst_body >>= getInstances node_names let (_, inst_idx) = assignIndices inst_data - return (node_idx, inst_idx) + tags_data <- tags_body >>= (fromJResult . decodeStrict) + return (node_idx, inst_idx, tags_data) diff --git a/Ganeti/HTools/Simu.hs b/Ganeti/HTools/Simu.hs index ee0e66a9d9376802cb2dccec2e0a481d6668e0c4..d8221687135fdc7056c0cbbd19a7ad49ebbf30db 100644 --- a/Ganeti/HTools/Simu.hs +++ b/Ganeti/HTools/Simu.hs @@ -52,7 +52,7 @@ parseDesc desc = -- | Builds the cluster data from node\/instance files. loadData :: String -- ^ Cluster description in text format - -> IO (Result (Node.AssocList, Instance.AssocList)) + -> IO (Result (Node.AssocList, Instance.AssocList, [String])) loadData ndata = -- IO monad, just for consistency with the other loaders return $ do (cnt, disk, mem, cpu) <- parseDesc ndata @@ -63,4 +63,4 @@ loadData ndata = -- IO monad, just for consistency with the other loaders (fromIntegral cpu) False in (idx, Node.setIdx n idx) ) [1..cnt] - return (nodes, []) + return (nodes, [], []) diff --git a/Ganeti/HTools/Text.hs b/Ganeti/HTools/Text.hs index 5fae5aa25b477f5084dee9befa9fa0a7ffc27619..efe8b48f1a5c7d97c9f95d705961f32eebc5415d 100644 --- a/Ganeti/HTools/Text.hs +++ b/Ganeti/HTools/Text.hs @@ -42,7 +42,7 @@ import qualified Ganeti.HTools.Instance as Instance -- | Load a node from a field list. loadNode :: (Monad m) => [String] -> m (String, Node.Node) -loadNode (name:tm:nm:fm:td:fd:tc:fo:[]) = do +loadNode [name, tm, nm, fm, td, fd, tc, fo] = do new_node <- if any (== "?") [tm,nm,fm,td,fd,tc] || fo == "Y" then return $ Node.create name 0 0 0 0 0 0 True @@ -60,7 +60,7 @@ loadNode s = fail $ "Invalid/incomplete node data: '" ++ show s ++ "'" -- | Load an instance from a field list. loadInst :: (Monad m) => [(String, Ndx)] -> [String] -> m (String, Instance.Instance) -loadInst ktn (name:mem:dsk:vcpus:status:pnode:snode:[]) = do +loadInst ktn [name, mem, dsk, vcpus, status, pnode, snode, tags] = do pidx <- lookupNode ktn name pnode sidx <- (if null snode then return Node.noSecondary else lookupNode ktn name snode) @@ -69,7 +69,8 @@ loadInst ktn (name:mem:dsk:vcpus:status:pnode:snode:[]) = do vvcpus <- tryRead name vcpus when (sidx == pidx) $ fail $ "Instance " ++ name ++ " has same primary and secondary node - " ++ pnode - let newinst = Instance.create name vmem vdsk vvcpus status pidx sidx + let vtags = sepSplit ',' tags + newinst = Instance.create name vmem vdsk vvcpus status vtags pidx sidx return (name, newinst) loadInst _ s = fail $ "Invalid/incomplete instance data: '" ++ show s ++ "'" @@ -90,7 +91,7 @@ loadTabular text_data convert_fn = do -- | Builds the cluster data from node\/instance files. loadData :: String -- ^ Node data in string format -> String -- ^ Instance data in string format - -> IO (Result (Node.AssocList, Instance.AssocList)) + -> IO (Result (Node.AssocList, Instance.AssocList, [String])) loadData nfile ifile = do -- IO monad ndata <- readFile nfile idata <- readFile ifile @@ -99,4 +100,4 @@ loadData nfile ifile = do -- IO monad (ktn, nl) <- loadTabular ndata loadNode {- instance file: name mem disk status pnode snode -} (_, il) <- loadTabular idata (loadInst ktn) - return (nl, il) + return (nl, il, []) diff --git a/Ganeti/HTools/Types.hs b/Ganeti/HTools/Types.hs index 1151cec8864d42b8f155b96618d36ee99a42ac04..fb6493e7b7531eccc8a742e36fa4ee2c786b1d40 100644 --- a/Ganeti/HTools/Types.hs +++ b/Ganeti/HTools/Types.hs @@ -147,6 +147,7 @@ data FailMode = FailMem -- ^ Failed due to not enough RAM | FailDisk -- ^ Failed due to not enough disk | FailCPU -- ^ Failed due to not enough CPU capacity | FailN1 -- ^ Failed due to not passing N1 checks + | FailTags -- ^ Failed due to tag exclusion deriving (Eq, Enum, Bounded, Show) -- | List with failure statistics diff --git a/Ganeti/HTools/Utils.hs b/Ganeti/HTools/Utils.hs index 9c1dff5bde83660b9e8b7cef4a535b40b252db6a..66498db7027065bddf321d157ce6d382cf828161 100644 --- a/Ganeti/HTools/Utils.hs +++ b/Ganeti/HTools/Utils.hs @@ -78,23 +78,18 @@ fst3 (a, _, _) = a -- * Mathematical functions --- Simple and slow statistical functions, please replace with better versions - --- | Mean value of a list. -meanValue :: Floating a => [a] -> a -meanValue lst = sum lst / fromIntegral (length lst) - --- | Standard deviation. -stdDev :: Floating a => [a] -> a -stdDev lst = - let mv = meanValue lst +-- Simple and slow statistical functions, please replace with better +-- versions + +-- | The covariance of the list +varianceCoeff :: [Double] -> Double +varianceCoeff lst = + let ll = fromIntegral (length lst)::Double -- length of list + mv = sum lst / ll -- mean value av = foldl' (\accu em -> let d = em - mv in accu + d * d) 0.0 lst - bv = sqrt (av / fromIntegral (length lst)) - in bv - --- | Coefficient of variation. -varianceCoeff :: Floating a => [a] -> a -varianceCoeff lst = stdDev lst / fromIntegral (length lst) + bv = sqrt (av / ll) -- stddev + cv = bv / ll -- covariance + in cv -- * JSON-related functions diff --git a/Ganeti/Luxi.hs b/Ganeti/Luxi.hs index 707341281bf8a4991655d4571f476ca3de8fa20d..0468a4df1b501762610845dbdddd37a214e70719 100644 --- a/Ganeti/Luxi.hs +++ b/Ganeti/Luxi.hs @@ -63,6 +63,7 @@ withTimeout secs descr action = do data LuxiOp = QueryInstances | QueryNodes | QueryJobs + | QueryClusterInfo | SubmitManyJobs -- | The serialisation of LuxiOps into strings in messages. @@ -70,6 +71,7 @@ strOfOp :: LuxiOp -> String strOfOp QueryNodes = "QueryNodes" strOfOp QueryInstances = "QueryInstances" strOfOp QueryJobs = "QueryJobs" +strOfOp QueryClusterInfo = "QueryClusterInfo" strOfOp SubmitManyJobs = "SubmitManyJobs" -- | The end-of-message separator. diff --git a/README b/README index d30e820b4b263da946068d6bf0b6e7321b88341e..d1cdfc59f43a30e7c455344be6a075b3410a7e02 100644 --- a/README +++ b/README @@ -5,15 +5,15 @@ These are some simple cluster tools for fixing common allocation problems on Ganeti 2.0 clusters. Note that these tools are most useful for bigger cluster sizes -(e.g. more than five or ten machines); at lower sizes, the -computations they do can also be done manually. +(e.g. more than five or ten machines); at lower sizes, the computations +they do can also be done manually. -Most of the tools revolve around the concept of keeping the cluster -N+1 compliant: this means that in case of failure of any node, the -instances affected can be failed over (via ``gnt-node failover`` or -``gnt-instance failover``) to their secondary node, and there is -enough memory reserved for this operation without needing to shutdown -other instances or rebalance the cluster. +Most of the tools revolve around the concept of keeping the cluster N+1 +compliant: this means that in case of failure of any node, the instances +affected can be failed over (via ``gnt-node failover`` or ``gnt-instance +failover``) to their secondary node, and there is enough memory reserved +for this operation without needing to shutdown other instances or +rebalance the cluster. **Quick start** (see the installation section for more details): @@ -41,11 +41,11 @@ IAllocator plugin ~~~~~~~~~~~~~~~~~ The ``hail`` iallocator plugin can be used for allocations of mirrored -and non-mirrored instances and for relocations of mirrored -instances. It needs to be installed in Ganeti's iallocator search -path—usually ``/usr/lib/ganeti/iallocators`` or -``/usr/local/lib/ganeti/iallocators``, and after that it can be used -via ganeti's ``--iallocator`` option (in various gnt-node/gnt-instance +and non-mirrored instances and for relocations of mirrored instances. It +needs to be installed in Ganeti's iallocator search path—usually +``/usr/lib/ganeti/iallocators`` or +``/usr/local/lib/ganeti/iallocators``, and after that it can be used via +ganeti's ``--iallocator`` option (in various gnt-node/gnt-instance commands). See the man page hail(1) for more details. Cluster capacity estimator @@ -61,19 +61,18 @@ Integration with Ganeti ----------------------- The ``hbal`` and ``hspace`` programs can either get their input from -text files, locally from the master daemon (when run on the master -node of a cluster), or remote from a cluster via RAPI. The "-L" -argument enables local collection (with an optional path to the unix -socket). For online collection via RAPI, the "-m" argument should -specify the cluster or master node name. Only ``hbal`` and ``hspace`` -use these arguments, ``hail`` uses the standard iallocator API and -thus doesn't need any special setup (just needs to be installed in the -right directory). +text files, locally from the master daemon (when run on the master node +of a cluster), or remote from a cluster via RAPI. The "-L" argument +enables local collection (with an optional path to the unix socket). For +online collection via RAPI, the "-m" argument should specify the cluster +or master node name. Only ``hbal`` and ``hspace`` use these arguments, +``hail`` uses the standard iallocator API and thus doesn't need any +special setup (just needs to be installed in the right directory). For generating the text files, a separate tool (``hscan``) is provided -to automate their gathering if RAPI is available, which is better -since it can extract more precise information. In case RAPI is not -usable for whatever reason, the following two commands should be run:: +to automate their gathering if RAPI is available, which is better since +it can extract more precise information. In case RAPI is not usable for +whatever reason, the following two commands should be run:: gnt-node list -oname,mtotal,mnode,mfree,dtotal,dfree,ctotal,offline \ --separator '|' --no-headers > nodes @@ -97,11 +96,11 @@ installed manually: - curl (http://hackage.haskell.org/package/curl) - network (http://hackage.haskell.org/package/network) -Once these are installed, just typing *make* in the top-level -directory should be enough. +Once these are installed, just typing *make* in the top-level directory +should be enough. -Only the ``hail`` program needs to be installed in a specific place, -the other tools are not location-dependent. +Only the ``hail`` program needs to be installed in a specific place, the +other tools are not location-dependent. For running the (admittedly small) unittest suite (via *make check*), the QuickCheck version 1 library is needed. @@ -109,6 +108,8 @@ the QuickCheck version 1 library is needed. Internal (implementation) documentation is available in the ``apidoc`` directory. +.. vim: set textwidth=72 : .. Local Variables: .. mode: rst +.. fill-column: 72 .. End: diff --git a/hail.1 b/hail.1 index 48c6d02045e8eb4b4082cc77f4829afa8f7599f7..c19fec2e233cf3cf7765e15967e574bb5b43de02 100644 --- a/hail.1 +++ b/hail.1 @@ -14,8 +14,8 @@ hail is a Ganeti IAllocator plugin that allows automatic instance placement and automatic instance secondary node replacement using the same algorithm as \fBhbal\fR(1). -The program takes input via a JSON-file containing current cluster -state and the request details, and output (on stdout) a JSON-formatted +The program takes input via a JSON\(hyfile containing current cluster +state and the request details, and output (on stdout) a JSON\(hyformatted response. In case of critical failures, the error message is printed on stderr and the exit code is changed to show failure. @@ -27,16 +27,32 @@ For relocations, we try to change the secondary node of the instance to all the valid other nodes; the node which results in the best cluster score is chosen. -For single-node allocations (non-mirrored instances), again we select -the node which, when chosen as the primary node, gives the best score. +For single\(hynode allocations (non\(hymirrored instances), again we +select the node which, when chosen as the primary node, gives the best +score. -For dual-node allocations (mirrored instances), we chose the best -pair; this is the only choice where the algoritm is non-trivial +For dual\(hynode allocations (mirrored instances), we chose the best +pair; this is the only choice where the algoritm is non\(hytrivial with regard to cluster size. For all choices, the cluster scoring is identical to the hbal algorithm. +.SH CONFIGURATION + +For the tag-exclusion configuration (see the manpage of hbal for more +details), the list of which instance tags to consider as exclusion +tags will be read from the cluster tags, configured as follows: + +- get all cluster tags starting with \fBhtools:iextags:\fR + +- use their suffix as the prefix for exclusion tags + +For example, given a cluster tag like \fBhtools:iextags:service\fR, +all instance tags of the form \fBservice:X\fR will be confidered as +exclusion tags, meaning that (e.g.) two instances which both have a +tag \fBservice:foo\fR will not be placed on the same primary node. + .SH EXIT STATUS The exist status of the command will be zero, unless for some reason diff --git a/hail.hs b/hail.hs index 74c2ac2799cf667f0f86b5cca6a4f3da987689a4..dde30a0d9f58a4966c2abd08fec8aaf6adf9b776 100644 --- a/hail.hs +++ b/hail.hs @@ -27,6 +27,7 @@ module Main (main) where import Data.List import Data.Function +import Data.Maybe (isJust, fromJust) import Monad import System import System.IO @@ -44,7 +45,7 @@ import Ganeti.HTools.Loader (RqType(..), Request(..)) -- | Options list and functions options :: [OptType] -options = [oShowVer, oShowHelp] +options = [oPrintNodes, oShowVer, oShowHelp] processResults :: (Monad m) => Cluster.AllocSolution -> m (String, [Node.Node]) processResults (fstats, successes, sols) = @@ -62,7 +63,7 @@ processResults (fstats, successes, sols) = processRequest :: Request -> Result Cluster.AllocSolution processRequest request = - let Request rqtype nl il _ = request + let Request rqtype nl il _ _ = request in case rqtype of Allocate xi reqn -> Cluster.tryAlloc nl il xi reqn Relocate idx reqn exnodes -> Cluster.tryReloc nl il idx reqn exnodes @@ -71,13 +72,14 @@ processRequest request = main :: IO () main = do cmd_args <- System.getArgs - (_, args) <- parseOpts cmd_args "hail" options + (opts, args) <- parseOpts cmd_args "hail" options when (null args) $ do hPutStrLn stderr "Error: this program needs an input file." exitWith $ ExitFailure 1 let input_file = head args + shownodes = optShowNodes opts input_data <- readFile input_file request <- case (parseData input_data) of @@ -86,8 +88,13 @@ main = do exitWith $ ExitFailure 1 Ok rq -> return rq - let Request _ _ _ csf = request - sols = processRequest request >>= processResults + let Request _ nl _ _ csf = request + + when (isJust shownodes) $ do + hPutStrLn stderr "Initial cluster status:" + hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes) + + let sols = processRequest request >>= processResults let (ok, info, rn) = case sols of Ok (ginfo, sn) -> (True, "Request successful: " ++ ginfo, diff --git a/hbal.1 b/hbal.1 index 52622fcc02d5a258acf71138d0ed89eebd2d89f7..432f081394c5bf86722107cf7edd3a8f3681e022 100644 --- a/hbal.1 +++ b/hbal.1 @@ -45,11 +45,11 @@ cluster (nodes with their total and free disk, memory, etc.) and instance placement and computes a series of steps designed to bring the cluster into a better state. -The algorithm to do so is designed to be stable (i.e. it will give you -the same results when restarting it from the middle of the solution) -and reasonably fast. It is not, however, designed to be a perfect -algorithm - it is possible to make it go into a corner from which it -can find no improvement, because it only look one "step" ahead. +The algorithm used is designed to be stable (i.e. it will give you the +same results when restarting it from the middle of the solution) and +reasonably fast. It is not, however, designed to be a perfect +algorithm \(em it is possible to make it go into a corner from which +it can find no improvement, because it looks only one "step" ahead. By default, the program will show the solution incrementally as it is computed, in a somewhat cryptic format; for getting the actual Ganeti @@ -88,6 +88,30 @@ exhaustive search over both candidate primary and secondary nodes, and is O(n*n) in the number of nodes. Furthermore, it doesn't seems to give better scores but will result in more disk replacements. +.SS PLACEMENT RESTRICTIONS + +At each step, we prevent an instance move if it would cause: + +.RS 4 +.TP 3 +\(em +a node to go into N+1 failure state +.TP +\(em +an instance to move onto an offline node (offline nodes are either +read from the cluster or declared with \fI-O\fR) +.TP +\(em +an exclusion-tag based conflict (exclusion tags are read from the +cluster and/or defined via the \fI--exclusion-tags\fR option) +.TP +\(em +a max vcpu/pcpu ratio to be exceeded (configured via \fI--max-cpu\fR) +.TP +\(em +min disk free percentage to go below the configured limit (configured +via \fI--min-disk\fR) + .SS CLUSTER SCORING As said before, the algorithm tries to minimise the cluster score at @@ -176,21 +200,32 @@ wrong calculations. For this reason, the algorithm subtracts the memory size of down instances from the free node memory of their primary node, in effect simulating the startup of such instances. -.SS OTHER POSSIBLE METRICS +.SS EXCLUSION TAGS + +The exclusion tags mecanism is designed to prevent instances which run +the same workload (e.g. two DNS servers) to land on the same node, +which would make the respective node a SPOF for the given service. + +It works by tagging instances with certain tags and then building +exclusion maps based on these. Which tags are actually used is +configured either via the command line (option \fI--exclusion-tags\fR) +or via adding them to the cluster tags: -It would be desirable to add more metrics to the algorithm, especially -dynamically-computed metrics, such as: -.RS 4 -.TP 3 -\(em -CPU usage of instances .TP -\(em -Disk IO usage +.B --exclusion-tags=a,b +This will make all instance tags of the form \fIa:*\fR, \fIb:*\fR be +considered for the exclusion map + .TP -\(em -Network IO -.RE +cluster tags \fBhtools:iextags:a\fR, \fBhtools:iextags:b\fR +This will make instance tags \fIa:*\fR, \fIb:*\fR be considered for +the exclusion map. More precisely, the suffix of cluster tags starting +with \fBhtools:iextags:\fR will become the prefix of the exclusion +tags. + +.P +Both the above forms mean that two instances both having (e.g.) the +tag \fIa:foo\fR or \fIb:bar\fR won't end on the same node. .SH OPTIONS The options that can be passed to the program are as follows: @@ -203,7 +238,7 @@ Note that the moves list will be split into independent steps, called "jobsets", but only for visual inspection, not for actually parallelisation. It is not possible to parallelise these directly when executed via "gnt-instance" commands, since a compound command -(e.g. failover and replace-disks) must be executed serially. Parallel +(e.g. failover and replace\-disks) must be executed serially. Parallel execution is only possible when using the Luxi backend and the \fI-L\fR option. @@ -218,13 +253,13 @@ Prints the before and after node status, in a format designed to allow the user to understand the node's most important parameters. It is possible to customise the listed information by passing a -comma-separated list of field names to this option (the field list is +comma\(hyseparated list of field names to this option (the field list is currently undocumented). By default, the node list will contain these informations: .RS .TP .B F -a character denoting the status of the node, with '-' meaning an +a character denoting the status of the node, with '\-' meaning an offline node, '*' meaning N+1 failure and blank meaning a good node .TP .B Name @@ -298,7 +333,7 @@ node status, but it can help in understanding instance moves. .TP .B -o, --oneline -Only shows a one-line output from the program, designed for the case +Only shows a one\(hyline output from the program, designed for the case when one wants to look at multiple clusters at once and check their status. @@ -337,7 +372,7 @@ these nodes will not be included in the score calculation (except for the percentage of instances on offline nodes) .RE Note that hbal will also mark as offline any nodes which are reported -by RAPI as such, or that have "?" in file-based input in any numeric +by RAPI as such, or that have "?" in file\(hybased input in any numeric fields. .RE @@ -362,8 +397,8 @@ empirically). .TP .BI "--no-disk-moves" -This parameter prevents hbal from using disk move (i.e. "gnt-instance -replace-disks") operations. This will result in a much quicker +This parameter prevents hbal from using disk move (i.e. "gnt\-instance +replace\-disks") operations. This will result in a much quicker balancing, but of course the improvements are limited. It is up to the user to decide when to use one or another. @@ -404,9 +439,9 @@ how to customize the default value via the environment). Collect data not from files but directly from the .I cluster given as an argument via RAPI. If the argument doesn't contain a colon -(:), then it is converted into a fully-built URL via prepending +(:), then it is converted into a fully\(hybuilt URL via prepending https:// and appending the default RAPI port, otherwise it's -considered a fully-specified URL and is used as-is. +considered a fully\(hyspecified URL and is used as\(hyis. .TP .BI "-L[" path "]" @@ -414,7 +449,7 @@ Collect data not from files but directly from the master daemon, which is to be contacted via the luxi (an internal Ganeti protocol). An optional \fIpath\fR argument is interpreted as the path to the unix socket on which the master daemon listens; otherwise, the default path -used by ganeti when installed with "--localstatedir=/var" is used. +used by ganeti when installed with \fI--localstatedir=/var\fR is used. .TP .B "-X" @@ -434,12 +469,12 @@ automate the execution of the balancing. .TP .BI "--max-cpu " cpu-ratio -The maximum virtual-to-physical cpu ratio, as a floating point number -between zero and one. For example, specifying \fIcpu-ratio\fR as -\fB2.5\fR means that, for a 4-cpu machine, a maximum of 10 virtual -cpus should be allowed to be in use for primary instances. A value of -one doesn't make sense though, as that means no disk space can be used -on it. +The maximum virtual\(hyto\(hyphysical cpu ratio, as a floating point +number between zero and one. For example, specifying \fIcpu-ratio\fR +as \fB2.5\fR means that, for a 4\(hycpu machine, a maximum of 10 +virtual cpus should be allowed to be in use for primary instances. A +value of one doesn't make sense though, as that means no disk space +can be used on it. .TP .BI "--min-disk " disk-ratio @@ -483,7 +518,7 @@ with cryptic errors messages in this case. The algorithm is not perfect. The output format is not easily scriptable, and the program should -feed moves directly into Ganeti (either via RAPI or via a gnt-debug +feed moves directly into Ganeti (either via RAPI or via a gnt\-debug input file). .SH EXAMPLE @@ -586,46 +621,46 @@ Cluster score improved from 0.52329131 to 0.00252594 Commands to run to reach the above solution: echo step 1 - echo gnt-instance migrate instance14 - echo gnt-instance replace-disks -n node16 instance14 - echo gnt-instance migrate instance14 + echo gnt\-instance migrate instance14 + echo gnt\-instance replace\-disks \-n node16 instance14 + echo gnt\-instance migrate instance14 echo step 2 - echo gnt-instance migrate instance54 - echo gnt-instance replace-disks -n node16 instance54 - echo gnt-instance migrate instance54 + echo gnt\-instance migrate instance54 + echo gnt\-instance replace\-disks \-n node16 instance54 + echo gnt\-instance migrate instance54 echo step 3 - echo gnt-instance migrate instance4 - echo gnt-instance replace-disks -n node16 instance4 + echo gnt\-instance migrate instance4 + echo gnt\-instance replace\-disks \-n node16 instance4 echo step 4 - echo gnt-instance replace-disks -n node2 instance48 - echo gnt-instance migrate instance48 + echo gnt\-instance replace\-disks \-n node2 instance48 + echo gnt\-instance migrate instance48 echo step 5 - echo gnt-instance replace-disks -n node16 instance93 - echo gnt-instance migrate instance93 + echo gnt\-instance replace\-disks \-n node16 instance93 + echo gnt\-instance migrate instance93 echo step 6 - echo gnt-instance replace-disks -n node2 instance89 - echo gnt-instance migrate instance89 + echo gnt\-instance replace\-disks \-n node2 instance89 + echo gnt\-instance migrate instance89 echo step 7 - echo gnt-instance replace-disks -n node16 instance5 - echo gnt-instance migrate instance5 + echo gnt\-instance replace\-disks \-n node16 instance5 + echo gnt\-instance migrate instance5 echo step 8 - echo gnt-instance migrate instance94 - echo gnt-instance replace-disks -n node16 instance94 + echo gnt\-instance migrate instance94 + echo gnt\-instance replace\-disks \-n node16 instance94 echo step 9 - echo gnt-instance migrate instance44 - echo gnt-instance replace-disks -n node15 instance44 + echo gnt\-instance migrate instance44 + echo gnt\-instance replace\-disks \-n node15 instance44 echo step 10 - echo gnt-instance replace-disks -n node16 instance62 + echo gnt\-instance replace\-disks \-n node16 instance62 echo step 11 - echo gnt-instance replace-disks -n node16 instance13 + echo gnt\-instance replace\-disks \-n node16 instance13 echo step 12 - echo gnt-instance replace-disks -n node7 instance19 + echo gnt\-instance replace\-disks \-n node7 instance19 echo step 13 - echo gnt-instance replace-disks -n node1 instance43 + echo gnt\-instance replace\-disks \-n node1 instance43 echo step 14 - echo gnt-instance replace-disks -n node4 instance1 + echo gnt\-instance replace\-disks \-n node4 instance1 echo step 15 - echo gnt-instance replace-disks -n node17 instance58 + echo gnt\-instance replace\-disks \-n node17 instance58 Final cluster status: N1 Name t_mem f_mem r_mem t_dsk f_dsk pri sec p_fmem p_fdsk @@ -659,7 +694,7 @@ the command list to reach the final solution. In the initial listing, we see which nodes are not N+1 compliant. The algorithm is stable as long as each step above is fully completed, -e.g. in step 8, both the migrate and the replace-disks are +e.g. in step 8, both the migrate and the replace\-disks are done. Otherwise, if only the migrate is done, the input data is changed in a way that the program will output a different solution list (but hopefully will end in the same state). diff --git a/hbal.hs b/hbal.hs index 1ad686e89b24019bc59f070f75ac04ece5b56632..1c3f9fe49459f8b13ed30a091f519c14e1e08a94 100644 --- a/hbal.hs +++ b/hbal.hs @@ -73,6 +73,7 @@ options = , oMinDisk , oDiskMoves , oDynuFile + , oExTags , oShowVer , oShowHelp ] @@ -186,7 +187,7 @@ main = do verbose = optVerbose opts shownodes = optShowNodes opts - (fixed_nl, il, csf) <- loadExternalData opts + (fixed_nl, il, ctags, csf) <- loadExternalData opts let offline_names = optOffline opts all_nodes = Container.elems fixed_nl @@ -209,6 +210,9 @@ main = do nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) nm + when (not oneline && verbose > 1) $ + putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags + when (Container.size il == 0) $ do (if oneline then putStrLn $ formatOneline 0 0 0 else printf "Cluster is empty, exiting.\n") diff --git a/hscan.1 b/hscan.1 index 72cca91f5f14481618df3de7448152d0533de341..82a2dc446ce3c44f5d15c9286b3138cd4fa756d0 100644 --- a/hscan.1 +++ b/hscan.1 @@ -16,7 +16,7 @@ hscan \- Scan clusters via RAPI and save node/instance data hscan is a tool for scanning clusters via RAPI and saving their data in the input format used by .BR hbal "(1) and " hspace "(1)." -It will also show a one-line score for each cluster scanned or, if +It will also show a one\(hyline score for each cluster scanned or, if desired, the cluster state as show by the \fB-p\fR option to the other tools. @@ -27,7 +27,7 @@ via the \fB-i\fR and \fB-n\fR options. In case the cluster name contains slashes (as it can happen when the cluster is a fully-specified URL), these will be replaced with underscores. -The one-line output for each cluster will show the following: +The one\(hyline output for each cluster will show the following: .RS .TP .B Name @@ -43,7 +43,7 @@ The number of instances in the cluster The number of nodes failing N+1 .TP .B BInst -The number of instances living on N+1-failed nodes +The number of instances living on N+1\(hyfailed nodes .TP .B t_mem Total memory in the cluster @@ -75,7 +75,7 @@ The options that can be passed to the program are as follows: .TP .B -p, --print-nodes -Prints the node status for each cluster after the cluster's one-line +Prints the node status for each cluster after the cluster's one\(hyline status display, in a format designed to allow the user to understand the node's most important parameters. For details, see the man page for \fBhbal\fR(1). @@ -100,7 +100,7 @@ data). The program does not check its input data for consistency, and aborts with cryptic errors messages in this case. -The RAPI collection doesn't deal with non-\fBdrbd\fR instances, and +The RAPI collection doesn't deal with non\(hy\fBdrbd\fR instances, and chokes on input data which has such instances. .SH EXAMPLE @@ -111,8 +111,8 @@ chokes on input data which has such instances. Name Nodes Inst BNode BInst t_mem f_mem t_disk f_disk Score cluster1 2 2 0 0 1008 652 255 253 0.24404762 .RB "$ " "ls -l cluster1.*" --rw-r--r-- 1 root root 163 2009-03-23 07:26 cluster1.instances --rw-r--r-- 1 root root 90 2009-03-23 07:26 cluster1.nodes +\-rw\-r\-\-r\-\- 1 root root 163 2009\-03\-23 07:26 cluster1.instances +\-rw\-r\-\-r\-\- 1 root root 90 2009\-03\-23 07:26 cluster1.nodes .fi .in diff --git a/hscan.hs b/hscan.hs index 66b6e4fd3561a1e855a6b4750ad27a7e7841f321..0469eaeed706b7653fb87f7d0b83e7ca98ec4756 100644 --- a/hscan.hs +++ b/hscan.hs @@ -81,10 +81,10 @@ serializeInstance csf nl inst = then "" else Container.nameOf nl sidx ++ csf) in - printf "%s|%d|%d|%d|%s|%s|%s" + printf "%s|%d|%d|%d|%s|%s|%s|%s" iname (Instance.mem inst) (Instance.dsk inst) (Instance.vcpus inst) (Instance.runSt inst) - pnode snode + pnode snode (intercalate "," (Instance.tags inst)) -- | Generate instance file data from instance objects serializeInstances :: String -> Node.List -> Instance.List -> String @@ -137,12 +137,12 @@ main = do printf "%-*s " nlen name hFlush stdout input_data <- Rapi.loadData name - let ldresult = input_data >>= Loader.mergeData [] + let ldresult = input_data >>= Loader.mergeData [] [] (case ldresult of Bad err -> printf "\nError: failed to load data. \ \Details:\n%s\n" err Ok x -> do - let (nl, il, csf) = x + let (nl, il, _, csf) = x (_, fix_nl) = Loader.checkData nl il putStrLn $ printCluster fix_nl il when (isJust shownodes) $ diff --git a/hspace.1 b/hspace.1 index 075f01dc4c8f609f4c48209388a52ee5b62ad7e9..ae3ac6e9e20fa9e5ff6ad416176bbcb625cdb503 100644 --- a/hspace.1 +++ b/hspace.1 @@ -50,7 +50,7 @@ iallocator plugin. The output of the program is designed to interpreted as a shell fragment (or parsed as a \fIkey=value\fR file). Options which extend -the output (e.g. -p, -v) will output the additional information on +the output (e.g. \-p, \-v) will output the additional information on stderr (such that the stdout is still parseable). The following keys are available in the output of the script (all @@ -97,7 +97,7 @@ RAM). .TP .I INI_MEM_OVERHEAD, FIN_MEM_OVERHEAD -The initial and final memory overhead - memory used for the node +The initial and final memory overhead \(em memory used for the node itself and unacounted memory (e.g. due to hypervisor overhead). .TP @@ -124,7 +124,7 @@ virtual instance CPUs divided by the total physical CPU count. .TP .I INI_MNODE_MEM_AVAIL, FIN_MNODE_MEM_AVAIL -The initial and final maximum per-node available memory. This is not +The initial and final maximum per\(hynode available memory. This is not very useful as a metric but can give an impression of the status of the nodes; as an example, this value restricts the maximum instance size that can be still created on the cluster. @@ -137,7 +137,7 @@ Like the above but for disk. .I TSPEC If the tiered allocation mode has been enabled, this parameter holds the pairs of specifications and counts of instances that can be -created in this mode. The value of the key is a space-separated list +created in this mode. The value of the key is a space\(hyseparated list of values; each value is of the form \fImemory,disk,vcpu=count\fR where the memory, disk and vcpu are the values for the current spec, and count is how many instances of this spec can be created. A @@ -203,12 +203,12 @@ The number of VCPUs of the instances to be placed (defaults to 1). .TP .BI "--max-cpu " cpu-ratio -The maximum virtual-to-physical cpu ratio, as a floating point number -between zero and one. For example, specifying \fIcpu-ratio\fR as -\fB2.5\fR means that, for a 4-cpu machine, a maximum of 10 virtual -cpus should be allowed to be in use for primary instances. A value of -one doesn't make sense though, as that means no disk space can be used -on it. +The maximum virtual\(hyto\(hyphysical cpu ratio, as a floating point +number between zero and one. For example, specifying \fIcpu-ratio\fR +as \fB2.5\fR means that, for a 4\(hycpu machine, a maximum of 10 +virtual cpus should be allowed to be in use for primary instances. A +value of one doesn't make sense though, as that means no disk space +can be used on it. .TP .BI "--min-disk " disk-ratio @@ -222,13 +222,13 @@ Prints the before and after node status, in a format designed to allow the user to understand the node's most important parameters. It is possible to customise the listed information by passing a -comma-separated list of field names to this option (the field list is +comma\(hyseparated list of field names to this option (the field list is currently undocumented). By default, the node list will contain these informations: .RS .TP .B F -a character denoting the status of the node, with '-' meaning an +a character denoting the status of the node, with '\-' meaning an offline node, '*' meaning N+1 failure and blank meaning a good node .TP .B Name @@ -301,7 +301,7 @@ This option (which can be given multiple times) will mark nodes as being \fIoffline\fR, and instances won't be placed on these nodes. Note that hspace will also mark as offline any nodes which are -reported by RAPI as such, or that have "?" in file-based input in any +reported by RAPI as such, or that have "?" in file\(hybased input in any numeric fields. .RE @@ -322,9 +322,9 @@ how to customize the default value via the environment). Collect data not from files but directly from the .I cluster given as an argument via RAPI. If the argument doesn't contain a colon -(:), then it is converted into a fully-built URL via prepending +(:), then it is converted into a fully\(hybuilt URL via prepending https:// and appending the default RAPI port, otherwise it's -considered a fully-specified URL and is used as-is. +considered a fully\(hyspecified URL and is used as\(hyis. .TP .BI "-L[" path "]" @@ -332,13 +332,13 @@ Collect data not from files but directly from the master daemon, which is to be contacted via the luxi (an internal Ganeti protocol). An optional \fIpath\fR argument is interpreted as the path to the unix socket on which the master daemon listens; otherwise, the default path -used by ganeti when installed with "--localstatedir=/var" is used. +used by ganeti when installed with \fI--localstatedir=/var\fR is used. .TP .BI "--simulate " description Instead of using actual data, build an empty cluster given a node -description. The \fIdescription\fR parameter must be a comma-separated -list of four elements, describing in order: +description. The \fIdescription\fR parameter must be a +comma\(hyseparated list of four elements, describing in order: .RS @@ -358,7 +358,7 @@ the cpu core count for the nodes .RE An example description would be \fB20,102400,16384,4\fR describing a -20-node cluster where each node has 100GiB of disk space, 16GiB of +20\(hynode cluster where each node has 100GiB of disk space, 16GiB of memory and 4 CPU cores. Note that all nodes must have the same specs currently. @@ -366,12 +366,12 @@ currently. .TP .BI "--tiered-alloc " spec -Beside the standard, fixed-size allocation, also do a tiered +Beside the standard, fixed\(hysize allocation, also do a tiered allocation scheme where the algorithm starts from the given specification and allocates until there is no more space; then it decreases the specification and tries the allocation again. The decrease is done on the matric that last failed during allocation. The -specification given is similar to the "--simulate" option and it +specification given is similar to the \fI--simulate\fR option and it holds: .RS diff --git a/hspace.hs b/hspace.hs index 6f0c771dbe20fc2607b92c7e671e8097c79d08a2..20818a51d891e584b8510d0c46c525322ae05bd8 100644 --- a/hspace.hs +++ b/hspace.hs @@ -223,7 +223,7 @@ main = do ispec = optISpec opts shownodes = optShowNodes opts - (fixed_nl, il, csf) <- loadExternalData opts + (fixed_nl, il, _, csf) <- loadExternalData opts printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn ispec)) specData printKeys [ ("SPEC_RQN", printf "%d" (optINodes opts)) ] @@ -284,7 +284,7 @@ main = do -- utility functions let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx) - (rspecCpu spx) "ADMIN_down" (-1) (-1) + (rspecCpu spx) "ADMIN_down" [] (-1) (-1) exitifbad val = (case val of Bad s -> do hPrintf stderr "Failure: %s\n" s