diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs index 1cfd48b2ad401837d3a93318483d9fbbdc8ba9b7..a0f876796c6829a111beeed066870997d0e4c71c 100644 --- a/Ganeti/HTools/Cluster.hs +++ b/Ganeti/HTools/Cluster.hs @@ -250,16 +250,16 @@ compDetailedCV nl = mem_l = map Node.pMem nodes dsk_l = map Node.pDsk nodes -- metric: memory covariance - mem_cv = varianceCoeff mem_l + mem_cv = stdDev mem_l -- metric: disk covariance - dsk_cv = varianceCoeff dsk_l + dsk_cv = stdDev dsk_l -- metric: count of instances living on N1 failing nodes n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) + length (Node.pList n)) . filter Node.failN1 $ nodes :: Double res_l = map Node.pRem nodes -- metric: reserved memory covariance - res_cv = varianceCoeff res_l + res_cv = stdDev res_l -- offline instances metrics offline_ipri = sum . map (length . Node.pList) $ offline offline_isec = sum . map (length . Node.sList) $ offline @@ -271,7 +271,7 @@ compDetailedCV nl = off_pri_score = fromIntegral offline_ipri::Double cpu_l = map Node.pCpu nodes -- metric: covariance of vcpu/pcpu ratio - cpu_cv = varianceCoeff cpu_l + cpu_cv = stdDev cpu_l -- metrics: covariance of cpu, memory, disk and network load (c_load, m_load, d_load, n_load) = unzip4 $ map (\n -> @@ -283,8 +283,7 @@ compDetailedCV nl = pri_tags_inst = sum $ map Node.conflictingPrimaries nodes pri_tags_score = fromIntegral pri_tags_inst::Double in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv - , varianceCoeff c_load, varianceCoeff m_load - , varianceCoeff d_load, varianceCoeff n_load + , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load , pri_tags_score ] -- | Compute the /total/ variance. diff --git a/Ganeti/HTools/Utils.hs b/Ganeti/HTools/Utils.hs index 84e8152ee94fcaae1a126bf71c49a3b6001eadd8..45b9d5e522026cdf1448f09f517ce912b16b3102 100644 --- a/Ganeti/HTools/Utils.hs +++ b/Ganeti/HTools/Utils.hs @@ -27,7 +27,7 @@ module Ganeti.HTools.Utils , debugFn , debugXy , sepSplit - , varianceCoeff + , stdDev , commaJoin , readEitherString , loadJSArray @@ -88,9 +88,9 @@ sepSplit sep s -- Simple and slow statistical functions, please replace with better -- versions --- | Our modified standard deviation function (not, it's not the variance) -varianceCoeff :: [Double] -> Double -varianceCoeff lst = +-- | Standard deviation function +stdDev :: [Double] -> Double +stdDev lst = -- first, calculate the list length and sum lst in a single step, -- for performance reasons let (ll', sx) = foldl' (\(rl, rs) e -> @@ -100,9 +100,7 @@ varianceCoeff lst = ll = fromIntegral ll'::Double mv = sx / ll av = foldl' (\accu em -> let d = em - mv in accu + d * d) 0.0 lst - bv = sqrt (av / ll) -- stddev - cv = bv / ll -- standard deviation divided by list length - in cv + in sqrt (av / ll) -- stddev -- * JSON-related functions diff --git a/NEWS b/NEWS index e014cf3cd72ceaa709dc7e0725fc25e8894fa0de..1783ea11319f03681f29e8b6af7d6c40f29a6def 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,23 @@ Ganeti-htools release notes =========================== +Version 0.2.8 (Thu, 23 Dec 2010) +-------------------------------- + +A bug fix release: + +- fixed balancing function for big clusters, which will improve corner + cases where hbal didn't see any solution even though the cluster was + obviously not well balanced +- fixed exit code of hbal in case of (Luxi) job errors +- changed the signal handling in hbal in order to make hbal control + easier: instead of synchronising on the count of signals, make SIGINT + cause graceful termination, and SIGTERM an immediate one +- increased the tag exclusion weight so that it has greater importance + during the balancing +- slight improvement to the speed of balancing via algorithm tweaks + + Version 0.2.7 (Thu, 07 Oct 2010) -------------------------------- diff --git a/hbal.hs b/hbal.hs index a83b7fc36a2f354399fbddc7036f3396d8888f11..455a751ccd978d5aa51e09ad1248acefd2f7060b 100644 --- a/hbal.hs +++ b/hbal.hs @@ -155,18 +155,25 @@ waitForJobs client jids = do checkJobsStatus :: [JobStatus] -> Bool checkJobsStatus = all (== JOB_STATUS_SUCCESS) +-- | Wrapper over execJobSet checking for early termination +execWrapper :: String -> Node.List + -> Instance.List -> IORef Int -> [JobSet] -> IO Bool +execWrapper _ _ _ _ [] = return True +execWrapper master nl il cref alljss = do + cancel <- readIORef cref + (if cancel > 0 + then do + hPrintf stderr "Exiting early due to user request, %d\ + \ jobset(s) remaining." (length alljss)::IO () + return False + else execJobSet master nl il cref alljss) + -- | Execute an entire jobset execJobSet :: String -> Node.List - -> Instance.List -> IORef Int -> [JobSet] -> IO () -execJobSet _ _ _ _ [] = return () -execJobSet master nl il cref alljss@(js:jss) = do + -> Instance.List -> IORef Int -> [JobSet] -> IO Bool +execJobSet _ _ _ _ [] = return True +execJobSet master nl il cref (js:jss) = do -- map from jobset (htools list of positions) to [[opcodes]] - cancel <- readIORef cref - when (cancel > 0) $ do - putStrLn ("Exiting early due to user request, " ++ show (length alljss) ++ - " jobset(s) remaining.") - exitWith $ ExitFailure 1 - let jobs = map (\(_, idx, move, _) -> Cluster.iMoveToJob nl il idx move) js let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js @@ -183,13 +190,14 @@ execJobSet master nl il cref alljss@(js:jss) = do (case jrs of Bad x -> do hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x - return () + return False Ok x -> if checkJobsStatus x - then execJobSet master nl il cref jss + then execWrapper master nl il cref jss else do hPutStrLn stderr $ "Not all jobs completed successfully: " ++ show x - hPutStrLn stderr "Aborting.") + hPutStrLn stderr "Aborting." + return False) -- | Signal handler for graceful termination hangleSigInt :: IORef Int -> IO () @@ -206,12 +214,12 @@ hangleSigTerm cref = do putStrLn "Double cancel request, exiting now..." exitImmediately $ ExitFailure 2 -runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO () +runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool runJobSet master fin_nl il cmd_jobs = do cref <- newIORef 0 mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing) [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)] - execJobSet master fin_nl il cref cmd_jobs + execWrapper master fin_nl il cref cmd_jobs -- | Main function. main :: IO () @@ -416,9 +424,12 @@ main = do when oneline $ putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv - when (optExecJobs opts && not (null ord_plc)) - (case optLuxi opts of + eval <- + if optExecJobs opts && not (null ord_plc) + then (case optLuxi opts of Nothing -> do hPutStrLn stderr "Execution of commands possible only on LUXI" - exitWith $ ExitFailure 1 + return False Just master -> runJobSet master fin_nl il cmd_jobs) + else return True + when (not eval) (exitWith (ExitFailure 1))