Commit 50211c86 authored by Iustin Pop's avatar Iustin Pop
Browse files

Merge branch 'stable-0.2'

* devel-0.2:
  Update NEWS file for 0.2.8 release
  hbal: return meaningful exit code for job failures
  Change the balancing function
parents 949397c8 d7f18640
......@@ -250,16 +250,16 @@ compDetailedCV nl =
mem_l = map Node.pMem nodes
dsk_l = map Node.pDsk nodes
-- metric: memory covariance
mem_cv = varianceCoeff mem_l
mem_cv = stdDev mem_l
-- metric: disk covariance
dsk_cv = varianceCoeff dsk_l
dsk_cv = stdDev dsk_l
-- metric: count of instances living on N1 failing nodes
n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
length (Node.pList n)) .
filter Node.failN1 $ nodes :: Double
res_l = map Node.pRem nodes
-- metric: reserved memory covariance
res_cv = varianceCoeff res_l
res_cv = stdDev res_l
-- offline instances metrics
offline_ipri = sum . map (length . Node.pList) $ offline
offline_isec = sum . map (length . Node.sList) $ offline
......@@ -271,7 +271,7 @@ compDetailedCV nl =
off_pri_score = fromIntegral offline_ipri::Double
cpu_l = map Node.pCpu nodes
-- metric: covariance of vcpu/pcpu ratio
cpu_cv = varianceCoeff cpu_l
cpu_cv = stdDev cpu_l
-- metrics: covariance of cpu, memory, disk and network load
(c_load, m_load, d_load, n_load) = unzip4 $
map (\n ->
......@@ -283,8 +283,7 @@ compDetailedCV nl =
pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
pri_tags_score = fromIntegral pri_tags_inst::Double
in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
, varianceCoeff c_load, varianceCoeff m_load
, varianceCoeff d_load, varianceCoeff n_load
, stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
, pri_tags_score ]
-- | Compute the /total/ variance.
......
......@@ -27,7 +27,7 @@ module Ganeti.HTools.Utils
, debugFn
, debugXy
, sepSplit
, varianceCoeff
, stdDev
, commaJoin
, readEitherString
, loadJSArray
......@@ -88,9 +88,9 @@ sepSplit sep s
-- Simple and slow statistical functions, please replace with better
-- versions
-- | Our modified standard deviation function (not, it's not the variance)
varianceCoeff :: [Double] -> Double
varianceCoeff lst =
-- | Standard deviation function
stdDev :: [Double] -> Double
stdDev lst =
-- first, calculate the list length and sum lst in a single step,
-- for performance reasons
let (ll', sx) = foldl' (\(rl, rs) e ->
......@@ -100,9 +100,7 @@ varianceCoeff lst =
ll = fromIntegral ll'::Double
mv = sx / ll
av = foldl' (\accu em -> let d = em - mv in accu + d * d) 0.0 lst
bv = sqrt (av / ll) -- stddev
cv = bv / ll -- standard deviation divided by list length
in cv
in sqrt (av / ll) -- stddev
-- * JSON-related functions
......
......@@ -2,6 +2,23 @@ Ganeti-htools release notes
===========================
Version 0.2.8 (Thu, 23 Dec 2010)
--------------------------------
A bug fix release:
- fixed balancing function for big clusters, which will improve corner
cases where hbal didn't see any solution even though the cluster was
obviously not well balanced
- fixed exit code of hbal in case of (Luxi) job errors
- changed the signal handling in hbal in order to make hbal control
easier: instead of synchronising on the count of signals, make SIGINT
cause graceful termination, and SIGTERM an immediate one
- increased the tag exclusion weight so that it has greater importance
during the balancing
- slight improvement to the speed of balancing via algorithm tweaks
Version 0.2.7 (Thu, 07 Oct 2010)
--------------------------------
......
......@@ -155,18 +155,25 @@ waitForJobs client jids = do
checkJobsStatus :: [JobStatus] -> Bool
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
-- | Wrapper over execJobSet checking for early termination
execWrapper :: String -> Node.List
-> Instance.List -> IORef Int -> [JobSet] -> IO Bool
execWrapper _ _ _ _ [] = return True
execWrapper master nl il cref alljss = do
cancel <- readIORef cref
(if cancel > 0
then do
hPrintf stderr "Exiting early due to user request, %d\
\ jobset(s) remaining." (length alljss)::IO ()
return False
else execJobSet master nl il cref alljss)
-- | Execute an entire jobset
execJobSet :: String -> Node.List
-> Instance.List -> IORef Int -> [JobSet] -> IO ()
execJobSet _ _ _ _ [] = return ()
execJobSet master nl il cref alljss@(js:jss) = do
-> Instance.List -> IORef Int -> [JobSet] -> IO Bool
execJobSet _ _ _ _ [] = return True
execJobSet master nl il cref (js:jss) = do
-- map from jobset (htools list of positions) to [[opcodes]]
cancel <- readIORef cref
when (cancel > 0) $ do
putStrLn ("Exiting early due to user request, " ++ show (length alljss) ++
" jobset(s) remaining.")
exitWith $ ExitFailure 1
let jobs = map (\(_, idx, move, _) ->
Cluster.iMoveToJob nl il idx move) js
let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
......@@ -183,13 +190,14 @@ execJobSet master nl il cref alljss@(js:jss) = do
(case jrs of
Bad x -> do
hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
return ()
return False
Ok x -> if checkJobsStatus x
then execJobSet master nl il cref jss
then execWrapper master nl il cref jss
else do
hPutStrLn stderr $ "Not all jobs completed successfully: " ++
show x
hPutStrLn stderr "Aborting.")
hPutStrLn stderr "Aborting."
return False)
-- | Signal handler for graceful termination
hangleSigInt :: IORef Int -> IO ()
......@@ -206,12 +214,12 @@ hangleSigTerm cref = do
putStrLn "Double cancel request, exiting now..."
exitImmediately $ ExitFailure 2
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO ()
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
runJobSet master fin_nl il cmd_jobs = do
cref <- newIORef 0
mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
[(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
execJobSet master fin_nl il cref cmd_jobs
execWrapper master fin_nl il cref cmd_jobs
-- | Main function.
main :: IO ()
......@@ -416,9 +424,12 @@ main = do
when oneline $
putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
when (optExecJobs opts && not (null ord_plc))
(case optLuxi opts of
eval <-
if optExecJobs opts && not (null ord_plc)
then (case optLuxi opts of
Nothing -> do
hPutStrLn stderr "Execution of commands possible only on LUXI"
exitWith $ ExitFailure 1
return False
Just master -> runJobSet master fin_nl il cmd_jobs)
else return True
when (not eval) (exitWith (ExitFailure 1))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment