From 4715711d8e527cb13ef5f23b72626ac03f04f9a9 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Thu, 23 Dec 2010 11:04:05 +0100
Subject: [PATCH] Change the balancing function

Currently the balancing function is a modified version of the standard
deviation (stddev divided by list length), due to historical reasons.

While this works fine for small clusters, for big clusters it makes
the balancing effect too "weak", and in some cases it refuses to
balance correctly some clusters. It also makes the balancing behaviour
dependant on the cluster size, which is a big no-no.

Therefore we revert to the normal version of standard deviation, and
we also rename the function to reflect what it does. The new version
correctly balances some corner cases that the previous version didn't,
and passes the current balancing unittests.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Adeodato Simo <dato@google.com>
---
 Ganeti/HTools/Cluster.hs | 11 +++++------
 Ganeti/HTools/Utils.hs   | 12 +++++-------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs
index f5a956b7d..3324f8e7e 100644
--- a/Ganeti/HTools/Cluster.hs
+++ b/Ganeti/HTools/Cluster.hs
@@ -230,16 +230,16 @@ compDetailedCV nl =
         mem_l = map Node.pMem nodes
         dsk_l = map Node.pDsk nodes
         -- metric: memory covariance
-        mem_cv = varianceCoeff mem_l
+        mem_cv = stdDev mem_l
         -- metric: disk covariance
-        dsk_cv = varianceCoeff dsk_l
+        dsk_cv = stdDev dsk_l
         -- metric: count of instances living on N1 failing nodes
         n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
                                                    length (Node.pList n)) .
                    filter Node.failN1 $ nodes :: Double
         res_l = map Node.pRem nodes
         -- metric: reserved memory covariance
-        res_cv = varianceCoeff res_l
+        res_cv = stdDev res_l
         -- offline instances metrics
         offline_ipri = sum . map (length . Node.pList) $ offline
         offline_isec = sum . map (length . Node.sList) $ offline
@@ -251,7 +251,7 @@ compDetailedCV nl =
         off_pri_score = fromIntegral offline_ipri::Double
         cpu_l = map Node.pCpu nodes
         -- metric: covariance of vcpu/pcpu ratio
-        cpu_cv = varianceCoeff cpu_l
+        cpu_cv = stdDev cpu_l
         -- metrics: covariance of cpu, memory, disk and network load
         (c_load, m_load, d_load, n_load) = unzip4 $
             map (\n ->
@@ -263,8 +263,7 @@ compDetailedCV nl =
         pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
         pri_tags_score = fromIntegral pri_tags_inst::Double
     in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
-       , varianceCoeff c_load, varianceCoeff m_load
-       , varianceCoeff d_load, varianceCoeff n_load
+       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
        , pri_tags_score ]
 
 -- | Compute the /total/ variance.
diff --git a/Ganeti/HTools/Utils.hs b/Ganeti/HTools/Utils.hs
index 8354dc2c4..7bc4ead7e 100644
--- a/Ganeti/HTools/Utils.hs
+++ b/Ganeti/HTools/Utils.hs
@@ -27,7 +27,7 @@ module Ganeti.HTools.Utils
     , debugFn
     , debugXy
     , sepSplit
-    , varianceCoeff
+    , stdDev
     , commaJoin
     , readEitherString
     , loadJSArray
@@ -87,9 +87,9 @@ sepSplit sep s
 -- Simple and slow statistical functions, please replace with better
 -- versions
 
--- | Our modified standard deviation function (not, it's not the variance)
-varianceCoeff :: [Double] -> Double
-varianceCoeff lst =
+-- | Standard deviation function
+stdDev :: [Double] -> Double
+stdDev lst =
   -- first, calculate the list length and sum lst in a single step,
   -- for performance reasons
   let (ll', sx) = foldl' (\(rl, rs) e ->
@@ -99,9 +99,7 @@ varianceCoeff lst =
       ll = fromIntegral ll'::Double
       mv = sx / ll
       av = foldl' (\accu em -> let d = em - mv in accu + d * d) 0.0 lst
-      bv = sqrt (av / ll) -- stddev
-      cv = bv / ll        -- standard deviation divided by list length
-  in cv
+  in sqrt (av / ll) -- stddev
 
 -- * JSON-related functions
 
-- 
GitLab