Hbal.hs 14.7 KB
Newer Older
1
{-| Cluster rebalancer.
Iustin Pop's avatar
Iustin Pop committed
2 3 4

-}

Iustin Pop's avatar
Iustin Pop committed
5 6
{-

7
Copyright (C) 2009, 2010, 2011 Google Inc.
Iustin Pop's avatar
Iustin Pop committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.

-}

26
module Ganeti.HTools.Program.Hbal (main) where
Iustin Pop's avatar
Iustin Pop committed
27

28 29
import Control.Concurrent (threadDelay)
import Control.Exception (bracket)
Iustin Pop's avatar
Iustin Pop committed
30
import Control.Monad
Iustin Pop's avatar
Iustin Pop committed
31
import Data.List
32
import Data.Maybe (isJust, isNothing, fromJust)
33
import Data.IORef
34 35
import System.Environment (getArgs)
import System.Exit
Iustin Pop's avatar
Iustin Pop committed
36
import System.IO
37 38
import System.Posix.Process
import System.Posix.Signals
Iustin Pop's avatar
Iustin Pop committed
39

40
import Text.Printf (printf, hPrintf)
Iustin Pop's avatar
Iustin Pop committed
41

42 43
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
Iustin Pop's avatar
Iustin Pop committed
44
import qualified Ganeti.HTools.Group as Group
45
import qualified Ganeti.HTools.Node as Node
46
import qualified Ganeti.HTools.Instance as Instance
Iustin Pop's avatar
Iustin Pop committed
47

48
import Ganeti.HTools.CLI
49
import Ganeti.HTools.ExtLoader
50
import Ganeti.HTools.Utils
51
import Ganeti.HTools.Types
52
import Ganeti.HTools.Loader
Iustin Pop's avatar
Iustin Pop committed
53

54 55 56
import qualified Ganeti.Luxi as L
import Ganeti.Jobs

Iustin Pop's avatar
Iustin Pop committed
57
-- | Options list and functions.
58
options :: [OptType]
Iustin Pop's avatar
Iustin Pop committed
59
options =
Iustin Pop's avatar
Iustin Pop committed
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
  [ oPrintNodes
  , oPrintInsts
  , oPrintCommands
  , oDataFile
  , oEvacMode
  , oRapiMaster
  , oLuxiSocket
  , oExecJobs
  , oGroup
  , oMaxSolLength
  , oVerbose
  , oQuiet
  , oOfflineNode
  , oMinScore
  , oMaxCpu
  , oMinDisk
  , oMinGain
  , oMinGainLim
  , oDiskMoves
  , oSelInst
  , oInstMoves
  , oDynuFile
  , oExTags
  , oExInst
  , oSaveCluster
  , oShowVer
  , oShowHelp
  ]
Iustin Pop's avatar
Iustin Pop committed
88

Iustin Pop's avatar
Iustin Pop committed
89 90 91 92 93 94
{- | Start computing the solution at the given depth and recurse until
we find a valid solution or we exceed the maximum depth.

-}
iterateDepth :: Cluster.Table    -- ^ The starting table
             -> Int              -- ^ Remaining length
95
             -> Bool             -- ^ Allow disk moves
96
             -> Bool             -- ^ Allow instance moves
Iustin Pop's avatar
Iustin Pop committed
97 98
             -> Int              -- ^ Max node name len
             -> Int              -- ^ Max instance name len
99
             -> [MoveJob]        -- ^ Current command list
100
             -> Score            -- ^ Score at which to stop
101 102
             -> Score            -- ^ Min gain limit
             -> Score            -- ^ Min score gain
Iustin Pop's avatar
Iustin Pop committed
103
             -> Bool             -- ^ Enable evacuation mode
104 105
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
                                              -- and commands
106
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
107
             cmd_strs min_score mg_limit min_gain evac_mode =
Iustin Pop's avatar
Iustin Pop committed
108 109 110
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
      m_fin_tbl = if allowed_next
111 112
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
                         evac_mode mg_limit min_gain
113
                    else Nothing
Iustin Pop's avatar
Iustin Pop committed
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
  in case m_fin_tbl of
       Just fin_tbl ->
         do
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
               fin_plc_len = length fin_plc
               cur_plc@(idx, _, _, move, _) = head fin_plc
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
                                  nmlen imlen cur_plc fin_plc_len
               afn = Cluster.involvedNodes ini_il cur_plc
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
           putStrLn sol_line
           hFlush stdout
           iterateDepth fin_tbl max_rounds disk_moves inst_moves
                        nmlen imlen upd_cmd_strs min_score
                        mg_limit min_gain evac_mode
       Nothing -> return (ini_tbl, cmd_strs)
Iustin Pop's avatar
Iustin Pop committed
130

Iustin Pop's avatar
Iustin Pop committed
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
-- | Displays the cluster stats.
printStats :: Node.List -> Node.List -> IO ()
printStats ini_nl fin_nl = do
  let ini_cs = Cluster.totalResources ini_nl
      fin_cs = Cluster.totalResources fin_nl
  printf "Original: mem=%d disk=%d\n"
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
  printf "Final:    mem=%d disk=%d\n"
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)

-- | Saves the rebalance commands to a text file.
saveBalanceCommands :: Options -> String -> IO ()
saveBalanceCommands opts cmd_data = do
  let out_path = fromJust $ optShowCmds opts
  putStrLn ""
  (if out_path == "-" then
       printf "Commands to run to reach the above solution:\n%s"
                  (unlines . map ("  " ++) .
                   filter (/= "  check") .
                   lines $ cmd_data)
   else do
     writeFile out_path (shTemplate ++ cmd_data)
     printf "The commands have been written to file '%s'\n" out_path)

155
-- | Polls a set of jobs at a fixed interval until all are finished
Iustin Pop's avatar
Iustin Pop committed
156
-- one way or another.
157 158 159 160 161
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
waitForJobs client jids = do
  sts <- L.queryJobsStatus client jids
  case sts of
    Bad x -> return $ Bad x
Iustin Pop's avatar
Iustin Pop committed
162
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
163 164 165 166 167 168
            then do
              -- TODO: replace hardcoded value with a better thing
              threadDelay (1000000 * 15)
              waitForJobs client jids
            else return $ Ok s

Iustin Pop's avatar
Iustin Pop committed
169
-- | Check that a set of job statuses is all success.
170
checkJobsStatus :: [JobStatus] -> Bool
Iustin Pop's avatar
Iustin Pop committed
171
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
172

Iustin Pop's avatar
Iustin Pop committed
173
-- | Wrapper over execJobSet checking for early termination.
174
execWrapper :: String -> Node.List
Iustin Pop's avatar
Iustin Pop committed
175
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
176 177 178 179 180 181 182 183 184 185
execWrapper _      _  _  _    [] = return True
execWrapper master nl il cref alljss = do
  cancel <- readIORef cref
  (if cancel > 0
   then do
     hPrintf stderr "Exiting early due to user request, %d\
                    \ jobset(s) remaining." (length alljss)::IO ()
     return False
   else execJobSet master nl il cref alljss)

Iustin Pop's avatar
Iustin Pop committed
186
-- | Execute an entire jobset.
187
execJobSet :: String -> Node.List
188 189 190
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
execJobSet _      _  _  _    [] = return True
execJobSet master nl il cref (js:jss) = do
191 192
  -- map from jobset (htools list of positions) to [[opcodes]]
  let jobs = map (\(_, idx, move, _) ->
193
                      Cluster.iMoveToJob nl il idx move) js
194 195 196 197
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
  jrs <- bracket (L.getClient master) L.closeClient
         (\client -> do
198
            jids <- L.submitManyJobs client jobs
199 200 201 202 203 204 205 206 207
            case jids of
              Bad x -> return $ Bad x
              Ok x -> do
                putStrLn $ "Got job IDs " ++ commaJoin x
                waitForJobs client x
         )
  (case jrs of
     Bad x -> do
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
208
       return False
209
     Ok x -> if checkJobsStatus x
210
             then execWrapper master nl il cref jss
211 212 213
             else do
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
                         show x
214 215
               hPutStrLn stderr "Aborting."
               return False)
216

Iustin Pop's avatar
Iustin Pop committed
217 218 219 220 221 222 223 224
-- | Executes the jobs, if possible and desired.
maybeExecJobs :: Options
              -> [a]
              -> Node.List
              -> Instance.List
              -> [JobSet]
              -> IO Bool
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
Iustin Pop's avatar
Iustin Pop committed
225
  if optExecJobs opts && not (null ord_plc)
Iustin Pop's avatar
Iustin Pop committed
226 227 228 229 230 231 232
    then (case optLuxi opts of
            Nothing -> do
              hPutStrLn stderr "Execution of commands possible only on LUXI"
              return False
            Just master -> runJobSet master fin_nl il cmd_jobs)
    else return True

Iustin Pop's avatar
Iustin Pop committed
233
-- | Signal handler for graceful termination.
Iustin Pop's avatar
Iustin Pop committed
234 235 236 237 238 239
hangleSigInt :: IORef Int -> IO ()
hangleSigInt cref = do
  writeIORef cref 1
  putStrLn ("Cancel request registered, will exit at" ++
            " the end of the current job set...")

Iustin Pop's avatar
Iustin Pop committed
240
-- | Signal handler for immediate termination.
Iustin Pop's avatar
Iustin Pop committed
241 242 243 244 245 246
hangleSigTerm :: IORef Int -> IO ()
hangleSigTerm cref = do
  -- update the cref to 2, just for consistency
  writeIORef cref 2
  putStrLn "Double cancel request, exiting now..."
  exitImmediately $ ExitFailure 2
247

Iustin Pop's avatar
Iustin Pop committed
248
-- | Runs a job set with handling of signals.
249
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
250 251
runJobSet master fin_nl il cmd_jobs = do
  cref <- newIORef 0
Iustin Pop's avatar
Iustin Pop committed
252 253
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
254
  execWrapper master fin_nl il cref cmd_jobs
255

Iustin Pop's avatar
Iustin Pop committed
256 257 258 259
-- | Select the target node group.
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
            -> IO (String, (Node.List, Instance.List))
selectGroup opts gl nlf ilf = do
260 261 262
  let ngroups = Cluster.splitCluster nlf ilf
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
    hPutStrLn stderr "Found multiple node groups:"
263
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
264
           flip Container.find gl . fst) ngroups
265 266 267
    hPutStrLn stderr "Aborting."
    exitWith $ ExitFailure 1

Iustin Pop's avatar
Iustin Pop committed
268
  case optGroup opts of
Iustin Pop's avatar
Iustin Pop committed
269
    Nothing -> do
Iustin Pop's avatar
Iustin Pop committed
270 271 272
      let (gidx, cdata) = head ngroups
          grp = Container.find gidx gl
      return (Group.name grp, cdata)
Iustin Pop's avatar
Iustin Pop committed
273
    Just g -> case Container.findByName gl g of
274 275 276
      Nothing -> do
        hPutStrLn stderr $ "Node group " ++ g ++
          " not found. Node group list is:"
Iustin Pop's avatar
Iustin Pop committed
277
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
278 279
        hPutStrLn stderr "Aborting."
        exitWith $ ExitFailure 1
Iustin Pop's avatar
Iustin Pop committed
280 281 282
      Just grp ->
          case lookup (Group.idx grp) ngroups of
            Nothing -> do
Iustin Pop's avatar
Iustin Pop committed
283 284 285
              -- This will only happen if there are no nodes assigned
              -- to this group
              return (Group.name grp, (Container.empty, Container.empty))
Iustin Pop's avatar
Iustin Pop committed
286
            Just cdata -> return (Group.name grp, cdata)
287

Iustin Pop's avatar
Iustin Pop committed
288
-- | Do a few checks on the cluster data.
289 290
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
checkCluster verbose nl il = do
Iustin Pop's avatar
Iustin Pop committed
291 292
  -- nothing to do on an empty cluster
  when (Container.null il) $ do
293
         printf "Cluster is empty, exiting.\n"::IO ()
Iustin Pop's avatar
Iustin Pop committed
294 295 296 297 298 299 300 301 302 303
         exitWith ExitSuccess

  -- hbal doesn't currently handle split clusters
  let split_insts = Cluster.findSplitInstances nl il
  unless (null split_insts) $ do
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
    hPutStrLn stderr "Aborting."
    exitWith $ ExitFailure 1

304
  printf "Loaded %d nodes, %d instances\n"
Iustin Pop's avatar
Iustin Pop committed
305
             (Container.size nl)
306
             (Container.size il)::IO ()
Iustin Pop's avatar
Iustin Pop committed
307 308

  let csf = commonSuffix nl il
309
  when (not (null csf) && verbose > 1) $
Iustin Pop's avatar
Iustin Pop committed
310 311 312
       printf "Note: Stripping common suffix of '%s' from names\n" csf

-- | Do a few checks on the selected group data.
313 314 315
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
checkGroup verbose gname nl il = do
  printf "Group size %d nodes, %d instances\n"
Iustin Pop's avatar
Iustin Pop committed
316
             (Container.size nl)
317
             (Container.size il)::IO ()
318

Iustin Pop's avatar
Iustin Pop committed
319
  putStrLn $ "Selected node group: " ++ gname
320

Iustin Pop's avatar
Iustin Pop committed
321
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
322
  unless (verbose == 0) $ printf
Iustin Pop's avatar
Iustin Pop committed
323
             "Initial check done: %d bad nodes, %d bad instances.\n"
Iustin Pop's avatar
Iustin Pop committed
324 325
             (length bad_nodes) (length bad_instances)

Iustin Pop's avatar
Iustin Pop committed
326
  when (length bad_nodes > 0) $
327 328
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
                  \that the cluster will end N+1 happy."
Iustin Pop's avatar
Iustin Pop committed
329

Iustin Pop's avatar
Iustin Pop committed
330 331 332 333 334
-- | Check that we actually need to rebalance.
checkNeedRebalance :: Options -> Score -> IO ()
checkNeedRebalance opts ini_cv = do
  let min_cv = optMinScore opts
  when (ini_cv < min_cv) $ do
335 336 337
         printf "Cluster is already well balanced (initial score %.6g,\n\
                \minimum score %.6g).\nNothing to do, exiting\n"
                ini_cv min_cv:: IO ()
Iustin Pop's avatar
Iustin Pop committed
338 339 340 341 342
         exitWith ExitSuccess

-- | Main function.
main :: IO ()
main = do
343
  cmd_args <- getArgs
Iustin Pop's avatar
Iustin Pop committed
344 345 346 347 348 349
  (opts, args) <- parseOpts cmd_args "hbal" options

  unless (null args) $ do
         hPutStrLn stderr "Error: this program doesn't take any arguments."
         exitWith $ ExitFailure 1

350
  let verbose = optVerbose opts
Iustin Pop's avatar
Iustin Pop committed
351 352 353 354 355
      shownodes = optShowNodes opts
      showinsts = optShowInsts opts

  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts

356
  when (verbose > 1) $
Iustin Pop's avatar
Iustin Pop committed
357 358
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags

Iustin Pop's avatar
Iustin Pop committed
359
  nlf <- setNodeStatus opts fixed_nl
360
  checkCluster verbose nlf ilf
Iustin Pop's avatar
Iustin Pop committed
361 362 363 364 365

  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata

  (gname, (nl, il)) <- selectGroup opts gl nlf ilf

366
  checkGroup verbose gname nl il
Iustin Pop's avatar
Iustin Pop committed
367

368
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
369

370
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
Iustin Pop's avatar
Iustin Pop committed
371 372 373

  let ini_cv = Cluster.compCV nl
      ini_tbl = Cluster.Table nl il ini_cv []
Iustin Pop's avatar
Iustin Pop committed
374 375
      min_cv = optMinScore opts

Iustin Pop's avatar
Iustin Pop committed
376
  checkNeedRebalance opts ini_cv
Iustin Pop's avatar
Iustin Pop committed
377

378 379 380 381
  (if verbose > 2
   then printf "Initial coefficients: overall %.8f, %s\n"
        ini_cv (Cluster.printStats nl)::IO ()
   else printf "Initial score: %.8f\n" ini_cv)
Iustin Pop's avatar
Iustin Pop committed
382

383
  putStrLn "Trying to minimize the CV..."
384 385
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
386 387

  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
388
                         (optDiskMoves opts)
389
                         (optInstMoves opts)
390
                         nmlen imlen [] min_cv
391 392
                         (optMinGainLim opts) (optMinGain opts)
                         (optEvacMode opts)
393
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
Iustin Pop's avatar
Iustin Pop committed
394
      ord_plc = reverse fin_plc
Iustin Pop's avatar
Iustin Pop committed
395 396 397 398 399 400 401 402
      sol_msg = case () of
                  _ | null fin_plc -> printf "No solution found\n"
                    | verbose > 2 ->
                        printf "Final coefficients:   overall %.8f, %s\n"
                        fin_cv (Cluster.printStats fin_nl)
                    | otherwise ->
                        printf "Cluster score improved from %.8f to %.8f\n"
                        ini_cv fin_cv ::String
Iustin Pop's avatar
Iustin Pop committed
403

404
  putStr sol_msg
405

406
  unless (verbose == 0) $
407
         printf "Solution length=%d\n" (length ord_plc)
Iustin Pop's avatar
Iustin Pop committed
408

409
  let cmd_jobs = Cluster.splitJobs cmd_strs
410 411

  when (isJust $ optShowCmds opts) $
Iustin Pop's avatar
Iustin Pop committed
412
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
413

414 415
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
                (ClusterData gl fin_nl fin_il ctags)
416

417
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
418

419 420
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)

Iustin Pop's avatar
Iustin Pop committed
421 422 423
  when (verbose > 3) $ printStats nl fin_nl

  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
424
  unless eval (exitWith (ExitFailure 1))