{-| Implementation of cluster-wide logic.

This module holds all pure cluster-logic; I\/O related functionality
goes into the "Main" module for the individual binaries.

-}

{-

Copyright (C) 2009 Google Inc.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.

-}

module Ganeti.HTools.Cluster
    (
     -- * Types
      Placement
    , AllocSolution
    , Table(..)
    , Score
    , IMove(..)
    , CStats(..)
    -- * Generic functions
    , totalResources
    -- * First phase functions
    , computeBadItems
    -- * Second phase functions
    , printSolution
    , printSolutionLine
    , formatCmds
    , printNodes
    -- * Balacing functions
    , applyMove
    , checkMove
    , compCV
    , printStats
    -- * IAllocator functions
    , allocateOnSingle
    , allocateOnPair
    , tryAlloc
    , tryReloc
    ) where

import Data.List
import Text.Printf (printf)
import Data.Function
import Control.Monad

import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Node as Node
import Ganeti.HTools.Types
import Ganeti.HTools.Utils

-- * Types

-- | A separate name for the cluster score type.
type Score = Double

-- | The description of an instance placement.
type Placement = (Idx, Ndx, Ndx, Score)

-- | Allocation\/relocation solution.
type AllocSolution = [(OpResult Node.List, Instance.Instance, [Node.Node])]

-- | An instance move definition
data IMove = Failover                -- ^ Failover the instance (f)
           | ReplacePrimary Ndx      -- ^ Replace primary (f, r:np, f)
           | ReplaceSecondary Ndx    -- ^ Replace secondary (r:ns)
           | ReplaceAndFailover Ndx  -- ^ Replace secondary, failover (r:np, f)
           | FailoverAndReplace Ndx  -- ^ Failover, replace secondary (f, r:ns)
             deriving (Show)

-- | The complete state for the balancing solution
data Table = Table Node.List Instance.List Score [Placement]
             deriving (Show)

data CStats = CStats { cs_fmem :: Int    -- ^ Cluster free mem
                     , cs_fdsk :: Int    -- ^ Cluster free disk
                     , cs_amem :: Int    -- ^ Cluster allocatable mem
                     , cs_adsk :: Int    -- ^ Cluster allocatable disk
                     , cs_acpu :: Int    -- ^ Cluster allocatable cpus
                     , cs_mmem :: Int    -- ^ Max node allocatable mem
                     , cs_mdsk :: Int    -- ^ Max node allocatable disk
                     , cs_mcpu :: Int    -- ^ Max node allocatable cpu
                     , cs_imem :: Int    -- ^ Instance used mem
                     , cs_idsk :: Int    -- ^ Instance used disk
                     , cs_icpu :: Int    -- ^ Instance used cpu
                     , cs_tmem :: Double -- ^ Cluster total mem
                     , cs_tdsk :: Double -- ^ Cluster total disk
                     , cs_tcpu :: Double -- ^ Cluster total cpus
                     , cs_xmem :: Int    -- ^ Unnacounted for mem
                     , cs_nmem :: Int    -- ^ Node own memory
                     }

-- * Utility functions

-- | Verifies the N+1 status and return the affected nodes.
verifyN1 :: [Node.Node] -> [Node.Node]
verifyN1 = filter Node.failN1

{-| Computes the pair of bad nodes and instances.

The bad node list is computed via a simple 'verifyN1' check, and the
bad instance list is the list of primary and secondary instances of
those nodes.

-}
computeBadItems :: Node.List -> Instance.List ->
                   ([Node.Node], [Instance.Instance])
computeBadItems nl il =
  let bad_nodes = verifyN1 $ getOnline nl
      bad_instances = map (\idx -> Container.find idx il) .
                      sort . nub $
                      concatMap (\ n -> Node.slist n ++ Node.plist n) bad_nodes
  in
    (bad_nodes, bad_instances)

emptyCStats :: CStats
emptyCStats = CStats { cs_fmem = 0
                     , cs_fdsk = 0
                     , cs_amem = 0
                     , cs_adsk = 0
                     , cs_acpu = 0
                     , cs_mmem = 0
                     , cs_mdsk = 0
                     , cs_mcpu = 0
                     , cs_imem = 0
                     , cs_idsk = 0
                     , cs_icpu = 0
                     , cs_tmem = 0
                     , cs_tdsk = 0
                     , cs_tcpu = 0
                     , cs_xmem = 0
                     , cs_nmem = 0
                     }

updateCStats :: CStats -> Node.Node -> CStats
updateCStats cs node =
    let CStats { cs_fmem = x_fmem, cs_fdsk = x_fdsk,
                 cs_amem = x_amem, cs_acpu = x_acpu, cs_adsk = x_adsk,
                 cs_mmem = x_mmem, cs_mdsk = x_mdsk, cs_mcpu = x_mcpu,
                 cs_imem = x_imem, cs_idsk = x_idsk, cs_icpu = x_icpu,
                 cs_tmem = x_tmem, cs_tdsk = x_tdsk, cs_tcpu = x_tcpu,
                 cs_xmem = x_xmem, cs_nmem = x_nmem
               }
            = cs
        inc_amem = Node.f_mem node - Node.r_mem node
        inc_amem' = if inc_amem > 0 then inc_amem else 0
        inc_adsk = Node.availDisk node
        inc_imem = truncate (Node.t_mem node) - Node.n_mem node
                   - Node.x_mem node - Node.f_mem node
        inc_icpu = Node.u_cpu node
        inc_idsk = truncate (Node.t_dsk node) - Node.f_dsk node

    in CStats { cs_fmem = x_fmem + Node.f_mem node
              , cs_fdsk = x_fdsk + Node.f_dsk node
              , cs_amem = x_amem + inc_amem'
              , cs_adsk = x_adsk + inc_adsk
              , cs_acpu = x_acpu
              , cs_mmem = max x_mmem inc_amem'
              , cs_mdsk = max x_mdsk inc_adsk
              , cs_mcpu = x_mcpu
              , cs_imem = x_imem + inc_imem
              , cs_idsk = x_idsk + inc_idsk
              , cs_icpu = x_icpu + inc_icpu
              , cs_tmem = x_tmem + Node.t_mem node
              , cs_tdsk = x_tdsk + Node.t_dsk node
              , cs_tcpu = x_tcpu + Node.t_cpu node
              , cs_xmem = x_xmem + Node.x_mem node
              , cs_nmem = x_nmem + Node.n_mem node
              }

-- | Compute the total free disk and memory in the cluster.
totalResources :: Node.List -> CStats
totalResources = foldl' updateCStats emptyCStats . Container.elems

-- | Compute the mem and disk covariance.
compDetailedCV :: Node.List -> (Double, Double, Double, Double, Double, Double)
compDetailedCV nl =
    let
        all_nodes = Container.elems nl
        (offline, nodes) = partition Node.offline all_nodes
        mem_l = map Node.p_mem nodes
        dsk_l = map Node.p_dsk nodes
        mem_cv = varianceCoeff mem_l
        dsk_cv = varianceCoeff dsk_l
        n1_l = length $ filter Node.failN1 nodes
        n1_score = fromIntegral n1_l /
                   fromIntegral (length nodes)::Double
        res_l = map Node.p_rem nodes
        res_cv = varianceCoeff res_l
        offline_inst = sum . map (\n -> (length . Node.plist $ n) +
                                        (length . Node.slist $ n)) $ offline
        online_inst = sum . map (\n -> (length . Node.plist $ n) +
                                       (length . Node.slist $ n)) $ nodes
        off_score = if offline_inst == 0
                    then 0::Double
                    else fromIntegral offline_inst /
                         fromIntegral (offline_inst + online_inst)::Double
        cpu_l = map Node.p_cpu nodes
        cpu_cv = varianceCoeff cpu_l
    in (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv)

-- | Compute the /total/ variance.
compCV :: Node.List -> Double
compCV nl =
    let (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) =
            compDetailedCV nl
    in mem_cv + dsk_cv + n1_score + res_cv + off_score + cpu_cv

-- | Compute online nodes from a Node.List
getOnline :: Node.List -> [Node.Node]
getOnline = filter (not . Node.offline) . Container.elems

-- * hbal functions

-- | Compute best table. Note that the ordering of the arguments is important.
compareTables :: Table -> Table -> Table
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
    if a_cv > b_cv then b else a

-- | Applies an instance move to a given node list and instance.
applyMove :: Node.List -> Instance.Instance
          -> IMove -> (OpResult Node.List, Instance.Instance, Ndx, Ndx)
-- Failover (f)
applyMove nl inst Failover =
    let old_pdx = Instance.pnode inst
        old_sdx = Instance.snode inst
        old_p = Container.find old_pdx nl
        old_s = Container.find old_sdx nl
        int_p = Node.removePri old_p inst
        int_s = Node.removeSec old_s inst
        new_nl = do -- Maybe monad
          new_p <- Node.addPri int_s inst
          new_s <- Node.addSec int_p inst old_sdx
          return $ Container.addTwo old_pdx new_s old_sdx new_p nl
    in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)

-- Replace the primary (f:, r:np, f)
applyMove nl inst (ReplacePrimary new_pdx) =
    let old_pdx = Instance.pnode inst
        old_sdx = Instance.snode inst
        old_p = Container.find old_pdx nl
        old_s = Container.find old_sdx nl
        tgt_n = Container.find new_pdx nl
        int_p = Node.removePri old_p inst
        int_s = Node.removeSec old_s inst
        new_nl = do -- Maybe monad
          -- check that the current secondary can host the instance
          -- during the migration
          tmp_s <- Node.addPri int_s inst
          let tmp_s' = Node.removePri tmp_s inst
          new_p <- Node.addPri tgt_n inst
          new_s <- Node.addSec tmp_s' inst new_pdx
          return . Container.add new_pdx new_p $
                 Container.addTwo old_pdx int_p old_sdx new_s nl
    in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)

-- Replace the secondary (r:ns)
applyMove nl inst (ReplaceSecondary new_sdx) =
    let old_pdx = Instance.pnode inst
        old_sdx = Instance.snode inst
        old_s = Container.find old_sdx nl
        tgt_n = Container.find new_sdx nl
        int_s = Node.removeSec old_s inst
        new_nl = Node.addSec tgt_n inst old_pdx >>=
                 \new_s -> return $ Container.addTwo new_sdx
                           new_s old_sdx int_s nl
    in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)

-- Replace the secondary and failover (r:np, f)
applyMove nl inst (ReplaceAndFailover new_pdx) =
    let old_pdx = Instance.pnode inst
        old_sdx = Instance.snode inst
        old_p = Container.find old_pdx nl
        old_s = Container.find old_sdx nl
        tgt_n = Container.find new_pdx nl
        int_p = Node.removePri old_p inst
        int_s = Node.removeSec old_s inst
        new_nl = do -- Maybe monad
          new_p <- Node.addPri tgt_n inst
          new_s <- Node.addSec int_p inst new_pdx
          return . Container.add new_pdx new_p $
                 Container.addTwo old_pdx new_s old_sdx int_s nl
    in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)

-- Failver and replace the secondary (f, r:ns)
applyMove nl inst (FailoverAndReplace new_sdx) =
    let old_pdx = Instance.pnode inst
        old_sdx = Instance.snode inst
        old_p = Container.find old_pdx nl
        old_s = Container.find old_sdx nl
        tgt_n = Container.find new_sdx nl
        int_p = Node.removePri old_p inst
        int_s = Node.removeSec old_s inst
        new_nl = do -- Maybe monad
          new_p <- Node.addPri int_s inst
          new_s <- Node.addSec tgt_n inst old_sdx
          return . Container.add new_sdx new_s $
                 Container.addTwo old_sdx new_p old_pdx int_p nl
    in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)

-- | Tries to allocate an instance on one given node.
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
                 -> (OpResult Node.List, Instance.Instance)
allocateOnSingle nl inst p =
    let new_pdx = Node.idx p
        new_nl = Node.addPri p inst >>= \new_p ->
                 return $ Container.add new_pdx new_p nl
    in (new_nl, Instance.setBoth inst new_pdx Node.noSecondary)

-- | Tries to allocate an instance on a given pair of nodes.
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
               -> (OpResult Node.List, Instance.Instance)
allocateOnPair nl inst tgt_p tgt_s =
    let new_pdx = Node.idx tgt_p
        new_sdx = Node.idx tgt_s
        new_nl = do -- Maybe monad
          new_p <- Node.addPri tgt_p inst
          new_s <- Node.addSec tgt_s inst new_pdx
          return $ Container.addTwo new_pdx new_p new_sdx new_s nl
    in (new_nl, Instance.setBoth inst new_pdx new_sdx)

-- | Tries to perform an instance move and returns the best table
-- between the original one and the new one.
checkSingleStep :: Table -- ^ The original table
                -> Instance.Instance -- ^ The instance to move
                -> Table -- ^ The current best table
                -> IMove -- ^ The move to apply
                -> Table -- ^ The final best table
checkSingleStep ini_tbl target cur_tbl move =
    let
        Table ini_nl ini_il _ ini_plc = ini_tbl
        (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
    in
      case tmp_nl of
        OpFail _ -> cur_tbl
        OpGood upd_nl ->
            let tgt_idx = Instance.idx target
                upd_cvar = compCV upd_nl
                upd_il = Container.add tgt_idx new_inst ini_il
                upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
            in
              compareTables cur_tbl upd_tbl

-- | Given the status of the current secondary as a valid new node
-- and the current candidate target node,
-- generate the possible moves for a instance.
possibleMoves :: Bool -> Ndx -> [IMove]
possibleMoves True tdx =
    [ReplaceSecondary tdx,
     ReplaceAndFailover tdx,
     ReplacePrimary tdx,
     FailoverAndReplace tdx]

possibleMoves False tdx =
    [ReplaceSecondary tdx,
     ReplaceAndFailover tdx]

-- | Compute the best move for a given instance.
checkInstanceMove :: [Ndx]             -- Allowed target node indices
                  -> Table             -- Original table
                  -> Instance.Instance -- Instance to move
                  -> Table             -- Best new table for this instance
checkInstanceMove nodes_idx ini_tbl target =
    let
        opdx = Instance.pnode target
        osdx = Instance.snode target
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
        use_secondary = elem osdx nodes_idx
        aft_failover = if use_secondary -- if allowed to failover
                       then checkSingleStep ini_tbl target ini_tbl Failover
                       else ini_tbl
        all_moves = concatMap (possibleMoves use_secondary) nodes
    in
      -- iterate over the possible nodes for this instance
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves

-- | Compute the best next move.
checkMove :: [Ndx]               -- ^ Allowed target node indices
          -> Table               -- ^ The current solution
          -> [Instance.Instance] -- ^ List of instances still to move
          -> Table               -- ^ The new solution
checkMove nodes_idx ini_tbl victims =
    let Table _ _ _ ini_plc = ini_tbl
        -- iterate over all instances, computing the best move
        best_tbl =
            foldl'
            (\ step_tbl elem ->
                 if Instance.snode elem == Node.noSecondary then step_tbl
                    else compareTables step_tbl $
                         checkInstanceMove nodes_idx ini_tbl elem)
            ini_tbl victims
        Table _ _ _ best_plc = best_tbl
    in
      if length best_plc == length ini_plc then -- no advancement
          ini_tbl
      else
          best_tbl

-- * Alocation functions

-- | Try to allocate an instance on the cluster.
tryAlloc :: (Monad m) =>
            Node.List         -- ^ The node list
         -> Instance.List     -- ^ The instance list
         -> Instance.Instance -- ^ The instance to allocate
         -> Int               -- ^ Required number of nodes
         -> m AllocSolution   -- ^ Possible solution list
tryAlloc nl _ inst 2 =
    let all_nodes = getOnline nl
        all_pairs = liftM2 (,) all_nodes all_nodes
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
        sols = map (\(p, s) -> let (mnl, i) = allocateOnPair nl inst p s
                               in (mnl, i, [p, s]))
               ok_pairs
    in return sols

tryAlloc nl _ inst 1 =
    let all_nodes = getOnline nl
        sols = map (\p -> let (mnl, i) = allocateOnSingle nl inst p
                          in (mnl, i, [p]))
               all_nodes
    in return sols

tryAlloc _ _ _ reqn = fail $ "Unsupported number of alllocation \
                             \destinations required (" ++ show reqn ++
                                               "), only two supported"

-- | Try to allocate an instance on the cluster.
tryReloc :: (Monad m) =>
            Node.List       -- ^ The node list
         -> Instance.List   -- ^ The instance list
         -> Idx             -- ^ The index of the instance to move
         -> Int             -- ^ The numver of nodes required
         -> [Ndx]           -- ^ Nodes which should not be used
         -> m AllocSolution -- ^ Solution list
tryReloc nl il xid 1 ex_idx =
    let all_nodes = getOnline nl
        inst = Container.find xid il
        ex_idx' = Instance.pnode inst:ex_idx
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
        valid_idxes = map Node.idx valid_nodes
        sols1 = map (\x -> let (mnl, i, _, _) =
                                   applyMove nl inst (ReplaceSecondary x)
                           in (mnl, i, [Container.find x nl])
                     ) valid_idxes
    in return sols1

tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
                                \destinations required (" ++ show reqn ++
                                                  "), only one supported"

-- * Formatting functions

-- | Given the original and final nodes, computes the relocation description.
computeMoves :: String -- ^ The instance name
             -> String -- ^ Original primary
             -> String -- ^ Original secondary
             -> String -- ^ New primary
             -> String -- ^ New secondary
             -> (String, [String])
                -- ^ Tuple of moves and commands list; moves is containing
                -- either @/f/@ for failover or @/r:name/@ for replace
                -- secondary, while the command list holds gnt-instance
                -- commands (without that prefix), e.g \"@failover instance1@\"
computeMoves i a b c d
    -- same primary
    | c == a =
        if d == b
        then {- Same sec??! -} ("-", [])
        else {- Change of secondary -}
            (printf "r:%s" d, [rep d])
    -- failover and ...
    | c == b =
        if d == a
        then {- that's all -} ("f", [mig])
        else (printf "f r:%s" d, [mig, rep d])
    -- ... and keep primary as secondary
    | d == a =
        (printf "r:%s f" c, [rep c, mig])
    -- ... keep same secondary
    | d == b =
        (printf "f r:%s f" c, [mig, rep c, mig])
    -- nothing in common -
    | otherwise =
        (printf "r:%s f r:%s" c d, [rep c, mig, rep d])
    where mig = printf "migrate -f %s" i::String
          rep n = printf "replace-disks -n %s %s" n i

-- | Converts a placement to string format.
printSolutionLine :: Node.List     -- ^ The node list
                  -> Instance.List -- ^ The instance list
                  -> Int           -- ^ Maximum node name length
                  -> Int           -- ^ Maximum instance name length
                  -> Placement     -- ^ The current placement
                  -> Int           -- ^ The index of the placement in
                                   -- the solution
                  -> (String, [String])
printSolutionLine nl il nmlen imlen plc pos =
    let
        pmlen = (2*nmlen + 1)
        (i, p, s, c) = plc
        inst = Container.find i il
        inam = Instance.name inst
        npri = Container.nameOf nl p
        nsec = Container.nameOf nl s
        opri = Container.nameOf nl $ Instance.pnode inst
        osec = Container.nameOf nl $ Instance.snode inst
        (moves, cmds) =  computeMoves inam opri osec npri nsec
        ostr = printf "%s:%s" opri osec::String
        nstr = printf "%s:%s" npri nsec::String
    in
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
       pos imlen inam pmlen ostr
       pmlen nstr c moves,
       cmds)

-- | Given a list of commands, prefix them with @gnt-instance@ and
-- also beautify the display a little.
formatCmds :: [[String]] -> String
formatCmds =
    unlines .
    concatMap (\(a, b) ->
               printf "echo step %d" (a::Int):
               printf "check":
               map ("gnt-instance " ++) b
              ) .
    zip [1..]

-- | Converts a solution to string format.
printSolution :: Node.List
              -> Instance.List
              -> [Placement]
              -> ([String], [[String]])
printSolution nl il sol =
    let
        nmlen = Container.maxNameLen nl
        imlen = Container.maxNameLen il
    in
      unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]

-- | Print the node list.
printNodes :: Node.List -> String
printNodes nl =
    let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
        m_name = maximum . map (length . Node.name) $ snl
        helper = Node.list m_name
        header = printf
                 "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %4s %4s \
                 \%3s %3s %6s %6s %5s"
                 " F" m_name "Name"
                 "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem"
                 "t_dsk" "f_dsk" "pcpu" "vcpu"
                 "pri" "sec" "p_fmem" "p_fdsk" "r_cpu"::String
    in unlines (header:map helper snl)

-- | Shows statistics for a given node list.
printStats :: Node.List -> String
printStats nl =
    let (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) =
            compDetailedCV nl
    in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, \
              \uf=%.3f, r_cpu=%.3f"
       mem_cv res_cv dsk_cv n1_score off_score cpu_cv