Commit 2e605c0f authored by Klaus Aehlig's avatar Klaus Aehlig

Make harep identify itself in the reason trail

As harep is a tool submitting jobs, make it identify itself
in the reason trail. Also allow giving a reason for calling
harep to be able to pass on further information, which can
also be used in job filtering.
Signed-off-by: default avatarKlaus Aehlig <aehlig@google.com>
Reviewed-by: default avatarHelga Velroyen <helgav@google.com>
parent de7e1a43
......@@ -40,6 +40,7 @@ module Ganeti.HTools.Program.Harep
, options) where
import Control.Exception (bracket)
import Control.Lens (over)
import Control.Monad
import Data.Function
import Data.List
......@@ -51,8 +52,11 @@ import qualified Data.Map as Map
import Ganeti.BasicTypes
import Ganeti.Common
import Ganeti.Errors
import Ganeti.JQueue (currentTimestamp, reasonTrailTimestamp)
import Ganeti.JQueue.Objects (Timestamp)
import Ganeti.Jobs
import Ganeti.OpCodes
import Ganeti.OpCodes.Lens (metaParamsL, opReasonL)
import Ganeti.OpParams
import Ganeti.Types
import Ganeti.Utils
......@@ -69,6 +73,9 @@ import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Node as Node
import Ganeti.Version (version)
-- | Options list and functions.
options :: IO [OptType]
options = do
......@@ -76,11 +83,22 @@ options = do
return
[ luxi
, oJobDelay
, oReason
]
arguments :: [ArgCompletion]
arguments = []
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
-- about what generated the opcode.
annotateOpCode :: Maybe String -> Timestamp -> OpCode -> MetaOpCode
annotateOpCode reason ts =
over (metaParamsL . opReasonL)
(++ [( "harep", fromMaybe ("harep " ++ version ++ " called") reason
, reasonTrailTimestamp ts)])
. setOpComment ("automated repairs by harep " ++ version)
. wrapOpCode
data InstanceData = InstanceData { arInstance :: Instance.Instance
, arState :: AutoRepairStatus
, tagsToRemove :: [String]
......@@ -211,8 +229,8 @@ arStatusCmp instData arData =
"programming error: ArNeedsRepair found as an initial state"
-- | Query jobs of a pending repair, returning the new instance data.
processPending :: L.Client -> InstanceData -> IO InstanceData
processPending client instData =
processPending :: Options -> L.Client -> InstanceData -> IO InstanceData
processPending opts client instData =
case arState instData of
(ArPendingRepair arData) -> do
sts <- L.queryJobsStatus client $ arJobs arData
......@@ -228,7 +246,7 @@ processPending client instData =
destSt = arStateName arState'
putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++
show destSt)
commitChange client instData'
commitChange opts client instData'
where
instData' =
instData { arState = arState'
......@@ -264,13 +282,15 @@ updateTag arData =
-- 'AutoRepairData', add its tag to the instance object. Additionally, if
-- /tagsToRemove/ is not empty, remove those tags from the instance object. The
-- returned /InstanceData/ object always has an empty /tagsToRemove/.
commitChange :: L.Client -> InstanceData -> IO InstanceData
commitChange client instData = do
commitChange :: Options -> L.Client -> InstanceData -> IO InstanceData
commitChange opts client instData = do
now <- currentTimestamp
let iname = Instance.name $ arInstance instData
arData = getArData $ arState instData
rmTags = tagsToRemove instData
execJobsWaitOk' opcodes = do
res <- execJobsWaitOk [map wrapOpCode opcodes] client
res <- execJobsWaitOk
[map (annotateOpCode (optReason opts) now) opcodes] client
case res of
Ok _ -> return ()
Bad e -> exitErr e
......@@ -382,12 +402,13 @@ detectBroken nl inst =
-- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt.
-- | Perform the suggested repair on an instance if its policy allows it.
doRepair :: L.Client -- ^ The Luxi client
doRepair :: Options
-> L.Client -- ^ The Luxi client
-> Double -- ^ Delay to insert before the first repair opcode
-> InstanceData -- ^ The instance data
-> (AutoRepairType, [OpCode]) -- ^ The repair job to perform
-> IO InstanceData -- ^ The updated instance data
doRepair client delay instData (rtype, opcodes) =
doRepair opts client delay instData (rtype, opcodes) =
let inst = arInstance instData
ipol = Instance.arPolicy inst
iname = Instance.name inst
......@@ -407,8 +428,9 @@ doRepair client delay instData (rtype, opcodes) =
putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++
iname ++ " because only repairs up to " ++ show maxtype ++
" are allowed")
commitChange client instData' -- Adds "enoperm" result label.
commitChange opts client instData' -- Adds "enoperm" result label.
else do
now <- currentTimestamp
putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname)
-- After submitting the job, we must write an autorepair:pending tag,
......@@ -445,7 +467,8 @@ doRepair client delay instData (rtype, opcodes) =
uuid <- newUUID
time <- getClockTime
jids <- submitJobs [map wrapOpCode opcodes'] client
jids <- submitJobs [map (annotateOpCode (optReason opts) now) opcodes']
client
case jids of
Bad e -> exitErr e
......@@ -456,7 +479,7 @@ doRepair client delay instData (rtype, opcodes) =
, tagsToRemove = delCurTag instData
}
in
commitChange client instData' -- Adds "pending" label.
commitChange opts client instData' -- Adds "pending" label.
otherSt -> do
putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++
......@@ -480,7 +503,7 @@ main opts args = do
-- First step: check all pending repairs, see if they are completed.
iniData' <- bracket (L.getLuxiClient master) L.closeClient $
forM iniData . processPending
forM iniData . processPending opts
-- Second step: detect any problems.
let repairs = map (detectBroken nl . arInstance) iniData'
......@@ -489,7 +512,7 @@ main opts args = do
let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r
jobDelay = optJobDelay opts
repairHealthy c i = case arState i of
ArHealthy _ -> doRepair c jobDelay i
ArHealthy _ -> doRepair opts c jobDelay i
_ -> const (return i)
repairDone <- bracket (L.getLuxiClient master) L.closeClient $
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment