Commit 1c05226b authored by Niklas Hambuechen's avatar Niklas Hambuechen
Browse files

Implement job filtering



This implements the operational part of the design doc
  "Filtering of jobs for the Ganeti job queue"
(design-optables.rst).

It includes
- respecting filter rules when jobs are scheduled
- cancelling running jobs rejected by filters
- re-running the scheduler when filter rules are changed
- handling of the filter actions ACCEPT, CONTINUE, PAUSE, REJECT
  and RATE_LIMIT
- implementation of the "jobid", "opcode" and "reason" predicates
Signed-off-by: default avatarNiklas Hambuechen <niklash@google.com>
Reviewed-by: default avatarKlaus Aehlig <aehlig@google.com>
parent 5ebc6675
...@@ -858,6 +858,7 @@ HS_LIB_SRCS = \ ...@@ -858,6 +858,7 @@ HS_LIB_SRCS = \
src/Ganeti/Hs2Py/GenOpCodes.hs \ src/Ganeti/Hs2Py/GenOpCodes.hs \
src/Ganeti/Hs2Py/OpDoc.hs \ src/Ganeti/Hs2Py/OpDoc.hs \
src/Ganeti/JQScheduler.hs \ src/Ganeti/JQScheduler.hs \
src/Ganeti/JQScheduler/Filtering.hs \
src/Ganeti/JQScheduler/ReasonRateLimiting.hs \ src/Ganeti/JQScheduler/ReasonRateLimiting.hs \
src/Ganeti/JQScheduler/Types.hs \ src/Ganeti/JQScheduler/Types.hs \
src/Ganeti/JQueue.hs \ src/Ganeti/JQueue.hs \
......
...@@ -39,6 +39,7 @@ module Ganeti.JQScheduler ...@@ -39,6 +39,7 @@ module Ganeti.JQScheduler
, jqForkLock , jqForkLock
, emptyJQStatus , emptyJQStatus
, selectJobsToRun , selectJobsToRun
, scheduleSomeJobs
, initJQScheduler , initJQScheduler
, enqueueNewJobs , enqueueNewJobs
, dequeueJob , dequeueJob
...@@ -46,7 +47,7 @@ module Ganeti.JQScheduler ...@@ -46,7 +47,7 @@ module Ganeti.JQScheduler
, cleanupIfDead , cleanupIfDead
) where ) where
import Control.Applicative (liftA2) import Control.Applicative (liftA2, (<$>))
import Control.Arrow import Control.Arrow
import Control.Concurrent import Control.Concurrent
import Control.Exception import Control.Exception
...@@ -57,16 +58,20 @@ import Data.Functor ((<$)) ...@@ -57,16 +58,20 @@ import Data.Functor ((<$))
import Data.IORef import Data.IORef
import Data.List import Data.List
import Data.Maybe import Data.Maybe
import qualified Data.Map as Map
import Data.Ord (comparing) import Data.Ord (comparing)
import Data.Set (Set)
import qualified Data.Set as S import qualified Data.Set as S
import System.INotify import System.INotify
import Ganeti.BasicTypes import Ganeti.BasicTypes
import Ganeti.Constants as C import Ganeti.Constants as C
import Ganeti.Errors import Ganeti.Errors
import Ganeti.JQScheduler.Filtering (applyingFilter, jobFiltering)
import Ganeti.JQScheduler.Types import Ganeti.JQScheduler.Types
import Ganeti.JQScheduler.ReasonRateLimiting (reasonRateLimit) import Ganeti.JQScheduler.ReasonRateLimiting (reasonRateLimit)
import Ganeti.JQueue as JQ import Ganeti.JQueue as JQ
import Ganeti.JSON (fromContainer)
import Ganeti.Lens hiding (chosen) import Ganeti.Lens hiding (chosen)
import Ganeti.Logging import Ganeti.Logging
import Ganeti.Objects import Ganeti.Objects
...@@ -299,11 +304,13 @@ jobEligible queue jWS = ...@@ -299,11 +304,13 @@ jobEligible queue jWS =
-- pure function doing the scheduling. -- pure function doing the scheduling.
selectJobsToRun :: Int -- ^ How many jobs are allowed to run at the selectJobsToRun :: Int -- ^ How many jobs are allowed to run at the
-- same time. -- same time.
-> Set FilterRule -- ^ Filter rules to respect for scheduling
-> Queue -> Queue
-> (Queue, [JobWithStat]) -> (Queue, [JobWithStat])
selectJobsToRun count queue = selectJobsToRun count filters queue =
let n = count - length (qRunning queue) - length (qManipulated queue) let n = count - length (qRunning queue) - length (qManipulated queue)
chosen = take n chosen = take n
. jobFiltering queue filters
. reasonRateLimit queue . reasonRateLimit queue
. sortBy (comparing (calcJobPriority . jJob)) . sortBy (comparing (calcJobPriority . jJob))
. filter (jobEligible queue) . filter (jobEligible queue)
...@@ -346,6 +353,44 @@ failJobs cfg qstate jobs = do ...@@ -346,6 +353,44 @@ failJobs cfg qstate jobs = do
mapM_ (runResultT . failAndSaveJobWithStat) jobs mapM_ (runResultT . failAndSaveJobWithStat) jobs
logDebug $ "Failed jobs " ++ sjobs logDebug $ "Failed jobs " ++ sjobs
-- | Checks if any jobs match a REJECT filter rule, and cancels them.
cancelRejectedJobs :: JQStatus -> ConfigData -> Set FilterRule -> IO ()
cancelRejectedJobs qstate cfg filters = do
enqueuedJobs <- map jJob . qEnqueued <$> readIORef (jqJobs qstate)
-- Determine which jobs are rejected.
let jobsToCancel =
[ (job, fr) | job <- enqueuedJobs
, Just fr <- [applyingFilter filters job]
, frAction fr == Reject ]
-- Cancel them.
qDir <- queueDir
forM_ jobsToCancel $ \(job, fr) -> do
let jid = qjId job
logDebug $ "Cancelling job " ++ show (fromJobId jid)
++ " because it was REJECTed by filter rule " ++ frUuid fr
-- First dequeue, then cancel.
dequeueResult <- dequeueJob qstate jid
case dequeueResult of
Ok True -> do
now <- currentTimestamp
r <- runResultT
$ writeAndReplicateJob cfg qDir (cancelQueuedJob now job)
case r of
Ok _ -> return ()
Bad err -> logError $
"Failed to write config when cancelling job: " ++ err
Ok False -> do
logDebug $ "Job " ++ show (fromJobId jid)
++ " not queued; trying to cancel directly"
_ <- cancelJob False (jqLivelock qstate) jid -- sigTERM-kill only
return ()
Bad s -> logError s -- passing a nonexistent job ID is an error here
-- | Schedule jobs to be run. This is the IO wrapper around the -- | Schedule jobs to be run. This is the IO wrapper around the
-- pure `selectJobsToRun`. -- pure `selectJobsToRun`.
scheduleSomeJobs :: JQStatus -> IO () scheduleSomeJobs :: JQStatus -> IO ()
...@@ -356,9 +401,15 @@ scheduleSomeJobs qstate = do ...@@ -356,9 +401,15 @@ scheduleSomeJobs qstate = do
let msg = "Configuration unavailable: " ++ err let msg = "Configuration unavailable: " ++ err
logError msg logError msg
Ok cfg -> do Ok cfg -> do
let filters = S.fromList . Map.elems . fromContainer $ configFilters cfg
-- Check if jobs are rejected by a REJECT filter, and cancel them.
cancelRejectedJobs qstate cfg filters
-- Select the jobs to run. -- Select the jobs to run.
count <- getMaxRunningJobs qstate count <- getMaxRunningJobs qstate
chosen <- atomicModifyIORef (jqJobs qstate) (selectJobsToRun count) chosen <- atomicModifyIORef (jqJobs qstate)
(selectJobsToRun count filters)
let jobs = map jJob chosen let jobs = map jJob chosen
unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin
$ map (show . fromJobId . qjId) jobs $ map (show . fromJobId . qjId) jobs
......
{-# LANGUAGE TupleSections, NamedFieldPuns, ScopedTypeVariables, RankNTypes,
GADTs #-}
{-| Filtering of jobs for the Ganeti job queue.
-}
{-
Copyright (C) 2014 Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-}
module Ganeti.JQScheduler.Filtering
( applyingFilter
, jobFiltering
) where
import Data.List
import Data.Maybe
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Text.JSON as J
import Ganeti.BasicTypes
import Ganeti.Errors
import Ganeti.Lens hiding (chosen)
import Ganeti.JQScheduler.Types
import Ganeti.JQueue (QueuedJob(..))
import Ganeti.JQueue.Lens
import Ganeti.JSON
import Ganeti.Objects (FilterRule(..), FilterAction(..), FilterPredicate(..),
filterRuleOrder)
import Ganeti.OpCodes (OpCode)
import Ganeti.OpCodes.Lens
import Ganeti.Query.Language
import Ganeti.Query.Filter (evaluateFilterM, evaluateFilterJSON, Comparator,
FilterOp(..), toCompFun)
import Ganeti.SlotMap
import Ganeti.Types (JobId(..), ReasonElem)
-- | Accesses a field of the JSON representation of an `OpCode` using a dotted
-- accessor (like @"a.b.c"@).
accessOpCodeField :: OpCode -> String -> ErrorResult J.JSValue
accessOpCodeField opc s = case nestedAccessByKeyDotted s (J.showJSON opc) of
J.Ok x -> Ok x
J.Error e -> Bad . ParameterError $ e
-- | All `OpCode`s of a job.
opCodesOf :: QueuedJob -> [OpCode]
opCodesOf job =
job ^.. qjOpsL . traverse . qoInputL . validOpCodeL . metaOpCodeL
-- | All `ReasonElem`s of a job.
reasonsOf :: QueuedJob -> [ReasonElem]
reasonsOf job = job ^.. qjOpsL . traverse . qoInputL . validOpCodeL
. metaParamsL . opReasonL . traverse
-- | Like `evaluateFilterM`, but allowing only `Comparator` operations;
-- all other filter language operations are evaluated as `False`.
--
-- The passed function is supposed to return `Just True/False` depending
-- on whether the comparing operation succeeds or not, and `Nothing` if
-- the comparison itself is invalid (e.g. comparing to a field that doesn't
-- exist).
evaluateFilterComparator :: (Ord field)
=> Filter field
-> (Comparator -> field -> FilterValue -> Maybe Bool)
-> Bool
evaluateFilterComparator fil opFun =
fromMaybe False $
evaluateFilterM
(\filterOp -> case filterOp of
Comp cmp -> opFun (toCompFun cmp)
_ -> \_ _ -> Nothing -- non-comparisons (become False)
)
fil
-- | Whether a `FilterPredicate` is true for a job.
matchPredicate :: QueuedJob
-> JobId -- ^ the watermark to compare against
-- if the predicate references it
-> FilterPredicate
-> Bool
matchPredicate job watermark predicate = case predicate of
FPJobId fil ->
let jid = qjId job
jidInt = fromIntegral (fromJobId jid)
in evaluateFilterComparator fil $ \comp field val -> case field of
"id" -> case val of
NumericValue i -> Just $ jidInt `comp` i
QuotedString "watermark" -> Just $ jid `comp` watermark
QuotedString _ -> Nothing
_ -> Nothing
FPOpCode fil ->
let opMatches opc = genericResult (const False) id $ do
jsonFilter <- traverse (accessOpCodeField opc) fil
evaluateFilterJSON jsonFilter
in any opMatches (opCodesOf job)
FPReason fil ->
let reasonMatches (source, reason, timestamp) =
evaluateFilterComparator fil $ \comp field val -> case field of
"source" -> Just $ QuotedString source `comp` val
"reason" -> Just $ QuotedString reason `comp` val
"timestamp" -> Just $ NumericValue timestamp `comp` val
_ -> Nothing
in any reasonMatches (reasonsOf job)
-- | Whether all predicates of the filter rule are true for the job.
matches :: QueuedJob -> FilterRule -> Bool
matches job FilterRule{ frPredicates, frWatermark } =
all (matchPredicate job frWatermark) frPredicates
-- | Filters need to be processed in the order as given by the spec;
-- see `filterRuleOrder`.
orderFilters :: Set FilterRule -> [FilterRule]
orderFilters = sortBy filterRuleOrder . Set.toList
-- | Finds the first filter whose predicates all match the job and whose
-- action is not `Continue`. This is the /applying/ filter.
applyingFilter :: Set FilterRule -> QueuedJob -> Maybe FilterRule
applyingFilter filters job =
-- Skip over all `Continue`s, to the first filter that matches.
find ((Continue /=) . frAction)
. filter (matches job)
. orderFilters
$ filters
-- | SlotMap for filter rule rate limiting, having `FilterRule` UUIDs as keys.
type RateLimitSlotMap = SlotMap String
-- We would prefer FilterRule here but that has no Ord instance (yet).
-- | State to be accumulated while traversing filters.
data FilterChainState = FilterChainState
{ rateLimitSlotMap :: RateLimitSlotMap -- ^ counts
} deriving (Eq, Ord, Show)
-- | Update a `FilterChainState` if the given `CountMap` fits into its
-- filtering SlotsMap.
tryFitSlots :: FilterChainState -> CountMap String -> Maybe FilterChainState
tryFitSlots st@FilterChainState{ rateLimitSlotMap = slotMap } countMap =
if slotMap `hasSlotsFor` countMap
then Just st{ rateLimitSlotMap = slotMap `occupySlots` countMap }
else Nothing
-- | For a given job queue and set of filters, calculates how many rate
-- limiting filter slots are available and how many are taken by running jobs
-- in the queue.
queueRateLimitSlotMap :: Queue -> Set FilterRule -> RateLimitSlotMap
queueRateLimitSlotMap queue filters =
let -- Rate limiting slots for each filter, with 0 occupied count each
-- (limits only).
emptyFilterSlots =
Map.fromList
[ (uuid, Slot 0 n)
| FilterRule{ frUuid = uuid
, frAction = RateLimit n } <- Set.toList filters ]
-- How many rate limiting slots are taken by the jobs currently running
-- in the queue jobs (counts only).
-- A job takes a slot of a RateLimit filter if that filter is the first
-- one that matches for the job.
runningJobSlots = Map.fromListWith (+)
[ (frUuid, 1) | Just FilterRule{ frUuid, frAction = RateLimit _ } <-
map (applyingFilter filters . jJob)
$ qRunning queue ++ qManipulated queue ]
in -- Fill limits from above with counts from above.
emptyFilterSlots `occupySlots` runningJobSlots
-- | Implements job filtering as specified in `doc/design-optables.rst`.
--
-- Importantly, the filter that *applies* is the first one of which all
-- predicates match; this is implemented in `applyingFilter`.
--
-- The initial `FilterChainState` is currently not cached across
-- `selectJobsToRun` invocations because the number of running jobs is
-- typically small (< 100).
jobFiltering :: Queue -> Set FilterRule -> [JobWithStat] -> [JobWithStat]
jobFiltering queue filters =
let
processFilters :: FilterChainState
-> JobWithStat
-> (FilterChainState, Maybe JobWithStat)
processFilters state job =
case applyingFilter filters (jJob job) of
Nothing -> (state, Just job) -- no filter applies, accept job
Just FilterRule{ frUuid, frAction } -> case frAction of
Accept -> (state, Just job)
Continue -> (state, Just job)
Pause -> (state, Nothing)
Reject -> (state, Nothing)
RateLimit _ -> -- A matching job takes 1 slot.
let jobSlots = Map.fromList [(frUuid, 1)]
in case tryFitSlots state jobSlots of
Nothing -> (state, Nothing)
Just state' -> (state', Just job)
in catMaybes . snd . mapAccumL processFilters FilterChainState
{ rateLimitSlotMap = queueRateLimitSlotMap queue filters
}
...@@ -348,6 +348,9 @@ handleCall _ status _ (ReplaceFilter mUuid priority predicates action ...@@ -348,6 +348,9 @@ handleCall _ status _ (ReplaceFilter mUuid priority predicates action
. (configFiltersL . alterContainerL uuid .~ Just rule) . (configFiltersL . alterContainerL uuid .~ Just rule)
$ lockedCfg $ lockedCfg
-- Filters were changed, run job scheduler.
liftIO $ scheduleSomeJobs status
-- Return UUID of added/replaced filter. -- Return UUID of added/replaced filter.
return $ showJSON uuid return $ showJSON uuid
...@@ -365,6 +368,9 @@ handleCall _ status cfg (DeleteFilter uuid) = runResultT $ do ...@@ -365,6 +368,9 @@ handleCall _ status cfg (DeleteFilter uuid) = runResultT $ do
. (configFiltersL . alterContainerL uuid .~ Nothing) . (configFiltersL . alterContainerL uuid .~ Nothing)
$ lockedCfg $ lockedCfg
-- Filters were changed, run job scheduler.
liftIO $ scheduleSomeJobs status
return JSNull return JSNull
handleCall _ _ cfg (QueryNetworks names fields lock) = handleCall _ _ cfg (QueryNetworks names fields lock) =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment