diff --git a/Makefile.am b/Makefile.am index bd5b64ba71a42cfe6b3b472849189e51ebed592e..e82057c48c8446d1bd10b15d8d340ac9b96ee2a7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -975,6 +975,7 @@ HS_LIB_SRCS = \ src/Ganeti/Logging/WriterLog.hs \ src/Ganeti/Luxi.hs \ src/Ganeti/MaintD/Autorepairs.hs \ + src/Ganeti/MaintD/MemoryState.hs \ src/Ganeti/MaintD/Server.hs \ src/Ganeti/MaintD/Utils.hs \ src/Ganeti/Network.hs \ diff --git a/src/Ganeti/MaintD/MemoryState.hs b/src/Ganeti/MaintD/MemoryState.hs new file mode 100644 index 0000000000000000000000000000000000000000..7fd351a6f143f4a065e81cc81dfaa3dff7184648 --- /dev/null +++ b/src/Ganeti/MaintD/MemoryState.hs @@ -0,0 +1,92 @@ +{-| Memory copy of the state of the maintenance daemon. + +While the autoritative state of the maintenance daemon is +stored in the configuration, the daemon keeps a copy of some +values at run time, so that they can easily be exposed over +HTTP. + +This module also provides functions for the mirrored information +to update both, the authoritative state and the in-memory copy. + +-} + +{- + +Copyright (C) 2015 Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-} + +module Ganeti.MaintD.MemoryState + ( MemoryState(..) + , emptyMemoryState + , getJobs + , clearJobs + , appendJobs + ) where + +import Control.Monad.IO.Class (liftIO) +import Data.IORef (IORef, atomicModifyIORef) + +import Ganeti.BasicTypes (ResultT, withErrorT) +import Ganeti.Types (JobId) +import Ganeti.Utils (ordNub) +import Ganeti.WConfd.Client ( runNewWConfdClient, maintenanceJobs, runModifyRpc + , clearMaintdJobs, appendMaintdJobs ) + +-- | In-memory copy of parts of the state of the maintenance +-- daemon. +data MemoryState = MemoryState + { msJobs :: [ JobId ] + } + +-- | Inital state of the in-memory copy. All parts will be updated +-- before use, after one round at the latest this copy is up to date. +emptyMemoryState :: MemoryState +emptyMemoryState = MemoryState { + msJobs = [] + } + +-- | Get the list of jobs from the authoritative copy, and update the +-- in-memory copy as well. +getJobs :: IORef MemoryState -> ResultT String IO [JobId] +getJobs memstate = do + jobs <- withErrorT show $ runNewWConfdClient maintenanceJobs + liftIO . atomicModifyIORef memstate $ \ s -> (s { msJobs = jobs }, ()) + return jobs + +-- | Reset the list of active jobs. +clearJobs :: IORef MemoryState -> IO () +clearJobs memstate = do + runModifyRpc clearMaintdJobs + atomicModifyIORef memstate $ \ s -> ( s { msJobs = [] }, ()) + +-- | Append jobs to the list of active jobs, if not present already +appendJobs :: IORef MemoryState -> [JobId] -> IO () +appendJobs memstate jobs = do + runModifyRpc $ appendMaintdJobs jobs + atomicModifyIORef memstate + $ \ s -> ( s { msJobs = ordNub $ msJobs s ++ jobs }, ()) diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs index 4ab74b930a90dcb946b499ea4a22271af2fb2b39..c68c7e3928bfd3cbaa632035868445883927147c 100644 --- a/src/Ganeti/MaintD/Server.hs +++ b/src/Ganeti/MaintD/Server.hs @@ -43,14 +43,17 @@ module Ganeti.MaintD.Server import Control.Applicative ((<|>)) import Control.Concurrent (forkIO) -import Control.Monad (forever, void, unless, when) +import Control.Exception.Lifted (bracket) +import Control.Monad (forever, void, unless, when, liftM) import Control.Monad.IO.Class (liftIO) +import Data.IORef (IORef, newIORef, readIORef) import qualified Data.Set as Set -import Snap.Core (Snap, method, Method(GET), ifTop) +import Snap.Core (Snap, method, Method(GET), ifTop, dir, route) import Snap.Http.Server (httpServe) import Snap.Http.Server.Config (Config) import System.IO.Error (tryIOError) import System.Time (getClockTime) +import qualified Text.JSON as J import Ganeti.BasicTypes ( GenericResult(..), ResultT, runResultT, mkResultT , withErrorT, isBad) @@ -61,8 +64,11 @@ import Ganeti.Daemon.Utils (handleMasterVerificationOptions) import qualified Ganeti.HTools.Backend.Luxi as Luxi import qualified Ganeti.HTools.Container as Container import Ganeti.HTools.Loader (ClusterData(..), mergeData, checkData) +import Ganeti.Jobs (waitForJobs) import Ganeti.Logging.Lifted +import qualified Ganeti.Luxi as L import Ganeti.MaintD.Autorepairs (harepTasks) +import Ganeti.MaintD.MemoryState import qualified Ganeti.Path as Path import Ganeti.Runtime (GanetiDaemon(GanetiMaintd)) import Ganeti.Types (JobId(..)) @@ -109,23 +115,43 @@ loadClusterData = do return $ cdata { cdNodes = nl } -- | Perform one round of maintenance -maintenance :: ResultT String IO () -maintenance = do +maintenance :: IORef MemoryState -> ResultT String IO () +maintenance memstate = do delay <- withErrorT show $ runNewWConfdClient maintenanceRoundDelay liftIO $ threadDelaySeconds delay + oldjobs <- getJobs memstate + logDebug $ "Jobs submitted in the last round: " + ++ show (map fromJobId oldjobs) + luxiSocket <- liftIO Path.defaultQuerySocket + bracket (mkResultT . liftM (either (Bad . show) Ok) + . tryIOError $ L.getLuxiClient luxiSocket) + (liftIO . L.closeClient) + $ void . mkResultT . waitForJobs oldjobs + liftIO $ clearJobs memstate logDebug "New round of maintenance started" cData <- loadClusterData let il = cdInstances cData nl = cdNodes cData nidxs = Set.fromList $ Container.keys nl (nidxs', jobs) <- harepTasks (nl, il) nidxs + liftIO $ appendJobs memstate jobs logDebug $ "Unaffected nodes " ++ show (Set.toList nidxs') ++ ", jobs submitted " ++ show (map fromJobId jobs) +-- | Expose a part of the memory state +exposeState :: J.JSON a => (MemoryState -> a) -> IORef MemoryState -> Snap () +exposeState selector ref = do + state <- liftIO $ readIORef ref + plainJSON $ selector state + -- | The information to serve via HTTP -httpInterface :: Snap () -httpInterface = ifTop (method GET $ plainJSON [1 :: Int]) - <|> error404 +httpInterface :: IORef MemoryState -> Snap () +httpInterface memstate = + ifTop (method GET $ plainJSON [1 :: Int]) + <|> dir "1" (ifTop (plainJSON J.JSNull) + <|> route [ ("jobs", exposeState msJobs memstate) + ]) + <|> error404 -- | Check function for luxid. checkMain :: CheckFn CheckResult @@ -138,10 +164,11 @@ prepMain opts _ = httpConfFromOpts GanetiMaintd opts -- | Main function. main :: MainFn CheckResult PrepResult main _ _ httpConf = do + memstate <- newIORef emptyMemoryState void . forkIO . forever $ do - res <- runResultT maintenance + res <- runResultT $ maintenance memstate logDebug $ "Maintenance round done, result is " ++ show res when (isBad res) $ do logInfo "Backing off after a round with internal errors" threadDelaySeconds C.maintdDefaultRoundDelay - httpServe httpConf httpInterface + httpServe httpConf $ httpInterface memstate