From aee68bb17ad80cae024cc9e196a28c55aa84b34e Mon Sep 17 00:00:00 2001
From: Klaus Aehlig <aehlig@google.com>
Date: Tue, 14 Jul 2015 15:43:24 +0200
Subject: [PATCH] Make maintd wait for its jobs and also expose them

As described in the design, the maintenance daemon will only start
a new round once all jobs from the old round are finished. Add this
functionality now. As the list of jobs of the current round is also
relevant information, we expose it over HTTP. In order to do so, we
keep an in-memory copy of the job list (while the authoritative copy
is still kept in the configuration, as per our design).

Signed-off-by: Klaus Aehlig <aehlig@google.com>
Reviewed-by: Petr Pudlak <pudlak@google.com>
---
 Makefile.am                      |  1 +
 src/Ganeti/MaintD/MemoryState.hs | 92 ++++++++++++++++++++++++++++++++
 src/Ganeti/MaintD/Server.hs      | 45 ++++++++++++----
 3 files changed, 129 insertions(+), 9 deletions(-)
 create mode 100644 src/Ganeti/MaintD/MemoryState.hs

diff --git a/Makefile.am b/Makefile.am
index bd5b64ba7..e82057c48 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -975,6 +975,7 @@ HS_LIB_SRCS = \
 	src/Ganeti/Logging/WriterLog.hs \
 	src/Ganeti/Luxi.hs \
 	src/Ganeti/MaintD/Autorepairs.hs \
+        src/Ganeti/MaintD/MemoryState.hs \
 	src/Ganeti/MaintD/Server.hs \
 	src/Ganeti/MaintD/Utils.hs \
 	src/Ganeti/Network.hs \
diff --git a/src/Ganeti/MaintD/MemoryState.hs b/src/Ganeti/MaintD/MemoryState.hs
new file mode 100644
index 000000000..7fd351a6f
--- /dev/null
+++ b/src/Ganeti/MaintD/MemoryState.hs
@@ -0,0 +1,92 @@
+{-| Memory copy of the state of the maintenance daemon.
+
+While the autoritative state of the maintenance daemon is
+stored in the configuration, the daemon keeps a copy of some
+values at run time, so that they can easily be exposed over
+HTTP.
+
+This module also provides functions for the mirrored information
+to update both, the authoritative state and the in-memory copy.
+
+-}
+
+{-
+
+Copyright (C) 2015 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.MaintD.MemoryState
+  ( MemoryState(..)
+  , emptyMemoryState
+  , getJobs
+  , clearJobs
+  , appendJobs
+  ) where
+
+import Control.Monad.IO.Class (liftIO)
+import Data.IORef (IORef, atomicModifyIORef)
+
+import Ganeti.BasicTypes (ResultT, withErrorT)
+import Ganeti.Types (JobId)
+import Ganeti.Utils (ordNub)
+import Ganeti.WConfd.Client ( runNewWConfdClient, maintenanceJobs, runModifyRpc
+                            , clearMaintdJobs, appendMaintdJobs )
+
+-- | In-memory copy of parts of the state of the maintenance
+-- daemon.
+data MemoryState = MemoryState
+  { msJobs :: [ JobId ]
+  }
+
+-- | Inital state of the in-memory copy. All parts will be updated
+-- before use, after one round at the latest this copy is up to date.
+emptyMemoryState :: MemoryState
+emptyMemoryState = MemoryState {
+                     msJobs = []
+                   }
+
+-- | Get the list of jobs from the authoritative copy, and update the
+-- in-memory copy as well.
+getJobs :: IORef MemoryState -> ResultT String IO [JobId]
+getJobs memstate = do
+  jobs <- withErrorT show $ runNewWConfdClient maintenanceJobs
+  liftIO . atomicModifyIORef memstate $ \ s -> (s { msJobs = jobs }, ())
+  return jobs
+
+-- | Reset the list of active jobs.
+clearJobs :: IORef MemoryState -> IO ()
+clearJobs memstate = do
+  runModifyRpc clearMaintdJobs
+  atomicModifyIORef memstate $ \ s -> ( s { msJobs = [] }, ())
+
+-- | Append jobs to the list of active jobs, if not present already
+appendJobs :: IORef MemoryState -> [JobId] -> IO ()
+appendJobs memstate jobs = do
+  runModifyRpc $ appendMaintdJobs jobs
+  atomicModifyIORef memstate
+    $ \ s -> ( s { msJobs = ordNub $ msJobs s ++ jobs }, ())
diff --git a/src/Ganeti/MaintD/Server.hs b/src/Ganeti/MaintD/Server.hs
index 4ab74b930..c68c7e392 100644
--- a/src/Ganeti/MaintD/Server.hs
+++ b/src/Ganeti/MaintD/Server.hs
@@ -43,14 +43,17 @@ module Ganeti.MaintD.Server
 
 import Control.Applicative ((<|>))
 import Control.Concurrent (forkIO)
-import Control.Monad (forever, void, unless, when)
+import Control.Exception.Lifted (bracket)
+import Control.Monad (forever, void, unless, when, liftM)
 import Control.Monad.IO.Class (liftIO)
+import Data.IORef (IORef, newIORef, readIORef)
 import qualified Data.Set as Set
-import Snap.Core (Snap, method, Method(GET), ifTop)
+import Snap.Core (Snap, method, Method(GET), ifTop, dir, route)
 import Snap.Http.Server (httpServe)
 import Snap.Http.Server.Config (Config)
 import System.IO.Error (tryIOError)
 import System.Time (getClockTime)
+import qualified Text.JSON as J
 
 import Ganeti.BasicTypes ( GenericResult(..), ResultT, runResultT, mkResultT
                          , withErrorT, isBad)
@@ -61,8 +64,11 @@ import Ganeti.Daemon.Utils (handleMasterVerificationOptions)
 import qualified Ganeti.HTools.Backend.Luxi as Luxi
 import qualified Ganeti.HTools.Container as Container
 import Ganeti.HTools.Loader (ClusterData(..), mergeData, checkData)
+import Ganeti.Jobs (waitForJobs)
 import Ganeti.Logging.Lifted
+import qualified Ganeti.Luxi as L
 import Ganeti.MaintD.Autorepairs (harepTasks)
+import Ganeti.MaintD.MemoryState
 import qualified Ganeti.Path as Path
 import Ganeti.Runtime (GanetiDaemon(GanetiMaintd))
 import Ganeti.Types (JobId(..))
@@ -109,23 +115,43 @@ loadClusterData = do
   return $ cdata { cdNodes = nl }
 
 -- | Perform one round of maintenance
-maintenance :: ResultT String IO ()
-maintenance = do
+maintenance :: IORef MemoryState -> ResultT String IO ()
+maintenance memstate = do
   delay <- withErrorT show $ runNewWConfdClient maintenanceRoundDelay
   liftIO $ threadDelaySeconds delay
+  oldjobs <- getJobs memstate
+  logDebug $ "Jobs submitted in the last round: "
+             ++ show (map fromJobId oldjobs)
+  luxiSocket <- liftIO Path.defaultQuerySocket
+  bracket (mkResultT . liftM (either (Bad . show) Ok)
+            . tryIOError $ L.getLuxiClient luxiSocket)
+          (liftIO . L.closeClient)
+          $ void . mkResultT . waitForJobs oldjobs
+  liftIO $ clearJobs memstate
   logDebug "New round of maintenance started"
   cData <- loadClusterData
   let il = cdInstances cData
       nl = cdNodes cData
       nidxs = Set.fromList $ Container.keys nl
   (nidxs', jobs) <- harepTasks (nl, il) nidxs
+  liftIO $ appendJobs memstate jobs
   logDebug $ "Unaffected nodes " ++ show (Set.toList nidxs')
              ++ ", jobs submitted " ++ show (map fromJobId jobs)
 
+-- | Expose a part of the memory state
+exposeState :: J.JSON a => (MemoryState -> a) -> IORef MemoryState -> Snap ()
+exposeState selector ref = do
+  state <- liftIO $ readIORef ref
+  plainJSON $ selector state
+
 -- | The information to serve via HTTP
-httpInterface :: Snap ()
-httpInterface = ifTop (method GET $ plainJSON [1 :: Int])
-                <|> error404
+httpInterface :: IORef MemoryState -> Snap ()
+httpInterface memstate =
+  ifTop (method GET $ plainJSON [1 :: Int])
+  <|> dir "1" (ifTop (plainJSON J.JSNull)
+               <|> route [ ("jobs", exposeState msJobs memstate)
+                         ])
+  <|> error404
 
 -- | Check function for luxid.
 checkMain :: CheckFn CheckResult
@@ -138,10 +164,11 @@ prepMain opts _ = httpConfFromOpts GanetiMaintd opts
 -- | Main function.
 main :: MainFn CheckResult PrepResult
 main _ _ httpConf = do
+  memstate <- newIORef emptyMemoryState
   void . forkIO . forever $ do
-    res <- runResultT maintenance
+    res <- runResultT $ maintenance memstate
     logDebug $ "Maintenance round done, result is " ++ show res
     when (isBad res) $ do
       logInfo "Backing off after a round with internal errors"
       threadDelaySeconds C.maintdDefaultRoundDelay
-  httpServe httpConf httpInterface
+  httpServe httpConf $ httpInterface memstate
-- 
GitLab