Server.hs 6.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
{-# LANGUAGE OverloadedStrings #-}

{-| Implementation of the Ganeti maintenenace server.

-}

{-

Copyright (C) 2015 Google Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-}

module Ganeti.MaintD.Server
  ( options
  , main
  , checkMain
  , prepMain
  ) where

import Control.Applicative ((<|>))
45
import Control.Concurrent (forkIO)
46 47
import Control.Exception.Lifted (bracket)
import Control.Monad (forever, void, unless, when, liftM)
48
import Control.Monad.IO.Class (liftIO)
49
import Data.IORef (IORef, newIORef, readIORef)
50
import qualified Data.Set as Set
51
import Snap.Core (Snap, method, Method(GET), ifTop, dir, route)
52 53
import Snap.Http.Server (httpServe)
import Snap.Http.Server.Config (Config)
54 55
import System.IO.Error (tryIOError)
import System.Time (getClockTime)
56
import qualified Text.JSON as J
57

58 59
import Ganeti.BasicTypes ( GenericResult(..), ResultT, runResultT, mkResultT
                         , withErrorT, isBad)
60 61 62 63
import qualified Ganeti.Constants as C
import Ganeti.Daemon ( OptType, CheckFn, PrepFn, MainFn, oDebug
                     , oNoVoting, oYesDoIt, oPort, oBindAddress, oNoDaemonize)
import Ganeti.Daemon.Utils (handleMasterVerificationOptions)
64 65 66
import qualified Ganeti.HTools.Backend.Luxi as Luxi
import qualified Ganeti.HTools.Container as Container
import Ganeti.HTools.Loader (ClusterData(..), mergeData, checkData)
67
import Ganeti.Jobs (waitForJobs)
68
import Ganeti.Logging.Lifted
69
import qualified Ganeti.Luxi as L
70
import Ganeti.MaintD.Autorepairs (harepTasks)
71
import Ganeti.MaintD.MemoryState
72
import qualified Ganeti.Path as Path
73
import Ganeti.Runtime (GanetiDaemon(GanetiMaintd))
74
import Ganeti.Types (JobId(..))
75
import Ganeti.Utils (threadDelaySeconds)
76
import Ganeti.Utils.Http (httpConfFromOpts, plainJSON, error404)
77
import Ganeti.WConfd.Client (runNewWConfdClient, maintenanceRoundDelay)
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

-- | Options list and functions.
options :: [OptType]
options =
  [ oNoDaemonize
  , oDebug
  , oPort C.defaultMaintdPort
  , oBindAddress
  , oNoVoting
  , oYesDoIt
  ]

-- | Type alias for checkMain results.
type CheckResult = ()

-- | Type alias for prepMain results
type PrepResult = Config Snap ()

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
-- | Load cluster data
--
-- At the moment, only the static data is fetched via luxi;
-- once we support load-based balancing in maintd as well,
-- we also need to query the MonDs for the load data.
loadClusterData :: ResultT String IO ClusterData
loadClusterData = do
  now <- liftIO getClockTime
  socket <- liftIO Path.defaultQuerySocket
  either_inp <-  liftIO . tryIOError $ Luxi.loadData socket
  input_data <- mkResultT $ case either_inp of
                  Left e -> do
                    let msg = show e
                    logNotice $ "Couldn't read data from luxid: " ++ msg
                    return $ Bad msg
                  Right r -> return r
  cdata <- mkResultT . return $ mergeData [] [] [] [] now input_data
  let (msgs, nl) = checkData (cdNodes cdata) (cdInstances cdata)
  unless (null msgs) . logDebug $ "Cluster data inconsistencies: " ++ show msgs
  return $ cdata { cdNodes = nl }

-- | Perform one round of maintenance
118 119
maintenance :: IORef MemoryState -> ResultT String IO ()
maintenance memstate = do
120 121
  delay <- withErrorT show $ runNewWConfdClient maintenanceRoundDelay
  liftIO $ threadDelaySeconds delay
122 123 124 125 126 127 128 129 130
  oldjobs <- getJobs memstate
  logDebug $ "Jobs submitted in the last round: "
             ++ show (map fromJobId oldjobs)
  luxiSocket <- liftIO Path.defaultQuerySocket
  bracket (mkResultT . liftM (either (Bad . show) Ok)
            . tryIOError $ L.getLuxiClient luxiSocket)
          (liftIO . L.closeClient)
          $ void . mkResultT . waitForJobs oldjobs
  liftIO $ clearJobs memstate
131 132 133 134 135 136
  logDebug "New round of maintenance started"
  cData <- loadClusterData
  let il = cdInstances cData
      nl = cdNodes cData
      nidxs = Set.fromList $ Container.keys nl
  (nidxs', jobs) <- harepTasks (nl, il) nidxs
137
  liftIO $ appendJobs memstate jobs
138 139 140
  logDebug $ "Unaffected nodes " ++ show (Set.toList nidxs')
             ++ ", jobs submitted " ++ show (map fromJobId jobs)

141 142 143 144 145 146
-- | Expose a part of the memory state
exposeState :: J.JSON a => (MemoryState -> a) -> IORef MemoryState -> Snap ()
exposeState selector ref = do
  state <- liftIO $ readIORef ref
  plainJSON $ selector state

147
-- | The information to serve via HTTP
148 149 150 151 152 153 154
httpInterface :: IORef MemoryState -> Snap ()
httpInterface memstate =
  ifTop (method GET $ plainJSON [1 :: Int])
  <|> dir "1" (ifTop (plainJSON J.JSNull)
               <|> route [ ("jobs", exposeState msJobs memstate)
                         ])
  <|> error404
155 156 157 158 159 160 161 162 163 164 165

-- | Check function for luxid.
checkMain :: CheckFn CheckResult
checkMain = handleMasterVerificationOptions

-- | Prepare function for luxid.
prepMain :: PrepFn CheckResult PrepResult
prepMain opts _ = httpConfFromOpts GanetiMaintd opts

-- | Main function.
main :: MainFn CheckResult PrepResult
166
main _ _ httpConf = do
167
  memstate <- newIORef emptyMemoryState
168
  void . forkIO . forever $ do
169
    res <- runResultT $ maintenance memstate
170
    logDebug $ "Maintenance round done, result is " ++ show res
171 172 173
    when (isBad res) $ do
      logInfo "Backing off after a round with internal errors"
      threadDelaySeconds C.maintdDefaultRoundDelay
174
  httpServe httpConf $ httpInterface memstate