Commit 521136df authored by Klaus Aehlig's avatar Klaus Aehlig

Make luxid verify master status at startup

As luxid now starts jobs, make it verify that it
is running on the master node by carrying out the
Ganeti voting process.
Signed-off-by: default avatarKlaus Aehlig <aehlig@google.com>
Reviewed-by: default avatarPetr Pudlak <pudlak@google.com>
parent 4fa74266
......@@ -32,18 +32,23 @@ module Ganeti.Query.Server
import Control.Applicative
import Control.Concurrent
import Control.Exception
import Control.Monad (forever, when, mzero, guard, zipWithM, liftM, void)
import Control.Monad (forever, when, mzero, guard, zipWithM, liftM, void,
unless)
import Control.Monad.IO.Class
import Control.Monad.Trans (lift)
import Control.Monad.Trans.Maybe
import Data.Bits (bitSize)
import Data.Either (rights)
import qualified Data.Foldable as F
import qualified Data.Set as Set (toList)
import Data.IORef
import Data.List (partition)
import Data.Maybe (fromMaybe)
import qualified Text.JSON as J
import Text.JSON (encode, showJSON, JSValue(..))
import System.Info (arch)
import System.Directory
import System.Exit (ExitCode(..))
import System.Posix.Signals as P
import qualified Ganeti.Constants as C
......@@ -445,9 +450,65 @@ activateMasterIP = runResultT $ do
liftIO $ logDebug "finished activating master IP address"
return ()
-- | Gather votes from all nodes and verify that we we are
-- the master. Return True if the voting is won, False if
-- not enough
verifyMasterVotes :: IO (Result Bool)
verifyMasterVotes = runResultT $ do
liftIO $ logDebug "Gathering votes for the master node"
myName <- liftIO getFQDN
liftIO . logDebug $ "My hostname is " ++ myName
conf_file <- liftIO Path.clusterConfFile
config <- mkResultT $ Config.loadConfig conf_file
let nodes = F.toList $ configNodes config
votes <- liftIO . executeRpcCall nodes $ RpcCallMasterNodeName
let (missing, valid) = partition (isLeft . snd) votes
noDataNodes = map (nodeName . fst) missing
validVotes = map rpcResultMasterNodeNameMaster . rights $ map snd valid
inFavor = length $ filter (== myName) validVotes
voters = length nodes
unknown = length missing
liftIO . unless (null noDataNodes) . logWarning
. (++) "No voting RPC result from " $ show noDataNodes
liftIO . logDebug . (++) "Valid votes: " $ show validVotes
if 2 * inFavor > voters
then return True
else if 2 * (inFavor + unknown) > voters
then return False
else fail $ "Voting cannot be won by " ++ myName
++ ", valid votes of " ++ show voters
++ " are " ++ show validVotes
-- | Verify, by voting, that this node is the master. Bad if we're not.
-- Allow the given number of retries to wait for not available nodes.
verifyMaster :: Int -> IO (Result ())
verifyMaster retries = runResultT $ do
won <- mkResultT verifyMasterVotes
unless won $
if retries <= 0
then fail "Couldn't gather voting results of enough nodes"
else do
liftIO $ logDebug "Voting not final due to missing votes."
liftIO . threadDelay $ C.masterVotingRetryIntervall * 1000000
mkResultT $ verifyMaster (retries - 1)
-- | Check function for luxid.
checkMain :: CheckFn ()
checkMain _ = return $ Right ()
checkMain opts =
if optNoVoting opts
then if optYesDoIt opts
then return $ Right ()
else do
logError "The no-voting option is dangerous and cannot be\
\ given without providing yes-do-it as well."
return . Left $ ExitFailure C.exitFailure
else do
masterStatus <- verifyMaster C.masterVotingRetries
case masterStatus of
Bad s -> do
logError $ "Failed to verify master status: " ++ s
return . Left $ ExitFailure C.exitFailure
Ok _ -> return $ Right ()
-- | Prepare function for luxid.
prepMain :: PrepFn () PrepResult
......
......@@ -36,6 +36,8 @@ options =
, oNoUserChecks
, oDebug
, oSyslogUsage
, oNoVoting
, oYesDoIt
]
-- | Main function.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment