Commit db147305 authored by Tom Limoncelli's avatar Tom Limoncelli Committed by Michael Hanselmann
Browse files

ganeti-watcher should attempt to fix ganeti-rapi



Update ganeti-watcher so that it tests the master's RAPI port with a
simple test (in this case GetVersion). If it fails, make one attempt
at restarting ganeti-rapi and retest.

- daemons/ganeti-watcher: Test rapi and make one attempt at restarting it.
- lib/utils.py: add StopDaemon() function.
Signed-off-by: default avatarTom Limoncelli <tlim@google.com>
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarMichael Hanselmann <hansmi@google.com>
parent ff18f526
......@@ -47,8 +47,11 @@ from ganeti import luxi
from ganeti import ssconf
from ganeti import bdev
from ganeti import hypervisor
from ganeti import rapi
from ganeti.confd import client as confd_client
import ganeti.rapi.client # pylint: disable-msg=W0611
MAXTRIES = 5
BAD_STATES = ['ERROR_down']
......@@ -595,6 +598,34 @@ def OpenStateFile(path):
return os.fdopen(statefile_fd, "w+")
def IsRapiResponding(hostname):
"""Connects to RAPI port and does a simple test.
Connects to RAPI port of hostname and does a simple test. At this time, the
test is GetVersion.
@type hostname: string
@param hostname: hostname of the node to connect to.
@rtype: bool
@return: Whether RAPI is working properly
"""
ssl_config = rapi.client.CertAuthorityVerify(constants.RAPI_CERT_FILE)
rapi_client = \
rapi.client.GanetiRapiClient(hostname,
config_ssl_verification=ssl_config)
try:
master_version = rapi_client.GetVersion()
except rapi.client.CertificateError, err:
logging.warning("RAPI Error: CertificateError (%s)", err)
return False
except rapi.client.GanetiApiError, err:
logging.warning("RAPI Error: GanetiApiError (%s)", err)
return False
logging.debug("RAPI Result: master_version is %s", master_version)
return master_version == constants.RAPI_VERSION
def ParseOptions():
"""Parse the command line options.
......@@ -668,6 +699,18 @@ def main():
# we are on master now
utils.EnsureDaemon(constants.RAPI)
# If RAPI isn't responding to queries, try one restart.
logging.debug("Attempting to talk with RAPI.")
if not IsRapiResponding(constants.LOCALHOST_IP_ADDRESS):
logging.warning("Couldn't get answer from Ganeti RAPI daemon."
" Restarting Ganeti RAPI.")
utils.StopDaemon(constants.RAPI)
utils.EnsureDaemon(constants.RAPI)
logging.debug("Second attempt to talk with RAPI")
if not IsRapiResponding(constants.LOCALHOST_IP_ADDRESS):
logging.fatal("RAPI is not responding. Please investigate.")
logging.debug("Successfully talked to RAPI.")
try:
watcher = Watcher(options, notepad)
except errors.ConfigurationError:
......
......@@ -2261,6 +2261,19 @@ def EnsureDaemon(name):
return True
def StopDaemon(name):
"""Stop daemon
"""
result = RunCmd([constants.DAEMON_UTIL, "stop", name])
if result.failed:
logging.error("Can't stop daemon '%s', failure %s, output: %s",
name, result.fail_reason, result.output)
return False
return True
def WritePidFile(name):
"""Write the current process pidfile.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment