diff --git a/daemons/ganeti-watcher b/daemons/ganeti-watcher index 1f82db8b998ac2cde96da01751dc6d149a533cfe..b0d924d435dbef03970489a69fdb39b8057e8330 100755 --- a/daemons/ganeti-watcher +++ b/daemons/ganeti-watcher @@ -47,8 +47,11 @@ from ganeti import luxi from ganeti import ssconf from ganeti import bdev from ganeti import hypervisor +from ganeti import rapi from ganeti.confd import client as confd_client +import ganeti.rapi.client # pylint: disable-msg=W0611 + MAXTRIES = 5 BAD_STATES = ['ERROR_down'] @@ -595,6 +598,34 @@ def OpenStateFile(path): return os.fdopen(statefile_fd, "w+") +def IsRapiResponding(hostname): + """Connects to RAPI port and does a simple test. + + Connects to RAPI port of hostname and does a simple test. At this time, the + test is GetVersion. + + @type hostname: string + @param hostname: hostname of the node to connect to. + @rtype: bool + @return: Whether RAPI is working properly + + """ + ssl_config = rapi.client.CertAuthorityVerify(constants.RAPI_CERT_FILE) + rapi_client = \ + rapi.client.GanetiRapiClient(hostname, + config_ssl_verification=ssl_config) + try: + master_version = rapi_client.GetVersion() + except rapi.client.CertificateError, err: + logging.warning("RAPI Error: CertificateError (%s)", err) + return False + except rapi.client.GanetiApiError, err: + logging.warning("RAPI Error: GanetiApiError (%s)", err) + return False + logging.debug("RAPI Result: master_version is %s", master_version) + return master_version == constants.RAPI_VERSION + + def ParseOptions(): """Parse the command line options. @@ -668,6 +699,18 @@ def main(): # we are on master now utils.EnsureDaemon(constants.RAPI) + # If RAPI isn't responding to queries, try one restart. + logging.debug("Attempting to talk with RAPI.") + if not IsRapiResponding(constants.LOCALHOST_IP_ADDRESS): + logging.warning("Couldn't get answer from Ganeti RAPI daemon." + " Restarting Ganeti RAPI.") + utils.StopDaemon(constants.RAPI) + utils.EnsureDaemon(constants.RAPI) + logging.debug("Second attempt to talk with RAPI") + if not IsRapiResponding(constants.LOCALHOST_IP_ADDRESS): + logging.fatal("RAPI is not responding. Please investigate.") + logging.debug("Successfully talked to RAPI.") + try: watcher = Watcher(options, notepad) except errors.ConfigurationError: diff --git a/lib/utils.py b/lib/utils.py index 70f5b41fa545d7457ef88adbd9a33008ba3bccae..a1fd259e0b065e8c0e4cb98fbf3c1448513e7f49 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -2261,6 +2261,19 @@ def EnsureDaemon(name): return True +def StopDaemon(name): + """Stop daemon + + """ + result = RunCmd([constants.DAEMON_UTIL, "stop", name]) + if result.failed: + logging.error("Can't stop daemon '%s', failure %s, output: %s", + name, result.fail_reason, result.output) + return False + + return True + + def WritePidFile(name): """Write the current process pidfile.