From 22d3e1840e271da6ad925fbe6d4560b6d196edf7 Mon Sep 17 00:00:00 2001 From: Guido Trotter <ultrotter@google.com> Date: Fri, 28 Aug 2009 15:03:39 +0300 Subject: [PATCH] Confd: don't fail if the config doesn't load Rather than quitting we'll just continue to poll the config at a slow rate, hoping that sooner or later we'll get it back. This allows also working on non-MC nodes, and smoothly transitioning from MC to non-MC, other than dealing with a few temporary breakages. Signed-off-by: Guido Trotter <ultrotter@google.com> Reviewed-by: Iustin Pop <iustin@google.com> --- daemons/ganeti-confd | 53 ++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/daemons/ganeti-confd b/daemons/ganeti-confd index cf56aa8f2..558d82a26 100755 --- a/daemons/ganeti-confd +++ b/daemons/ganeti-confd @@ -235,7 +235,7 @@ class ConfdConfigurationReloader(object): try: self.inotify_handler.enable() except errors.InotifyError: - raise errors.ConfdFatalError(err) + self.polling = True try: reloaded = self.processor.reader.Reload() @@ -244,14 +244,13 @@ class ConfdConfigurationReloader(object): else: logging.debug("Skipped double config reload") except errors.ConfigurationError: - # transform a ConfigurationError in a fatal error, that will cause confd - # to quit. - raise errors.ConfdFatalError(err) + self.DisableConfd() + self.inotify_handler.disable() + return # Reset the timer. If we're polling it will go to the polling rate, if # we're not it will delay it again to its base safe timeout. - self._DisableTimer() - self._EnableTimer() + self._ResetTimer() def _DisableTimer(self): if self.timer_handle is not None: @@ -268,17 +267,27 @@ class ConfdConfigurationReloader(object): self.timer_handle = self.mainloop.scheduler.enter( timeout, 1, self.OnTimer, []) + def _ResetTimer(self): + self._DisableTimer() + self._EnableTimer() + def OnTimer(self): """Function called when the timer fires """ self.timer_handle = None + reloaded = False + was_disabled = False try: - reloaded = self.processor.reader.Reload() + if self.processor.reader is None: + was_disabled = True + self.EnableConfd() + reloaded = True + else: + reloaded = self.processor.reader.Reload() except errors.ConfigurationError: - # transform a ConfigurationError in a fatal error, that will cause confd - # to quit. - raise errors.ConfdFatalError(err) + self.DisableConfd() + return if self.polling and reloaded: logging.info("Reloaded ganeti config") @@ -286,19 +295,37 @@ class ConfdConfigurationReloader(object): # We have reloaded the config files, but received no inotify event. If # an event is pending though, we just happen to have timed out before # receiving it, so this is not a problem, and we shouldn't alert - if not self.notifier.check_events(): + if not self.notifier.check_events() and not was_disabled: logging.warning("Config file reload at timeout (inotify failure)") elif self.polling: # We're polling, but we haven't reloaded the config: # Going back to inotify mode logging.debug("Moving from polling mode to inotify mode") self.polling = False - self.inotify_handler.enable() + try: + self.inotify_handler.enable() + except errors.InotifyError: + self.polling = True else: logging.debug("Performed configuration check") self._EnableTimer() + def DisableConfd(self): + """Puts confd in non-serving mode + + """ + logging.warning("Confd is being disabled") + self.processor.Disable() + self.polling = False + self._ResetTimer() + + def EnableConfd(self): + self.processor.Enable() + logging.warning("Confd is being enabled") + self.polling = True + self._ResetTimer() + def CheckConfd(options, args): """Initial checks whether to run exit with a failure. @@ -310,8 +337,6 @@ def CheckConfd(options, args): print >> sys.stderr, "Need HMAC key %s to run" % constants.HMAC_CLUSTER_KEY sys.exit(constants.EXIT_FAILURE) - ssconf.CheckMasterCandidate(options.debug) - def ExecConfd(options, args): """Main confd function, executed with PID file held -- GitLab