From 9ca87fb31173b18119fb39c18dc3668e78a4054a Mon Sep 17 00:00:00 2001 From: Michael Hanselmann <hansmi@google.com> Date: Tue, 26 Jul 2011 14:14:18 +0200 Subject: [PATCH] watcher: Split node maintenance into separate module The node maintenance class is standalone. Signed-off-by: Michael Hanselmann <hansmi@google.com> Reviewed-by: Iustin Pop <iustin@google.com> --- Makefile.am | 3 +- lib/watcher/__init__.py | 124 +------------------------------- lib/watcher/nodemaint.py | 148 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 122 deletions(-) create mode 100644 lib/watcher/nodemaint.py diff --git a/Makefile.am b/Makefile.am index bec651235..3ec9b2279 100644 --- a/Makefile.am +++ b/Makefile.am @@ -244,7 +244,8 @@ impexpd_PYTHON = \ lib/impexpd/__init__.py watcher_PYTHON = \ - lib/watcher/__init__.py + lib/watcher/__init__.py \ + lib/watcher/nodemaint.py server_PYTHON = \ lib/server/__init__.py \ diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py index 0e6206366..9f6b2c952 100644 --- a/lib/watcher/__init__.py +++ b/lib/watcher/__init__.py @@ -27,10 +27,6 @@ by a node reboot. Run from cron or similar. """ -# pylint: disable-msg=C0103,W0142 - -# C0103: Invalid name ganeti-watcher - import os import os.path import sys @@ -46,14 +42,11 @@ from ganeti import errors from ganeti import opcodes from ganeti import cli from ganeti import luxi -from ganeti import ssconf -from ganeti import bdev -from ganeti import hypervisor from ganeti import rapi -from ganeti.confd import client as confd_client from ganeti import netutils import ganeti.rapi.client # pylint: disable-msg=W0611 +import ganeti.watcher.nodemaint # pylint: disable-msg=W0611 MAXTRIES = 5 @@ -125,117 +118,6 @@ def RunWatcherHooks(): runresult.output) -class NodeMaintenance(object): - """Talks to confd daemons and possible shutdown instances/drbd devices. - - """ - def __init__(self): - self.store_cb = confd_client.StoreResultCallback() - self.filter_cb = confd_client.ConfdFilterCallback(self.store_cb) - self.confd_client = confd_client.GetConfdClient(self.filter_cb) - - @staticmethod - def ShouldRun(): - """Checks whether node maintenance should run. - - """ - try: - return ssconf.SimpleStore().GetMaintainNodeHealth() - except errors.ConfigurationError, err: - logging.error("Configuration error, not activating node maintenance: %s", - err) - return False - - @staticmethod - def GetRunningInstances(): - """Compute list of hypervisor/running instances. - - """ - hyp_list = ssconf.SimpleStore().GetHypervisorList() - results = [] - for hv_name in hyp_list: - try: - hv = hypervisor.GetHypervisor(hv_name) - ilist = hv.ListInstances() - results.extend([(iname, hv_name) for iname in ilist]) - except: # pylint: disable-msg=W0702 - logging.error("Error while listing instances for hypervisor %s", - hv_name, exc_info=True) - return results - - @staticmethod - def GetUsedDRBDs(): - """Get list of used DRBD minors. - - """ - return bdev.DRBD8.GetUsedDevs().keys() - - @classmethod - def DoMaintenance(cls, role): - """Maintain the instance list. - - """ - if role == constants.CONFD_NODE_ROLE_OFFLINE: - inst_running = cls.GetRunningInstances() - cls.ShutdownInstances(inst_running) - drbd_running = cls.GetUsedDRBDs() - cls.ShutdownDRBD(drbd_running) - else: - logging.debug("Not doing anything for role %s", role) - - @staticmethod - def ShutdownInstances(inst_running): - """Shutdown running instances. - - """ - names_running = set([i[0] for i in inst_running]) - if names_running: - logging.info("Following instances should not be running," - " shutting them down: %s", utils.CommaJoin(names_running)) - # this dictionary will collapse duplicate instance names (only - # xen pvm/vhm) into a single key, which is fine - i2h = dict(inst_running) - for name in names_running: - hv_name = i2h[name] - hv = hypervisor.GetHypervisor(hv_name) - hv.StopInstance(None, force=True, name=name) - - @staticmethod - def ShutdownDRBD(drbd_running): - """Shutdown active DRBD devices. - - """ - if drbd_running: - logging.info("Following DRBD minors should not be active," - " shutting them down: %s", utils.CommaJoin(drbd_running)) - for minor in drbd_running: - # pylint: disable-msg=W0212 - # using the private method as is, pending enhancements to the DRBD - # interface - bdev.DRBD8._ShutdownAll(minor) - - def Exec(self): - """Check node status versus cluster desired state. - - """ - my_name = netutils.Hostname.GetSysName() - req = confd_client.ConfdClientRequest(type= - constants.CONFD_REQ_NODE_ROLE_BYNAME, - query=my_name) - self.confd_client.SendRequest(req, async=False, coverage=-1) - timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt) - if not timed_out: - # should have a valid response - status, result = self.store_cb.GetResponse(req.rsalt) - assert status, "Missing result but received replies" - if not self.filter_cb.consistent[req.rsalt]: - logging.warning("Inconsistent replies, not doing anything") - return - self.DoMaintenance(result.server_reply.answer) - else: - logging.warning("Confd query timed out, cannot do maintenance actions") - - class WatcherState(object): """Interface to a state file recording restart attempts. @@ -756,8 +638,8 @@ def Main(): RunWatcherHooks() # run node maintenance in all cases, even if master, so that old # masters can be properly cleaned up too - if NodeMaintenance.ShouldRun(): - NodeMaintenance().Exec() + if nodemaint.NodeMaintenance.ShouldRun(): # pylint: disable-msg=E0602 + nodemaint.NodeMaintenance().Exec() # pylint: disable-msg=E0602 notepad = WatcherState(statefile) try: diff --git a/lib/watcher/nodemaint.py b/lib/watcher/nodemaint.py new file mode 100644 index 000000000..2cf5a4b23 --- /dev/null +++ b/lib/watcher/nodemaint.py @@ -0,0 +1,148 @@ +# +# + +# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + + +"""Module doing node maintenance for Ganeti watcher. + +""" + +import logging + +from ganeti import bdev +from ganeti import constants +from ganeti import errors +from ganeti import hypervisor +from ganeti import netutils +from ganeti import ssconf +from ganeti import utils +from ganeti import confd + +import ganeti.confd.client # pylint: disable-msg=W0611 + + +class NodeMaintenance(object): + """Talks to confd daemons and possible shutdown instances/drbd devices. + + """ + def __init__(self): + self.store_cb = confd.client.StoreResultCallback() + self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb) + self.confd_client = confd.client.GetConfdClient(self.filter_cb) + + @staticmethod + def ShouldRun(): + """Checks whether node maintenance should run. + + """ + try: + return ssconf.SimpleStore().GetMaintainNodeHealth() + except errors.ConfigurationError, err: + logging.error("Configuration error, not activating node maintenance: %s", + err) + return False + + @staticmethod + def GetRunningInstances(): + """Compute list of hypervisor/running instances. + + """ + hyp_list = ssconf.SimpleStore().GetHypervisorList() + results = [] + for hv_name in hyp_list: + try: + hv = hypervisor.GetHypervisor(hv_name) + ilist = hv.ListInstances() + results.extend([(iname, hv_name) for iname in ilist]) + except: # pylint: disable-msg=W0702 + logging.error("Error while listing instances for hypervisor %s", + hv_name, exc_info=True) + return results + + @staticmethod + def GetUsedDRBDs(): + """Get list of used DRBD minors. + + """ + return bdev.DRBD8.GetUsedDevs().keys() + + @classmethod + def DoMaintenance(cls, role): + """Maintain the instance list. + + """ + if role == constants.CONFD_NODE_ROLE_OFFLINE: + inst_running = cls.GetRunningInstances() + cls.ShutdownInstances(inst_running) + drbd_running = cls.GetUsedDRBDs() + cls.ShutdownDRBD(drbd_running) + else: + logging.debug("Not doing anything for role %s", role) + + @staticmethod + def ShutdownInstances(inst_running): + """Shutdown running instances. + + """ + names_running = set([i[0] for i in inst_running]) + if names_running: + logging.info("Following instances should not be running," + " shutting them down: %s", utils.CommaJoin(names_running)) + # this dictionary will collapse duplicate instance names (only + # xen pvm/vhm) into a single key, which is fine + i2h = dict(inst_running) + for name in names_running: + hv_name = i2h[name] + hv = hypervisor.GetHypervisor(hv_name) + hv.StopInstance(None, force=True, name=name) + + @staticmethod + def ShutdownDRBD(drbd_running): + """Shutdown active DRBD devices. + + """ + if drbd_running: + logging.info("Following DRBD minors should not be active," + " shutting them down: %s", utils.CommaJoin(drbd_running)) + for minor in drbd_running: + # pylint: disable-msg=W0212 + # using the private method as is, pending enhancements to the DRBD + # interface + bdev.DRBD8._ShutdownAll(minor) + + def Exec(self): + """Check node status versus cluster desired state. + + """ + my_name = netutils.Hostname.GetSysName() + req = confd.client.ConfdClientRequest(type= + constants.CONFD_REQ_NODE_ROLE_BYNAME, + query=my_name) + self.confd_client.SendRequest(req, async=False, coverage=-1) + timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt) + if not timed_out: + # should have a valid response + status, result = self.store_cb.GetResponse(req.rsalt) + assert status, "Missing result but received replies" + if not self.filter_cb.consistent[req.rsalt]: + logging.warning("Inconsistent replies, not doing anything") + return + self.DoMaintenance(result.server_reply.answer) + else: + logging.warning("Confd query timed out, cannot do maintenance actions") -- GitLab