diff --git a/Makefile.am b/Makefile.am index d477a44ec73bf03633b89edd6c1ba94bf1ff79b4..cd41776ee8388f393645fc623fa8278377c8d14a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -356,7 +356,6 @@ docrst = \ doc/design-linuxha.rst \ doc/design-multi-reloc.rst \ doc/design-network.rst \ - doc/design-node-state-cache.rst \ doc/design-oob.rst \ doc/design-ovf-support.rst \ doc/design-partitioned.rst \ diff --git a/doc/design-draft.rst b/doc/design-draft.rst index a4a2acc68c179cb0d3d47e5c1820a0461d219753..d22f8618f8f1343c586e8447bb4175bfaa74878b 100644 --- a/doc/design-draft.rst +++ b/doc/design-draft.rst @@ -9,7 +9,6 @@ Design document drafts design-http-server.rst design-impexp2.rst design-network.rst - design-node-state-cache.rst design-resource-model.rst design-virtual-clusters.rst design-query-splitting.rst diff --git a/doc/design-node-state-cache.rst b/doc/design-node-state-cache.rst deleted file mode 100644 index 28218ef09e0f65052e5e10012cee8900bb73e6cb..0000000000000000000000000000000000000000 --- a/doc/design-node-state-cache.rst +++ /dev/null @@ -1,146 +0,0 @@ -================ -Node State Cache -================ - -.. contents:: :depth: 4 - -This is a design doc about the optimization of machine info retrieval. - - -Current State -============= - -Currently every RPC call is quite expensive as a TCP handshake has to be -made as well as SSL negotiation. This especially is visible when getting -node and instance info over and over again. - -This data, however, is quite easy to cache but needs some changes to how -we retrieve data in the RPC as this is spread over several RPC calls -and are hard to unify. - - -Proposed changes -================ - -To overcome this situation with multiple information retrieval calls we -introduce one single RPC call to get all the info in a organized manner, -for easy store in the cache. - -As of now we have 3 different information RPC calls: - -- ``call_node_info``: To retrieve disk and hyper-visor information -- ``call_instance_info``: To retrieve hyper-visor information for one - instance -- ``call_all_instance_info``: To retrieve hyper-visor information for - all instances - -Not to mention that ``call_all_instance_info`` and -``call_instance_info`` return different information in the dict. - -To unify the data and organize them we introduce a new RPC call -``call_node_snapshot`` doing all of the above in one go. Which -data we want to know will be specified about a dict of request -types: CACHE_REQ_HV, CACHE_REQ_DISKINFO, CACHE_REQ_BOOTID - -As this cache is representing the state of a given node we use the -name of a node as the key to retrieve the data from the cache. A -name-space separation of node and instance data is not possible at the -current point. This is due to the fact that some of the node hyper-visor -information like free memory is correlating with instances running. - -An example of how the data for a node in the cache looks like:: - - { - constants.CACHE_REQ_HV: { - constants.HT_XEN_PVM: { - _NODE_DATA: { - "memory_total": 32763, - "memory_free": 9159, - "memory_dom0": 1024, - "cpu_total": 4, - "cpu_sockets": 2 - }, - _INSTANCES_DATA: { - "inst1": { - "memory": 4096, - "state": "-b----", - "time": 102399.3, - "vcpus": 1 - }, - "inst2": { - "memory": 4096, - "state": "-b----", - "time": 12280.0, - "vcpus": 3 - } - } - } - }, - constants.CACHE_REQ_DISKINFO: { - "xenvg": { - "vg_size": 1048576, - "vg_free": 491520 - }, - } - constants.CACHE_REQ_BOOTID: "0dd0983c-913d-4ce6-ad94-0eceb77b69f9" - } - -This way we get easy to organize information which can simply be arranged in -the cache. - -The 3 RPC calls mentioned above will remain for compatibility reason but -will be simple wrappers around this RPC call. - - -Cache invalidation ------------------- - -The cache is invalidated at every RPC call which is not proven to not -modify the state of a given node. This is to avoid inconsistency between -cache and actual node state. - -There are some corner cases which invalidates the whole cache at once as -they usually affect other nodes states too: - - - migrate/failover - - import/export - -A request will be served from the cache if and only if it can be -fulfilled entirely from it (i.e. all the CACHE_REQ_* entries are already -present). Otherwise, we will invalidate the cache and actually do the -remote call. - -In addition, every cache entry will have a TTL of about 10 minutes which -should be enough to accommodate most use cases. - -We also allow an option to the calls to bypass the cache completely and -do a force remote call. However, this will invalidate the present -entries and populate the cache with the new retrieved values. - - -Additional cache population ---------------------------- - -Besides of the commands which calls above RPC calls, a full cache -population can also be done by a separate new op-code run by -``ganeti-watcher`` periodically. This op-code will be used instead of -the old ones. - - -Possible regressions -==================== - -As we change from getting "one hyper-visor information" to "get all we -know about this hyper-visor"-style we have a regression in time of -execution. The execution time is about 1.8x more in process execution -time. However, this does not include the latency and negotiation time -needed for each separate RPC call. Also if we hit the cache all 3 costs -will be 0. The only time taken is to look up the info in the cache and -the deserialization of the data. Which takes down the time from today -~300ms to ~100ms. - -.. vim: set textwidth=72 : -.. Local Variables: -.. mode: rst -.. fill-column: 72 -.. End: diff --git a/lib/cache.py b/lib/cache.py deleted file mode 100644 index ae1c49779d4545faf8da1b14aa0729796d0d580b..0000000000000000000000000000000000000000 --- a/lib/cache.py +++ /dev/null @@ -1,235 +0,0 @@ -# -# - -# Copyright (C) 2011 Google Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. - - -"""This module implements caching.""" - - -import time - -from ganeti import locking -from ganeti import serializer - - -TIMESTAMP = "timestamp" -TTL = "ttl" -VALUE = "value" - - -class CacheBase: - """This is the base class for all caches. - - """ - def __init__(self): - """Base init method. - - """ - - def Store(self, key, value, ttl=0): - """Stores key with value in the cache. - - @param key: The key to associate this cached value - @param value: The value to cache - @param ttl: TTL in seconds after when this entry is considered outdated - @returns: L{True} on success, L{False} on failure - - """ - raise NotImplementedError - - def GetMulti(self, keys): - """Retrieve multiple values from the cache. - - @param keys: The keys to retrieve - @returns: The list of values - - """ - raise NotImplementedError - - def Get(self, key): - """Retrieve the value from the cache. - - @param key: The key to retrieve - @returns: The value or L{None} if not found - - """ - raise NotImplementedError - - def Invalidate(self, keys): - """Invalidate given keys. - - @param keys: The list of keys to invalidate - @returns: L{True} on success, L{False} otherwise - - """ - raise NotImplementedError - - def Flush(self): - """Invalidates all of the keys and flushes the cache. - - """ - raise NotImplementedError - - def ResetState(self): - """Used to reset the state of the cache. - - This can be used to reinstantiate connection or any other state refresh - - """ - - def Cleanup(self): - """Cleanup the cache from expired entries. - - """ - - -class SimpleCache(CacheBase): - """Implements a very simple, dict base cache. - - """ - CLEANUP_ROUND = 1800 - _LOCK = "lock" - - def __init__(self, _time_fn=time.time): - """Initialize this class. - - @param _time_fn: Function used to return time (unittest only) - - """ - CacheBase.__init__(self) - - self._time_fn = _time_fn - - self.cache = {} - self.lock = locking.SharedLock("SimpleCache") - self.last_cleanup = self._time_fn() - - def _UnlockedCleanup(self): - """Does cleanup of the cache. - - """ - check_time = self._time_fn() - if (self.last_cleanup + self.CLEANUP_ROUND) <= check_time: - keys = [] - for key, value in self.cache.items(): - if not value[TTL]: - continue - - expired = value[TIMESTAMP] + value[TTL] - if expired < check_time: - keys.append(key) - self._UnlockedInvalidate(keys) - self.last_cleanup = check_time - - @locking.ssynchronized(_LOCK) - def Cleanup(self): - """Cleanup our cache. - - """ - self._UnlockedCleanup() - - @locking.ssynchronized(_LOCK) - def Store(self, key, value, ttl=0): - """Stores a value at key in the cache. - - See L{CacheBase.Store} for parameter description - - """ - assert ttl >= 0 - self._UnlockedCleanup() - val = serializer.Dump(value) - cache_val = { - TIMESTAMP: self._time_fn(), - TTL: ttl, - VALUE: val - } - self.cache[key] = cache_val - return True - - @locking.ssynchronized(_LOCK, shared=1) - def GetMulti(self, keys): - """Retrieve the values of keys from cache. - - See L{CacheBase.GetMulti} for parameter description - - """ - return [self._ExtractValue(key) for key in keys] - - @locking.ssynchronized(_LOCK, shared=1) - def Get(self, key): - """Retrieve the value of key from cache. - - See L{CacheBase.Get} for parameter description - - """ - return self._ExtractValue(key) - - @locking.ssynchronized(_LOCK) - def Invalidate(self, keys): - """Invalidates value for keys in cache. - - See L{CacheBase.Invalidate} for parameter description - - """ - return self._UnlockedInvalidate(keys) - - @locking.ssynchronized(_LOCK) - def Flush(self): - """Invalidates all keys and values in cache. - - See L{CacheBase.Flush} for parameter description - - """ - self.cache.clear() - self.last_cleanup = self._time_fn() - - def _UnlockedInvalidate(self, keys): - """Invalidate keys in cache. - - This is the unlocked version, see L{Invalidate} for parameter description - - """ - for key in keys: - self.cache.pop(key, None) - - return True - - def _ExtractValue(self, key): - """Extracts just the value for a key. - - This method is taking care if the value did not expire ans returns it - - @param key: The key to look for - @returns: The value if key is not expired, L{None} otherwise - - """ - try: - cache_val = self.cache[key] - except KeyError: - return None - else: - if cache_val[TTL] == 0: - return serializer.Load(cache_val[VALUE]) - else: - expired = cache_val[TIMESTAMP] + cache_val[TTL] - - if self._time_fn() <= expired: - return serializer.Load(cache_val[VALUE]) - else: - return None diff --git a/test/ganeti.cache_unittest.py b/test/ganeti.cache_unittest.py deleted file mode 100755 index d9dffb197e508a532417ad04c68bcfd3f1075678..0000000000000000000000000000000000000000 --- a/test/ganeti.cache_unittest.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/python -# - -# Copyright (C) 2011 Google Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. - -"""Script for testing ganeti.cache""" - -import testutils -import unittest - -from ganeti import cache - - -class ReturnStub: - def __init__(self, values): - self.values = values - - def __call__(self): - assert self.values - return self.values.pop(0) - - -class SimpleCacheTest(unittest.TestCase): - def setUp(self): - self.cache = cache.SimpleCache() - - def testNoKey(self): - self.assertEqual(self.cache.GetMulti(["i-dont-exist", "neither-do-i", "no"]), - [None, None, None]) - - def testCache(self): - value = 0xc0ffee - self.assert_(self.cache.Store("i-exist", value)) - self.assertEqual(self.cache.GetMulti(["i-exist"]), [value]) - - def testMixed(self): - value = 0xb4dc0de - self.assert_(self.cache.Store("i-exist", value)) - self.assertEqual(self.cache.GetMulti(["i-exist", "i-dont"]), [value, None]) - - def testTtl(self): - my_times = ReturnStub([0, 1, 1, 2, 3, 5]) - ttl_cache = cache.SimpleCache(_time_fn=my_times) - self.assert_(ttl_cache.Store("test-expire", 0xdeadbeef, ttl=2)) - - # At this point time will return 2, 1 (start) + 2 (ttl) = 3, still valid - self.assertEqual(ttl_cache.Get("test-expire"), 0xdeadbeef) - - # At this point time will return 3, 1 (start) + 2 (ttl) = 3, still valid - self.assertEqual(ttl_cache.Get("test-expire"), 0xdeadbeef) - - # We are at 5, < 3, invalid - self.assertEqual(ttl_cache.Get("test-expire"), None) - self.assertFalse(my_times.values) - - def testCleanup(self): - my_times = ReturnStub([0, 1, 1, 2, 2, 3, 3, 5, 5, - 21 + cache.SimpleCache.CLEANUP_ROUND, - 34 + cache.SimpleCache.CLEANUP_ROUND, - 55 + cache.SimpleCache.CLEANUP_ROUND * 2, - 89 + cache.SimpleCache.CLEANUP_ROUND * 3]) - # Index 0 - ttl_cache = cache.SimpleCache(_time_fn=my_times) - # Index 1, 2 - self.assert_(ttl_cache.Store("foobar", 0x1dea, ttl=6)) - # Index 3, 4 - self.assert_(ttl_cache.Store("baz", 0xc0dea55, ttl=11)) - # Index 6, 7 - self.assert_(ttl_cache.Store("long-foobar", "pretty long", - ttl=(22 + cache.SimpleCache.CLEANUP_ROUND))) - # Index 7, 8 - self.assert_(ttl_cache.Store("foobazbar", "alive forever")) - - self.assertEqual(set(ttl_cache.cache.keys()), - set(["foobar", "baz", "long-foobar", "foobazbar"])) - ttl_cache.Cleanup() - self.assertEqual(set(ttl_cache.cache.keys()), - set(["long-foobar", "foobazbar"])) - ttl_cache.Cleanup() - self.assertEqual(set(ttl_cache.cache.keys()), - set(["long-foobar", "foobazbar"])) - ttl_cache.Cleanup() - self.assertEqual(set(ttl_cache.cache.keys()), set(["foobazbar"])) - ttl_cache.Cleanup() - self.assertEqual(set(ttl_cache.cache.keys()), set(["foobazbar"])) - - -if __name__ == "__main__": - testutils.GanetiTestProgram()