From a182a3ed71ac3fc17f265b9d6372d77e026dc054 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann <hansmi@google.com> Date: Wed, 21 Dec 2011 15:29:32 +0100 Subject: [PATCH] serializer: Remove JSON indentation and dict key sorting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Serializing to JSON using βsimplejsonβ is significantly slower when indentation and/or sorting of dictionary keys is used. In simplejson 1.x the difference isn't that big, but with simplejson 2.x the difference can be up to a factor of 7.5. The reason is that the latter no longer uses C functions when sorting or indentation is used. With this patch we revert everything to simplejson's defaults, which should provide us with the best performance available. Signed-off-by: Michael Hanselmann <hansmi@google.com> Reviewed-by: Iustin Pop <iustin@google.com> --- daemons/import-export | 2 +- lib/hypervisor/hv_kvm.py | 5 ++- lib/jqueue.py | 2 +- lib/luxi.py | 2 +- lib/rpc.py | 3 +- lib/serializer.py | 40 +++-------------------- lib/server/noded.py | 2 +- lib/server/rapi.py | 2 +- test/ganeti.hypervisor.hv_kvm_unittest.py | 2 +- test/ganeti.serializer_unittest.py | 7 ++-- 10 files changed, 17 insertions(+), 50 deletions(-) diff --git a/daemons/import-export b/daemons/import-export index 86034608d..0163dff87 100755 --- a/daemons/import-export +++ b/daemons/import-export @@ -200,7 +200,7 @@ class StatusFile: self._data.mtime = time.time() utils.WriteFile(self._path, - data=serializer.DumpJson(self._data.ToDict(), indent=True), + data=serializer.DumpJson(self._data.ToDict()), mode=0400) diff --git a/lib/hypervisor/hv_kvm.py b/lib/hypervisor/hv_kvm.py index c680cf421..6520132c1 100644 --- a/lib/hypervisor/hv_kvm.py +++ b/lib/hypervisor/hv_kvm.py @@ -185,9 +185,8 @@ class QmpMessage: return QmpMessage(data) def __str__(self): - # The protocol expects the JSON object to be sent as a single - # line, hence the need for indent=False. - return serializer.DumpJson(self.data, indent=False) + # The protocol expects the JSON object to be sent as a single line. + return serializer.DumpJson(self.data) def __eq__(self, other): # When comparing two QmpMessages, we are interested in comparing diff --git a/lib/jqueue.py b/lib/jqueue.py index 19d369f47..e030558df 100644 --- a/lib/jqueue.py +++ b/lib/jqueue.py @@ -2248,7 +2248,7 @@ class JobQueue(object): assert job.writable, "Can't update read-only job" filename = self._GetJobPath(job.id) - data = serializer.DumpJson(job.Serialize(), indent=False) + data = serializer.DumpJson(job.Serialize()) logging.debug("Writing job %s to %s", job.id, filename) self._UpdateJobQueueFile(filename, data, replicate) diff --git a/lib/luxi.py b/lib/luxi.py index 503b5ecf0..bfebbc1e5 100644 --- a/lib/luxi.py +++ b/lib/luxi.py @@ -343,7 +343,7 @@ def FormatRequest(method, args, version=None): request[KEY_VERSION] = version # Serialize the request - return serializer.DumpJson(request, indent=False) + return serializer.DumpJson(request) def CallLuxiMethod(transport_cb, method, args, version=None): diff --git a/lib/rpc.py b/lib/rpc.py index 0bedb3ceb..898224c11 100644 --- a/lib/rpc.py +++ b/lib/rpc.py @@ -442,8 +442,7 @@ class _RpcClientBase: read_timeout = timeout body = serializer.DumpJson(map(self._encoder, - zip(map(compat.snd, argdefs), args)), - indent=False) + zip(map(compat.snd, argdefs), args))) result = self._proc(node_list, procedure, body, read_timeout=read_timeout) diff --git a/lib/serializer.py b/lib/serializer.py index 0a411b17e..cbc11fa65 100644 --- a/lib/serializer.py +++ b/lib/serializer.py @@ -42,49 +42,19 @@ from ganeti import errors from ganeti import utils -_JSON_INDENT = 2 - _RE_EOLSP = re.compile("[ \t]+$", re.MULTILINE) -def _GetJsonDumpers(_encoder_class=simplejson.JSONEncoder): - """Returns two JSON functions to serialize data. - - @rtype: (callable, callable) - @return: The function to generate a compact form of JSON and another one to - generate a more readable, indented form of JSON (if supported) - - """ - plain_encoder = _encoder_class(sort_keys=True) - - # Check whether the simplejson module supports indentation - try: - indent_encoder = _encoder_class(indent=_JSON_INDENT, sort_keys=True) - except TypeError: - # Indentation not supported - indent_encoder = plain_encoder - - return (plain_encoder.encode, indent_encoder.encode) - - -(_DumpJson, _DumpJsonIndent) = _GetJsonDumpers() - - -def DumpJson(data, indent=True): +def DumpJson(data): """Serialize a given object. @param data: the data to serialize - @param indent: whether to indent output (depends on simplejson version) - @return: the string representation of data """ - if indent: - fn = _DumpJsonIndent - else: - fn = _DumpJson + encoded = simplejson.dumps(data) - txt = _RE_EOLSP.sub("", fn(data)) + txt = _RE_EOLSP.sub("", encoded) if not txt.endswith("\n"): txt += "\n" @@ -112,7 +82,7 @@ def DumpSignedJson(data, key, salt=None, key_selector=None): @return: the string representation of data signed by the hmac key """ - txt = DumpJson(data, indent=False) + txt = DumpJson(data) if salt is None: salt = "" signed_dict = { @@ -127,7 +97,7 @@ def DumpSignedJson(data, key, salt=None, key_selector=None): signed_dict["hmac"] = utils.Sha1Hmac(key, txt, salt=salt + key_selector) - return DumpJson(signed_dict, indent=False) + return DumpJson(signed_dict) def LoadSignedJson(txt, key): diff --git a/lib/server/noded.py b/lib/server/noded.py index 46cebcfdc..b8f848cba 100644 --- a/lib/server/noded.py +++ b/lib/server/noded.py @@ -169,7 +169,7 @@ class NodeHttpServer(http.server.HttpServer): logging.exception("Error in RPC call") result = (False, "Error while executing backend function: %s" % str(err)) - return serializer.DumpJson(result, indent=False) + return serializer.DumpJson(result) # the new block devices -------------------------- diff --git a/lib/server/rapi.py b/lib/server/rapi.py index 6a8a76aee..f6fe2853c 100644 --- a/lib/server/rapi.py +++ b/lib/server/rapi.py @@ -79,7 +79,7 @@ class JsonErrorRequestExecutor(http.server.HttpServerRequestExecutor): @return: the body of the message """ - return serializer.DumpJson(values, indent=True) + return serializer.DumpJson(values) class RemoteApiHttpServer(http.auth.HttpServerRequestAuthentication, diff --git a/test/ganeti.hypervisor.hv_kvm_unittest.py b/test/ganeti.hypervisor.hv_kvm_unittest.py index 14ab34546..465f3dbce 100755 --- a/test/ganeti.hypervisor.hv_kvm_unittest.py +++ b/test/ganeti.hypervisor.hv_kvm_unittest.py @@ -101,7 +101,7 @@ class QmpStub(threading.Thread): conn.close() def encode_string(self, message): - return (serializer.DumpJson(message, indent=False) + + return (serializer.DumpJson(message) + hv_kvm.QmpConnection._MESSAGE_END_TOKEN) diff --git a/test/ganeti.serializer_unittest.py b/test/ganeti.serializer_unittest.py index a4f78371b..46aafc275 100755 --- a/test/ganeti.serializer_unittest.py +++ b/test/ganeti.serializer_unittest.py @@ -52,10 +52,9 @@ class TestSerializer(testutils.GanetiTestCase): ] def _TestSerializer(self, dump_fn, load_fn): - for indent in [True, False]: - for data in self._TESTDATA: - self.failUnless(dump_fn(data, indent=indent).endswith("\n")) - self.assertEqualValues(load_fn(dump_fn(data, indent=indent)), data) + for data in self._TESTDATA: + self.failUnless(dump_fn(data).endswith("\n")) + self.assertEqualValues(load_fn(dump_fn(data)), data) def testGeneric(self): self._TestSerializer(serializer.Dump, serializer.Load) -- GitLab