From d392fa349561a113223bad4c922176964fd78817 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Tue, 19 May 2009 18:36:36 +0200 Subject: [PATCH] Fix the SafeEncoding behaviour Currently we have bad behaviour in SafeEncode: - binary strings are actually not handled correctly (ahem) - the encoding is not stable, due to use of string_escape For this reason, we replace the use of string_escape with part of the code of string escape (PyString_Repr in Objects/stringobject.c); we don't escape backslashes or single quotes, since that is that makes it nonstable. Furthermore, we only use the encode('ascii', ...) for unicode inputs. The patch also adds unittests for the function that test basic behaviour. Signed-off-by: Iustin Pop <iustin@google.com> Reviewed-by: Guido Trotter <ultrotter@google.com> --- lib/utils.py | 30 +++++++++++++++++++++++------- test/ganeti.utils_unittest.py | 22 +++++++++++++++++++++- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/lib/utils.py b/lib/utils.py index e876e2c3e..ac781fb62 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -1816,11 +1816,13 @@ def SafeEncode(text): """Return a 'safe' version of a source string. This function mangles the input string and returns a version that - should be safe to disply/encode as ASCII. To this end, we first + should be safe to display/encode as ASCII. To this end, we first convert it to ASCII using the 'backslashreplace' encoding which - should get rid of any non-ASCII chars, and then we again encode it - via 'string_escape' which converts '\n' into '\\n' so that log - messages remain one-line. + should get rid of any non-ASCII chars, and then we process it + through a loop copied from the string repr sources in the python; we + don't use string_escape anymore since that escape single quotes and + backslashes too, and that is too much; and that escaping is not + stable, i.e. string_escape(string_escape(x)) != string_escape(x). @type text: str or unicode @param text: input data @@ -1828,9 +1830,23 @@ def SafeEncode(text): @return: a safe version of text """ - text = text.encode('ascii', 'backslashreplace') - text = text.encode('string_escape') - return text + if isinstance(text, unicode): + # onli if unicode; if str already, we handle it below + text = text.encode('ascii', 'backslashreplace') + resu = "" + for char in text: + c = ord(char) + if char == '\t': + resu += r'\t' + elif char == '\n': + resu += r'\n' + elif char == '\r': + resu += r'\'r' + elif c < 32 or c >= 127: # non-printable + resu += "\\x%02x" % (c & 0xff) + else: + resu += char + return resu def CommaJoin(names): diff --git a/test/ganeti.utils_unittest.py b/test/ganeti.utils_unittest.py index 1c2992c3d..ef3d626e4 100755 --- a/test/ganeti.utils_unittest.py +++ b/test/ganeti.utils_unittest.py @@ -33,6 +33,7 @@ import socket import shutil import re import select +import string import ganeti import testutils @@ -44,7 +45,7 @@ from ganeti.utils import IsProcessAlive, RunCmd, \ ParseUnit, AddAuthorizedKey, RemoveAuthorizedKey, \ ShellQuote, ShellQuoteArgs, TcpPing, ListVisibleFiles, \ SetEtcHostsEntry, RemoveEtcHostsEntry, FirstFree, OwnIpAddress, \ - TailFile, ForceDictType + TailFile, ForceDictType, SafeEncode from ganeti.errors import LockError, UnitParseError, GenericError, \ ProgrammerError @@ -969,5 +970,24 @@ class TestForceDictType(unittest.TestCase): self.assertRaises(errors.TypeEnforcementError, self._fdt, {'d': '4 L'}) +class TestSafeEncode(unittest.TestCase): + """Test case for SafeEncode""" + + def testAscii(self): + for txt in [string.digits, string.letters, string.punctuation]: + self.failUnlessEqual(txt, SafeEncode(txt)) + + def testDoubleEncode(self): + for i in range(255): + txt = SafeEncode(chr(i)) + self.failUnlessEqual(txt, SafeEncode(txt)) + + def testUnicode(self): + # 1024 is high enough to catch non-direct ASCII mappings + for i in range(1024): + txt = SafeEncode(unichr(i)) + self.failUnlessEqual(txt, SafeEncode(txt)) + + if __name__ == '__main__': unittest.main() -- GitLab