Commit d392fa34 authored by Iustin Pop's avatar Iustin Pop
Browse files

Fix the SafeEncoding behaviour



Currently we have bad behaviour in SafeEncode:
  - binary strings are actually not handled correctly (ahem)
  - the encoding is not stable, due to use of string_escape

For this reason, we replace the use of string_escape with part of the
code of string escape (PyString_Repr in Objects/stringobject.c); we
don't escape backslashes or single quotes, since that is that makes it
nonstable. Furthermore, we only use the encode('ascii', ...) for unicode
inputs.

The patch also adds unittests for the function that test basic
behaviour.
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parent 835528af
......@@ -1816,11 +1816,13 @@ def SafeEncode(text):
"""Return a 'safe' version of a source string.
This function mangles the input string and returns a version that
should be safe to disply/encode as ASCII. To this end, we first
should be safe to display/encode as ASCII. To this end, we first
convert it to ASCII using the 'backslashreplace' encoding which
should get rid of any non-ASCII chars, and then we again encode it
via 'string_escape' which converts '\n' into '\\n' so that log
messages remain one-line.
should get rid of any non-ASCII chars, and then we process it
through a loop copied from the string repr sources in the python; we
don't use string_escape anymore since that escape single quotes and
backslashes too, and that is too much; and that escaping is not
stable, i.e. string_escape(string_escape(x)) != string_escape(x).
@type text: str or unicode
@param text: input data
......@@ -1828,9 +1830,23 @@ def SafeEncode(text):
@return: a safe version of text
"""
text = text.encode('ascii', 'backslashreplace')
text = text.encode('string_escape')
return text
if isinstance(text, unicode):
# onli if unicode; if str already, we handle it below
text = text.encode('ascii', 'backslashreplace')
resu = ""
for char in text:
c = ord(char)
if char == '\t':
resu += r'\t'
elif char == '\n':
resu += r'\n'
elif char == '\r':
resu += r'\'r'
elif c < 32 or c >= 127: # non-printable
resu += "\\x%02x" % (c & 0xff)
else:
resu += char
return resu
def CommaJoin(names):
......
......@@ -33,6 +33,7 @@ import socket
import shutil
import re
import select
import string
import ganeti
import testutils
......@@ -44,7 +45,7 @@ from ganeti.utils import IsProcessAlive, RunCmd, \
ParseUnit, AddAuthorizedKey, RemoveAuthorizedKey, \
ShellQuote, ShellQuoteArgs, TcpPing, ListVisibleFiles, \
SetEtcHostsEntry, RemoveEtcHostsEntry, FirstFree, OwnIpAddress, \
TailFile, ForceDictType
TailFile, ForceDictType, SafeEncode
from ganeti.errors import LockError, UnitParseError, GenericError, \
ProgrammerError
......@@ -969,5 +970,24 @@ class TestForceDictType(unittest.TestCase):
self.assertRaises(errors.TypeEnforcementError, self._fdt, {'d': '4 L'})
class TestSafeEncode(unittest.TestCase):
"""Test case for SafeEncode"""
def testAscii(self):
for txt in [string.digits, string.letters, string.punctuation]:
self.failUnlessEqual(txt, SafeEncode(txt))
def testDoubleEncode(self):
for i in range(255):
txt = SafeEncode(chr(i))
self.failUnlessEqual(txt, SafeEncode(txt))
def testUnicode(self):
# 1024 is high enough to catch non-direct ASCII mappings
for i in range(1024):
txt = SafeEncode(unichr(i))
self.failUnlessEqual(txt, SafeEncode(txt))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment