From d392fa349561a113223bad4c922176964fd78817 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Tue, 19 May 2009 18:36:36 +0200
Subject: [PATCH] Fix the SafeEncoding behaviour

Currently we have bad behaviour in SafeEncode:
  - binary strings are actually not handled correctly (ahem)
  - the encoding is not stable, due to use of string_escape

For this reason, we replace the use of string_escape with part of the
code of string escape (PyString_Repr in Objects/stringobject.c); we
don't escape backslashes or single quotes, since that is that makes it
nonstable. Furthermore, we only use the encode('ascii', ...) for unicode
inputs.

The patch also adds unittests for the function that test basic
behaviour.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Guido Trotter <ultrotter@google.com>
---
 lib/utils.py                  | 30 +++++++++++++++++++++++-------
 test/ganeti.utils_unittest.py | 22 +++++++++++++++++++++-
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/lib/utils.py b/lib/utils.py
index e876e2c3e..ac781fb62 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1816,11 +1816,13 @@ def SafeEncode(text):
   """Return a 'safe' version of a source string.
 
   This function mangles the input string and returns a version that
-  should be safe to disply/encode as ASCII. To this end, we first
+  should be safe to display/encode as ASCII. To this end, we first
   convert it to ASCII using the 'backslashreplace' encoding which
-  should get rid of any non-ASCII chars, and then we again encode it
-  via 'string_escape' which converts '\n' into '\\n' so that log
-  messages remain one-line.
+  should get rid of any non-ASCII chars, and then we process it
+  through a loop copied from the string repr sources in the python; we
+  don't use string_escape anymore since that escape single quotes and
+  backslashes too, and that is too much; and that escaping is not
+  stable, i.e. string_escape(string_escape(x)) != string_escape(x).
 
   @type text: str or unicode
   @param text: input data
@@ -1828,9 +1830,23 @@ def SafeEncode(text):
   @return: a safe version of text
 
   """
-  text = text.encode('ascii', 'backslashreplace')
-  text = text.encode('string_escape')
-  return text
+  if isinstance(text, unicode):
+    # onli if unicode; if str already, we handle it below
+    text = text.encode('ascii', 'backslashreplace')
+  resu = ""
+  for char in text:
+    c = ord(char)
+    if char  == '\t':
+      resu += r'\t'
+    elif char == '\n':
+      resu += r'\n'
+    elif char == '\r':
+      resu += r'\'r'
+    elif c < 32 or c >= 127: # non-printable
+      resu += "\\x%02x" % (c & 0xff)
+    else:
+      resu += char
+  return resu
 
 
 def CommaJoin(names):
diff --git a/test/ganeti.utils_unittest.py b/test/ganeti.utils_unittest.py
index 1c2992c3d..ef3d626e4 100755
--- a/test/ganeti.utils_unittest.py
+++ b/test/ganeti.utils_unittest.py
@@ -33,6 +33,7 @@ import socket
 import shutil
 import re
 import select
+import string
 
 import ganeti
 import testutils
@@ -44,7 +45,7 @@ from ganeti.utils import IsProcessAlive, RunCmd, \
      ParseUnit, AddAuthorizedKey, RemoveAuthorizedKey, \
      ShellQuote, ShellQuoteArgs, TcpPing, ListVisibleFiles, \
      SetEtcHostsEntry, RemoveEtcHostsEntry, FirstFree, OwnIpAddress, \
-     TailFile, ForceDictType
+     TailFile, ForceDictType, SafeEncode
 
 from ganeti.errors import LockError, UnitParseError, GenericError, \
      ProgrammerError
@@ -969,5 +970,24 @@ class TestForceDictType(unittest.TestCase):
     self.assertRaises(errors.TypeEnforcementError, self._fdt, {'d': '4 L'})
 
 
+class TestSafeEncode(unittest.TestCase):
+  """Test case for SafeEncode"""
+
+  def testAscii(self):
+    for txt in [string.digits, string.letters, string.punctuation]:
+      self.failUnlessEqual(txt, SafeEncode(txt))
+
+  def testDoubleEncode(self):
+    for i in range(255):
+      txt = SafeEncode(chr(i))
+      self.failUnlessEqual(txt, SafeEncode(txt))
+
+  def testUnicode(self):
+    # 1024 is high enough to catch non-direct ASCII mappings
+    for i in range(1024):
+      txt = SafeEncode(unichr(i))
+      self.failUnlessEqual(txt, SafeEncode(txt))
+
+
 if __name__ == '__main__':
   unittest.main()
-- 
GitLab