From 5b69bc7ca790156ee4422fa1611c05ad6d5ebc47 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Wed, 13 Jan 2010 16:56:42 +0100
Subject: [PATCH] Add an UnescapeAndSplit function

In many cases, where we accept (usually from the command line) a list of
parameters, we remove the use of the separator as an component of any of
the elements.

This patch adds a new function that can split strings of the form
'a,b\,c,d' into ['a', 'b,c', 'd'], with proper un-escaping of
double-backslashes.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Michael Hanselmann <hansmi@google.com>
---
 lib/utils.py                  | 42 +++++++++++++++++++++++++++++++++++
 test/ganeti.utils_unittest.py | 34 +++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/lib/utils.py b/lib/utils.py
index 1abec0ef2..6e3340a77 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1897,6 +1897,48 @@ def SafeEncode(text):
   return resu
 
 
+def UnescapeAndSplit(text, sep=","):
+  """Split and unescape a string based on a given separator.
+
+  This function splits a string based on a separator where the
+  separator itself can be escape in order to be an element of the
+  elements. The escaping rules are (assuming coma being the
+  separator):
+    - a plain , separates the elements
+    - a sequence \\\\, (double backslash plus comma) is handled as a
+      backslash plus a separator comma
+    - a sequence \, (backslash plus comma) is handled as a
+      non-separator comma
+
+  @type text: string
+  @param text: the string to split
+  @type sep: string
+  @param text: the separator
+  @rtype: string
+  @return: a list of strings
+
+  """
+  # we split the list by sep (with no escaping at this stage)
+  slist = text.split(sep)
+  # next, we revisit the elements and if any of them ended with an odd
+  # number of backslashes, then we join it with the next
+  rlist = []
+  while slist:
+    e1 = slist.pop(0)
+    if e1.endswith("\\"):
+      num_b = len(e1) - len(e1.rstrip("\\"))
+      if num_b % 2 == 1:
+        e2 = slist.pop(0)
+        # here the backslashes remain (all), and will be reduced in
+        # the next step
+        rlist.append(e1 + sep + e2)
+        continue
+    rlist.append(e1)
+  # finally, replace backslash-something with something
+  rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
+  return rlist
+
+
 def CommaJoin(names):
   """Nicely join a set of identifiers.
 
diff --git a/test/ganeti.utils_unittest.py b/test/ganeti.utils_unittest.py
index fe7464de5..f0763550d 100755
--- a/test/ganeti.utils_unittest.py
+++ b/test/ganeti.utils_unittest.py
@@ -45,7 +45,8 @@ from ganeti.utils import IsProcessAlive, RunCmd, \
      ParseUnit, AddAuthorizedKey, RemoveAuthorizedKey, \
      ShellQuote, ShellQuoteArgs, TcpPing, ListVisibleFiles, \
      SetEtcHostsEntry, RemoveEtcHostsEntry, FirstFree, OwnIpAddress, \
-     TailFile, ForceDictType, SafeEncode, IsNormAbsPath, FormatTime
+     TailFile, ForceDictType, SafeEncode, IsNormAbsPath, FormatTime, \
+     UnescapeAndSplit
 
 from ganeti.errors import LockError, UnitParseError, GenericError, \
      ProgrammerError
@@ -1053,5 +1054,36 @@ class TestFormatTime(unittest.TestCase):
     FormatTime(int(time.time()))
 
 
+class TestUnescapeAndSplit(unittest.TestCase):
+  """Testing case for UnescapeAndSplit"""
+
+  def setUp(self):
+    # testing more that one separator for regexp safety
+    self._seps = [",", "+", "."]
+
+  def testSimple(self):
+    a = ["a", "b", "c", "d"]
+    for sep in self._seps:
+      self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), a)
+
+  def testEscape(self):
+    for sep in self._seps:
+      a = ["a", "b\\" + sep + "c", "d"]
+      b = ["a", "b" + sep + "c", "d"]
+      self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), b)
+
+  def testDoubleEscape(self):
+    for sep in self._seps:
+      a = ["a", "b\\\\", "c", "d"]
+      b = ["a", "b\\", "c", "d"]
+      self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), b)
+
+  def testThreeEscape(self):
+    for sep in self._seps:
+      a = ["a", "b\\\\\\" + sep + "c", "d"]
+      b = ["a", "b\\" + sep + "c", "d"]
+      self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), b)
+
+
 if __name__ == '__main__':
   testutils.GanetiTestProgram()
-- 
GitLab