From 5b69bc7ca790156ee4422fa1611c05ad6d5ebc47 Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Wed, 13 Jan 2010 16:56:42 +0100 Subject: [PATCH] Add an UnescapeAndSplit function In many cases, where we accept (usually from the command line) a list of parameters, we remove the use of the separator as an component of any of the elements. This patch adds a new function that can split strings of the form 'a,b\,c,d' into ['a', 'b,c', 'd'], with proper un-escaping of double-backslashes. Signed-off-by: Iustin Pop <iustin@google.com> Reviewed-by: Michael Hanselmann <hansmi@google.com> --- lib/utils.py | 42 +++++++++++++++++++++++++++++++++++ test/ganeti.utils_unittest.py | 34 +++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/lib/utils.py b/lib/utils.py index 1abec0ef2..6e3340a77 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -1897,6 +1897,48 @@ def SafeEncode(text): return resu +def UnescapeAndSplit(text, sep=","): + """Split and unescape a string based on a given separator. + + This function splits a string based on a separator where the + separator itself can be escape in order to be an element of the + elements. The escaping rules are (assuming coma being the + separator): + - a plain , separates the elements + - a sequence \\\\, (double backslash plus comma) is handled as a + backslash plus a separator comma + - a sequence \, (backslash plus comma) is handled as a + non-separator comma + + @type text: string + @param text: the string to split + @type sep: string + @param text: the separator + @rtype: string + @return: a list of strings + + """ + # we split the list by sep (with no escaping at this stage) + slist = text.split(sep) + # next, we revisit the elements and if any of them ended with an odd + # number of backslashes, then we join it with the next + rlist = [] + while slist: + e1 = slist.pop(0) + if e1.endswith("\\"): + num_b = len(e1) - len(e1.rstrip("\\")) + if num_b % 2 == 1: + e2 = slist.pop(0) + # here the backslashes remain (all), and will be reduced in + # the next step + rlist.append(e1 + sep + e2) + continue + rlist.append(e1) + # finally, replace backslash-something with something + rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] + return rlist + + def CommaJoin(names): """Nicely join a set of identifiers. diff --git a/test/ganeti.utils_unittest.py b/test/ganeti.utils_unittest.py index fe7464de5..f0763550d 100755 --- a/test/ganeti.utils_unittest.py +++ b/test/ganeti.utils_unittest.py @@ -45,7 +45,8 @@ from ganeti.utils import IsProcessAlive, RunCmd, \ ParseUnit, AddAuthorizedKey, RemoveAuthorizedKey, \ ShellQuote, ShellQuoteArgs, TcpPing, ListVisibleFiles, \ SetEtcHostsEntry, RemoveEtcHostsEntry, FirstFree, OwnIpAddress, \ - TailFile, ForceDictType, SafeEncode, IsNormAbsPath, FormatTime + TailFile, ForceDictType, SafeEncode, IsNormAbsPath, FormatTime, \ + UnescapeAndSplit from ganeti.errors import LockError, UnitParseError, GenericError, \ ProgrammerError @@ -1053,5 +1054,36 @@ class TestFormatTime(unittest.TestCase): FormatTime(int(time.time())) +class TestUnescapeAndSplit(unittest.TestCase): + """Testing case for UnescapeAndSplit""" + + def setUp(self): + # testing more that one separator for regexp safety + self._seps = [",", "+", "."] + + def testSimple(self): + a = ["a", "b", "c", "d"] + for sep in self._seps: + self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), a) + + def testEscape(self): + for sep in self._seps: + a = ["a", "b\\" + sep + "c", "d"] + b = ["a", "b" + sep + "c", "d"] + self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), b) + + def testDoubleEscape(self): + for sep in self._seps: + a = ["a", "b\\\\", "c", "d"] + b = ["a", "b\\", "c", "d"] + self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), b) + + def testThreeEscape(self): + for sep in self._seps: + a = ["a", "b\\\\\\" + sep + "c", "d"] + b = ["a", "b\\" + sep + "c", "d"] + self.failUnlessEqual(UnescapeAndSplit(sep.join(a), sep=sep), b) + + if __name__ == '__main__': testutils.GanetiTestProgram() -- GitLab