From 153533f3e88434f3207999bd8c049a9921fb6804 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann <hansmi@google.com> Date: Fri, 17 Dec 2010 15:05:57 +0100 Subject: [PATCH] utils.NiceSort: Use sorted(), add keyfunc, unittests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes utils.NiceSort to use the built-in βsorted()β and gets rid of the intermediate list. Instead of wrapping the items ourselves, a key function is used. The caller can specify another key function (useful to sort objects by their name, e.g. βutils.NiceSort(instances, key=operator.attrgetter("name"))β. Unittests are provided. Signed-off-by: Michael Hanselmann <hansmi@google.com> Reviewed-by: Iustin Pop <iustin@google.com> --- lib/utils.py | 54 ++++++++------ test/ganeti.utils_unittest.py | 136 ++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+), 21 deletions(-) diff --git a/lib/utils.py b/lib/utils.py index 359c44302..bef80576d 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -109,6 +109,9 @@ _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") #: ASN1 time regexp _ASN1_TIME_REGEX = re.compile(r"^(\d+)([-+]\d\d)(\d\d)$") +_SORTER_RE = re.compile("^%s(.*)$" % (8 * "(\D+|\d+)?")) +_SORTER_DIGIT = re.compile("^\d+$") + class RunResult(object): """Holds the result of running external programs. @@ -1279,7 +1282,25 @@ def BridgeExists(bridge): return os.path.isdir("/sys/class/net/%s/bridge" % bridge) -def NiceSort(name_list): +def _NiceSortTryInt(val): + """Attempts to convert a string to an integer. + + """ + if val and _SORTER_DIGIT.match(val): + return int(val) + else: + return val + + +def _NiceSortKey(value): + """Extract key for sorting. + + """ + return [_NiceSortTryInt(grp) + for grp in _SORTER_RE.match(value).groups()] + + +def NiceSort(values, key=None): """Sort a list of strings based on digit and non-digit groupings. Given a list of names C{['a1', 'a10', 'a11', 'a2']} this function @@ -1290,30 +1311,21 @@ def NiceSort(name_list): or no-digits. Only the first eight such groups are considered, and after that we just use what's left of the string. - @type name_list: list - @param name_list: the names to be sorted + @type values: list + @param values: the names to be sorted + @type key: callable or None + @param key: function of one argument to extract a comparison key from each + list element, must return string @rtype: list @return: a copy of the name list sorted with our algorithm """ - _SORTER_BASE = "(\D+|\d+)" - _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE, - _SORTER_BASE, _SORTER_BASE, - _SORTER_BASE, _SORTER_BASE, - _SORTER_BASE, _SORTER_BASE) - _SORTER_RE = re.compile(_SORTER_FULL) - _SORTER_NODIGIT = re.compile("^\D*$") - def _TryInt(val): - """Attempts to convert a variable to integer.""" - if val is None or _SORTER_NODIGIT.match(val): - return val - rval = int(val) - return rval - - to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name) - for name in name_list] - to_sort.sort() - return [tup[1] for tup in to_sort] + if key is None: + keyfunc = _NiceSortKey + else: + keyfunc = lambda value: _NiceSortKey(key(value)) + + return sorted(values, key=keyfunc) def TryConvert(fn, val): diff --git a/test/ganeti.utils_unittest.py b/test/ganeti.utils_unittest.py index d01a00557..eaa183929 100755 --- a/test/ganeti.utils_unittest.py +++ b/test/ganeti.utils_unittest.py @@ -38,6 +38,8 @@ import time import unittest import warnings import OpenSSL +import random +import operator from cStringIO import StringIO import testutils @@ -2643,5 +2645,139 @@ class TestNormalizeAndValidateMac(unittest.TestCase): self.assertEqual(utils.NormalizeAndValidateMac(mac), mac.lower()) +class TestNiceSort(unittest.TestCase): + def test(self): + self.assertEqual(utils.NiceSort([]), []) + self.assertEqual(utils.NiceSort(["foo"]), ["foo"]) + self.assertEqual(utils.NiceSort(["bar", ""]), ["", "bar"]) + self.assertEqual(utils.NiceSort([",", "."]), [",", "."]) + self.assertEqual(utils.NiceSort(["0.1", "0.2"]), ["0.1", "0.2"]) + self.assertEqual(utils.NiceSort(["0;099", "0,099", "0.1", "0.2"]), + ["0,099", "0.1", "0.2", "0;099"]) + + data = ["a0", "a1", "a99", "a20", "a2", "b10", "b70", "b00", "0000"] + self.assertEqual(utils.NiceSort(data), + ["0000", "a0", "a1", "a2", "a20", "a99", + "b00", "b10", "b70"]) + + data = ["a0-0", "a1-0", "a99-10", "a20-3", "a0-4", "a99-3", "a09-2", + "Z", "a9-1", "A", "b"] + self.assertEqual(utils.NiceSort(data), + ["A", "Z", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2", + "a20-3", "a99-3", "a99-10", "b"]) + self.assertEqual(utils.NiceSort(data, key=str.lower), + ["A", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2", + "a20-3", "a99-3", "a99-10", "b", "Z"]) + self.assertEqual(utils.NiceSort(data, key=str.upper), + ["A", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2", + "a20-3", "a99-3", "a99-10", "b", "Z"]) + + def testLargeA(self): + data = [ + "Eegah9ei", "xij88brTulHYAv8IEOyU", "3jTwJPtrXOY22bwL2YoW", + "Z8Ljf1Pf5eBfNg171wJR", "WvNJd91OoXvLzdEiEXa6", "uHXAyYYftCSG1o7qcCqe", + "xpIUJeVT1Rp", "KOt7vn1dWXi", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH", + "cPRi0lM7HLnSuWA2G9", "KVQqLPDjcPjf8T3oyzjcOsfkb", + "guKJkXnkULealVC8CyF1xefym", "pqF8dkU5B1cMnyZuREaSOADYx", + ] + self.assertEqual(utils.NiceSort(data), [ + "3jTwJPtrXOY22bwL2YoW", "Eegah9ei", "KOt7vn1dWXi", + "KVQqLPDjcPjf8T3oyzjcOsfkb", "WvNJd91OoXvLzdEiEXa6", + "Z8Ljf1Pf5eBfNg171wJR", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH", + "cPRi0lM7HLnSuWA2G9", "guKJkXnkULealVC8CyF1xefym", + "pqF8dkU5B1cMnyZuREaSOADYx", "uHXAyYYftCSG1o7qcCqe", + "xij88brTulHYAv8IEOyU", "xpIUJeVT1Rp" + ]) + + def testLargeB(self): + data = [ + "inst-0.0.0.0-0.0.0.0", + "inst-0.1.0.0-0.0.0.0", + "inst-0.2.0.0-0.0.0.0", + "inst-0.2.1.0-0.0.0.0", + "inst-0.2.2.0-0.0.0.0", + "inst-0.2.2.0-0.0.0.9", + "inst-0.2.2.0-0.0.3.9", + "inst-0.2.2.0-0.2.0.9", + "inst-0.2.2.0-0.9.0.9", + "inst-0.20.2.0-0.0.0.0", + "inst-0.20.2.0-0.9.0.9", + "inst-10.020.2.0-0.9.0.10", + "inst-15.020.2.0-0.9.1.00", + "inst-100.020.2.0-0.9.0.9", + + # Only the last group, not converted to a number anymore, differs + "inst-100.020.2.0a999", + "inst-100.020.2.0b000", + "inst-100.020.2.0c10", + "inst-100.020.2.0c101", + "inst-100.020.2.0c2", + "inst-100.020.2.0c20", + "inst-100.020.2.0c3", + "inst-100.020.2.0c39123", + ] + + rnd = random.Random(16205) + for _ in range(10): + testdata = data[:] + rnd.shuffle(testdata) + assert testdata != data + self.assertEqual(utils.NiceSort(testdata), data) + + class _CallCount: + def __init__(self, fn): + self.count = 0 + self.fn = fn + + def __call__(self, *args): + self.count += 1 + return self.fn(*args) + + def testKeyfuncA(self): + # Generate some random numbers + rnd = random.Random(21131) + numbers = [rnd.randint(0, 10000) for _ in range(999)] + assert numbers != sorted(numbers) + + # Convert to hex + data = [hex(i) for i in numbers] + datacopy = data[:] + + keyfn = self._CallCount(lambda value: str(int(value, 16))) + + # Sort with key function converting hex to decimal + result = utils.NiceSort(data, key=keyfn) + + self.assertEqual([hex(i) for i in sorted(numbers)], result) + self.assertEqual(data, datacopy, msg="Input data was modified in NiceSort") + self.assertEqual(keyfn.count, len(numbers), + msg="Key function was not called once per value") + + class _TestData: + def __init__(self, name, value): + self.name = name + self.value = value + + def testKeyfuncB(self): + rnd = random.Random(27396) + data = [] + for i in range(123): + v1 = rnd.randint(0, 5) + v2 = rnd.randint(0, 5) + data.append(self._TestData("inst-%s-%s-%s" % (v1, v2, i), + (v1, v2, i))) + rnd.shuffle(data) + assert data != sorted(data, key=operator.attrgetter("name")) + + keyfn = self._CallCount(operator.attrgetter("name")) + + # Sort by name + result = utils.NiceSort(data, key=keyfn) + + self.assertEqual(result, sorted(data, key=operator.attrgetter("value"))) + self.assertEqual(keyfn.count, len(data), + msg="Key function was not called once per value") + + if __name__ == '__main__': testutils.GanetiTestProgram() -- GitLab