From 153533f3e88434f3207999bd8c049a9921fb6804 Mon Sep 17 00:00:00 2001
From: Michael Hanselmann <hansmi@google.com>
Date: Fri, 17 Dec 2010 15:05:57 +0100
Subject: [PATCH] utils.NiceSort: Use sorted(), add keyfunc, unittests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch changes utils.NiceSort to use the built-in β€œsorted()” and
gets rid of the intermediate list. Instead of wrapping the items
ourselves, a key function is used. The caller can specify another key
function (useful to sort objects by their name, e.g.
β€œutils.NiceSort(instances, key=operator.attrgetter("name"))”.

Unittests are provided.

Signed-off-by: Michael Hanselmann <hansmi@google.com>
Reviewed-by: Iustin Pop <iustin@google.com>
---
 lib/utils.py                  |  54 ++++++++------
 test/ganeti.utils_unittest.py | 136 ++++++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+), 21 deletions(-)

diff --git a/lib/utils.py b/lib/utils.py
index 359c44302..bef80576d 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -109,6 +109,9 @@ _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")
 #: ASN1 time regexp
 _ASN1_TIME_REGEX = re.compile(r"^(\d+)([-+]\d\d)(\d\d)$")
 
+_SORTER_RE = re.compile("^%s(.*)$" % (8 * "(\D+|\d+)?"))
+_SORTER_DIGIT = re.compile("^\d+$")
+
 
 class RunResult(object):
   """Holds the result of running external programs.
@@ -1279,7 +1282,25 @@ def BridgeExists(bridge):
   return os.path.isdir("/sys/class/net/%s/bridge" % bridge)
 
 
-def NiceSort(name_list):
+def _NiceSortTryInt(val):
+  """Attempts to convert a string to an integer.
+
+  """
+  if val and _SORTER_DIGIT.match(val):
+    return int(val)
+  else:
+    return val
+
+
+def _NiceSortKey(value):
+  """Extract key for sorting.
+
+  """
+  return [_NiceSortTryInt(grp)
+          for grp in _SORTER_RE.match(value).groups()]
+
+
+def NiceSort(values, key=None):
   """Sort a list of strings based on digit and non-digit groupings.
 
   Given a list of names C{['a1', 'a10', 'a11', 'a2']} this function
@@ -1290,30 +1311,21 @@ def NiceSort(name_list):
   or no-digits. Only the first eight such groups are considered, and
   after that we just use what's left of the string.
 
-  @type name_list: list
-  @param name_list: the names to be sorted
+  @type values: list
+  @param values: the names to be sorted
+  @type key: callable or None
+  @param key: function of one argument to extract a comparison key from each
+    list element, must return string
   @rtype: list
   @return: a copy of the name list sorted with our algorithm
 
   """
-  _SORTER_BASE = "(\D+|\d+)"
-  _SORTER_FULL = "^%s%s?%s?%s?%s?%s?%s?%s?.*$" % (_SORTER_BASE, _SORTER_BASE,
-                                                  _SORTER_BASE, _SORTER_BASE,
-                                                  _SORTER_BASE, _SORTER_BASE,
-                                                  _SORTER_BASE, _SORTER_BASE)
-  _SORTER_RE = re.compile(_SORTER_FULL)
-  _SORTER_NODIGIT = re.compile("^\D*$")
-  def _TryInt(val):
-    """Attempts to convert a variable to integer."""
-    if val is None or _SORTER_NODIGIT.match(val):
-      return val
-    rval = int(val)
-    return rval
-
-  to_sort = [([_TryInt(grp) for grp in _SORTER_RE.match(name).groups()], name)
-             for name in name_list]
-  to_sort.sort()
-  return [tup[1] for tup in to_sort]
+  if key is None:
+    keyfunc = _NiceSortKey
+  else:
+    keyfunc = lambda value: _NiceSortKey(key(value))
+
+  return sorted(values, key=keyfunc)
 
 
 def TryConvert(fn, val):
diff --git a/test/ganeti.utils_unittest.py b/test/ganeti.utils_unittest.py
index d01a00557..eaa183929 100755
--- a/test/ganeti.utils_unittest.py
+++ b/test/ganeti.utils_unittest.py
@@ -38,6 +38,8 @@ import time
 import unittest
 import warnings
 import OpenSSL
+import random
+import operator
 from cStringIO import StringIO
 
 import testutils
@@ -2643,5 +2645,139 @@ class TestNormalizeAndValidateMac(unittest.TestCase):
       self.assertEqual(utils.NormalizeAndValidateMac(mac), mac.lower())
 
 
+class TestNiceSort(unittest.TestCase):
+  def test(self):
+    self.assertEqual(utils.NiceSort([]), [])
+    self.assertEqual(utils.NiceSort(["foo"]), ["foo"])
+    self.assertEqual(utils.NiceSort(["bar", ""]), ["", "bar"])
+    self.assertEqual(utils.NiceSort([",", "."]), [",", "."])
+    self.assertEqual(utils.NiceSort(["0.1", "0.2"]), ["0.1", "0.2"])
+    self.assertEqual(utils.NiceSort(["0;099", "0,099", "0.1", "0.2"]),
+                     ["0,099", "0.1", "0.2", "0;099"])
+
+    data = ["a0", "a1", "a99", "a20", "a2", "b10", "b70", "b00", "0000"]
+    self.assertEqual(utils.NiceSort(data),
+                     ["0000", "a0", "a1", "a2", "a20", "a99",
+                      "b00", "b10", "b70"])
+
+    data = ["a0-0", "a1-0", "a99-10", "a20-3", "a0-4", "a99-3", "a09-2",
+            "Z", "a9-1", "A", "b"]
+    self.assertEqual(utils.NiceSort(data),
+                     ["A", "Z", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2",
+                      "a20-3", "a99-3", "a99-10", "b"])
+    self.assertEqual(utils.NiceSort(data, key=str.lower),
+                     ["A", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2",
+                      "a20-3", "a99-3", "a99-10", "b", "Z"])
+    self.assertEqual(utils.NiceSort(data, key=str.upper),
+                     ["A", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2",
+                      "a20-3", "a99-3", "a99-10", "b", "Z"])
+
+  def testLargeA(self):
+    data = [
+      "Eegah9ei", "xij88brTulHYAv8IEOyU", "3jTwJPtrXOY22bwL2YoW",
+      "Z8Ljf1Pf5eBfNg171wJR", "WvNJd91OoXvLzdEiEXa6", "uHXAyYYftCSG1o7qcCqe",
+      "xpIUJeVT1Rp", "KOt7vn1dWXi", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH",
+      "cPRi0lM7HLnSuWA2G9", "KVQqLPDjcPjf8T3oyzjcOsfkb",
+      "guKJkXnkULealVC8CyF1xefym", "pqF8dkU5B1cMnyZuREaSOADYx",
+      ]
+    self.assertEqual(utils.NiceSort(data), [
+      "3jTwJPtrXOY22bwL2YoW", "Eegah9ei", "KOt7vn1dWXi",
+      "KVQqLPDjcPjf8T3oyzjcOsfkb", "WvNJd91OoXvLzdEiEXa6",
+      "Z8Ljf1Pf5eBfNg171wJR", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH",
+      "cPRi0lM7HLnSuWA2G9", "guKJkXnkULealVC8CyF1xefym",
+      "pqF8dkU5B1cMnyZuREaSOADYx", "uHXAyYYftCSG1o7qcCqe",
+      "xij88brTulHYAv8IEOyU", "xpIUJeVT1Rp"
+      ])
+
+  def testLargeB(self):
+    data = [
+      "inst-0.0.0.0-0.0.0.0",
+      "inst-0.1.0.0-0.0.0.0",
+      "inst-0.2.0.0-0.0.0.0",
+      "inst-0.2.1.0-0.0.0.0",
+      "inst-0.2.2.0-0.0.0.0",
+      "inst-0.2.2.0-0.0.0.9",
+      "inst-0.2.2.0-0.0.3.9",
+      "inst-0.2.2.0-0.2.0.9",
+      "inst-0.2.2.0-0.9.0.9",
+      "inst-0.20.2.0-0.0.0.0",
+      "inst-0.20.2.0-0.9.0.9",
+      "inst-10.020.2.0-0.9.0.10",
+      "inst-15.020.2.0-0.9.1.00",
+      "inst-100.020.2.0-0.9.0.9",
+
+      # Only the last group, not converted to a number anymore, differs
+      "inst-100.020.2.0a999",
+      "inst-100.020.2.0b000",
+      "inst-100.020.2.0c10",
+      "inst-100.020.2.0c101",
+      "inst-100.020.2.0c2",
+      "inst-100.020.2.0c20",
+      "inst-100.020.2.0c3",
+      "inst-100.020.2.0c39123",
+      ]
+
+    rnd = random.Random(16205)
+    for _ in range(10):
+      testdata = data[:]
+      rnd.shuffle(testdata)
+      assert testdata != data
+      self.assertEqual(utils.NiceSort(testdata), data)
+
+  class _CallCount:
+    def __init__(self, fn):
+      self.count = 0
+      self.fn = fn
+
+    def __call__(self, *args):
+      self.count += 1
+      return self.fn(*args)
+
+  def testKeyfuncA(self):
+    # Generate some random numbers
+    rnd = random.Random(21131)
+    numbers = [rnd.randint(0, 10000) for _ in range(999)]
+    assert numbers != sorted(numbers)
+
+    # Convert to hex
+    data = [hex(i) for i in numbers]
+    datacopy = data[:]
+
+    keyfn = self._CallCount(lambda value: str(int(value, 16)))
+
+    # Sort with key function converting hex to decimal
+    result = utils.NiceSort(data, key=keyfn)
+
+    self.assertEqual([hex(i) for i in sorted(numbers)], result)
+    self.assertEqual(data, datacopy, msg="Input data was modified in NiceSort")
+    self.assertEqual(keyfn.count, len(numbers),
+                     msg="Key function was not called once per value")
+
+  class _TestData:
+    def __init__(self, name, value):
+      self.name = name
+      self.value = value
+
+  def testKeyfuncB(self):
+    rnd = random.Random(27396)
+    data = []
+    for i in range(123):
+      v1 = rnd.randint(0, 5)
+      v2 = rnd.randint(0, 5)
+      data.append(self._TestData("inst-%s-%s-%s" % (v1, v2, i),
+                                 (v1, v2, i)))
+    rnd.shuffle(data)
+    assert data != sorted(data, key=operator.attrgetter("name"))
+
+    keyfn = self._CallCount(operator.attrgetter("name"))
+
+    # Sort by name
+    result = utils.NiceSort(data, key=keyfn)
+
+    self.assertEqual(result, sorted(data, key=operator.attrgetter("value")))
+    self.assertEqual(keyfn.count, len(data),
+                     msg="Key function was not called once per value")
+
+
 if __name__ == '__main__':
   testutils.GanetiTestProgram()
-- 
GitLab