From 9f5a3645d21d084d9df71b0553e1bd1c48129747 Mon Sep 17 00:00:00 2001
From: Michael Hanselmann <hansmi@google.com>
Date: Wed, 1 Dec 2010 20:41:47 +0100
Subject: [PATCH] utils: Add function to find duplicates in sequence

Signed-off-by: Michael Hanselmann <hansmi@google.com>
Reviewed-by: Adeodato Simo <dato@google.com>
---
 lib/utils.py                  | 23 +++++++++++++++++++++++
 test/ganeti.utils_unittest.py | 31 +++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/lib/utils.py b/lib/utils.py
index 4c95cbd02..359c44302 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -2196,6 +2196,29 @@ def UniqueSequence(seq):
   return [i for i in seq if i not in seen and not seen.add(i)]
 
 
+def FindDuplicates(seq):
+  """Identifies duplicates in a list.
+
+  Does not preserve element order.
+
+  @type seq: sequence
+  @param seq: Sequence with source elements
+  @rtype: list
+  @return: List of duplicate elements from seq
+
+  """
+  dup = set()
+  seen = set()
+
+  for item in seq:
+    if item in seen:
+      dup.add(item)
+    else:
+      seen.add(item)
+
+  return list(dup)
+
+
 def NormalizeAndValidateMac(mac):
   """Normalizes and check if a MAC address is valid.
 
diff --git a/test/ganeti.utils_unittest.py b/test/ganeti.utils_unittest.py
index 9faf3bf31..d01a00557 100755
--- a/test/ganeti.utils_unittest.py
+++ b/test/ganeti.utils_unittest.py
@@ -1337,6 +1337,37 @@ class TestUniqueSequence(unittest.TestCase):
     self._test(["a", "b", "a"], ["a", "b"])
 
 
+class TestFindDuplicates(unittest.TestCase):
+  """Test case for FindDuplicates"""
+
+  def _Test(self, seq, expected):
+    result = utils.FindDuplicates(seq)
+    self.assertEqual(result, utils.UniqueSequence(result))
+    self.assertEqual(set(result), set(expected))
+
+  def test(self):
+    self._Test([], [])
+    self._Test([1, 2, 3], [])
+    self._Test([9, 8, 8, 0, 5, 1, 7, 0, 6, 7], [8, 0, 7])
+    for exp in [[1, 2, 3], [3, 2, 1]]:
+      self._Test([1, 1, 2, 2, 3, 3], exp)
+
+    self._Test(["A", "a", "B"], [])
+    self._Test(["a", "A", "a", "B"], ["a"])
+    self._Test("Hello World out there!", ["e", " ", "o", "r", "t", "l"])
+
+    self._Test(self._Gen(False), [])
+    self._Test(self._Gen(True), range(1, 10))
+
+  @staticmethod
+  def _Gen(dup):
+    for i in range(10):
+      yield i
+      if dup:
+        for _ in range(i):
+          yield i
+
+
 class TestFirstFree(unittest.TestCase):
   """Test case for the FirstFree function"""
 
-- 
GitLab