algo.py 2.73 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#
#

# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

"""Utility functions with algorithms.

"""

import re


_SORTER_RE = re.compile("^%s(.*)$" % (8 * "(\D+|\d+)?"))
_SORTER_DIGIT = re.compile("^\d+$")


def UniqueSequence(seq):
  """Returns a list with unique elements.

  Element order is preserved.

  @type seq: sequence
  @param seq: the sequence with the source elements
  @rtype: list
  @return: list of unique elements from seq

  """
  seen = set()
  return [i for i in seq if i not in seen and not seen.add(i)]


def FindDuplicates(seq):
  """Identifies duplicates in a list.

  Does not preserve element order.

  @type seq: sequence
  @param seq: Sequence with source elements
  @rtype: list
  @return: List of duplicate elements from seq

  """
  dup = set()
  seen = set()

  for item in seq:
    if item in seen:
      dup.add(item)
    else:
      seen.add(item)

  return list(dup)


def _NiceSortTryInt(val):
  """Attempts to convert a string to an integer.

  """
  if val and _SORTER_DIGIT.match(val):
    return int(val)
  else:
    return val


def _NiceSortKey(value):
  """Extract key for sorting.

  """
  return [_NiceSortTryInt(grp)
          for grp in _SORTER_RE.match(value).groups()]


def NiceSort(values, key=None):
  """Sort a list of strings based on digit and non-digit groupings.

  Given a list of names C{['a1', 'a10', 'a11', 'a2']} this function
  will sort the list in the logical order C{['a1', 'a2', 'a10',
  'a11']}.

  The sort algorithm breaks each name in groups of either only-digits
  or no-digits. Only the first eight such groups are considered, and
  after that we just use what's left of the string.

  @type values: list
  @param values: the names to be sorted
  @type key: callable or None
  @param key: function of one argument to extract a comparison key from each
    list element, must return string
  @rtype: list
  @return: a copy of the name list sorted with our algorithm

  """
  if key is None:
    keyfunc = _NiceSortKey
  else:
    keyfunc = lambda value: _NiceSortKey(key(value))

  return sorted(values, key=keyfunc)