From 04edfc993eadb9b069dfef83c3fa3a7ff31ae0d9 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Thu, 4 Oct 2012 23:32:50 +0100
Subject: [PATCH] Add an Utils.NiceSort() equivalent

This patch adds a NiceSort equivalent and the corresponding unittest
(partially copied from Python unittest). The difference between the
Python version and this one is that this implementation doesn't use
regular expressions, and as such it doesn't have the 8-groups
limitation.

The key-based version is separate from the non-key one (since we don't
have default arguments in Haskell), and is tested less in its absolute
properties but only that it is identical to the non-key version under
some transformations (the non-key version is much more tested).

This will be needed later in query name sorting.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Michael Hanselmann <hansmi@google.com>
---
 htest/Test/Ganeti/Utils.hs | 86 ++++++++++++++++++++++++++++++++++++++
 htools/Ganeti/Utils.hs     | 45 +++++++++++++++++++-
 2 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/htest/Test/Ganeti/Utils.hs b/htest/Test/Ganeti/Utils.hs
index 98ea53dd1..35d7a5575 100644
--- a/htest/Test/Ganeti/Utils.hs
+++ b/htest/Test/Ganeti/Utils.hs
@@ -29,7 +29,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 module Test.Ganeti.Utils (testUtils) where
 
 import Test.QuickCheck
+import Test.HUnit
 
+import Data.List
 import qualified Text.JSON as J
 
 import Test.Ganeti.TestHelper
@@ -119,6 +121,85 @@ prop_parseUnit (NonNegative n) =
         n_gb = n_mb * 1000
         n_tb = n_gb * 1000
 
+{-# ANN case_niceSort_static "HLint: ignore Use camelCase" #-}
+
+case_niceSort_static :: Assertion
+case_niceSort_static = do
+  assertEqual "empty list" [] $ niceSort []
+  assertEqual "punctuation" [",", "."] $ niceSort [",", "."]
+  assertEqual "decimal numbers" ["0.1", "0.2"] $ niceSort ["0.1", "0.2"]
+  assertEqual "various numbers" ["0,099", "0.1", "0.2", "0;099"] $
+              niceSort ["0;099", "0,099", "0.1", "0.2"]
+
+  assertEqual "simple concat" ["0000", "a0", "a1", "a2", "a20", "a99",
+                               "b00", "b10", "b70"] $
+    niceSort ["a0", "a1", "a99", "a20", "a2", "b10", "b70", "b00", "0000"]
+
+  assertEqual "ranges" ["A", "Z", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2",
+                      "a20-3", "a99-3", "a99-10", "b"] $
+    niceSort ["a0-0", "a1-0", "a99-10", "a20-3", "a0-4", "a99-3", "a09-2",
+              "Z", "a9-1", "A", "b"]
+
+  assertEqual "large"
+    ["3jTwJPtrXOY22bwL2YoW", "Eegah9ei", "KOt7vn1dWXi",
+     "KVQqLPDjcPjf8T3oyzjcOsfkb", "WvNJd91OoXvLzdEiEXa6",
+     "Z8Ljf1Pf5eBfNg171wJR", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH",
+     "cPRi0lM7HLnSuWA2G9", "guKJkXnkULealVC8CyF1xefym",
+     "pqF8dkU5B1cMnyZuREaSOADYx", "uHXAyYYftCSG1o7qcCqe",
+     "xij88brTulHYAv8IEOyU", "xpIUJeVT1Rp"] $
+    niceSort ["Eegah9ei", "xij88brTulHYAv8IEOyU", "3jTwJPtrXOY22bwL2YoW",
+             "Z8Ljf1Pf5eBfNg171wJR", "WvNJd91OoXvLzdEiEXa6",
+             "uHXAyYYftCSG1o7qcCqe", "xpIUJeVT1Rp", "KOt7vn1dWXi",
+             "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH",
+             "cPRi0lM7HLnSuWA2G9", "KVQqLPDjcPjf8T3oyzjcOsfkb",
+             "guKJkXnkULealVC8CyF1xefym", "pqF8dkU5B1cMnyZuREaSOADYx"]
+
+-- | Tests single-string behaviour of 'niceSort'. Last test is special
+-- in the sense that /0/ is before any other non-empty string (except
+-- itself, etc.).
+prop_niceSort_single :: Property
+prop_niceSort_single =
+  forAll getName $ \name ->
+  conjoin
+  [ printTestCase "single string" $ [name] ==? niceSort [name]
+  , printTestCase "single plus empty" $ ["", name] ==? niceSort [name, ""]
+  , printTestCase "single plus 0-digit" $ ["0", name] ==? niceSort [name, "0"]
+  ]
+
+-- | Tests some generic 'niceSort' properties. Note that the last test
+-- must add a non-digit prefix; a digit one might change ordering.
+prop_niceSort_generic :: Property
+prop_niceSort_generic =
+  forAll (resize 20 arbitrary) $ \names ->
+  let n_sorted = niceSort names in
+  conjoin [ printTestCase "length" $ length names ==? length n_sorted
+          , printTestCase "same strings" $ sort names ==? sort n_sorted
+          , printTestCase "idempotence" $ n_sorted ==? niceSort n_sorted
+          , printTestCase "static prefix" $ n_sorted ==?
+              map tail (niceSort $ map (" "++) names)
+          ]
+
+-- | Tests that niceSorting numbers is identical to actual sorting
+-- them (in numeric form).
+prop_niceSort_numbers :: Property
+prop_niceSort_numbers =
+  forAll (listOf (arbitrary::Gen (NonNegative Int))) $ \numbers ->
+  map show (sort numbers) ==? niceSort (map show numbers)
+
+-- | Tests that 'niceSort' and 'niceSortKey' are equivalent.
+prop_niceSortKey_equiv :: Property
+prop_niceSortKey_equiv =
+  forAll (resize 20 arbitrary) $ \names ->
+  forAll (vectorOf (length names) (arbitrary::Gen Int)) $ \numbers ->
+  let n_sorted = niceSort names in
+  conjoin
+  [ printTestCase "key id" $ n_sorted ==? niceSortKey id names
+  , printTestCase "key rev" $ niceSort (map reverse names) ==?
+                              map reverse (niceSortKey reverse names)
+  , printTestCase "key snd" $ n_sorted ==? map snd (niceSortKey snd $
+                                                    zip numbers names)
+  ]
+
 -- | Test list for the Utils module.
 testSuite "Utils"
             [ 'prop_commaJoinSplit
@@ -129,4 +210,9 @@ testSuite "Utils"
             , 'prop_select_undefd
             , 'prop_select_undefv
             , 'prop_parseUnit
+            , 'case_niceSort_static
+            , 'prop_niceSort_single
+            , 'prop_niceSort_generic
+            , 'prop_niceSort_numbers
+            , 'prop_niceSortKey_equiv
             ]
diff --git a/htools/Ganeti/Utils.hs b/htools/Ganeti/Utils.hs
index 708755e70..8c1b0ce72 100644
--- a/htools/Ganeti/Utils.hs
+++ b/htools/Ganeti/Utils.hs
@@ -37,13 +37,16 @@ module Ganeti.Utils
   , printTable
   , parseUnit
   , plural
+  , niceSort
+  , niceSortKey
   , exitIfBad
   , exitErr
   , exitWhen
   , exitUnless
   ) where
 
-import Data.Char (toUpper, isAlphaNum)
+import Data.Char (toUpper, isAlphaNum, isDigit)
+import Data.Function (on)
 import Data.List
 
 import Debug.Trace
@@ -230,3 +233,43 @@ exitWhen False _  = return ()
 -- if true, the opposite of 'exitWhen'.
 exitUnless :: Bool -> String -> IO ()
 exitUnless cond = exitWhen (not cond)
+
+-- | Helper for 'niceSort'. Computes the key element for a given string.
+extractKey :: [Either Integer String]  -- ^ Current (partial) key, reversed
+           -> String                   -- ^ Remaining string
+           -> ([Either Integer String], String)
+extractKey ek [] = (reverse ek, [])
+extractKey ek xs@(x:_) =
+  let (span_fn, conv_fn) = if isDigit x
+                             then (isDigit, Left . read)
+                             else (not . isDigit, Right)
+      (k, rest) = span span_fn xs
+  in extractKey (conv_fn k:ek) rest
+
+{-| Sort a list of strings based on digit and non-digit groupings.
+
+Given a list of names @['a1', 'a10', 'a11', 'a2']@ this function
+will sort the list in the logical order @['a1', 'a2', 'a10', 'a11']@.
+
+The sort algorithm breaks each name in groups of either only-digits or
+no-digits, and sorts based on each group.
+
+Internally, this is not implemented via regexes (like the Python
+version), but via actual splitting of the string in sequences of
+either digits or everything else, and converting the digit sequences
+in /Left Integer/ and the non-digit ones in /Right String/, at which
+point sorting becomes trivial due to the built-in 'Either' ordering;
+we only need one extra step of dropping the key at the end.
+
+-}
+niceSort :: [String] -> [String]
+niceSort = map snd . sort . map (\s -> (fst $ extractKey [] s, s))
+
+-- | Key-version of 'niceSort'. We use 'sortBy' and @compare `on` fst@
+-- since we don't want to add an ordering constraint on the /a/ type,
+-- hence the need to only compare the first element of the /(key, a)/
+-- tuple.
+niceSortKey :: (a -> String) -> [a] -> [a]
+niceSortKey keyfn =
+  map snd . sortBy (compare `on` fst) .
+  map (\s -> (fst . extractKey [] $ keyfn s, s))
-- 
GitLab