From 0160693107ed86a78a7d3bc770c9ccfec18e84d9 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Mon, 27 Aug 2012 17:56:41 +0200
Subject: [PATCH] Implement compilation of regexes at creation time

This means that the verification of the correctness of the regex is
done once, at the deserialisation/creation time, as in the Python
code. To do this, we have to change the FilterRegex type from an alias
to String to a more complex data type, and we have to create manual
read/show/eq instance (phew!).

Unittests are added which test these instances, since it's the first time I do
this manually.

An additional improvement is that we now check that regex-pcre has
been compiled correctly, per the documentation (otherwise we get
runtime errors).

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Guido Trotter <ultrotter@google.com>
---
 htools/Ganeti/HTools/QC.hs      | 12 +++++++-
 htools/Ganeti/Query/Filter.hs   |  7 +++--
 htools/Ganeti/Query/Language.hs | 53 +++++++++++++++++++++++++++++++--
 3 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/htools/Ganeti/HTools/QC.hs b/htools/Ganeti/HTools/QC.hs
index f460d15bc..2654d4e7d 100644
--- a/htools/Ganeti/HTools/QC.hs
+++ b/htools/Ganeti/HTools/QC.hs
@@ -605,7 +605,7 @@ genFilter' 0 =
         , Qlang.GTFilter       <$> getName <*> value
         , Qlang.LEFilter       <$> getName <*> value
         , Qlang.GEFilter       <$> getName <*> value
-        , Qlang.RegexpFilter   <$> getName <*> getName
+        , Qlang.RegexpFilter   <$> getName <*> arbitrary
         , Qlang.ContainsFilter <$> getName <*> value
         ]
     where value = oneof [ Qlang.QuotedString <$> getName
@@ -623,6 +623,9 @@ genFilter' n = do
 instance Arbitrary Qlang.ItemType where
   arbitrary = elements [minBound..maxBound]
 
+instance Arbitrary Qlang.FilterRegex where
+  arbitrary = getName >>= Qlang.mkRegex -- a name should be a good regex
+
 -- * Actual tests
 
 -- ** Utils tests
@@ -2089,8 +2092,15 @@ prop_Qlang_Serialisation =
   forAll genFilter $ \flt ->
   J.readJSON (J.showJSON flt) ==? J.Ok flt
 
+prop_Qlang_FilterRegex_instances :: Qlang.FilterRegex -> Property
+prop_Qlang_FilterRegex_instances rex =
+  printTestCase "failed JSON encoding"
+    (J.readJSON (J.showJSON rex) ==? J.Ok rex) .&&.
+  printTestCase "failed read/show instances" (read (show rex) ==? rex)
+
 testSuite "Qlang"
   [ 'prop_Qlang_Serialisation
+  , 'prop_Qlang_FilterRegex_instances
   ]
 
 
diff --git a/htools/Ganeti/Query/Filter.hs b/htools/Ganeti/Query/Filter.hs
index 2f662a469..de2a1d476 100644
--- a/htools/Ganeti/Query/Filter.hs
+++ b/htools/Ganeti/Query/Filter.hs
@@ -54,7 +54,7 @@ import qualified Data.Map as Map
 import Data.Traversable (traverse)
 import Text.JSON (JSValue(..), fromJSString)
 import Text.JSON.Pretty (pp_value)
-import Text.Regex.PCRE ((=~))
+import qualified Text.Regex.PCRE as PCRE
 
 import Ganeti.BasicTypes
 import Ganeti.Objects
@@ -115,8 +115,9 @@ binOpFilter _ expr actual =
       show (pp_value actual) ++ " with '" ++ show expr ++ "'"
 
 -- | Implements the 'RegexpFilter' matching.
-regexpFilter :: String -> JSValue -> Result Bool
-regexpFilter re (JSString val) = Ok $ (fromJSString val) =~ re
+regexpFilter :: FilterRegex -> JSValue -> Result Bool
+regexpFilter re (JSString val) =
+  Ok $ PCRE.match (compiledRegex re) (fromJSString val)
 regexpFilter _ x =
   Bad $ "Invalid field value used in regexp matching,\
         \ expecting string but got '" ++ show (pp_value x) ++ "'"
diff --git a/htools/Ganeti/Query/Language.hs b/htools/Ganeti/Query/Language.hs
index dc8629ba1..2d0e55ed3 100644
--- a/htools/Ganeti/Query/Language.hs
+++ b/htools/Ganeti/Query/Language.hs
@@ -29,6 +29,10 @@ module Ganeti.Query.Language
     ( Filter(..)
     , FilterField
     , FilterValue(..)
+    , FilterRegex -- note: we don't export the constructor, must use helpers
+    , mkRegex
+    , stringRegex
+    , compiledRegex
     , Fields
     , Query(..)
     , QueryResult(..)
@@ -53,6 +57,7 @@ import Data.Ratio (numerator, denominator)
 import Text.JSON.Pretty (pp_value)
 import Text.JSON.Types
 import Text.JSON
+import qualified Text.Regex.PCRE as PCRE
 
 import qualified Ganeti.Constants as C
 import Ganeti.THH
@@ -123,7 +128,7 @@ data Filter a
     | GTFilter       a FilterValue  -- ^ > <field> <value>
     | LEFilter       a FilterValue  -- ^ <= <field> <value>
     | GEFilter       a FilterValue  -- ^ >= <field> <value>
-    | RegexpFilter   a FilterRegexp -- ^ =~ <field> <regexp>
+    | RegexpFilter   a FilterRegex  -- ^ =~ <field> <regexp>
     | ContainsFilter a FilterValue  -- ^ =[] <list-field> <value>
       deriving (Show, Read, Eq)
 
@@ -270,8 +275,50 @@ instance JSON FilterValue where
   showJSON = showFilterValue
   readJSON = readFilterValue
 
--- | Regexp to apply to the filter value, for filteriong purposes.
-type FilterRegexp = String
+-- | Regexp to apply to the filter value, for filtering purposes. It
+-- holds both the string format, and the \"compiled\" format, so that
+-- we don't re-compile the regex at each match attempt.
+data FilterRegex = FilterRegex
+  { stringRegex   :: String      -- ^ The string version of the regex
+  , compiledRegex :: PCRE.Regex  -- ^ The compiled regex
+  }
+
+-- | Builder for 'FilterRegex'. We always attempt to compile the
+-- regular expression on the initialisation of the data structure;
+-- this might fail, if the RE is not well-formed.
+mkRegex :: (Monad m) => String -> m FilterRegex
+mkRegex str = do
+  compiled <- case PCRE.getVersion of
+                Nothing -> fail "regex-pcre library compiled without\
+                                \ libpcre, regex functionality not available"
+                _ -> PCRE.makeRegexM str
+  return $ FilterRegex str compiled
+
+-- | 'Show' instance: we show the constructor plus the string version
+-- of the regex.
+instance Show FilterRegex where
+  show (FilterRegex re _) = "mkRegex " ++ show re
+
+-- | 'Read' instance: we manually read \"mkRegex\" followed by a
+-- string, and build the 'FilterRegex' using that.
+instance Read FilterRegex where
+  readsPrec _ str = do
+    ("mkRegex", s') <- lex str
+    (re, s'') <- reads s'
+    filterre <- mkRegex re
+    return (filterre, s'')
+
+-- | 'Eq' instance: we only compare the string versions of the regexes.
+instance Eq FilterRegex where
+  (FilterRegex re1 _) == (FilterRegex re2 _) = re1 == re2
+
+-- | 'JSON' instance: like for show and read instances, we work only
+-- with the string component.
+instance JSON FilterRegex where
+  showJSON (FilterRegex re _) = showJSON re
+  readJSON s = do
+    re <- readJSON s
+    mkRegex re
 
 -- | Name of a field.
 type FieldName = String
-- 
GitLab