From 8a65c02b3963cadb22d0d7ec67e769e5ac8b2e17 Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Sun, 26 Aug 2012 20:22:44 +0200
Subject: [PATCH] Add filtering support in Query

This adds basic infrastructure for filtering (fully functional except,
as usual, for runtime data), and then uses it for node queries.

Since the filtering exports regex matching as an external
functionality, we have to use a regex library. There are many flavours
of these in Haskell (see
http://www.haskell.org/haskellwiki/Regular_expressions), but since we
want to be as compatible as we can with Python's, we use the
regex-pcre one, which is a wrapper to the widely used 'pcre' library.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Agata Murawska <agatamurawska@google.com>
---
 INSTALL                       |   9 +-
 Makefile.am                   |   1 +
 htools/Ganeti/Query/Filter.hs | 172 ++++++++++++++++++++++++++++++++++
 htools/Ganeti/Query/Query.hs  |  37 +++++++-
 4 files changed, 213 insertions(+), 6 deletions(-)
 create mode 100644 htools/Ganeti/Query/Filter.hs

diff --git a/INSTALL b/INSTALL
index 4ced783e7..70f70563a 100644
--- a/INSTALL
+++ b/INSTALL
@@ -134,8 +134,10 @@ just going to run Ganeti). More specifically:
 - `bytestring <http://hackage.haskell.org/package/bytestring>`_ and
   `utf8-string <http://hackage.haskell.org/package/utf8-string>`_
   libraries; these usually come with the GHC compiler
+- `regex-pcre <http://hackage.haskell.org/package/regex-pcre>`_,
+  bindings for the ``pcre`` library
 
-All of these are also available as package in Debian/Ubuntu::
+Some of these are also available as package in Debian/Ubuntu::
 
   $ apt-get install ghc6 libghc6-json-dev libghc6-network-dev \
                     libghc6-parallel-dev libghc6-curl-dev
@@ -152,7 +154,8 @@ Note that more recent version have switched to GHC 7.x and the packages
 were renamed::
 
   $ apt-get install ghc libghc-json-dev libghc-network-dev \
-                    libghc-parallel-dev libghc-curl-dev
+                    libghc-parallel-dev libghc-curl-dev \
+                    libghc-regex-pcre-dev
 
 If using a distribution which does not provide them, first install
 the Haskell platform. You can also install ``cabal`` manualy::
@@ -162,7 +165,7 @@ the Haskell platform. You can also install ``cabal`` manualy::
 Then install the additional libraries via
 ``cabal``::
 
-  $ cabal install json network parallel curl
+  $ cabal install json network parallel curl regex-pcre
 
 The compilation of the htools components is automatically enabled when
 the compiler and the requisite libraries are found. You can use the
diff --git a/Makefile.am b/Makefile.am
index ee420b7d8..6c8bcc78e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -418,6 +418,7 @@ HS_LIB_SRCS = \
 	htools/Ganeti/Rpc.hs \
 	htools/Ganeti/Qlang.hs \
 	htools/Ganeti/Query/Common.hs \
+	htools/Ganeti/Query/Filter.hs \
 	htools/Ganeti/Query/Node.hs \
 	htools/Ganeti/Query/Query.hs \
 	htools/Ganeti/Query/Types.hs \
diff --git a/htools/Ganeti/Query/Filter.hs b/htools/Ganeti/Query/Filter.hs
new file mode 100644
index 000000000..7e8c1e184
--- /dev/null
+++ b/htools/Ganeti/Query/Filter.hs
@@ -0,0 +1,172 @@
+{-# LANGUAGE Rank2Types #-}
+
+{-| Implementation of the Ganeti Query2 filterning.
+
+The filtering of results should be done in two phases.
+
+In the first phase, before contacting any remote nodes for runtime
+data, the filtering should be executed with 'Nothing' for the runtime
+context. This will make all non-runtime filters filter correctly,
+whereas all runtime filters will respond successfully. As described in
+the Python version too, this makes for example /Or/ filters very
+inefficient if they contain runtime fields.
+
+Once this first filtering phase has been done, we hopefully eliminated
+some remote nodes out of the list of candidates, we run the remote
+data gathering, and we evaluate the filter again, this time with a
+'Just' runtime context. This will make all filters work correctly.
+
+Note that the second run will re-evaluate the config/simple fields,
+without caching; this is not perfect, but we consider config accesses
+very cheap (and the configuration snapshot we have won't change
+between the two runs, hence we will not get inconsistent results).
+
+-}
+
+{-
+
+Copyright (C) 2012 Google Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+
+-}
+
+module Ganeti.Query.Filter
+  ( compileFilter
+  , evaluateFilter
+  ) where
+
+import Control.Applicative
+import qualified Data.Map as Map
+import Data.Traversable (traverse)
+import Text.JSON (JSValue(..), fromJSString)
+import Text.JSON.Pretty (pp_value)
+import Text.Regex.PCRE ((=~))
+
+import Ganeti.BasicTypes
+import Ganeti.Objects
+import Ganeti.Qlang
+import Ganeti.Query.Types
+import Ganeti.HTools.JSON
+
+-- | Compiles a filter based on field names to one based on getters.
+compileFilter :: FieldMap a b
+              -> Filter FilterField
+              -> Result (Filter (FieldGetter a b))
+compileFilter fm =
+  traverse (\field -> maybe (Bad $ "Can't find field named '" ++ field ++ "'")
+                      (Ok . snd) (field `Map.lookup` fm))
+
+-- | Wraps a getter, filter pair. If the getter is 'FieldRuntime' but
+-- we don't have a runtime context, we skip the filtering, returning
+-- \"pass\". Otherwise, we pass the actual value to the filter.
+wrapGetter :: ConfigData
+           -> Maybe b
+           -> a
+           -> FieldGetter a b
+           -> (JSValue -> Result Bool)
+           -> Result Bool
+wrapGetter cfg b a getter faction =
+  case tryGetter cfg b a getter of
+    Nothing -> Ok True -- runtime missing, accepting the value
+    Just v ->
+      case v of
+        ResultEntry RSNormal (Just fval) -> faction fval
+        ResultEntry RSNormal Nothing ->
+          Bad "Internal error: Getter returned RSNormal/Nothing"
+        _ -> Ok True -- filter has no data to work, accepting it
+
+-- | Helper to evaluate a filter getter (and the value it generates) in
+-- a boolean context.
+trueFilter :: JSValue -> Result Bool
+trueFilter (JSBool x) = Ok x
+trueFilter v = Bad $ "Unexpected value '" ++ show (pp_value v) ++
+               "' in boolean context"
+
+-- | A type synonim for a rank-2 comparator function. This is used so
+-- that we can pass the usual '<=', '>', '==' functions to 'binOpFilter'
+-- and for them to be used in multiple contexts.
+type Comparator = (Eq a, Ord a) => a -> a -> Bool
+
+-- | Helper to evaluate a filder getter (and the value it generates)
+-- in a boolean context. Note the order of arguments is reversed from
+-- the filter definitions (due to the call chain), make sure to
+-- compare in the reverse order too!.
+binOpFilter :: Comparator -> FilterValue -> JSValue -> Result Bool
+binOpFilter comp (QuotedString y) (JSString x) =
+  Ok $ fromJSString x `comp` y
+binOpFilter comp (NumericValue y) (JSRational _ x) =
+  Ok $ x `comp` fromIntegral y
+binOpFilter _ expr actual =
+  Bad $ "Invalid types in comparison, trying to compare " ++
+      show (pp_value actual) ++ " with '" ++ show expr ++ "'"
+
+-- | Implements the 'RegexpFilter' matching.
+regexpFilter :: String -> JSValue -> Result Bool
+regexpFilter re (JSString val) = Ok $ (fromJSString val) =~ re
+regexpFilter _ x =
+  Bad $ "Invalid field value used in regexp matching,\
+        \ expecting string but got '" ++ show (pp_value x) ++ "'"
+
+-- | Implements the 'ContainsFilter' matching.
+containsFilter :: FilterValue -> JSValue -> Result Bool
+-- note: the next two implementations are the same, but we have to
+-- repeat them due to the encapsulation done by FilterValue
+containsFilter (QuotedString val) lst = do
+  lst' <- fromJVal lst
+  return $ val `elem` lst'
+containsFilter (NumericValue val) lst = do
+  lst' <- fromJVal lst
+  return $ val `elem` lst'
+
+-- | Verifies if a given item passes a filter. The runtime context
+-- might be missing, in which case most of the filters will consider
+-- this as passing the filter.
+evaluateFilter :: ConfigData -> Maybe b -> a
+               -> Filter (FieldGetter a b)
+               -> Result Bool
+evaluateFilter _ _  _ EmptyFilter = Ok True
+evaluateFilter c mb a (AndFilter flts) =
+  all id <$> mapM (evaluateFilter c mb a) flts
+evaluateFilter c mb a (OrFilter flts)  =
+  any id <$> mapM (evaluateFilter c mb a) flts
+evaluateFilter c mb a (NotFilter flt)  =
+  not <$> evaluateFilter c mb a flt
+evaluateFilter c mb a (TrueFilter getter)  =
+  wrapGetter c mb a getter trueFilter
+evaluateFilter c mb a (EQFilter getter val) =
+  wrapGetter c mb a getter (binOpFilter (==) val)
+evaluateFilter c mb a (LTFilter getter val) =
+  wrapGetter c mb a getter (binOpFilter (<) val)
+evaluateFilter c mb a (LEFilter getter val) =
+  wrapGetter c mb a getter (binOpFilter (<=) val)
+evaluateFilter c mb a (GTFilter getter val) =
+  wrapGetter c mb a getter (binOpFilter (>) val)
+evaluateFilter c mb a (GEFilter getter val) =
+  wrapGetter c mb a getter (binOpFilter (>=) val)
+evaluateFilter c mb a (RegexpFilter getter re) =
+  wrapGetter c mb a getter (regexpFilter re)
+evaluateFilter c mb a (ContainsFilter getter val) =
+  wrapGetter c mb a getter (containsFilter val)
+
+-- | Runs a getter with potentially missing runtime context.
+tryGetter :: ConfigData -> Maybe b -> a -> FieldGetter a b -> Maybe ResultEntry
+tryGetter _   _ item (FieldSimple getter)  = Just $ getter item
+tryGetter cfg _ item (FieldConfig getter)  = Just $ getter cfg item
+tryGetter _  rt item (FieldRuntime getter) =
+  maybe Nothing (\rt' -> Just $ getter rt' item) rt
+tryGetter _   _ _    FieldUnknown          = Just $
+                                             ResultEntry RSUnknown Nothing
diff --git a/htools/Ganeti/Query/Query.hs b/htools/Ganeti/Query/Query.hs
index 071e6e5a5..65544c7e3 100644
--- a/htools/Ganeti/Query/Query.hs
+++ b/htools/Ganeti/Query/Query.hs
@@ -23,11 +23,34 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 
 -}
 
+{-
+
+TODO: problems with the current model:
+
+1. There's nothing preventing a result such as ResultEntry RSNormal
+Nothing, or ResultEntry RSNoData (Just ...); ideally, we would
+separate the the RSNormal and other types; we would need a new data
+type for this, though, with JSON encoding/decoding
+
+2. We don't have a way to 'bind' a FieldDefinition's field type
+(e.q. QFTBool) with the actual value that is returned from a
+FieldGetter. This means that the various getter functions can return
+divergent types for the same field when evaluated against multiple
+items. This is bad; it only works today because we 'hide' everything
+behind JSValue, but is not nice at all. We should probably remove the
+separation between FieldDefinition and the FieldGetter, and introduce
+a new abstract data type, similar to QFT*, that contains the values
+too.
+
+-}
+
 module Ganeti.Query.Query
+
     ( query
     , queryFields
     ) where
 
+import Control.Monad (filterM)
 import Data.Maybe (fromMaybe)
 import qualified Data.Map as Map
 
@@ -35,6 +58,7 @@ import Ganeti.BasicTypes
 import Ganeti.HTools.JSON
 import Ganeti.Qlang
 import Ganeti.Query.Common
+import Ganeti.Query.Filter
 import Ganeti.Query.Types
 import Ganeti.Query.Node
 import Ganeti.Objects
@@ -70,12 +94,19 @@ query :: ConfigData   -- ^ The current configuration
       -> Query        -- ^ The query (item, fields, filter)
       -> IO (Result QueryResult) -- ^ Result
 
-query cfg (Query QRNode fields _) = return $ do
+query cfg (Query QRNode fields qfilter) = return $ do
+  cfilter <- compileFilter nodeFieldsMap qfilter
   let selected = getSelectedFields nodeFieldsMap fields
       (fdefs, fgetters) = unzip selected
       nodes = Map.elems . fromContainer $ configNodes cfg
-      fdata = map (\node -> map (execGetter cfg NodeRuntime node) fgetters)
-              nodes
+  -- runs first pass of the filter, without a runtime context; this
+  -- will limit the nodes that we'll contact for runtime data
+  fnodes <- filterM (\n -> evaluateFilter cfg Nothing n cfilter)
+            nodes
+  -- here we would run the runtime data gathering, then filter again
+  -- the nodes, based on existing runtime data
+  let fdata = map (\node -> map (execGetter cfg NodeRuntime node) fgetters)
+              fnodes
   return QueryResult { qresFields = fdefs, qresData = fdata }
 
 query _ (Query qkind _ _) =
-- 
GitLab