Commit 713d40d1 authored by Klaus Aehlig's avatar Klaus Aehlig

Move Statistics data type to a separate submodule of Utils

To avoid further cluttering the already overly long Utils.hs
move the Statistics data type with its related functions into
a separate submodule. This also provides a better encapsulation
of this type where the Constructors are not exported.
Signed-off-by: default avatarKlaus Aehlig <aehlig@google.com>
Reviewed-by: default avatarPetr Pudlak <pudlak@google.com>
parent c79d19e8
......@@ -157,7 +157,8 @@ HS_DIRS = \
test/hs/Test/Ganeti/Hypervisor/Xen \
test/hs/Test/Ganeti/Locking \
test/hs/Test/Ganeti/Query \
test/hs/Test/Ganeti/THH
test/hs/Test/Ganeti/THH \
test/hs/Test/Ganeti/Utils
# Haskell directories without the roots (src, test/hs)
HS_DIRS_NOROOT = $(filter-out src,$(filter-out test/hs,$(HS_DIRS)))
......@@ -865,6 +866,7 @@ HS_LIB_SRCS = \
src/Ganeti/Utils/MonadPlus.hs \
src/Ganeti/Utils/MultiMap.hs \
src/Ganeti/Utils/MVarLock.hs \
src/Ganeti/Utils/Statistics.hs \
src/Ganeti/VCluster.hs \
src/Ganeti/WConfd/ConfigState.hs \
src/Ganeti/WConfd/ConfigWriter.hs \
......@@ -930,7 +932,9 @@ HS_TEST_SRCS = \
test/hs/Test/Ganeti/TestHTools.hs \
test/hs/Test/Ganeti/TestHelper.hs \
test/hs/Test/Ganeti/Types.hs \
test/hs/Test/Ganeti/Utils.hs
test/hs/Test/Ganeti/Utils.hs \
test/hs/Test/Ganeti/Utils/Statistics.hs
HS_LIBTEST_SRCS = $(HS_LIB_SRCS) $(HS_TEST_SRCS)
......
......@@ -95,6 +95,7 @@ import Ganeti.HTools.Types
import Ganeti.Compat
import qualified Ganeti.OpCodes as OpCodes
import Ganeti.Utils
import Ganeti.Utils.Statistics
import Ganeti.Types (EvacMode(..), mkNonEmpty)
-- * Types
......
{-# LANGUAGE FlexibleContexts, BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-| Utility functions. -}
......@@ -29,11 +29,6 @@ module Ganeti.Utils
, debugXy
, sepSplit
, findFirst
, Statistics
, getSumStatistics
, getStdDevStatistics
, getStatisticValue
, updateStatistics
, stdDev
, if'
, select
......@@ -194,48 +189,6 @@ stdDev lst =
av = foldl' (\accu em -> let d = em - mv in accu + d * d) 0.0 lst
in sqrt (av / ll) -- stddev
-- | Abstract type of statistical accumulations. They behave as if the given
-- statistics were computed on the list of values, but they allow a potentially
-- more efficient update of a given value.
data Statistics = SumStatistics Double
| StdDevStatistics Double Double Double deriving Show
-- count, sum, and not the sum of squares---instead the
-- computed variance for better precission.
-- | Get a statistics that sums up the values.
getSumStatistics :: [Double] -> Statistics
getSumStatistics = SumStatistics . sum
-- | Get a statistics for the standard deviation.
getStdDevStatistics :: [Double] -> Statistics
getStdDevStatistics xs =
let (nt, st) = foldl' (\(n, s) x ->
let !n' = n + 1
!s' = s + x
in (n', s'))
(0, 0) xs
mean = st / nt
nvar = foldl' (\v x -> let d = x - mean in v + d * d) 0 xs
in StdDevStatistics nt st (nvar / nt)
-- | Obtain the value of a statistics.
getStatisticValue :: Statistics -> Double
getStatisticValue (SumStatistics s) = s
getStatisticValue (StdDevStatistics _ _ var) = sqrt var
-- | In a given statistics replace on value by another. This
-- will only give meaningful results, if the original value
-- was actually part of the statistics.
updateStatistics :: Statistics -> (Double, Double) -> Statistics
updateStatistics (SumStatistics s) (x, y) = SumStatistics $ s + (y - x)
updateStatistics (StdDevStatistics n s var) (x, y) =
let !ds = y - x
!dss = y * y - x * x
!dnnvar = n * dss - (2 * s + ds) * ds
!s' = s + ds
!var' = max 0 $ var + dnnvar / (n * n)
in StdDevStatistics n s' var'
-- * Logical functions
-- Avoid syntactic sugar and enhance readability. These functions are proposed
......
{-# LANGUAGE BangPatterns #-}
{-| Utility functions for statistical accumulation. -}
{-
Copyright (C) 2014 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
-}
module Ganeti.Utils.Statistics
( Statistics
, getSumStatistics
, getStdDevStatistics
, getStatisticValue
, updateStatistics
) where
import Data.List (foldl')
-- | Abstract type of statistical accumulations. They behave as if the given
-- statistics were computed on the list of values, but they allow a potentially
-- more efficient update of a given value.
data Statistics = SumStatistics Double
| StdDevStatistics Double Double Double deriving Show
-- count, sum, and not the sum of squares---instead the
-- computed variance for better precission.
-- | Get a statistics that sums up the values.
getSumStatistics :: [Double] -> Statistics
getSumStatistics = SumStatistics . sum
-- | Get a statistics for the standard deviation.
getStdDevStatistics :: [Double] -> Statistics
getStdDevStatistics xs =
let (nt, st) = foldl' (\(n, s) x ->
let !n' = n + 1
!s' = s + x
in (n', s'))
(0, 0) xs
mean = st / nt
nvar = foldl' (\v x -> let d = x - mean in v + d * d) 0 xs
in StdDevStatistics nt st (nvar / nt)
-- | Obtain the value of a statistics.
getStatisticValue :: Statistics -> Double
getStatisticValue (SumStatistics s) = s
getStatisticValue (StdDevStatistics _ _ var) = sqrt var
-- | In a given statistics replace on value by another. This
-- will only give meaningful results, if the original value
-- was actually part of the statistics.
updateStatistics :: Statistics -> (Double, Double) -> Statistics
updateStatistics (SumStatistics s) (x, y) = SumStatistics $ s + (y - x)
updateStatistics (StdDevStatistics n s var) (x, y) =
let !ds = y - x
!dss = y * y - x * x
!dnnvar = n * dss - (2 * s + ds) * ds
!s' = s + ds
!var' = max 0 $ var + dnnvar / (n * n)
in StdDevStatistics n s' var'
......@@ -341,24 +341,6 @@ prop_splitRecombineEithers es =
(splitleft, splitright, trail) = splitEithers es
emptylist = []::[Int]
-- | Test the update function for standard deviations against the naive
-- implementation.
prop_stddev_update :: Property
prop_stddev_update =
forAll (choose (0, 6) >>= flip vectorOf (choose (0, 1))) $ \xs ->
forAll (choose (0, 1)) $ \a ->
forAll (choose (0, 1)) $ \b ->
forAll (choose (1, 6) >>= flip vectorOf (choose (0, 1))) $ \ys ->
let original = xs ++ [a] ++ ys
modified = xs ++ [b] ++ ys
with_update = getStatisticValue
$ updateStatistics (getStdDevStatistics original) (a,b)
direct = stdDev modified
in printTestCase ("Value computed by update " ++ show with_update
++ " differs too much from correct value " ++ show direct)
(abs (with_update - direct) < 1e-12)
-- | Test list for the Utils module.
testSuite "Utils"
[ 'prop_commaJoinSplit
, 'prop_commaSplitJoin
......@@ -385,5 +367,4 @@ testSuite "Utils"
, 'prop_chompPrefix_empty_string
, 'prop_chompPrefix_nothing
, 'prop_splitRecombineEithers
, 'prop_stddev_update
]
{-# LANGUAGE TemplateHaskell #-}
{-| Unit tests for Ganeti statistics utils.
-}
{-
Copyright (C) 2014 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
-}
module Test.Ganeti.Utils.Statistics (testUtils_Statistics) where
import Test.QuickCheck
import Test.Ganeti.TestHelper
import Ganeti.Utils (stdDev)
import Ganeti.Utils.Statistics
-- | Test the update function for standard deviations against the naive
-- implementation.
prop_stddev_update :: Property
prop_stddev_update =
forAll (choose (0, 6) >>= flip vectorOf (choose (0, 1))) $ \xs ->
forAll (choose (0, 1)) $ \a ->
forAll (choose (0, 1)) $ \b ->
forAll (choose (1, 6) >>= flip vectorOf (choose (0, 1))) $ \ys ->
let original = xs ++ [a] ++ ys
modified = xs ++ [b] ++ ys
with_update = getStatisticValue
$ updateStatistics (getStdDevStatistics original) (a,b)
direct = stdDev modified
in printTestCase ("Value computed by update " ++ show with_update
++ " differs too much from correct value " ++ show direct)
(abs (with_update - direct) < 1e-12)
testSuite "Utils/Statistics"
[ 'prop_stddev_update
]
......@@ -80,6 +80,7 @@ import Test.Ganeti.THH
import Test.Ganeti.THH.Types
import Test.Ganeti.Types
import Test.Ganeti.Utils
import Test.Ganeti.Utils.Statistics
-- | Our default test options, overring the built-in test-framework
-- ones (but not the supplied command line parameters).
......@@ -145,6 +146,7 @@ allTests =
, testTHH_Types
, testTypes
, testUtils
, testUtils_Statistics
]
-- | Main function. Note we don't use defaultMain since we want to
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment