From 693342ad3b37d546bbe5123128790c8f7877ec9f Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Tue, 4 Jan 2011 16:02:07 +0100
Subject: [PATCH] hscan: fix long-standing bug with node memory data

There is a long-standing bug in hscan, due to the serialization
workflow. Basically, hscan loads data from the remote cluster(s),
processes it via Loader.mergeData and Loader.checkData, then
serializes it. The Loader function will update the node memory values
as if the down instances were running, and then hscan saves the
_modified_ values.

When another program (e.g. hbal) loads the text files, it will
re-apply the Loader functions, in effect subtracting the instance
memory twice, which is course offers a wrong cluster view.

To fix this bug, we change a bit the internal workflow in hscan, so
that we serialize the unmodified cluster data, and only use the
modified one for the display of cluster state (and verification that
yes, we can load the data).

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Balazs Lecz <leczb@google.com>
---
 hscan.hs | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/hscan.hs b/hscan.hs
index b2a4a9f7d..c14846f21 100644
--- a/hscan.hs
+++ b/hscan.hs
@@ -6,7 +6,7 @@
 
 {-
 
-Copyright (C) 2009, 2010 Google Inc.
+Copyright (C) 2009, 2010, 2011 Google Inc.
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -89,9 +89,9 @@ fixSlash = map (\x -> if x == '/' then '_' else x)
 
 
 -- | Generates serialized data from loader input.
-processData :: Result ClusterData -> Result ClusterData
+processData :: ClusterData -> Result ClusterData
 processData input_data = do
-  cdata@(ClusterData _ nl il _) <- input_data >>= mergeData [] [] []
+  cdata@(ClusterData _ nl il _) <- mergeData [] [] [] input_data
   let (_, fix_nl) = checkData nl il
   return cdata { cdNodes = fix_nl }
 
@@ -106,7 +106,20 @@ writeData _ name _ (Bad err) =
   return False
 
 writeData nlen name opts (Ok cdata) = do
-  let (ClusterData _ nl il _) = cdata
+  let fixdata = processData cdata
+  case fixdata of
+    Bad err -> printf "\nError for %s: failed to process data. Details:\n%s\n"
+               name err >> return False
+    Ok processed -> writeDataInner nlen name opts cdata processed
+
+writeDataInner :: Int
+               -> String
+               -> Options
+               -> ClusterData
+               -> ClusterData
+               -> IO Bool
+writeDataInner nlen name opts cdata fixdata = do
+  let (ClusterData _ nl il _) = fixdata
   printf "%-*s " nlen name :: IO ()
   hFlush stdout
   let shownodes = optShowNodes opts
@@ -139,14 +152,14 @@ main = do
          let lsock = fromMaybe defaultLuxiSocket (optLuxi opts)
          let name = local
          input_data <- Luxi.loadData lsock
-         result <- writeData nlen name opts (processData input_data)
+         result <- writeData nlen name opts input_data
          when (not result) $ exitWith $ ExitFailure 2
 
 #ifndef NO_CURL
   results <- mapM (\ name ->
                     do
                       input_data <- Rapi.loadData name
-                      writeData nlen name opts (processData input_data)
+                      writeData nlen name opts input_data
                   ) clusters
   when (not $ all id results) $ exitWith (ExitFailure 2)
 #else
-- 
GitLab