From 693342ad3b37d546bbe5123128790c8f7877ec9f Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Tue, 4 Jan 2011 16:02:07 +0100 Subject: [PATCH] hscan: fix long-standing bug with node memory data There is a long-standing bug in hscan, due to the serialization workflow. Basically, hscan loads data from the remote cluster(s), processes it via Loader.mergeData and Loader.checkData, then serializes it. The Loader function will update the node memory values as if the down instances were running, and then hscan saves the _modified_ values. When another program (e.g. hbal) loads the text files, it will re-apply the Loader functions, in effect subtracting the instance memory twice, which is course offers a wrong cluster view. To fix this bug, we change a bit the internal workflow in hscan, so that we serialize the unmodified cluster data, and only use the modified one for the display of cluster state (and verification that yes, we can load the data). Signed-off-by: Iustin Pop <iustin@google.com> Reviewed-by: Balazs Lecz <leczb@google.com> --- hscan.hs | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/hscan.hs b/hscan.hs index b2a4a9f7d..c14846f21 100644 --- a/hscan.hs +++ b/hscan.hs @@ -6,7 +6,7 @@ {- -Copyright (C) 2009, 2010 Google Inc. +Copyright (C) 2009, 2010, 2011 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -89,9 +89,9 @@ fixSlash = map (\x -> if x == '/' then '_' else x) -- | Generates serialized data from loader input. -processData :: Result ClusterData -> Result ClusterData +processData :: ClusterData -> Result ClusterData processData input_data = do - cdata@(ClusterData _ nl il _) <- input_data >>= mergeData [] [] [] + cdata@(ClusterData _ nl il _) <- mergeData [] [] [] input_data let (_, fix_nl) = checkData nl il return cdata { cdNodes = fix_nl } @@ -106,7 +106,20 @@ writeData _ name _ (Bad err) = return False writeData nlen name opts (Ok cdata) = do - let (ClusterData _ nl il _) = cdata + let fixdata = processData cdata + case fixdata of + Bad err -> printf "\nError for %s: failed to process data. Details:\n%s\n" + name err >> return False + Ok processed -> writeDataInner nlen name opts cdata processed + +writeDataInner :: Int + -> String + -> Options + -> ClusterData + -> ClusterData + -> IO Bool +writeDataInner nlen name opts cdata fixdata = do + let (ClusterData _ nl il _) = fixdata printf "%-*s " nlen name :: IO () hFlush stdout let shownodes = optShowNodes opts @@ -139,14 +152,14 @@ main = do let lsock = fromMaybe defaultLuxiSocket (optLuxi opts) let name = local input_data <- Luxi.loadData lsock - result <- writeData nlen name opts (processData input_data) + result <- writeData nlen name opts input_data when (not result) $ exitWith $ ExitFailure 2 #ifndef NO_CURL results <- mapM (\ name -> do input_data <- Rapi.loadData name - writeData nlen name opts (processData input_data) + writeData nlen name opts input_data ) clusters when (not $ all id results) $ exitWith (ExitFailure 2) #else -- GitLab