From b544cfe0c31aa6ee7602d2855824fc2c67b6ffbe Mon Sep 17 00:00:00 2001 From: Iustin Pop <iustin@google.com> Date: Tue, 13 May 2008 07:32:58 +0000 Subject: [PATCH] Reduce chance of ssh failures in verify cluster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cluster verify builds a sorted list of nodes and passes that to all the nodes (in parallel) for ssh checks. This means that for a cluster with N nodes, there will be approximately N simultaneous connections to the first node, then to the second node, etc. This, coupled with the ssh daemon's βMaxStartupsβ parameter, can create false alarms about ssh connectivity. This patch randomizes the node list in the backend (therefore, each node should have it's own order of ssh-ing to the other nodes) and the chance of these alarms should be reduced. Reviewed-by: ultrotter --- lib/backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/backend.py b/lib/backend.py index 634d85cd5..401710c72 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -30,6 +30,7 @@ import stat import errno import re import subprocess +import random from ganeti import logger from ganeti import errors @@ -200,6 +201,7 @@ def VerifyNode(what): if 'nodelist' in what: result['nodelist'] = {} + random.shuffle(what['nodelist']) for node in what['nodelist']: success, message = _GetSshRunner().VerifyNodeHostname(node) if not success: -- GitLab