From b544cfe0c31aa6ee7602d2855824fc2c67b6ffbe Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Tue, 13 May 2008 07:32:58 +0000
Subject: [PATCH] Reduce chance of ssh failures in verify cluster
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cluster verify builds a sorted list of nodes and passes that to all
the nodes (in parallel) for ssh checks. This means that for a cluster
with N nodes, there will be approximately N simultaneous connections to
the first node, then to the second node, etc. This, coupled with the
ssh daemon's β€œMaxStartups” parameter, can create false alarms about ssh
connectivity.

This patch randomizes the node list in the backend (therefore, each node
should have it's own order of ssh-ing to the other nodes) and the chance
of these alarms should be reduced.

Reviewed-by: ultrotter
---
 lib/backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/backend.py b/lib/backend.py
index 634d85cd5..401710c72 100644
--- a/lib/backend.py
+++ b/lib/backend.py
@@ -30,6 +30,7 @@ import stat
 import errno
 import re
 import subprocess
+import random
 
 from ganeti import logger
 from ganeti import errors
@@ -200,6 +201,7 @@ def VerifyNode(what):
 
   if 'nodelist' in what:
     result['nodelist'] = {}
+    random.shuffle(what['nodelist'])
     for node in what['nodelist']:
       success, message = _GetSshRunner().VerifyNodeHostname(node)
       if not success:
-- 
GitLab