Commit a5da38fa authored by Michael Hanselmann's avatar Michael Hanselmann

bootstrap: Wait for SSH daemon to become reachable

In some situations after restarting the SSH daemon by
“prepare-node-join” it's not immediately reachable. Like with the master
and node daemons, waiting for it to become reachable instead of failing
hard can help in such situations.
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarMichele Tartara <mtartara@google.com>
parent 25ba209e
......@@ -256,6 +256,27 @@ def _WaitForMasterDaemon():
" %s seconds" % _DAEMON_READY_TIMEOUT)
def _WaitForSshDaemon(hostname, port, family):
"""Wait for SSH daemon to become responsive.
"""
hostip = netutils.GetHostname(name=hostname, family=family).ip
def _CheckSshDaemon():
if netutils.TcpPing(hostip, port, timeout=1.0, live_port_needed=True):
logging.debug("SSH daemon on %s:%s (IP address %s) has become"
" responsive", hostname, port, hostip)
else:
raise utils.RetryAgain()
try:
utils.Retry(_CheckSshDaemon, 1.0, _DAEMON_READY_TIMEOUT)
except utils.RetryTimeout:
raise errors.OpExecError("SSH daemon on %s:%s (IP address %s) didn't"
" become responsive within %s seconds" %
(hostname, port, hostip, _DAEMON_READY_TIMEOUT))
def RunNodeSetupCmd(cluster_name, node, basecmd, debug, verbose,
use_cluster_key, ask_key, strict_host_check, data):
"""Runs a command to configure something on a remote machine.
......@@ -310,6 +331,8 @@ def RunNodeSetupCmd(cluster_name, node, basecmd, debug, verbose,
raise errors.OpExecError("Command '%s' failed: %s" %
(result.cmd, result.fail_reason))
_WaitForSshDaemon(node, netutils.GetDaemonPort(constants.SSH), family)
def _InitFileStorage(file_storage_dir):
"""Initialize if needed the file storage.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment