Commit 8f215968 authored by Michael Hanselmann's avatar Michael Hanselmann

Make cluster initialization more reliable

There was a race condition between starting the node daemon
and sending requests to write the ssconf files. With this
patch, the initialization waits up to ten seconds for the
node daemon to become responsive.
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parent 1df79ce6
......@@ -110,7 +110,7 @@ def GenerateHmacKey(file_name):
utils.WriteFile(file_name, data=utils.GenerateSecret(), mode=0400)
def _InitGanetiServerSetup():
def _InitGanetiServerSetup(master_name):
"""Setup the necessary configuration for the initial node daemon.
This creates the nodepass file containing the shared password for
......@@ -133,6 +133,19 @@ def _InitGanetiServerSetup():
" had exitcode %s and error %s" %
(result.cmd, result.exit_code, result.output))
# Wait for node daemon to become responsive
end_time = time.time() + 10.0
while True:
result = rpc.RpcRunner.call_version([master_name])[master_name]
if not result.fail_msg:
break
if time.time() > end_time:
raise errors.OpExecError("Node daemon didn't answer queries within"
" 10 seconds")
time.sleep(1)
def InitCluster(cluster_name, mac_prefix,
master_netdev, file_storage_dir, candidate_pool_size,
......@@ -241,7 +254,7 @@ def InitCluster(cluster_name, mac_prefix,
hv_class.CheckParameterSyntax(hv_params)
# set up the inter-node password and certificate
_InitGanetiServerSetup()
_InitGanetiServerSetup(hostname.name)
# set up ssh config and /etc/hosts
sshline = utils.ReadFile(constants.SSH_HOST_RSA_PUB)
......
......@@ -730,13 +730,14 @@ class RpcRunner(object):
# TODO: should this method query down nodes?
return cls._StaticMultiNodeCall(node_list, "master_info", [])
def call_version(self, node_list):
@classmethod
def call_version(cls, node_list):
"""Query node version.
This is a multi-node call.
"""
return self._MultiNodeCall(node_list, "version", [])
return cls._StaticMultiNodeCall(node_list, "version", [])
def call_blockdev_create(self, node, bdev, size, owner, on_primary, info):
"""Request creation of a given block device.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment