Commit 2f1d9bd0 authored by Klaus Aehlig's avatar Klaus Aehlig
Browse files

After master-failover verify reachability of master IP



...and warn if it is not. Note that the master activates
the master IP in an asynchronous task and will continue
even if that fails.
Signed-off-by: default avatarKlaus Aehlig <aehlig@google.com>
Reviewed-by: default avatarHelga Velroyen <helgav@google.com>
parent 66a5f242
......@@ -1051,12 +1051,12 @@ def MasterFailover(no_voting=False):
total_timeout = 30
# Here we have a phase where no master should be running
def _check_ip():
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
def _check_ip(expected):
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT) != expected:
raise utils.RetryAgain()
try:
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout)
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout, args=[False])
except utils.RetryTimeout:
warning = ("The master IP is still reachable after %s seconds,"
" continuing but activating the master IP on the current"
......@@ -1079,6 +1079,19 @@ def MasterFailover(no_voting=False):
" %s, please check: %s", new_master, msg)
rcode = 1
# Finally verify that the new master managed to set up the master IP
# and warn if it didn't.
try:
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout, args=[True])
except utils.RetryTimeout:
warning = ("The master IP did not come up within %s seconds; the"
" cluster should still be working and reachable via %s,"
" but not via the master IP address"
% (total_timeout, new_master))
logging.warning("%s", warning)
warnings.append(warning)
rcode = 1
logging.info("Master failed over from %s to %s", old_master, new_master)
return rcode, warnings
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment