Commit 8351486d authored by Klaus Aehlig's avatar Klaus Aehlig
Browse files

Merge branch 'stable-2.10' into stable-2.11



* stable-2.10
  fix typos in design-file-based-storage.rst doc
  Switch to our osminor
  Provide an alternative for os.minor working around its bug
  Fix typo
  CanTieredAlloc test: make instances big enough
  After master-failover verify reachability of master IP
  Report failure to deactivate old master IP in exit code
  Expose warnings during master-failover
  Fix manpage for gnt-cluster copyfile
Signed-off-by: default avatarKlaus Aehlig <aehlig@google.com>
Reviewed-by: default avatarHelga Velroyen <helgav@google.com>
parents a56d29a6 70f22bae
......@@ -175,7 +175,7 @@ proposed, and presented hereafter.
``Configuration query daemon (RConfD)``
It is written in Haskell, and it corresponds to the old ConfD. It will run on
all the master candidates and it will serve information about the the static
all the master candidates and it will serve information about the static
configuration of the cluster (the one contained in ``config.data``). The
provided information will be highly available (as in: a response will be
available as long as a stable-enough connection between the client and at
......
......@@ -21,7 +21,7 @@ Goals:
* reuse existing image files
* allow Ganeti to initalize the cluster without checking for a volume
* allow Ganeti to initialize the cluster without checking for a volume
group (e.g. xenvg)
Non Goals:
......@@ -267,7 +267,7 @@ Export/Import instance
Provided "dump/restore" is used in the "export" and "import" guest-os
scripts, there are no modifications needed when file-based instances are
exported/imported. If any other backup-tool (which requires access to
the mounted file-system) is used then the image file can be temporaily
the mounted file-system) is used then the image file can be temporarily
mounted. This can be done in different ways:
Mount a raw image file via loopback driver::
......
......@@ -985,6 +985,7 @@ def MasterFailover(no_voting=False):
@param no_voting: force the operation without remote nodes agreement
(dangerous)
@returns: the pair of an exit code and warnings to display
"""
sstore = ssconf.SimpleStore()
......@@ -1025,6 +1026,7 @@ def MasterFailover(no_voting=False):
# end checks
rcode = 0
warnings = []
logging.info("Setting master to %s, old master: %s", new_master, old_master)
......@@ -1071,13 +1073,17 @@ def MasterFailover(no_voting=False):
msg = result.fail_msg
if msg:
logging.warning("Could not disable the master IP: %s", msg)
warning = "Could not disable the master IP: %s" % (msg,)
logging.warning("%s", warning)
warnings.append(warning)
result = runner.call_node_stop_master(old_master)
msg = result.fail_msg
if msg:
logging.error("Could not disable the master role on the old master"
" %s, please disable manually: %s", old_master, msg)
warning = ("Could not disable the master role on the old master"
" %s, please disable manually: %s" % (old_master, msg))
logging.error("%s", warning)
warnings.append(warning)
logging.info("Checking master IP non-reachability...")
......@@ -1085,16 +1091,19 @@ def MasterFailover(no_voting=False):
total_timeout = 30
# Here we have a phase where no master should be running
def _check_ip():
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
def _check_ip(expected):
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT) != expected:
raise utils.RetryAgain()
try:
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout)
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout, args=[False])
except utils.RetryTimeout:
logging.warning("The master IP is still reachable after %s seconds,"
" continuing but activating the master on the current"
" node will probably fail", total_timeout)
warning = ("The master IP is still reachable after %s seconds,"
" continuing but activating the master IP on the current"
" node will probably fail" % total_timeout)
logging.warning("%s", warning)
warnings.append(warning)
rcode = 1
if jstore.CheckDrainFlag():
logging.info("Undraining job queue")
......@@ -1110,8 +1119,21 @@ def MasterFailover(no_voting=False):
" %s, please check: %s", new_master, msg)
rcode = 1
# Finally verify that the new master managed to set up the master IP
# and warn if it didn't.
try:
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout, args=[True])
except utils.RetryTimeout:
warning = ("The master IP did not come up within %s seconds; the"
" cluster should still be working and reachable via %s,"
" but not via the master IP address"
% (total_timeout, new_master))
logging.warning("%s", warning)
warnings.append(warning)
rcode = 1
logging.info("Master failed over from %s to %s", old_master, new_master)
return rcode
return rcode, warnings
def GetMaster():
......
......@@ -829,7 +829,10 @@ def MasterFailover(opts, args):
if not AskUser(usertext):
return 1
return bootstrap.MasterFailover(no_voting=opts.no_voting)
rvlaue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting)
for msg in msgs:
ToStderr(msg)
return rvlaue
def MasterPing(opts, args):
......
......@@ -803,7 +803,7 @@ class PersistentBlockDevice(base.BlockDev):
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.minor = utils.osminor(st.st_rdev)
self.attached = True
return True
......@@ -940,7 +940,7 @@ class RADOSBlockDevice(base.BlockDev):
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.minor = utils.osminor(st.st_rdev)
self.attached = True
return True
......@@ -1307,7 +1307,7 @@ class ExtStorageDevice(base.BlockDev):
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.minor = utils.osminor(st.st_rdev)
self.attached = True
return True
......
......@@ -182,3 +182,13 @@ def LookupSpaceInfoByStorageType(storage_space_info, storage_type):
logging.warning("Storage space information requested for"
" ambiguous storage type '%s'.", storage_type)
return result
def osminor(dev):
"""Return the device minor number from a raw device number.
This is a replacement for os.minor working around the issue that
Python's os.minor still has the old definition. See Ganeti issue
1058 for more details.
"""
return (dev & 0xff) | ((dev >> 12) & ~0xff)
......@@ -84,7 +84,7 @@ Passing the ``--use-replication-network`` option will cause the
copy to be done over the replication network (only matters if the
primary/secondary IPs are different). Example::
# gnt-cluster -n node1.example.com -n node2.example.com copyfile /tmp/test
# gnt-cluster copyfile -n node1.example.com -n node2.example.com /tmp/test
This will copy the file /tmp/test from the current node to the two
named nodes.
......
......@@ -170,7 +170,7 @@ prop_CanTieredAlloc :: Property
prop_CanTieredAlloc =
forAll (choose (2, 5)) $ \count ->
forAll (liftM (Node.setPolicy Types.defIPolicy)
(genOnlineNode `suchThat` isNodeBig 4)) $ \node ->
(genOnlineNode `suchThat` isNodeBig 5)) $ \node ->
forAll (genInstanceMaybeBiggerThanNode node) $ \inst ->
let nl = makeSmallCluster node count
il = Container.empty
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment