Commit 5e68cc59 authored by Klaus Aehlig's avatar Klaus Aehlig
Browse files

Merge branch 'stable-2.11' into stable-2.12



* stable-2.11
  (no changes)

* stable-2.10
  fix typos in design-file-based-storage.rst doc
  Switch to our osminor
  Provide an alternative for os.minor working around its bug
  Fix typo
  CanTieredAlloc test: make instances big enough
  After master-failover verify reachability of master IP
  Report failure to deactivate old master IP in exit code
  Expose warnings during master-failover
  Fix manpage for gnt-cluster copyfile

Conflicts:
	lib/bootstrap.py: adapt function to new IO specification
	lib/utils/storage.py: trivial
Signed-off-by: default avatarKlaus Aehlig <aehlig@google.com>
Reviewed-by: default avatarPetr Pudlak <pudlak@google.com>
parents 6b7ecdcf 8351486d
......@@ -175,7 +175,7 @@ proposed, and presented hereafter.
``Configuration query daemon (RConfD)``
It is written in Haskell, and it corresponds to the old ConfD. It will run on
all the master candidates and it will serve information about the the static
all the master candidates and it will serve information about the static
configuration of the cluster (the one contained in ``config.data``). The
provided information will be highly available (as in: a response will be
available as long as a stable-enough connection between the client and at
......
......@@ -21,7 +21,7 @@ Goals:
* reuse existing image files
* allow Ganeti to initalize the cluster without checking for a volume
* allow Ganeti to initialize the cluster without checking for a volume
group (e.g. xenvg)
Non Goals:
......@@ -267,7 +267,7 @@ Export/Import instance
Provided "dump/restore" is used in the "export" and "import" guest-os
scripts, there are no modifications needed when file-based instances are
exported/imported. If any other backup-tool (which requires access to
the mounted file-system) is used then the image file can be temporaily
the mounted file-system) is used then the image file can be temporarily
mounted. This can be done in different ways:
Mount a raw image file via loopback driver::
......
......@@ -1009,6 +1009,7 @@ def MasterFailover(no_voting=False):
@param no_voting: force the operation without remote nodes agreement
(dangerous)
@returns: the pair of an exit code and warnings to display
"""
sstore = ssconf.SimpleStore()
......@@ -1049,6 +1050,7 @@ def MasterFailover(no_voting=False):
# end checks
rcode = 0
warnings = []
logging.info("Setting master to %s, old master: %s", new_master, old_master)
......@@ -1102,24 +1104,30 @@ def MasterFailover(no_voting=False):
msg = result.fail_msg
if msg:
logging.warning("Could not disable the master IP: %s", msg)
warning = "Could not disable the master IP: %s" % (msg,)
logging.warning("%s", warning)
warnings.append(warning)
result = runner.call_node_stop_master(old_master)
msg = result.fail_msg
if msg:
logging.error("Could not disable the master role on the old master"
" %s, please disable manually: %s", old_master, msg)
warning = ("Could not disable the master role on the old master"
" %s, please disable manually: %s" % (old_master, msg))
logging.error("%s", warning)
warnings.append(warning)
except errors.ConfigurationError, err:
logging.error("Error while trying to set the new master: %s",
str(err))
return 1
return 1, warnings
finally:
# stop WConfd again:
result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop", constants.WCONFD])
if result.failed:
logging.error("Could not stop the configuration daemon,"
" command %s had exitcode %s and error %s",
result.cmd, result.exit_code, result.output)
warning = ("Could not stop the configuration daemon,"
" command %s had exitcode %s and error %s"
% (result.cmd, result.exit_code, result.output))
logging.error("%s", warning)
rcode = 1
logging.info("Checking master IP non-reachability...")
......@@ -1127,16 +1135,19 @@ def MasterFailover(no_voting=False):
total_timeout = 30
# Here we have a phase where no master should be running
def _check_ip():
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
def _check_ip(expected):
if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT) != expected:
raise utils.RetryAgain()
try:
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout)
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout, args=[False])
except utils.RetryTimeout:
logging.warning("The master IP is still reachable after %s seconds,"
" continuing but activating the master on the current"
" node will probably fail", total_timeout)
warning = ("The master IP is still reachable after %s seconds,"
" continuing but activating the master IP on the current"
" node will probably fail" % total_timeout)
logging.warning("%s", warning)
warnings.append(warning)
rcode = 1
if jstore.CheckDrainFlag():
logging.info("Undraining job queue")
......@@ -1152,8 +1163,21 @@ def MasterFailover(no_voting=False):
" %s, please check: %s", new_master, msg)
rcode = 1
# Finally verify that the new master managed to set up the master IP
# and warn if it didn't.
try:
utils.Retry(_check_ip, (1, 1.5, 5), total_timeout, args=[True])
except utils.RetryTimeout:
warning = ("The master IP did not come up within %s seconds; the"
" cluster should still be working and reachable via %s,"
" but not via the master IP address"
% (total_timeout, new_master))
logging.warning("%s", warning)
warnings.append(warning)
rcode = 1
logging.info("Master failed over from %s to %s", old_master, new_master)
return rcode
return rcode, warnings
def GetMaster():
......
......@@ -858,7 +858,10 @@ def MasterFailover(opts, args):
if not AskUser(usertext):
return 1
return bootstrap.MasterFailover(no_voting=opts.no_voting)
rvlaue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting)
for msg in msgs:
ToStderr(msg)
return rvlaue
def MasterPing(opts, args):
......
......@@ -811,7 +811,7 @@ class PersistentBlockDevice(base.BlockDev):
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.minor = utils.osminor(st.st_rdev)
self.attached = True
return True
......@@ -948,7 +948,7 @@ class RADOSBlockDevice(base.BlockDev):
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.minor = utils.osminor(st.st_rdev)
self.attached = True
return True
......@@ -1315,7 +1315,7 @@ class ExtStorageDevice(base.BlockDev):
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.minor = utils.osminor(st.st_rdev)
self.attached = True
return True
......
......@@ -215,3 +215,13 @@ def GetDiskLabels(prefix, num_disks, start=0):
for i in range(start, num_disks):
yield prefix + _GetDiskSuffix(i)
def osminor(dev):
"""Return the device minor number from a raw device number.
This is a replacement for os.minor working around the issue that
Python's os.minor still has the old definition. See Ganeti issue
1058 for more details.
"""
return (dev & 0xff) | ((dev >> 12) & ~0xff)
......@@ -84,7 +84,7 @@ Passing the ``--use-replication-network`` option will cause the
copy to be done over the replication network (only matters if the
primary/secondary IPs are different). Example::
# gnt-cluster -n node1.example.com -n node2.example.com copyfile /tmp/test
# gnt-cluster copyfile -n node1.example.com -n node2.example.com /tmp/test
This will copy the file /tmp/test from the current node to the two
named nodes.
......
......@@ -170,7 +170,7 @@ prop_CanTieredAlloc :: Property
prop_CanTieredAlloc =
forAll (choose (2, 5)) $ \count ->
forAll (liftM (Node.setPolicy Types.defIPolicy)
(genOnlineNode `suchThat` isNodeBig 4)) $ \node ->
(genOnlineNode `suchThat` isNodeBig 5)) $ \node ->
forAll (genInstanceMaybeBiggerThanNode node) $ \inst ->
let nl = makeSmallCluster node count
il = Container.empty
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment