Commit cea3abbd authored by Andrea Spadaccini's avatar Andrea Spadaccini
Browse files

Merge branch 'stable-2.5' into devel-2.5



* stable-2.5:
  listrunner: Don't pass arguments if there are none
  ssh: Quote strings in error message
  utils.log: Write error messages to stderr
  Add signal handling doc to hbal man page
  Fix handling of cluster verify hooks
  Redistribute the RAPI certificate
  QA: Add tests for instance start/stop via RAPI
  RAPI: Fix wrong check on instance shutdown
  baserlib: Accept empty body in FillOpcode
Signed-off-by: default avatarAndrea Spadaccini <spadaccio@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parents aeb24d97 0c009cc5
......@@ -2973,10 +2973,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
self._ErrorIf(test, self.ENODEHOOKS, node_name,
"Communication failure in hooks execution: %s", msg)
if res.offline or msg:
# No need to investigate payload if node is offline or gave an error.
# override manually lu_result here as _ErrorIf only
# overrides self.bad
lu_result = 1
# No need to investigate payload if node is offline or gave
# an error.
continue
for script, hkr, output in res.payload:
test = hkr == constants.HKR_FAIL
......@@ -2985,7 +2983,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
if test:
output = self._HOOKS_INDENT_RE.sub(" ", output)
feedback_fn("%s" % output)
lu_result = 0
lu_result = False
return lu_result
......@@ -3697,6 +3695,9 @@ def _ComputeAncillaryFiles(cluster, redist):
if not redist:
files_all.update(constants.ALL_CERT_FILES)
files_all.update(ssconf.SimpleStore().GetFileList())
else:
# we need to ship at least the RAPI certificate
files_all.add(constants.RAPI_CERT_FILE)
if cluster.modify_etc_hosts:
files_all.add(constants.ETC_HOSTS)
......
......@@ -193,10 +193,13 @@ def FillOpcode(opcls, body, static, rename=None):
@return: Opcode object
"""
CheckType(body, dict, "Body contents")
if body is None:
params = {}
else:
CheckType(body, dict, "Body contents")
# Make copy to be modified
params = body.copy()
# Make copy to be modified
params = body.copy()
if rename:
for old, new in rename.items():
......
......@@ -922,8 +922,6 @@ class R_2_instances_name_shutdown(baserlib.R_Generic):
@return: a job id
"""
baserlib.CheckType(self.request_body, dict, "Body contents")
no_remember = bool(self._checkIntVariable("no_remember"))
op = _ParseShutdownInstanceRequest(self.items[0], self.request_body,
bool(self.dryRun()), no_remember)
......
......@@ -229,8 +229,8 @@ class SshRunner:
result = utils.RunCmd(command)
if result.failed:
logging.error("Copy to node %s failed (%s) error %s,"
" command was %s",
logging.error("Copy to node %s failed (%s) error '%s',"
" command was '%s'",
node, result.fail_reason, result.output, result.cmd)
return not result.failed
......
......@@ -230,7 +230,7 @@ def SetupLogging(logfile, program, debug=0, stderr_logging=False,
if debug:
stderr_handler.setLevel(logging.NOTSET)
else:
stderr_handler.setLevel(logging.CRITICAL)
stderr_handler.setLevel(logging.ERROR)
root_logger.addHandler(stderr_handler)
if syslog in (constants.SYSLOG_YES, constants.SYSLOG_ONLY):
......
......@@ -362,6 +362,9 @@ The options that can be passed to the program are as follows:
jobset will be executed in parallel. The jobsets themselves are
executed serially.
The execution of the job series can be interrupted, see below for
signal handling.
-l *N*, --max-length=*N*
Restrict the solution to this length. This can be used for example
to automate the execution of the balancing.
......@@ -399,25 +402,45 @@ The options that can be passed to the program are as follows:
-V, --version
Just show the program version and exit.
SIGNAL HANDLING
---------------
When executing jobs via LUXI (using the ``-X`` option), normally hbal
will execute all jobs until either one errors out or all the jobs finish
successfully.
Since balancing can take a long time, it is possible to stop hbal early
in two ways:
- by sending a ``SIGINT`` (``^C``), hbal will register the termination
request, and will wait until the currently submitted jobs finish, at
which point it will exit (with exit code 1)
- by sending a ``SIGTERM``, hbal will immediately exit (with exit code
2); it is the responsibility of the user to follow up with Ganeti the
result of the currently-executing jobs
Note that in any situation, it's perfectly safe to kill hbal, either via
the above signals or via any other signal (e.g. ``SIGQUIT``,
``SIGKILL``), since the jobs themselves are processed by Ganeti whereas
hbal (after submission) only watches their progression. In this case,
the use will again have to query Ganeti for job results.
EXIT STATUS
-----------
The exit status of the command will be zero, unless for some reason
the algorithm fatally failed (e.g. wrong node or instance data), or
(in case of job execution) any job has failed.
The exit status of the command will be zero, unless for some reason the
algorithm fatally failed (e.g. wrong node or instance data), or (in case
of job execution) either one of the jobs has failed or the balancing was
interrupted early.
BUGS
----
The program does not check its input data for consistency, and aborts
with cryptic errors messages in this case.
The program does not check all its input data for consistency, and
sometime aborts with cryptic errors messages with invalid data.
The algorithm is not perfect.
The output format is not easily scriptable, and the program should
feed moves directly into Ganeti (either via RAPI or via a gnt-debug
input file).
EXAMPLE
-------
......
......@@ -225,6 +225,12 @@ def RunCommonInstanceTests(instance):
qa_rapi.TestRapiStoppedInstanceConsole, instance)
RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
# Test shutdown/start via RAPI
RunTestIf(["instance-shutdown", "rapi"],
qa_rapi.TestRapiInstanceShutdown, instance)
RunTestIf(["instance-shutdown", "rapi"],
qa_rapi.TestRapiInstanceStartup, instance)
RunTestIf("instance-list", qa_instance.TestInstanceList)
RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
......
......@@ -598,6 +598,16 @@ def TestRapiInstanceFailover(instance):
_WaitForRapiJob(_rapi_client.FailoverInstance(instance["name"]))
def TestRapiInstanceShutdown(instance):
"""Test stopping an instance via RAPI"""
_WaitForRapiJob(_rapi_client.ShutdownInstance(instance["name"]))
def TestRapiInstanceStartup(instance):
"""Test starting an instance via RAPI"""
_WaitForRapiJob(_rapi_client.StartupInstance(instance["name"]))
def TestRapiInstanceRename(rename_source, rename_target):
"""Test renaming instance via RAPI"""
_WaitForRapiJob(_rapi_client.RenameInstance(rename_source, rename_target))
......
......@@ -376,8 +376,10 @@ def HostWorker(logdir, username, password, use_agent, hostname,
print " %s: uploading files" % hostname
upload_dir = UploadFiles(connection, executable,
filelist, logfile)
command = ("cd %s && ./%s %s" %
(upload_dir, os.path.basename(executable), exec_args))
command = ("cd %s && ./%s" %
(upload_dir, os.path.basename(executable)))
if exec_args:
command += " %s" % exec_args
print " %s: executing remote command" % hostname
cmd_result = RunRemoteCommand(connection, command, logfile)
if cmd_result is True:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment