Commit 343c9ed2 authored by Hrvoje Ribicic's avatar Hrvoje Ribicic

Add a RunWithLocks QA utility function

This patch adds a QA utility function that acquires a set of locks, and
attempts to run a given function with the locks in place. Should the
given function block, this function does not detect this - later
patches will address the issue.

An example of its use is provided by having the move-instance test
modified to use it.
Signed-off-by: default avatarHrvoje Ribicic <riba@google.com>
Reviewed-by: default avatarPetr Pudlak <pudlak@google.com>
parent 6b924ca3
......@@ -1046,6 +1046,7 @@ qa_scripts = \
qa/qa_instance.py \
qa/qa_instance_utils.py \
qa/qa_job.py \
qa/qa_job_utils.py \
qa/qa_monitoring.py \
qa/qa_node.py \
qa/qa_os.py \
......
#
#
# Copyright (C) 2014 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
"""QA utility functions for testing jobs
"""
import re
from ganeti import constants
from ganeti import locking
from ganeti import utils
import qa_config
import qa_error
from qa_utils import AssertCommand, GetCommandOutput, GetObjectInfo
AVAILABLE_LOCKS = [locking.LEVEL_NODE, ]
def _GetOutputFromMaster(cmd):
""" Gets the output of a command executed on master.
"""
if isinstance(cmd, basestring):
cmdstr = cmd
else:
cmdstr = utils.ShellQuoteArgs(cmd)
# Necessary due to the stderr stream not being captured properly on the
# buildbot
cmdstr += " 2>&1"
return GetCommandOutput(qa_config.GetMasterNode().primary, cmdstr)
def ExecuteJobProducingCommand(cmd):
""" Executes a command that contains the --submit flag, and returns a job id.
@type cmd: list of string
@param cmd: The command to execute, broken into constituent components.
"""
job_id_output = _GetOutputFromMaster(cmd)
possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output)
if len(possible_job_ids) != 1:
raise qa_error.Error("Cannot parse command output to find job id: output "
"is %s" % job_id_output)
return int(possible_job_ids[0])
def _StartDelayFunction(locks, timeout):
""" Starts the gnt-debug delay option with the given locks and timeout.
"""
# The interruptible switch must be used
cmd = ["gnt-debug", "delay", "-i", "--submit", "--no-master"]
for node in locks.get(locking.LEVEL_NODE, []):
cmd.append("-n%s" % node)
cmd.append(str(timeout))
job_id = ExecuteJobProducingCommand(cmd)
job_info = GetObjectInfo(["gnt-job", "info", str(job_id)])
execution_logs = job_info[0]["Opcodes"][0]["Execution log"]
is_termination_info_fn = \
lambda e: e["Content"][1] == constants.ELOG_DELAY_TEST
filtered_logs = filter(is_termination_info_fn, execution_logs)
if len(filtered_logs) != 1:
raise qa_error.Error("Failure when trying to retrieve delay termination "
"information")
_, _, (socket_path, ) = filtered_logs[0]["Content"]
return socket_path
def _TerminateDelayFunction(termination_socket):
""" Terminates the delay function by communicating with the domain socket.
"""
AssertCommand("echo a | socat -u stdin UNIX-CLIENT:%s" % termination_socket)
# TODO: Can this be done as a decorator? Implement as needed.
def RunWithLocks(fn, locks, timeout, *args, **kwargs):
""" Runs the given function, acquiring a set of locks beforehand.
@type fn: function
@param fn: The function to invoke.
@type locks: dict of string to list of string
@param locks: The locks to acquire, per lock category.
@type timeout: number
@param timeout: The number of seconds the locks should be held before
expiring.
This function allows a set of locks to be acquired in preparation for a QA
test, to try and see if the function can run in parallel with other
operations.
The current version simply creates the locks, which expire after a given
timeout, and attempts to invoke the provided function.
This will probably block the QA, and future versions will address this.
A default timeout is not provided by design - the test creator must make a
good conservative estimate.
"""
if filter(lambda l_type: l_type not in AVAILABLE_LOCKS, locks):
raise qa_error.Error("Attempted to acquire locks that cannot yet be "
"acquired in the course of a QA test.")
# The watcher may interfere by issuing its own jobs - therefore pause it
AssertCommand(["gnt-cluster", "watcher", "pause", "12h"])
termination_socket = _StartDelayFunction(locks, timeout)
fn(*args, **kwargs)
_TerminateDelayFunction(termination_socket)
# Revive the watcher
AssertCommand(["gnt-cluster", "watcher", "continue"])
......@@ -52,6 +52,7 @@ import qa_utils
from qa_instance import IsDiskReplacingSupported
from qa_instance import IsFailoverSupported
from qa_instance import IsMigrationSupported
from qa_job_utils import RunWithLocks
from qa_utils import (AssertEqual, AssertIn, AssertMatch, StartLocalCommand)
from qa_utils import InstanceCheck, INST_DOWN, INST_UP, FIRST_ARG
......@@ -917,7 +918,10 @@ def _InvokeMoveInstance(current_dest_inst, current_src_inst, rapi_pw_filename,
"--dest-secondary-node=%s" % snode,
])
else:
cmd.append("--iallocator=%s" % constants.IALLOC_HAIL)
cmd.extend([
"--iallocator=%s" % constants.IALLOC_HAIL,
"--opportunistic-tries=1",
])
cmd.extend([
"--net=0:mac=%s" % constants.VALUE_GENERATE,
......@@ -959,10 +963,14 @@ def TestInterClusterInstanceMove(src_instance, dest_instance,
snode = tnode
pnode = inodes[0]
# pnode:snode are the *current* nodes, so we move it first to tnode:pnode
_InvokeMoveInstance(dest_instance.name, src_instance.name, rapi_pw_file.name,
master.primary, perform_checks,
target_nodes=(tnode.primary, pnode.primary))
# pnode:snode are the *current* nodes, and the first move is an
# iallocator-guided move outside of pnode. The node lock for the pnode
# assures that this happens, and while we cannot be sure where the instance
# will land, it is a real move.
locks = {locking.LEVEL_NODE: [pnode.primary]}
RunWithLocks(_InvokeMoveInstance, locks, 600.0,
dest_instance.name, src_instance.name, rapi_pw_file.name,
master.primary, perform_checks)
# And then back to pnode:snode
_InvokeMoveInstance(src_instance.name, dest_instance.name, rapi_pw_file.name,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment