Commit f154a7a3 authored by Michael Hanselmann's avatar Michael Hanselmann

Add new “daemon-util” script to start/stop Ganeti daemons

Until now, Ganeti started and stopped its own daemons using custom functions.
To start, the daemon was just executed and then sent the appropriate signals to
stop it again. Init scripts would have to pay attention to the PID file and
other things.

With this patch, a new script is added (“daemon-util”, installed in
$prefix/lib/ganeti/), centralizing the starting and stopping of daemons. The
provided example init script is adjusted to use this new script. Ganeti's code
no longer calls its own init script.
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarGuido Trotter <ultrotter@google.com>
parent 86d6bc2a
......@@ -31,6 +31,7 @@
/ganeti-[0-9]*.[0-9]*.[0-9]*
# daemons
/daemons/daemon-util
/daemons/ganeti-cleaner
# devel
......
......@@ -53,6 +53,7 @@ maintainer-clean-local:
CLEANFILES = \
autotools/replace_vars.sed \
daemons/daemon-util \
daemons/ganeti-cleaner \
devel/upload \
doc/examples/bash_completion \
......@@ -220,6 +221,9 @@ dist_tools_SCRIPTS = \
tools/cfgupgrade \
tools/lvmstrap
pkglib_SCRIPTS = \
daemons/daemon-util
EXTRA_DIST = \
NEWS \
pylintrc \
......@@ -227,6 +231,7 @@ EXTRA_DIST = \
autotools/check-python-code \
autotools/docbook-wrapper \
$(RUN_IN_TEMPDIR) \
daemons/daemon-util.in \
daemons/ganeti-cleaner.in \
devel/upload.in \
$(docdot) \
......@@ -342,7 +347,7 @@ devel/upload: devel/upload.in $(REPLACE_VARS_SED)
sed -f $(REPLACE_VARS_SED) < $< > $@
chmod u+x $@
daemons/ganeti-cleaner: daemons/ganeti-cleaner.in \
daemons/%: daemons/%.in \
$(REPLACE_VARS_SED)
sed -f $(REPLACE_VARS_SED) < $< > $@
chmod +x $@
......@@ -418,6 +423,7 @@ lib/_autoconf.py: Makefile stamp-directories
echo "LVM_STRIPECOUNT = $(LVM_STRIPECOUNT)"; \
echo "TOOLSDIR = '$(toolsdir)'"; \
echo "GNT_SCRIPTS = [$(foreach i,$(notdir $(gnt_scripts)),'$(i)',)]"; \
echo "PKGLIBDIR = '$(pkglibdir)'"; \
} > $@
$(REPLACE_VARS_SED): Makefile
......
......@@ -72,6 +72,10 @@ Details
(``rapi_users``)
- Added option to specify maximum timeout on instance shutdown
- Added ``--no-ssh-init`` option to ``gnt-cluster init``
- Added new helper script to start and stop Ganeti daemons
(``daemon-util``), with the intent to reduce the work necessary to
adjust Ganeti for non-Debian distributions and to start/stop daemons
from one place
- Added more unittests
- Fixed critical bug in ganeti-masterd startup
- Pass ``INSTANCE_REINSTALL`` variable to OS installation script when
......
#!/bin/bash
#
# Copyright (C) 2009 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
set -e
defaults_file=@SYSCONFDIR@/default/ganeti
NODED_ARGS=
MASTERD_ARGS=
CONFD_ARGS=
RAPI_ARGS=
# Read defaults file if it exists
if [[ -s $defaults_file ]]; then
. $defaults_file
fi
_daemon_pidfile() {
echo "@LOCALSTATEDIR@/run/ganeti/$1.pid"
}
# Checks whether daemon is running
check() {
if [[ "$#" -lt 1 ]]; then
echo 'Missing daemon name.' >&2
exit 1
fi
local name="$1"; shift
start-stop-daemon --stop --signal 0 --quiet \
--pidfile $(_daemon_pidfile $name)
}
# Starts a daemon
start() {
if [[ "$#" -lt 1 ]]; then
echo 'Missing daemon name.' >&2
exit 1
fi
local name="$1"; shift
# Convert daemon name to uppercase after removing "ganeti-" prefix
local ucname=$(tr a-z A-Z <<< ${name#ganeti-})
# Read $<daemon>_ARGS and $EXTRA_<daemon>_ARGS
eval local args="\$${ucname}_ARGS \$EXTRA_${ucname}_ARGS"
start-stop-daemon --start --quiet --oknodo \
--pidfile $(_daemon_pidfile $name) \
--startas "@PREFIX@/sbin/$name" \
-- $args "$@"
}
# Stops a daemon
stop() {
if [[ "$#" -lt 1 ]]; then
echo 'Missing daemon name.' >&2
exit 1
fi
local name="$1"; shift
start-stop-daemon --stop --quiet --oknodo --retry 30 \
--pidfile $(_daemon_pidfile $name)
}
# Starts a daemon if it's not yet running
check_and_start() {
local name="$1"
if ! check $name; then
start $name
fi
}
# Starts the master role
start_master() {
start ganeti-masterd
start ganeti-rapi
}
# Stops the master role
stop_master() {
stop ganeti-rapi
stop ganeti-masterd
}
if [[ "$#" -lt 1 ]]; then
echo "Usage: $0 <action>" >&2
exit 1
fi
orig_action=$1; shift
# Replace all dashes (-) with underlines (_)
action=${orig_action//-/_}
# Is it a known function?
if ! declare -F "$action" >/dev/null 2>&1; then
echo "Unknown command: $orig_action" >&2
exit 1
fi
# Call handler function
$action "$@"
......@@ -78,28 +78,17 @@ def ShouldPause():
return bool(utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE))
def StartMaster():
"""Try to start the master daemon.
def EnsureDaemon(name):
"""Check for and start daemon if not alive.
"""
result = utils.RunCmd(['ganeti-masterd'])
result = utils.RunCmd([constants.DAEMON_UTIL, "check-and-start", name])
if result.failed:
logging.error("Can't start the master daemon: output '%s'", result.output)
return not result.failed
logging.error("Can't start daemon '%s', failure %s, output: %s",
name, result.fail_reason, result.output)
return False
def EnsureDaemon(daemon):
"""Check for and start daemon if not alive.
"""
pidfile = utils.DaemonPidFileName(daemon)
pid = utils.ReadPidFile(pidfile)
if pid == 0 or not utils.IsProcessAlive(pid): # no file or dead pid
logging.debug("Daemon '%s' not alive, trying to restart", daemon)
result = utils.RunCmd([daemon])
if not result:
logging.error("Can't start daemon '%s', failure %s, output: %s",
daemon, result.fail_reason, result.output)
return True
class WatcherState(object):
......@@ -503,7 +492,7 @@ def main():
except luxi.NoMasterError, err:
logging.warning("Master seems to be down (%s), trying to restart",
str(err))
if not StartMaster():
if not EnsureDaemon(constants.MASTERD):
logging.critical("Can't start the master, exiting")
sys.exit(constants.EXIT_FAILURE)
# else retry the connection
......
......@@ -14,21 +14,12 @@
PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin
DESC="Ganeti cluster"
GANETIRUNDIR="@LOCALSTATEDIR@/run/ganeti"
GANETI_DEFAULTS_FILE="@SYSCONFDIR@/default/ganeti"
NODED="ganeti-noded"
NODED_ARGS=""
MASTERD="ganeti-masterd"
MASTERD_ARGS=""
CONFD="ganeti-confd"
CONFD_ARGS=""
RAPI="ganeti-rapi"
RAPI_ARGS=""
DAEMON_UTIL=@PKGLIBDIR@/daemon-util
SCRIPTNAME="@SYSCONFDIR@/init.d/ganeti"
......@@ -36,10 +27,6 @@ test -f "@PREFIX@/sbin/$NODED" || exit 0
. /lib/lsb/init-functions
if [ -s $GANETI_DEFAULTS_FILE ]; then
. $GANETI_DEFAULTS_FILE
fi
check_config() {
for fname in \
"@LOCALSTATEDIR@/lib/ganeti/server.pem"
......@@ -69,13 +56,9 @@ check_exitcode() {
start_action() {
# called as start_action daemon-name
local daemon="$1"; shift
local daemon="$1"
log_action_begin_msg "$daemon"
start-stop-daemon --start --quiet \
--pidfile "${GANETIRUNDIR}/${daemon}.pid" \
--startas "@PREFIX@/sbin/$daemon" \
--oknodo \
-- "$@"
$DAEMON_UTIL start "$@"
check_exitcode $?
}
......@@ -83,8 +66,7 @@ stop_action() {
# called as stop_action daemon-name
local daemon="$1"
log_action_begin_msg "$daemon"
start-stop-daemon --stop --quiet --oknodo \
--retry 30 --pidfile "${GANETIRUNDIR}/${daemon}.pid"
$DAEMON_UTIL stop "$@"
check_exitcode $?
}
......@@ -97,6 +79,19 @@ maybe_do() {
fi
}
start_all() {
check_config
for i in $NODED $MASTERD $CONFD $RAPI; do \
maybe_do "$1" stop_action $i
done
}
stop_all() {
for i in $RAPI $CONFD $MASTERD $NODED; do \
maybe_do "$1" stop_action $i
done
}
if [ -n "$2" -a \
"$2" != "$NODED" -a \
"$2" != "$CONFD" -a \
......@@ -109,29 +104,15 @@ fi
case "$1" in
start)
log_daemon_msg "Starting $DESC" "$2"
check_config
maybe_do "$2" start_action $NODED $NODED_ARGS
maybe_do "$2" start_action $MASTERD $MASTERD_ARGS
maybe_do "$2" start_action $CONFD $CONFD_ARGS
maybe_do "$2" start_action $RAPI $RAPI_ARGS
start_all "$2"
;;
stop)
log_daemon_msg "Stopping $DESC" "$2"
maybe_do "$2" stop_action $RAPI
maybe_do "$2" stop_action $CONFD
maybe_do "$2" stop_action $MASTERD
maybe_do "$2" stop_action $NODED
stop_all "$2"
;;
restart|force-reload)
maybe_do "$2" stop_action $RAPI
maybe_do "$2" stop_action $CONFD
maybe_do "$2" stop_action $MASTERD
maybe_do "$2" stop_action $NODED
check_config
maybe_do "$2" start_action $NODED $NODED_ARGS
maybe_do "$2" start_action $MASTERD $MASTERD_ARGS
maybe_do "$2" start_action $CONFD $CONFD_ARGS
maybe_do "$2" start_action $RAPI $RAPI_ARGS
stop_all "$2"
start_all "$2"
;;
*)
log_success_msg "Usage: $SCRIPTNAME {start|stop|force-reload|restart}"
......
......@@ -255,21 +255,20 @@ def StartMaster(start_daemons, no_voting):
# and now start the master and rapi daemons
if start_daemons:
daemons_params = {
'ganeti-masterd': [],
'ganeti-rapi': [],
}
if no_voting:
daemons_params['ganeti-masterd'].append('--no-voting')
daemons_params['ganeti-masterd'].append('--yes-do-it')
for daemon in daemons_params:
cmd = [daemon]
cmd.extend(daemons_params[daemon])
result = utils.RunCmd(cmd)
if result.failed:
msg = "Can't start daemon %s: %s" % (daemon, result.output)
logging.error(msg)
err_msgs.append(msg)
masterd_args = "--no-voting --yes-do-it"
else:
masterd_args = ""
env = {
"EXTRA_MASTERD_ARGS": masterd_args,
}
result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env)
if result.failed:
msg = "Can't start Ganeti master: %s" % result.output
logging.error(msg)
err_msgs.append(msg)
if err_msgs:
_Fail("; ".join(err_msgs))
......@@ -301,9 +300,11 @@ def StopMaster(stop_daemons):
# but otherwise ignore the failure
if stop_daemons:
# stop/kill the rapi and the master daemon
for daemon in constants.RAPI, constants.MASTERD:
utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"])
if result.failed:
logging.error("Could not stop Ganeti master, command %s had exitcode %s"
" and error %s",
result.cmd, result.exit_code, result.output)
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
......@@ -385,10 +386,10 @@ def LeaveCluster(modify_ssh_setup):
except:
logging.exception("Error while removing cluster secrets")
confd_pid = utils.ReadPidFile(utils.DaemonPidFileName(constants.CONFD))
if confd_pid:
utils.KillProcess(confd_pid, timeout=2)
result = utils.RunCmd([constants.DAEMON_UTIL, "stop", constants.CONFD])
if result.failed:
logging.error("Command %s failed with exitcode %s and error %s",
result.cmd, result.exit_code, result.output)
# Raise a custom exception (handled in ganeti-noded)
raise errors.QuitGanetiException(True, 'Shutdown scheduled')
......@@ -2435,15 +2436,18 @@ def DemoteFromMC():
master, myself = ssconf.GetMasterAndMyself()
if master == myself:
_Fail("ssconf status shows I'm the master node, will not demote")
pid_file = utils.DaemonPidFileName(constants.MASTERD)
if utils.IsProcessAlive(utils.ReadPidFile(pid_file)):
result = utils.RunCmd([constants.DAEMON_UTIL, "check", constants.MASTERD])
if not result.failed:
_Fail("The master daemon is running, will not demote")
try:
if os.path.isfile(constants.CLUSTER_CONF_FILE):
utils.CreateBackup(constants.CLUSTER_CONF_FILE)
except EnvironmentError, err:
if err.errno != errno.ENOENT:
_Fail("Error while backing up cluster file: %s", err, exc=True)
utils.RemoveFile(constants.CLUSTER_CONF_FILE)
......
......@@ -126,8 +126,7 @@ def _InitGanetiServerSetup(master_name):
if not os.path.exists(constants.HMAC_CLUSTER_KEY):
GenerateHmacKey(constants.HMAC_CLUSTER_KEY)
result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
result = utils.RunCmd([constants.DAEMON_UTIL, "start", constants.NODED])
if result.failed:
raise errors.OpExecError("Could not start the node daemon, command %s"
" had exitcode %s and error %s" %
......@@ -241,12 +240,6 @@ def InitCluster(cluster_name, mac_prefix,
(master_netdev,
result.output.strip()), errors.ECODE_INVAL)
if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
raise errors.OpPrereqError("Init.d script '%s' missing or not"
" executable." % constants.NODE_INITD_SCRIPT,
errors.ECODE_ENVIRON)
dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE)]
utils.EnsureDirs(dirs)
......@@ -416,13 +409,13 @@ def SetupNodeDaemon(cluster_name, node, ssh_key_check):
"cat > '%s' << '!EOF.' && \n"
"%s!EOF.\n"
"chmod 0400 %s %s %s && "
"%s restart" %
"%s start %s" %
(constants.SSL_CERT_FILE, noded_cert,
constants.RAPI_CERT_FILE, rapi_cert,
constants.HMAC_CLUSTER_KEY, hmac_key,
constants.SSL_CERT_FILE, constants.RAPI_CERT_FILE,
constants.HMAC_CLUSTER_KEY,
constants.NODE_INITD_SCRIPT))
constants.DAEMON_UTIL, constants.NODED))
result = sshrunner.Run(node, 'root', mycommand, batch=False,
ask_key=ssh_key_check,
......
......@@ -104,6 +104,7 @@ INSTANCE_UPFILE = RUN_GANETI_DIR + "/instance-status"
SSH_KNOWN_HOSTS_FILE = DATA_DIR + "/known_hosts"
RAPI_USERS_FILE = DATA_DIR + "/rapi_users"
QUEUE_DIR = DATA_DIR + "/queue"
DAEMON_UTIL = _autoconf.PKGLIBDIR + "/daemon-util"
ETC_HOSTS = "/etc/hosts"
DEFAULT_FILE_STORAGE_DIR = _autoconf.FILE_STORAGE_DIR
SYSCONFDIR = _autoconf.SYSCONFDIR
......@@ -112,8 +113,6 @@ CONF_DIR = SYSCONFDIR + "/ganeti"
MASTER_SOCKET = SOCKET_DIR + "/ganeti-master"
NODE_INITD_SCRIPT = SYSCONFDIR + "/init.d/ganeti"
NODED = "ganeti-noded"
CONFD = "ganeti-confd"
RAPI = "ganeti-rapi"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment