Commit a8083063 authored by Iustin Pop's avatar Iustin Pop

Initial commit.

parent 676f2b54
This diff is collapsed.
Installation of the software
============================
Before installing, please verify that you have the following programs:
- lvm 2
- ssh
- fping
- python twisted library (the core is enough)
- python openssl bindings
To install, simply do ./configure && make && make install
This will install the software under /usr/local. You then need to copy
ganeti.init to /etc/init.d and integrate it into your boot sequence
(``chkconfig``, ``update-rc.d``, etc.).
Cluster initialisation
======================
Before initialising the cluster, on each node you need to create the following
directories:
- /etc/ganeti
- /var/log/ganeti
- /var/lib/ganeti
- /srv/ganeti and /srv/ganeti/os
After this, use ``gnt-cluster init``.
# standard automake rules
SUBDIRS = man lib scripts daemons docs testing tools
EXTRA_DIST = ganeti.initd
# custom rules
depgraph: depgraph.png
depgraph.png: depgraph.dot
dot -Tpng -o $@ $<
depgraph.ps: depgraph.dot
dot -Tps -o $@ $<
depgraph.dot: ganeti/*.py
pylint.python2.4 --indent-string ' ' --rcfile=/dev/null --reports y --int-import-graph $@ --persistent n ganeti >/dev/null
Ganeti 1.2
==========
For installation instructions, read the INSTALL file.
For a brief introduction, read the ganeti(7) manpage and the other pages
it suggests.
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
AC_INIT(ganeti, 1.2a, ganeti@googlegroups.com)
AM_INIT_AUTOMAKE(foreign)
# Checks for programs.
AC_PROG_INSTALL
# Checks for python
AM_PATH_PYTHON(2.4)
# Checks for libraries.
# Checks for header files.
# Checks for typedefs, structures, and compiler characteristics.
# Checks for library functions.
AC_CONFIG_FILES([Makefile man/Makefile docs/Makefile
testing/Makefile tools/Makefile
lib/Makefile scripts/Makefile daemons/Makefile])
AC_OUTPUT
dist_sbin_SCRIPTS = ganeti-noded ganeti-watcher
This diff is collapsed.
#!/usr/bin/python
#
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
"""Tool to restart erronously downed virtual machines.
This program and set of classes implement a watchdog to restart
virtual machines in a Ganeti cluster that have crashed or been killed
by a node reboot. Run from cron or similar.
"""
LOGFILE = '/var/log/ganeti/watcher.log'
MAXTRIES = 5
BAD_STATES = ['stopped']
HELPLESS_STATES = ['(node down)']
NOTICE = 'NOTICE'
ERROR = 'ERROR'
import os
import sys
import time
import fcntl
import errno
from optparse import OptionParser
from ganeti import utils
from ganeti import constants
class Error(Exception):
"""Generic custom error class."""
pass
def Indent(s, prefix='| '):
"""Indent a piece of text with a given prefix before each line.
Args:
s: The string to indent
prefix: The string to prepend each line.
"""
return "%s%s\n" % (prefix, ('\n' + prefix).join(s.splitlines()))
def DoCmd(cmd):
"""Run a shell command.
Args:
cmd: the command to run.
Raises CommandError with verbose commentary on error.
"""
res = utils.RunCmd(cmd)
if res.failed:
raise Error("Command %s failed:\n%s\nstdout:\n%sstderr:\n%s" %
(repr(cmd),
Indent(res.fail_reason),
Indent(res.stdout),
Indent(res.stderr)))
return res
class RestarterState(object):
"""Interface to a state file recording restart attempts.
Methods:
Open(): open, lock, read and parse the file.
Raises StandardError on lock contention.
NumberOfAttempts(name): returns the number of times in succession
a restart has been attempted of the named instance.
RecordAttempt(name, when): records one restart attempt of name at
time in when.
Remove(name): remove record given by name, if exists.
Save(name): saves all records to file, releases lock and closes file.
"""
def __init__(self):
# The two-step dance below is necessary to allow both opening existing
# file read/write and creating if not existing. Vanilla open will truncate
# an existing file -or- allow creating if not existing.
f = os.open(constants.WATCHER_STATEFILE, os.O_RDWR | os.O_CREAT)
f = os.fdopen(f, 'w+')
try:
fcntl.flock(f.fileno(), fcntl.LOCK_EX|fcntl.LOCK_NB)
except IOError, x:
if x.errno == errno.EAGAIN:
raise StandardError('State file already locked')
raise
self.statefile = f
self.inst_map = {}
for line in f:
name, when, count = line.rstrip().split(':')
when = int(when)
count = int(count)
self.inst_map[name] = (when, count)
def NumberOfAttempts(self, instance):
"""Returns number of previous restart attempts.
Args:
instance - the instance to look up.
"""
assert self.statefile
if instance.name in self.inst_map:
return self.inst_map[instance.name][1]
return 0
def RecordAttempt(self, instance):
"""Record a restart attempt.
Args:
instance - the instance being restarted
"""
assert self.statefile
when = time.time()
self.inst_map[instance.name] = (when, 1 + self.NumberOfAttempts(instance))
def Remove(self, instance):
"""Update state to reflect that a machine is running, i.e. remove record
Args:
instance - the instance to remove from books
This method removes the record for a named instance
"""
assert self.statefile
if instance.name in self.inst_map:
del self.inst_map[instance.name]
def Save(self):
"""Save records to file, then unlock and close file.
"""
assert self.statefile
self.statefile.seek(0)
self.statefile.truncate()
for name in self.inst_map:
print >> self.statefile, "%s:%d:%d" % ((name,) + self.inst_map[name])
fcntl.flock(self.statefile.fileno(), fcntl.LOCK_UN)
self.statefile.close()
self.statefile = None
class Instance(object):
"""Abstraction for a Virtual Machine instance.
Methods:
Restart(): issue a command to restart the represented machine.
"""
def __init__(self, name, state):
self.name = name
self.state = state
def Restart(self):
DoCmd(['gnt-instance', 'startup', '--lock-retries=15', self.name])
class InstanceList(object):
"""The set of Virtual Machine instances on a cluster.
"""
cmd = ['gnt-instance', 'list', '--lock-retries=15',
'-o', 'name,admin_state,oper_state', '--no-headers', '--separator=:']
def __init__(self):
res = DoCmd(self.cmd)
lines = res.stdout.splitlines()
self.instances = []
for line in lines:
fields = [fld.strip() for fld in line.split(':')]
if len(fields) != 3:
continue
if fields[1] == "no": #no autostart, we don't care about this instance
continue
name, status = fields[0], fields[2]
self.instances.append(Instance(name, status))
def __iter__(self):
return self.instances.__iter__()
class Message(object):
"""Encapsulation of a notice or error message.
"""
def __init__(self, level, msg):
self.level = level
self.msg = msg
self.when = time.time()
def __str__(self):
return self.level + ' ' + time.ctime(self.when) + '\n' + Indent(self.msg)
class Restarter(object):
"""Encapsulate the logic for restarting erronously halted virtual machines.
The calling program should periodically instantiate me and call Run().
This will traverse the list of instances, and make up to MAXTRIES attempts
to restart machines that are down.
"""
def __init__(self):
self.instances = InstanceList()
self.messages = []
def Run(self):
"""Make a pass over the list of instances, restarting downed ones.
"""
notepad = RestarterState()
for instance in self.instances:
if instance.state in BAD_STATES:
n = notepad.NumberOfAttempts(instance)
if n > MAXTRIES:
# stay quiet.
continue
elif n < MAXTRIES:
last = " (Attempt #%d)" % (n + 1)
else:
notepad.RecordAttempt(instance)
self.messages.append(Message(ERROR, "Could not restart %s for %d"
" times, giving up..." %
(instance.name, MAXTRIES)))
continue
try:
self.messages.append(Message(NOTICE,
"Restarting %s%s." %
(instance.name, last)))
instance.Restart()
except Error, x:
self.messages.append(Message(ERROR, str(x)))
notepad.RecordAttempt(instance)
elif instance.state in HELPLESS_STATES:
if notepad.NumberOfAttempts(instance):
notepad.Remove(instance)
else:
if notepad.NumberOfAttempts(instance):
notepad.Remove(instance)
msg = Message(NOTICE,
"Restart of %s succeeded." % instance.name)
self.messages.append(msg)
notepad.Save()
def WriteReport(self, logfile):
"""
Log all messages to file.
Args:
logfile: file object open for writing (the log file)
"""
for msg in self.messages:
print >> logfile, str(msg)
def ParseOptions():
"""Parse the command line options.
Returns:
(options, args) as from OptionParser.parse_args()
"""
parser = OptionParser(description="Ganeti cluster watcher",
usage="%prog [-d]",
version="%%prog (ganeti) %s" %
constants.RELEASE_VERSION)
parser.add_option("-d", "--debug", dest="debug",
help="Don't redirect messages to the log file",
default=False, action="store_true")
options, args = parser.parse_args()
return options, args
def main():
"""Main function.
"""
options, args = ParseOptions()
if not options.debug:
sys.stderr = sys.stdout = open(LOGFILE, 'a')
try:
restarter = Restarter()
restarter.Run()
restarter.WriteReport(sys.stdout)
except Error, err:
print err
if __name__ == '__main__':
main()
docdir = $(datadir)/doc/$(PACKAGE)
dist_doc_DATA = hooks.html hooks.pdf
EXTRA_DIST = hooks.sgml
%.html: %.sgml
docbook2html --nochunks $<
%.pdf: %.sgml
docbook2pdf $<
This diff is collapsed.
#! /bin/sh
# ganeti node daemon starter script
# based on skeleton from Debian GNU/Linux
PATH=/sbin:/bin:/usr/sbin:/usr/bin
DAEMON=/usr/local/sbin/ganeti-noded
NAME=ganeti-noded
SCRIPTNAME=/etc/init.d/ganeti
DESC="Ganeti node daemon"
test -f $DAEMON || exit 0
set -e
. /lib/lsb/init-functions
check_config() {
for fname in /var/lib/ganeti/ssconf_node_pass /var/lib/ganeti/server.pem; do
if ! [ -f "$fname" ]; then
log_end_msg 0
log_warning_msg "Config $fname not there, will not run."
exit 0
fi
done
}
case "$1" in
start)
log_begin_msg "Starting $DESC..."
check_config
start-stop-daemon --start --quiet --exec $DAEMON || log_end_msg 1
log_end_msg 0
;;
stop)
log_begin_msg "Stopping $DESC..."
start-stop-daemon --stop --quiet --name $NAME || log_end_msg 1
log_end_msg 0
;;
restart|force-reload)
log_begin_msg "Reloading $DESC..."
start-stop-daemon --stop --quiet --oknodo --retry 30 --name $NAME
check_config
start-stop-daemon --start --quiet --exec $DAEMON || log_end_msg 1
log_end_msg 0
;;
*)
log_success_msg "Usage: $SCRIPTNAME {start|stop|force-reload|restart}"
exit 1
;;
esac
exit 0
pkgpython_PYTHON = __init__.py backend.py cli.py cmdlib.py config.py \
objects.py errors.py logger.py ssh.py utils.py rpc.py \
bdev.py hypervisor.py opcodes.py mcpu.py constants.py \
ssconf.py
#!/usr/bin/python
#
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
# empty file for package definition
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/python
#
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
"""Module dealing with command line parsing"""
import sys
import textwrap
import os.path
import copy
from ganeti import utils
from ganeti import logger
from ganeti import errors
from ganeti import mcpu
from ganeti import constants
from optparse import (OptionParser, make_option, TitledHelpFormatter,
Option, OptionValueError, SUPPRESS_HELP)
__all__ = ["DEBUG_OPT", "NOHDR_OPT", "SEP_OPT", "GenericMain", "SubmitOpCode",
"cli_option",
"ARGS_NONE", "ARGS_FIXED", "ARGS_ATLEAST", "ARGS_ANY", "ARGS_ONE",
"USEUNITS_OPT"]
DEBUG_OPT = make_option("-d", "--debug", default=False,
action="store_true",
help="Turn debugging on")
NOHDR_OPT = make_option("--no-headers", default=False,
action="store_true", dest="no_headers",
help="Don't display column headers")
SEP_OPT = make_option("--separator", default=" ",
action="store", dest="separator",
help="Separator between output fields"
" (defaults to one space)")
USEUNITS_OPT = make_option("--human-readable", default=False,
action="store_true", dest="human_readable",
help="Print sizes in human readable format")
_LOCK_OPT = make_option("--lock-retries", default=None,
type="int", help=SUPPRESS_HELP)
def ARGS_FIXED(val):
"""Macro-like function denoting a fixed number of arguments"""
return -val
def ARGS_ATLEAST(val):
"""Macro-like function denoting a minimum number of arguments"""
return val
ARGS_NONE = None
ARGS_ONE = ARGS_FIXED(1)
ARGS_ANY = ARGS_ATLEAST(0)
def check_unit(option, opt, value):
try:
return utils.ParseUnit(value)
except errors.UnitParseError, err:
raise OptionValueError, ("option %s: %s" % (opt, err))
class CliOption(Option):
TYPES = Option.TYPES + ("unit",)
TYPE_CHECKER = copy.copy(Option.TYPE_CHECKER)
TYPE_CHECKER["unit"] = check_unit
# optparse.py sets make_option, so we do it for our own option class, too
cli_option = CliOption
def _ParseArgs(argv, commands):
"""Parses the command line and return the function which must be
executed together with its arguments
Arguments:
argv: the command line
commands: dictionary with special contents, see the design doc for
cmdline handling
"""
if len(argv) == 0:
binary = "<command>"
else:
binary = argv[0].split("/")[-1]
if len(argv) > 1 and argv[1] == "--version":
print "%s (ganeti) %s" % (binary, constants.RELEASE_VERSION)
# Quit right away. That way we don't have to care about this special
# argument. optparse.py does it the same.
sys.exit(0)
if len(argv) < 2 or argv[1] not in commands.keys():
# let's do a nice thing
sortedcmds = commands.keys()
sortedcmds.sort()
print ("Usage: %(bin)s {command} [options...] [argument...]"
"\n%(bin)s <command> --help to see details, or"
" man %(bin)s\n" % {"bin": binary})
# compute the max line length for cmd + usage
mlen = max([len(" %s %s" % (cmd, commands[cmd][3])) for cmd in commands])
mlen = min(60, mlen) # should not get here...
# and format a nice command list
print "Commands:"
for cmd in sortedcmds:
cmdstr = " %s %s" % (cmd, commands[cmd][3])
help_text = commands[cmd][4]
help_lines = textwrap.wrap(help_text, 79-3-mlen)
print "%-*s - %s" % (mlen, cmdstr,
help_lines.pop(0))
for line in help_lines:
print "%-*s %s" % (mlen, "", line)
print
return None, None, None
cmd = argv.pop(1)
func, nargs, parser_opts, usage, description = commands[cmd]
parser_opts.append(_LOCK_OPT)
parser = OptionParser(option_list=parser_opts,
description=description,
formatter=TitledHelpFormatter(),
usage="%%prog %s %s" % (cmd, usage))
parser.disable_interspersed_args()
options, args = parser.parse_args()
if nargs is None:
if len(args) != 0:
print >> sys.stderr, ("Error: Command %s expects no arguments" % cmd)
return None, None, None
elif nargs < 0 and len(args) != -nargs:
print >> sys.stderr, ("Error: Command %s expects %d argument(s)" %
(cmd, -nargs))
return None, None, None
elif nargs >= 0 and len(args) < nargs:
print >> sys.stderr, ("Error: Command %s expects at least %d argument(s)" %
(cmd, nargs))
return None, None, None
return func, options, args
def _AskUser(text):
"""Ask the user a yes/no question.
Args:
questionstring - the question to ask.
Returns:
True or False depending on answer (No for False is default).
"""
try:
f = file("/dev/tty", "r+")
except IOError:
return False
answer = False
try:
f.write(textwrap.fill(text))
f.write('\n')
f.write("y/[n]: ")
line = f.readline(16).strip().lower()
answer = line in ('y', 'yes')
finally:
f.close()
return answer
def SubmitOpCode(op):
"""Function to submit an opcode.
This is just a simple wrapper over the construction of the processor
instance. It should be extended to better handle feedback and
interaction functions.
"""
proc = mcpu.Processor()
return proc.ExecOpCode(op, logger.ToStdout)
def GenericMain(commands):
"""Generic main function for all the gnt-* commands.
Argument: a dictionary with a special structure, see the design doc
for command line handling.
"""
# save the program name and the entire command line for later logging
if sys.argv:
binary = os.path.basename(sys.argv[0]) or sys.argv[0]
if len(sys.argv) >= 2:
binary += " " + sys.argv[1]
old_cmdline = " ".join(sys.argv[2:])
else:
old_cmdline = ""
else:
binary = "<unknown program>"