Newer
Older
#
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
"""Master daemon program.
Some classes deviates from the standard style guide since the
inheritance from parent classes requires it.
"""
import SocketServer
import threading
import time
import collections
import Queue
import random
import signal
import simplejson
from cStringIO import StringIO
from optparse import OptionParser
from ganeti import constants
from ganeti import mcpu
from ganeti import opcodes
from ganeti import jqueue
from ganeti import luxi
from ganeti import utils
from ganeti import errors
from ganeti import ssconf
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
class IOServer(SocketServer.UnixStreamServer):
"""IO thread class.
This class takes care of initializing the other threads, setting
signal handlers (which are processed only in this thread), and doing
cleanup at shutdown.
"""
QUEUE_PROCESSOR_SIZE = 1
def __init__(self, address, rqhandler):
SocketServer.UnixStreamServer.__init__(self, address, rqhandler)
self.do_quit = False
self.queue = jqueue.QueueManager()
self.processors = []
signal.signal(signal.SIGINT, self.handle_quit_signals)
signal.signal(signal.SIGTERM, self.handle_quit_signals)
def setup_processors(self):
"""Spawn the processors threads.
This initializes the queue and the thread processors. It is done
separately from the constructor because we want the clone()
syscalls to happen after the daemonize part.
"""
for i in range(self.QUEUE_PROCESSOR_SIZE):
self.processors.append(threading.Thread(target=PoolWorker,
args=(i, self.queue.new_queue)))
for t in self.processors:
t.start()
def process_request_thread(self, request, client_address):
"""Process the request.
This is copied from the code in ThreadingMixIn.
"""
try:
self.finish_request(request, client_address)
self.close_request(request)
except:
self.handle_error(request, client_address)
self.close_request(request)
def process_request(self, request, client_address):
"""Start a new thread to process the request.
This is copied from the coode in ThreadingMixIn.
"""
t = threading.Thread(target=self.process_request_thread,
args=(request, client_address))
t.start()
def handle_quit_signals(self, signum, frame):
print "received %s in %s" % (signum, frame)
self.do_quit = True
def serve_forever(self):
"""Handle one request at a time until told to quit."""
while not self.do_quit:
self.handle_request()
print "served request, quit=%s" % (self.do_quit)
def server_cleanup(self):
"""Cleanup the server.
This involves shutting down the processor threads and the master
socket.
"""
self.server_close()
utils.RemoveFile(constants.MASTER_SOCKET)
for i in range(self.QUEUE_PROCESSOR_SIZE):
self.queue.new_queue.put(None)
for idx, t in enumerate(self.processors):
print "waiting for processor thread %s..." % idx
t.join()
print "done threads"
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
class ClientRqHandler(SocketServer.BaseRequestHandler):
"""Client handler"""
EOM = '\3'
READ_SIZE = 4096
def setup(self):
self._buffer = ""
self._msgs = collections.deque()
self._ops = ClientOps(self.server)
def handle(self):
while True:
msg = self.read_message()
if msg is None:
print "client closed connection"
break
request = simplejson.loads(msg)
if not isinstance(request, dict):
print "wrong request received: %s" % msg
break
method = request.get('request', None)
data = request.get('data', None)
if method is None or data is None:
print "no method or data in request"
break
print "request:", method, data
result = self._ops.handle_request(method, data)
print "result:", result
self.send_message(simplejson.dumps({'success': True, 'result': result}))
def read_message(self):
while not self._msgs:
data = self.request.recv(self.READ_SIZE)
if not data:
return None
new_msgs = (self._buffer + data).split(self.EOM)
self._buffer = new_msgs.pop()
self._msgs.extend(new_msgs)
return self._msgs.popleft()
def send_message(self, msg):
#print "sending", msg
self.request.sendall(msg + self.EOM)
class ClientOps:
"""Class holding high-level client operations."""
def __init__(self, server):
self.server = server
self._cpu = None
def _getcpu(self):
if self._cpu is None:
self._cpu = mcpu.Processor(lambda x: None)
return self._cpu
def handle_request(self, operation, args):
print operation, args
if operation == "submit":
return self.put(args)
elif operation == "query":
else:
raise ValueError("Invalid operation")
def put(self, args):
job = luxi.UnserializeJob(args)
rid = self.server.queue.put(job)
return rid
def query(self, args):
path = args["object"]
fields = args["fields"]
names = args["names"]
if path == "instances":
opclass = opcodes.OpQueryInstances
elif path == "jobs":
# early exit because job query-ing is special (not via opcodes)
return self.query_jobs(fields, names)
else:
raise ValueError("Invalid object %s" % path)
op = opclass(output_fields = fields, names=names)
cpu = self._getcpu()
result = cpu.ExecOpCode(op)
return result
def query_jobs(self, fields, names):
return self.server.queue.query_jobs(fields, names)
def JobRunner(proc, job):
"""Job executor.
This functions processes a single job in the context of given
processor instance.
"""
job.SetStatus(opcodes.Job.STATUS_RUNNING)
fail = False
for idx, op in enumerate(job.data.op_list):
job.data.op_status[idx] = opcodes.Job.STATUS_RUNNING
try:
job.data.op_result[idx] = proc.ExecOpCode(op)
job.data.op_status[idx] = opcodes.Job.STATUS_SUCCESS
except (errors.OpPrereqError, errors.OpExecError), err:
fail = True
job.data.op_result[idx] = str(err)
job.data.op_status[idx] = opcodes.Job.STATUS_FAIL
if fail:
job.SetStatus(opcodes.Job.STATUS_FAIL)
else:
job.SetStatus(opcodes.Job.STATUS_SUCCESS)
def PoolWorker(worker_id, incoming_queue):
"""A worker thread function.
This is the actual processor of a single thread of Job execution.
"""
while True:
print "worker %s sleeping" % worker_id
item = incoming_queue.get(True)
if item is None:
break
print "worker %s processing job %s" % (worker_id, item.data.job_id)
try:
proc = mcpu.Processor(feedback=lambda x: None)
try:
JobRunner(proc, item)
except errors.GenericError, err:
print "ganeti exception %s" % err
finally:
#utils.Unlock('cmd')
#utils.LockCleanup()
pass
print "worker %s finish job %s" % (worker_id, item.data.job_id)
print "worker %s exiting" % worker_id
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def CheckMaster(debug):
"""Checks the node setup.
If this is the master, the function will return. Otherwise it will
exit with an exit code based on the node status.
"""
try:
ss = ssconf.SimpleStore()
master_name = ss.GetMasterNode()
except errors.ConfigurationError, err:
print "Cluster configuration incomplete: '%s'" % str(err)
sys.exit(EXIT_NODESETUP_ERROR)
try:
myself = utils.HostInfo()
except errors.ResolverError, err:
sys.stderr.write("Cannot resolve my own name (%s)\n" % err.args[0])
sys.exit(EXIT_NODESETUP_ERROR)
if myself.name != master_name:
if debug:
sys.stderr.write("Not master, exiting.\n")
sys.exit(EXIT_NOTMASTER)
def ParseOptions():
"""Parse the command line options.
Returns:
(options, args) as from OptionParser.parse_args()
"""
parser = OptionParser(description="Ganeti master daemon",
usage="%prog [-f] [-d]",
version="%%prog (ganeti) %s" %
constants.RELEASE_VERSION)
parser.add_option("-f", "--foreground", dest="fork",
help="Don't detach from the current terminal",
default=True, action="store_false")
parser.add_option("-d", "--debug", dest="debug",
help="Enable some debug messages",
default=False, action="store_true")
options, args = parser.parse_args()
return options, args
options, args = ParseOptions()
utils.debug = options.debug
CheckMaster(options.debug)
master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
# become a daemon
if options.fork:
utils.Daemonize(logfile=constants.LOG_MASTERDAEMON,
noclose_fds=[master.fileno()])
master.setup_processors()
try:
master.serve_forever()
finally:
master.server_cleanup()
if __name__ == "__main__":
main()