masterd.py 21.5 KB
Newer Older
1
#
Iustin Pop's avatar
Iustin Pop committed
2
3
#

4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
Iustin Pop's avatar
Iustin Pop committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Master daemon program.

Some classes deviates from the standard style guide since the
inheritance from parent classes requires it.

"""

29
# pylint: disable=C0103
Iustin Pop's avatar
Iustin Pop committed
30
# C0103: Invalid name ganeti-masterd
Iustin Pop's avatar
Iustin Pop committed
31

32
33
34
import grp
import os
import pwd
35
import sys
36
import socket
Iustin Pop's avatar
Iustin Pop committed
37
import time
38
import tempfile
39
import logging
Iustin Pop's avatar
Iustin Pop committed
40

41
from optparse import OptionParser
Iustin Pop's avatar
Iustin Pop committed
42

43
from ganeti import config
Iustin Pop's avatar
Iustin Pop committed
44
from ganeti import constants
45
from ganeti import daemon
Iustin Pop's avatar
Iustin Pop committed
46
47
48
from ganeti import mcpu
from ganeti import opcodes
from ganeti import jqueue
49
from ganeti import locking
Iustin Pop's avatar
Iustin Pop committed
50
51
from ganeti import luxi
from ganeti import utils
52
53
from ganeti import errors
from ganeti import ssconf
54
from ganeti import workerpool
55
from ganeti import rpc
56
from ganeti import bootstrap
57
from ganeti import netutils
58
from ganeti import objects
59
from ganeti import query
60
61


62
63
CLIENT_REQUEST_WORKERS = 16

64
65
EXIT_NOTMASTER = constants.EXIT_NOTMASTER
EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR
Iustin Pop's avatar
Iustin Pop committed
66
67


68
class ClientRequestWorker(workerpool.BaseWorker):
69
  # pylint: disable=W0221
70
  def RunTask(self, server, message, client):
71
72
73
    """Process the request.

    """
74
75
    client_ops = ClientOps(server)

76
    try:
77
      (method, args, version) = luxi.ParseRequest(message)
78
79
80
81
82
83
84
    except luxi.ProtocolError, err:
      logging.error("Protocol Error: %s", err)
      client.close_log()
      return

    success = False
    try:
85
86
87
88
89
      # Verify client's version if there was one in the request
      if version is not None and version != constants.LUXI_VERSION:
        raise errors.LuxiError("LUXI version mismatch, server %s, request %s" %
                               (constants.LUXI_VERSION, version))

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
      result = client_ops.handle_request(method, args)
      success = True
    except errors.GenericError, err:
      logging.exception("Unexpected exception")
      success = False
      result = errors.EncodeException(err)
    except:
      logging.exception("Unexpected exception")
      err = sys.exc_info()
      result = "Caught exception: %s" % str(err[1])

    try:
      reply = luxi.FormatResponse(success, result)
      client.send_message(reply)
      # awake the main thread so that it can write out the data.
      server.awaker.signal()
106
    except: # pylint: disable=W0702
107
108
109
110
111
112
113
114
115
      logging.exception("Send error")
      client.close_log()


class MasterClientHandler(daemon.AsyncTerminatedMessageStream):
  """Handler for master peers.

  """
  _MAX_UNHANDLED = 1
Michael Hanselmann's avatar
Michael Hanselmann committed
116

117
118
119
120
121
122
123
124
  def __init__(self, server, connected_socket, client_address, family):
    daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket,
                                                 client_address,
                                                 constants.LUXI_EOM,
                                                 family, self._MAX_UNHANDLED)
    self.server = server

  def handle_message(self, message, _):
125
    self.server.request_workers.AddTask((self.server, message, self))
126
127


128
129
class MasterServer(daemon.AsyncStreamServer):
  """Master Server.
Iustin Pop's avatar
Iustin Pop committed
130

131
132
  This is the main asynchronous master server. It handles connections to the
  master socket.
Iustin Pop's avatar
Iustin Pop committed
133
134

  """
135
136
137
  family = socket.AF_UNIX

  def __init__(self, mainloop, address, uid, gid):
138
    """MasterServer constructor
139

140
141
142
    @type mainloop: ganeti.daemon.Mainloop
    @param mainloop: Mainloop used to poll for I/O events
    @param address: the unix socket address to bind the MasterServer to
143
144
    @param uid: The uid of the owner of the socket
    @param gid: The gid of the owner of the socket
145
146

    """
147
    temp_name = tempfile.mktemp(dir=os.path.dirname(address))
148
    daemon.AsyncStreamServer.__init__(self, self.family, temp_name)
149
150
151
152
    os.chmod(temp_name, 0770)
    os.chown(temp_name, uid, gid)
    os.rename(temp_name, address)

153
    self.mainloop = mainloop
154
    self.awaker = daemon.AsyncAwaker()
155
156

    # We'll only start threads once we've forked.
157
    self.context = None
158
    self.request_workers = None
159

160
  def handle_connection(self, connected_socket, client_address):
161
162
163
    # TODO: add connection count and limit the number of open connections to a
    # maximum number to avoid breaking for lack of file descriptors or memory.
    MasterClientHandler(self, connected_socket, client_address, self.family)
164

165
  def setup_queue(self):
166
    self.context = GanetiContext()
167
168
    self.request_workers = workerpool.WorkerPool("ClientReq",
                                                 CLIENT_REQUEST_WORKERS,
169
                                                 ClientRequestWorker)
Iustin Pop's avatar
Iustin Pop committed
170

171
172
173
174
175
176
177
  def server_cleanup(self):
    """Cleanup the server.

    This involves shutting down the processor threads and the master
    socket.

    """
178
    try:
179
      self.close()
180
    finally:
181
      if self.request_workers:
182
        self.request_workers.TerminateWorkers()
183
184
      if self.context:
        self.context.jobqueue.Shutdown()
Iustin Pop's avatar
Iustin Pop committed
185
186
187
188
189
190
191


class ClientOps:
  """Class holding high-level client operations."""
  def __init__(self, server):
    self.server = server

192
  def handle_request(self, method, args): # pylint: disable=R0911
193
    queue = self.server.context.jobqueue
194
195

    # TODO: Parameter validation
196
197
198
    if not isinstance(args, (tuple, list)):
      logging.info("Received invalid arguments of type '%s'", type(args))
      raise ValueError("Invalid arguments type '%s'" % type(args))
199

Iustin Pop's avatar
Iustin Pop committed
200
201
    # TODO: Rewrite to not exit in each 'if/elif' branch

202
    if method == luxi.REQ_SUBMIT_JOB:
203
      logging.info("Received new job")
204
      ops = [opcodes.OpCode.LoadOpCode(state) for state in args]
205
      return queue.SubmitJob(ops)
Iustin Pop's avatar
Iustin Pop committed
206

207
208
209
210
211
212
213
    if method == luxi.REQ_SUBMIT_MANY_JOBS:
      logging.info("Received multiple jobs")
      jobs = []
      for ops in args:
        jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops])
      return queue.SubmitManyJobs(jobs)

214
    elif method == luxi.REQ_CANCEL_JOB:
215
      (job_id, ) = args
216
      logging.info("Received job cancel request for %s", job_id)
217
      return queue.CancelJob(job_id)
Iustin Pop's avatar
Iustin Pop committed
218

219
    elif method == luxi.REQ_ARCHIVE_JOB:
220
      (job_id, ) = args
221
      logging.info("Received job archive request for %s", job_id)
222
223
      return queue.ArchiveJob(job_id)

Iustin Pop's avatar
Iustin Pop committed
224
    elif method == luxi.REQ_AUTOARCHIVE_JOBS:
225
      (age, timeout) = args
226
227
      logging.info("Received job autoarchive request for age %s, timeout %s",
                   age, timeout)
228
      return queue.AutoArchiveJobs(age, timeout)
Iustin Pop's avatar
Iustin Pop committed
229

230
    elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE:
231
      (job_id, fields, prev_job_info, prev_log_serial, timeout) = args
232
      logging.info("Received job poll request for %s", job_id)
233
      return queue.WaitForJobChanges(job_id, fields, prev_job_info,
234
                                     prev_log_serial, timeout)
235

236
    elif method == luxi.REQ_QUERY:
237
238
      (what, fields, qfilter) = args
      req = objects.QueryRequest(what=what, fields=fields, qfilter=qfilter)
239

240
      if req.what in constants.QR_VIA_OP:
241
        result = self._Query(opcodes.OpQuery(what=req.what, fields=req.fields,
242
                                             qfilter=req.qfilter))
243
      elif req.what == constants.QR_LOCK:
244
        if req.qfilter is not None:
245
246
          raise errors.OpPrereqError("Lock queries can't be filtered")
        return self.server.context.glm.QueryLocks(req.fields)
247
      elif req.what in constants.QR_VIA_LUXI:
248
249
250
251
252
253
254
255
        raise NotImplementedError
      else:
        raise errors.OpPrereqError("Resource type '%s' unknown" % req.what,
                                   errors.ECODE_INVAL)

      return result

    elif method == luxi.REQ_QUERY_FIELDS:
256
257
      (what, fields) = args
      req = objects.QueryFieldsRequest(what=what, fields=fields)
258

259
260
261
      try:
        fielddefs = query.ALL_FIELDS[req.what]
      except KeyError:
262
263
264
        raise errors.OpPrereqError("Resource type '%s' unknown" % req.what,
                                   errors.ECODE_INVAL)

265
      return query.QueryFields(fielddefs, req.fields)
266

267
268
    elif method == luxi.REQ_QUERY_JOBS:
      (job_ids, fields) = args
269
      if isinstance(job_ids, (tuple, list)) and job_ids:
270
        msg = utils.CommaJoin(job_ids)
271
272
273
      else:
        msg = str(job_ids)
      logging.info("Received job query request for %s", msg)
274
275
      return queue.QueryJobs(job_ids, fields)

276
    elif method == luxi.REQ_QUERY_INSTANCES:
277
      (names, fields, use_locking) = args
278
      logging.info("Received instance query request for %s", names)
279
      if use_locking:
280
281
        raise errors.OpPrereqError("Sync queries are not allowed",
                                   errors.ECODE_INVAL)
282
283
      op = opcodes.OpInstanceQuery(names=names, output_fields=fields,
                                   use_locking=use_locking)
284
285
      return self._Query(op)

Michael Hanselmann's avatar
Michael Hanselmann committed
286
    elif method == luxi.REQ_QUERY_NODES:
287
      (names, fields, use_locking) = args
288
      logging.info("Received node query request for %s", names)
289
      if use_locking:
290
291
        raise errors.OpPrereqError("Sync queries are not allowed",
                                   errors.ECODE_INVAL)
292
293
      op = opcodes.OpNodeQuery(names=names, output_fields=fields,
                               use_locking=use_locking)
Michael Hanselmann's avatar
Michael Hanselmann committed
294
295
      return self._Query(op)

296
297
298
299
300
301
    elif method == luxi.REQ_QUERY_GROUPS:
      (names, fields, use_locking) = args
      logging.info("Received group query request for %s", names)
      if use_locking:
        raise errors.OpPrereqError("Sync queries are not allowed",
                                   errors.ECODE_INVAL)
302
      op = opcodes.OpGroupQuery(names=names, output_fields=fields)
303
304
      return self._Query(op)

305
    elif method == luxi.REQ_QUERY_EXPORTS:
306
      (nodes, use_locking) = args
307
      if use_locking:
308
309
        raise errors.OpPrereqError("Sync queries are not allowed",
                                   errors.ECODE_INVAL)
310
      logging.info("Received exports query request")
311
      op = opcodes.OpBackupQuery(nodes=nodes, use_locking=use_locking)
312
313
      return self._Query(op)

314
    elif method == luxi.REQ_QUERY_CONFIG_VALUES:
315
      (fields, ) = args
316
      logging.info("Received config values query request for %s", fields)
317
      op = opcodes.OpClusterConfigQuery(output_fields=fields)
318
319
      return self._Query(op)

320
    elif method == luxi.REQ_QUERY_CLUSTER_INFO:
321
      logging.info("Received cluster info query request")
322
      op = opcodes.OpClusterQuery()
323
324
      return self._Query(op)

Iustin Pop's avatar
Iustin Pop committed
325
    elif method == luxi.REQ_QUERY_TAGS:
326
      (kind, name) = args
Iustin Pop's avatar
Iustin Pop committed
327
      logging.info("Received tags query request")
Iustin Pop's avatar
Iustin Pop committed
328
      op = opcodes.OpTagsGet(kind=kind, name=name)
Iustin Pop's avatar
Iustin Pop committed
329
330
      return self._Query(op)

Michael Hanselmann's avatar
Michael Hanselmann committed
331
332
333
    elif method == luxi.REQ_QUERY_LOCKS:
      (fields, sync) = args
      logging.info("Received locks query request")
334
335
336
      if sync:
        raise NotImplementedError("Synchronous queries are not implemented")
      return self.server.context.glm.OldStyleQueryLocks(fields)
Michael Hanselmann's avatar
Michael Hanselmann committed
337

338
    elif method == luxi.REQ_QUEUE_SET_DRAIN_FLAG:
339
      (drain_flag, ) = args
340
341
      logging.info("Received queue drain flag change request to %s",
                   drain_flag)
342
343
      return queue.SetDrainFlag(drain_flag)

344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
    elif method == luxi.REQ_SET_WATCHER_PAUSE:
      (until, ) = args

      if until is None:
        logging.info("Received request to no longer pause the watcher")
      else:
        if not isinstance(until, (int, float)):
          raise TypeError("Duration must be an integer or float")

        if until < time.time():
          raise errors.GenericError("Unable to set pause end time in the past")

        logging.info("Received request to pause the watcher until %s", until)

      return _SetWatcherPause(until)

360
    else:
361
362
      logging.info("Received invalid request '%s'", method)
      raise ValueError("Invalid operation '%s'" % method)
Iustin Pop's avatar
Iustin Pop committed
363

364
365
366
367
  def _Query(self, op):
    """Runs the specified opcode and returns the result.

    """
368
369
    # Queries don't have a job id
    proc = mcpu.Processor(self.server.context, None)
370
371
372

    # TODO: Executing an opcode using locks will acquire them in blocking mode.
    # Consider using a timeout for retries.
373
    return proc.ExecOpCode(op, None)
374

Iustin Pop's avatar
Iustin Pop committed
375

376
377
378
379
380
381
class GanetiContext(object):
  """Context common to all ganeti threads.

  This class creates and holds common objects shared by all threads.

  """
382
  # pylint: disable=W0212
Iustin Pop's avatar
Iustin Pop committed
383
  # we do want to ensure a singleton here
384
385
386
387
388
389
390
391
392
393
394
  _instance = None

  def __init__(self):
    """Constructs a new GanetiContext object.

    There should be only a GanetiContext object at any time, so this
    function raises an error if this is not the case.

    """
    assert self.__class__._instance is None, "double GanetiContext instance"

395
    # Create global configuration object
396
    self.cfg = config.ConfigWriter()
397
398

    # Locking manager
Guido Trotter's avatar
Guido Trotter committed
399
    self.glm = locking.GanetiLockManager(
400
                self.cfg.GetNodeList(),
401
                self.cfg.GetNodeGroupList(),
402
403
                self.cfg.GetInstanceList())

404
405
406
    # Job queue
    self.jobqueue = jqueue.JobQueue(self)

407
408
409
    # RPC runner
    self.rpc = rpc.RpcRunner(self)

410
411
412
413
414
415
416
417
418
419
    # setting this also locks the class against attribute modifications
    self.__class__._instance = self

  def __setattr__(self, name, value):
    """Setting GanetiContext attributes is forbidden after initialization.

    """
    assert self.__class__._instance is None, "Attempt to modify Ganeti Context"
    object.__setattr__(self, name, value)

420
  def AddNode(self, node, ec_id):
421
422
423
424
    """Adds a node to the configuration and lock manager.

    """
    # Add it to the configuration
425
    self.cfg.AddNode(node, ec_id)
426

427
    # If preseeding fails it'll not be added
428
    self.jobqueue.AddNode(node)
429

430
431
432
433
434
435
436
    # Add the new node to the Ganeti Lock Manager
    self.glm.add(locking.LEVEL_NODE, node.name)

  def ReaddNode(self, node):
    """Updates a node that's already in the configuration

    """
437
    # Synchronize the queue again
438
    self.jobqueue.AddNode(node)
439
440
441
442
443
444
445
446

  def RemoveNode(self, name):
    """Removes a node from the configuration and lock manager.

    """
    # Remove node from configuration
    self.cfg.RemoveNode(name)

447
448
449
    # Notify job queue
    self.jobqueue.RemoveNode(name)

450
451
452
    # Remove the node from the Ganeti Lock Manager
    self.glm.remove(locking.LEVEL_NODE, name)

453

454
455
456
457
458
459
460
461
462
463
464
465
466
def _SetWatcherPause(until):
  """Creates or removes the watcher pause file.

  @type until: None or int
  @param until: Unix timestamp saying until when the watcher shouldn't run

  """
  if until is None:
    utils.RemoveFile(constants.WATCHER_PAUSEFILE)
  else:
    utils.WriteFile(constants.WATCHER_PAUSEFILE,
                    data="%d\n" % (until, ))

467
468
  return until

469

470
@rpc.RunWithRPC
471
472
473
474
475
476
477
478
479
def CheckAgreement():
  """Check the agreement on who is the master.

  The function uses a very simple algorithm: we must get more positive
  than negative answers. Since in most of the cases we are the master,
  we'll use our own config file for getting the node list. In the
  future we could collect the current node list from our (possibly
  obsolete) known nodes.

480
481
482
483
484
485
486
487
488
489
  In order to account for cold-start of all nodes, we retry for up to
  a minute until we get a real answer as the top-voted one. If the
  nodes are more out-of-sync, for now manual startup of the master
  should be attempted.

  Note that for a even number of nodes cluster, we need at least half
  of the nodes (beside ourselves) to vote for us. This creates a
  problem on two-node clusters, since in this case we require the
  other node to be up too to confirm our status.

490
  """
491
  myself = netutils.Hostname.GetSysName()
492
493
494
495
  #temp instantiation of a config writer, used only to get the node list
  cfg = config.ConfigWriter()
  node_list = cfg.GetNodeList()
  del cfg
496
497
498
499
500
501
502
503
504
  retries = 6
  while retries > 0:
    votes = bootstrap.GatherMasterVotes(node_list)
    if not votes:
      # empty node list, this is a one node cluster
      return True
    if votes[0][0] is None:
      retries -= 1
      time.sleep(10)
505
      continue
506
507
    break
  if retries == 0:
Iustin Pop's avatar
Iustin Pop committed
508
509
    logging.critical("Cluster inconsistent, most of the nodes didn't answer"
                     " after multiple retries. Aborting startup")
510
511
    logging.critical("Use the --no-voting option if you understand what"
                     " effects it has on the cluster state")
Iustin Pop's avatar
Iustin Pop committed
512
    return False
513
514
515
  # here a real node is at the top of the list
  all_votes = sum(item[1] for item in votes)
  top_node, top_votes = votes[0]
516

517
518
519
  result = False
  if top_node != myself:
    logging.critical("It seems we are not the master (top-voted node"
Iustin Pop's avatar
Iustin Pop committed
520
521
                     " is %s with %d out of %d votes)", top_node, top_votes,
                     all_votes)
522
  elif top_votes < all_votes - top_votes:
523
    logging.critical("It seems we are not the master (%d votes for,"
524
525
526
527
528
                     " %d votes against)", top_votes, all_votes - top_votes)
  else:
    result = True

  return result
529

Michael Hanselmann's avatar
Michael Hanselmann committed
530

531
532
533
534
@rpc.RunWithRPC
def ActivateMasterIP():
  # activate ip
  master_node = ssconf.SimpleStore().GetMasterNode()
535
  result = rpc.RpcRunner.call_node_activate_master_ip(master_node)
536
537
538
539
540
  msg = result.fail_msg
  if msg:
    logging.error("Can't activate master IP address: %s", msg)


541
542
543
544
def CheckMasterd(options, args):
  """Initial checks whether to run or exit with a failure.

  """
545
546
547
548
  if args: # masterd doesn't take any arguments
    print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0])
    sys.exit(constants.EXIT_FAILURE)

549
550
  ssconf.CheckMaster(options.debug)

551
552
553
554
555
556
557
558
  try:
    options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid
    options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid
  except KeyError:
    print >> sys.stderr, ("User or group not existing on system: %s:%s" %
                          (constants.MASTERD_USER, constants.DAEMONS_GROUP))
    sys.exit(constants.EXIT_FAILURE)

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
  # Check the configuration is sane before anything else
  try:
    config.ConfigWriter()
  except errors.ConfigVersionMismatch, err:
    v1 = "%s.%s.%s" % constants.SplitVersion(err.args[0])
    v2 = "%s.%s.%s" % constants.SplitVersion(err.args[1])
    print >> sys.stderr,  \
        ("Configuration version mismatch. The current Ganeti software"
         " expects version %s, but the on-disk configuration file has"
         " version %s. This is likely the result of upgrading the"
         " software without running the upgrade procedure. Please contact"
         " your cluster administrator or complete the upgrade using the"
         " cfgupgrade utility, after reading the upgrade notes." %
         (v1, v2))
    sys.exit(constants.EXIT_FAILURE)
  except errors.ConfigurationError, err:
    print >> sys.stderr, \
        ("Configuration error while opening the configuration file: %s\n"
         "This might be caused by an incomplete software upgrade or"
         " by a corrupted configuration file. Until the problem is fixed"
         " the master daemon cannot start." % str(err))
    sys.exit(constants.EXIT_FAILURE)
581

582
583
584
  # If CheckMaster didn't fail we believe we are the master, but we have to
  # confirm with the other nodes.
  if options.no_voting:
585
586
587
588
589
    if not options.yes_do_it:
      sys.stdout.write("The 'no voting' option has been selected.\n")
      sys.stdout.write("This is dangerous, please confirm by"
                       " typing uppercase 'yes': ")
      sys.stdout.flush()
590

591
592
593
594
      confirmation = sys.stdin.readline().strip()
      if confirmation != "YES":
        print >> sys.stderr, "Aborting."
        sys.exit(constants.EXIT_FAILURE)
595

596
597
598
599
600
  else:
    # CheckAgreement uses RPC and threads, hence it needs to be run in
    # a separate process before we call utils.Daemonize in the current
    # process.
    if not utils.RunInSeparateProcess(CheckAgreement):
601
602
      sys.exit(constants.EXIT_FAILURE)

603
604
605
606
607
608
  # ActivateMasterIP also uses RPC/threads, so we run it again via a
  # separate process.

  # TODO: decide whether failure to activate the master IP is a fatal error
  utils.RunInSeparateProcess(ActivateMasterIP)

609

610
611
def PrepMasterd(options, _):
  """Prep master daemon function, executed with the PID file held.
612

613
614
615
616
  """
  # This is safe to do as the pid file guarantees against
  # concurrent execution.
  utils.RemoveFile(constants.MASTER_SOCKET)
617

618
  mainloop = daemon.Mainloop()
619
  master = MasterServer(mainloop, constants.MASTER_SOCKET,
620
                        options.uid, options.gid)
621
622
623
  return (mainloop, master)


624
def ExecMasterd(options, args, prep_data): # pylint: disable=W0613
625
626
627
628
  """Main master daemon function, executed with the PID file held.

  """
  (mainloop, master) = prep_data
629
  try:
630
    rpc.Init()
631
    try:
632
633
      master.setup_queue()
      try:
634
        mainloop.Run()
635
636
      finally:
        master.server_cleanup()
637
    finally:
638
      rpc.Shutdown()
639
  finally:
640
    utils.RemoveFile(constants.MASTER_SOCKET)
641

Iustin Pop's avatar
Iustin Pop committed
642

643
def Main():
644
645
646
647
648
649
650
651
652
653
654
655
  """Main function"""
  parser = OptionParser(description="Ganeti master daemon",
                        usage="%prog [-f] [-d]",
                        version="%%prog (ganeti) %s" %
                        constants.RELEASE_VERSION)
  parser.add_option("--no-voting", dest="no_voting",
                    help="Do not check that the nodes agree on this node"
                    " being the master and start the daemon unconditionally",
                    default=False, action="store_true")
  parser.add_option("--yes-do-it", dest="yes_do_it",
                    help="Override interactive check for --no-voting",
                    default=False, action="store_true")
656
  daemon.GenericMain(constants.MASTERD, parser, CheckMasterd, PrepMasterd,
Iustin Pop's avatar
Iustin Pop committed
657
                     ExecMasterd, multithreaded=True)