exec.py 3.28 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#
#

# Copyright (C) 2014 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Module implementing executing of a job as a separate process

The complete protocol of initializing a job is described in the haskell
module Ganeti.Query.Exec
"""

import contextlib
import logging
import os
import signal
import sys
import time

from ganeti.server import masterd
from ganeti.rpc import transport
from ganeti import utils
from ganeti import pathutils
from ganeti.utils import livelock


def _GetMasterInfo():
  """Retrieves the job id and lock file name from the master process

  This also closes standard input/output

  """
  logging.debug("Opening transport over stdin/out")
  with contextlib.closing(transport.FdTransport((0, 1))) as trans:
    logging.debug("Reading job id from the master process")
    job_id = int(trans.Call(""))
    logging.debug("Got job id %d", job_id)
    logging.debug("Reading the livelock name from the master process")
    livelock_name = livelock.LiveLockName(trans.Call(""))
    logging.debug("Got livelock %s", livelock_name)
  return (job_id, livelock_name)


def main():
  logname = pathutils.GetLogFilename("jobs")
  utils.SetupLogging(logname, "master-daemon", debug=True) # TODO

  (job_id, livelock_name) = _GetMasterInfo()

  exit_code = 1
  try:
    logging.debug("Preparing the context and the configuration")
    context = masterd.GanetiContext(livelock_name)

    logging.debug("Registering a SIGTERM handler")

72
73
    cancel = [False]

74
75
    def _TermHandler(signum, _frame):
      logging.info("Killed by signal %d", signum)
76
      cancel[0] = True
77
78
79
80
81
82
83
84
    signal.signal(signal.SIGTERM, _TermHandler)

    logging.debug("Picking up job %d", job_id)
    context.jobqueue.PickupJob(job_id)

    # waiting for the job to finish
    time.sleep(1)
    while not context.jobqueue.HasJobBeenFinalized(job_id):
85
86
87
88
89
      if cancel[0]:
        logging.debug("Got cancel request, cancelling job %d", job_id)
        r = context.jobqueue.CancelJob(job_id)
        logging.debug("CancelJob result for job %d: %s", job_id, r)
        cancel[0] = False
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
      time.sleep(1)

    # wait until the queue finishes
    logging.debug("Waiting for the queue to finish")
    while context.jobqueue.PrepareShutdown():
      time.sleep(1)
    logging.debug("Shutting the queue down")
    context.jobqueue.Shutdown()
    exit_code = 0
  except Exception: # pylint: disable=W0703
    logging.exception("Exception when trying to run job %d", job_id)
  finally:
    logging.debug("Job %d finalized", job_id)
    logging.debug("Removing livelock file %s", livelock_name.GetPath())
    os.remove(livelock_name.GetPath())

  sys.exit(exit_code)

if __name__ == '__main__':
  main()