From d21d09d6818296c3965d230397bf2a75c27eb60b Mon Sep 17 00:00:00 2001
From: Iustin Pop <iustin@google.com>
Date: Tue, 20 Jan 2009 17:19:58 +0000
Subject: [PATCH] Update the logging output of job processing

(this is related to the master daemon log)

Currently it's not possible to follow (in the non-debug runs) the
logical execution thread of jobs. This is due to the fact that we don't
log the thread name (so we lose the association of log messages to jobs)
and we don't log the start/stop of job and opcode execution.

This patch adds a new parameter to utils.SetupLogging that enables
thread name logging, and promotes some log entries from debug to info.
With this applied, it's easier to understand which log messages relate
to which jobs/opcodes.

The patch also moves the "INFO client closed connection" entry to debug
level, since it's not a very informative log entry.

Reviewed-by: ultrotter
---
 daemons/ganeti-masterd |  4 ++--
 lib/jqueue.py          | 17 ++++++++++-------
 lib/utils.py           | 17 ++++++++++-------
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/daemons/ganeti-masterd b/daemons/ganeti-masterd
index ac0af6df3..6dad3d29e 100755
--- a/daemons/ganeti-masterd
+++ b/daemons/ganeti-masterd
@@ -149,7 +149,7 @@ class ClientRqHandler(SocketServer.BaseRequestHandler):
     while True:
       msg = self.read_message()
       if msg is None:
-        logging.info("client closed connection")
+        logging.debug("client closed connection")
         break
 
       request = simplejson.loads(msg)
@@ -468,7 +468,7 @@ def main():
   utils.WritePidFile(constants.MASTERD_PID)
   try:
     utils.SetupLogging(constants.LOG_MASTERDAEMON, debug=options.debug,
-                       stderr_logging=not options.fork)
+                       stderr_logging=not options.fork, multithreaded=True)
 
     logging.info("Ganeti master daemon startup")
 
diff --git a/lib/jqueue.py b/lib/jqueue.py
index f1244eb1f..4b9f86dd0 100644
--- a/lib/jqueue.py
+++ b/lib/jqueue.py
@@ -344,7 +344,7 @@ class _JobQueueWorker(workerpool.BaseWorker):
     @param job: the job to be processed
 
     """
-    logging.debug("Worker %s processing job %s",
+    logging.info("Worker %s processing job %s",
                   self.worker_id, job.id)
     proc = mcpu.Processor(self.pool.queue.context)
     self.queue = queue = job.queue
@@ -352,8 +352,10 @@ class _JobQueueWorker(workerpool.BaseWorker):
       try:
         count = len(job.ops)
         for idx, op in enumerate(job.ops):
+          op_summary = op.input.Summary()
           try:
-            logging.debug("Op %s/%s: Starting %s", idx + 1, count, op)
+            logging.info("Op %s/%s: Starting opcode %s", idx + 1, count,
+                         op_summary)
 
             queue.acquire()
             try:
@@ -408,8 +410,8 @@ class _JobQueueWorker(workerpool.BaseWorker):
             finally:
               queue.release()
 
-            logging.debug("Op %s/%s: Successfully finished %s",
-                          idx + 1, count, op)
+            logging.info("Op %s/%s: Successfully finished opcode %s",
+                         idx + 1, count, op_summary)
           except CancelJob:
             # Will be handled further up
             raise
@@ -420,7 +422,8 @@ class _JobQueueWorker(workerpool.BaseWorker):
                 op.status = constants.OP_STATUS_ERROR
                 op.result = str(err)
                 op.end_timestamp = TimeStampNow()
-                logging.debug("Op %s/%s: Error in %s", idx + 1, count, op)
+                logging.info("Op %s/%s: Error in opcode %s", idx + 1, count,
+                             op_summary)
               finally:
                 queue.UpdateJobUnlocked(job)
             finally:
@@ -449,8 +452,8 @@ class _JobQueueWorker(workerpool.BaseWorker):
           status = job.CalcStatus()
       finally:
         queue.release()
-      logging.debug("Worker %s finished job %s, status = %s",
-                    self.worker_id, job_id, status)
+      logging.info("Worker %s finished job %s, status = %s",
+                   self.worker_id, job_id, status)
 
 
 class _JobQueueWorkerPool(workerpool.WorkerPool):
diff --git a/lib/utils.py b/lib/utils.py
index 40b5ff361..691b0ee6b 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -1653,7 +1653,8 @@ def GetNodeDaemonPort():
   return port
 
 
-def SetupLogging(logfile, debug=False, stderr_logging=False, program=""):
+def SetupLogging(logfile, debug=False, stderr_logging=False, program="",
+                 multithreaded=False):
   """Configures the logging module.
 
   @type logfile: str
@@ -1665,16 +1666,18 @@ def SetupLogging(logfile, debug=False, stderr_logging=False, program=""):
   @param stderr_logging: whether we should also log to the standard error
   @type program: str
   @param program: the name under which we should log messages
+  @type multithreaded: boolean
+  @param multithreaded: if True, will add the thread name to the log file
   @raise EnvironmentError: if we can't open the log file and
       stderr logging is disabled
 
   """
-  fmt = "%(asctime)s: " + program + " "
+  fmt = "%(asctime)s: " + program + " pid=%(process)d"
+  if multithreaded:
+    fmt += "/%(threadName)s"
   if debug:
-    fmt += ("pid=%(process)d/%(threadName)s %(levelname)s"
-           " %(module)s:%(lineno)s %(message)s")
-  else:
-    fmt += "pid=%(process)d %(levelname)s %(message)s"
+    fmt += " %(module)s:%(lineno)s"
+  fmt += " %(levelname)s %(message)s"
   formatter = logging.Formatter(fmt)
 
   root_logger = logging.getLogger("")
@@ -1706,7 +1709,7 @@ def SetupLogging(logfile, debug=False, stderr_logging=False, program=""):
     else:
       logfile_handler.setLevel(logging.INFO)
     root_logger.addHandler(logfile_handler)
-  except EnvironmentError, err:
+  except EnvironmentError:
     if stderr_logging:
       logging.exception("Failed to enable logging to file '%s'", logfile)
     else:
-- 
GitLab