From 5ef5ea453b322430c8ec11ddcfbf129ade840331 Mon Sep 17 00:00:00 2001
From: Guido Trotter <ultrotter@google.com>
Date: Fri, 7 May 2010 10:11:37 +0200
Subject: [PATCH] IsProcessAlive: retry stat() a few times

On multiprocessor dom0 stat() on /proc can sometimes return EINVAL.
Before giving up, we try a few times to get a consistent answer.

Signed-off-by: Guido Trotter <ultrotter@google.com>
Reviewed-by: Iustin Pop <iustin@google.com>
---
 lib/utils.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/lib/utils.py b/lib/utils.py
index 6d2cd0fee..1d0750e30 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -566,16 +566,28 @@ def IsProcessAlive(pid):
   @return: True if the process exists
 
   """
+  def _TryStat(name):
+    try:
+      os.stat(name)
+      return True
+    except EnvironmentError, err:
+      if err.errno in (errno.ENOENT, errno.ENOTDIR):
+        return False
+      elif err.errno == errno.EINVAL:
+        raise RetryAgain(err)
+      raise
+
+  assert isinstance(pid, int), "pid must be an integer"
   if pid <= 0:
     return False
 
+  proc_entry = "/proc/%d/status" % pid
+  # /proc in a multiprocessor environment can have strange behaviors.
+  # Retry the os.stat a few times until we get a good result.
   try:
-    os.stat("/proc/%d/status" % pid)
-    return True
-  except EnvironmentError, err:
-    if err.errno in (errno.ENOENT, errno.ENOTDIR):
-      return False
-    raise
+    return Retry(_TryStat, (0.01, 1.5, 0.1), 0.5, args=[proc_entry])
+  except RetryTimeout, err:
+    err.RaiseInner()
 
 
 def ReadPidFile(pidfile):
-- 
GitLab