From 554bcdbdfcbe68c799dd6de8a01ab0d2337f2975 Mon Sep 17 00:00:00 2001 From: Wu Yi Date: Mon, 10 Dec 2018 15:17:49 +0800 Subject: [PATCH] add more log for dist test for ci test=develop (#14813) * add more log for dist test for ci test=develop * increase deadline test=develop --- .../fluid/tests/unittests/test_dist_base.py | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 0a43f536585..26fa20291b5 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -378,6 +378,18 @@ class TestDistBase(unittest.TestCase): stderr=tr1_pipe, env=env1) + # Wait until trainer process terminate + while True: + stat0 = tr0_proc.poll() + time.sleep(0.1) + if stat0 is not None: + break + while True: + stat1 = tr1_proc.poll() + time.sleep(0.1) + if stat1 is not None: + break + tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() @@ -390,11 +402,21 @@ class TestDistBase(unittest.TestCase): ps0.terminate() ps1.terminate() + # print server log + with open("/tmp/ps0_err.log", "r") as fn: + sys.stderr.write("ps0 stderr: %s\n" % fn.read()) + with open("/tmp/ps1_err.log", "r") as fn: + sys.stderr.write("ps1 stderr: %s\n" % fn.read()) + # print log - sys.stderr.write('trainer 0 stdout: %s\n' % pickle.loads(tr0_out)) - sys.stderr.write('trainer 0 stderr: %s\n' % tr0_err) - sys.stderr.write('trainer 1 stdout: %s\n' % pickle.loads(tr1_out)) - sys.stderr.write('trainer 1 stderr: %s\n' % tr1_err) + if stat0 == 0: + sys.stderr.write('trainer 0 stdout: %s\n' % pickle.loads(tr0_out)) + with open("/tmp/tr0_err.log", "r") as fn: + sys.stderr.write('trainer 0 stderr: %s\n' % fn.read()) + if stat1 == 0: + sys.stderr.write('trainer 1 stdout: %s\n' % pickle.loads(tr1_out)) + with open("/tmp/tr1_err.log", "r") as fn: + sys.stderr.write('trainer 1 stderr: %s\n' % fn.read()) return pickle.loads(tr0_out), pickle.loads(tr1_out) @@ -474,6 +496,7 @@ class TestDistBase(unittest.TestCase): "PYTHONPATH": os.getenv("PYTHONPATH", ""), "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), "FLAGS_fraction_of_gpu_memory_to_use": "0.15", + "FLAGS_rpc_deadline": "5000", # 5sec to fail fast "FLAGS_cudnn_deterministic": "1", "http_proxy": "", "NCCL_P2P_DISABLE": "1" -- GitLab