From 412ad816440a8c2e8b4a4f0c3a0e702749642cf4 Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Sat, 28 Jul 2018 20:44:04 +0800 Subject: [PATCH] keep seed in dist transpiler better test --- .../tests/unittests/test_dist_se_resnext.py | 28 +++++++++++++++---- .../fluid/transpiler/distribute_transpiler.py | 2 ++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py index 0871ad715f..3b67b3f5cc 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py @@ -19,6 +19,7 @@ import math import unittest import os +import sys import signal import subprocess @@ -56,7 +57,7 @@ class TestDistSeResneXt2x2(unittest.TestCase): except os.error: retry_times -= 1 - def no_test_with_place(self): + def test_with_place(self): # *ATTENTION* THIS TEST NEEDS AT LEAST 2GPUS TO RUN required_envs = { "PATH": os.getenv("PATH"), @@ -70,9 +71,15 @@ class TestDistSeResneXt2x2(unittest.TestCase): local_cmd = "%s dist_se_resnext.py trainer %s 0 %s %d FLASE" % \ (self._python_interp, "127.0.0.1:1234", "127.0.0.1:1234", 1) local_proc = subprocess.Popen( - local_cmd.split(" "), stdout=subprocess.PIPE, env=env_local) + local_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env_local) local_proc.wait() - local_ret = local_proc.stdout.read() + out, err = local_proc.communicate() + local_ret = out + sys.stderr.write('local_loss: %s\n' % local_ret) + sys.stderr.write('local_stderr: %s\n' % err) # Run dist train to compare with local results ps0, ps1 = self.start_pserver() @@ -92,13 +99,22 @@ class TestDistSeResneXt2x2(unittest.TestCase): FNULL = open(os.devnull, 'w') tr0_proc = subprocess.Popen( - tr0_cmd.split(" "), stdout=subprocess.PIPE, stderr=FNULL, env=env0) + tr0_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env0) tr1_proc = subprocess.Popen( - tr1_cmd.split(" "), stdout=subprocess.PIPE, stderr=FNULL, env=env1) + tr1_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env1) tr0_proc.wait() tr1_proc.wait() - loss_data0 = tr0_proc.stdout.read() + out, err = tr0_proc.communicate() + sys.stderr.write('dist_stderr: %s\n' % err) + loss_data0 = out + sys.stderr.write('dist_loss: %s\n' % loss_data0) lines = loss_data0.split("\n") dist_first_loss = eval(lines[0].replace(" ", ","))[0] dist_last_loss = eval(lines[1].replace(" ", ","))[0] diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 4a9ea6af74..2d9c089c0b 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -347,6 +347,7 @@ class DistributeTranspiler(object): # step1 pserver_program = Program() + pserver_program.random_seed = self.origin_program.random_seed # step2: Create vars to receive vars at parameter servers. recv_inputs = [] for v in self.param_grad_ep_mapping[endpoint]["params"]: @@ -544,6 +545,7 @@ class DistributeTranspiler(object): """ s_prog = Program() orig_s_prog = default_startup_program() + s_prog.random_seed = orig_s_prog.random_seed params = self.param_grad_ep_mapping[endpoint]["params"] def _get_splited_name_and_shape(varname): -- GitLab