未验证 提交 7d76e34e 编写于 作者: G guru4elephant 提交者: GitHub

add more print function for timeout issue, make timeout value larger (#18219)

* add more print function for timeout issue, make timeout value larger
上级 cf15c3ff
...@@ -184,10 +184,10 @@ if(WITH_DISTRIBUTE) ...@@ -184,10 +184,10 @@ if(WITH_DISTRIBUTE)
py_test_modules(test_dgc_op MODULES test_dgc_op) py_test_modules(test_dgc_op MODULES test_dgc_op)
endif() endif()
if(NOT APPLE) if(NOT APPLE)
set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 300)
set_tests_properties(test_dist_mnist_nccl PROPERTIES TIMEOUT 250) set_tests_properties(test_dist_mnist_nccl PROPERTIES TIMEOUT 300)
set_tests_properties(test_dist_mnist_lars PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_mnist_lars PROPERTIES TIMEOUT 300)
set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 200) set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 300)
py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext) py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext)
py_test_modules(test_dist_se_resnext_nccl MODULES test_dist_se_resnext_nccl) py_test_modules(test_dist_se_resnext_nccl MODULES test_dist_se_resnext_nccl)
bash_test_modules(test_launch MODULES test_launch.sh) bash_test_modules(test_launch MODULES test_launch.sh)
......
...@@ -144,7 +144,11 @@ class TestDistRunnerBase(object): ...@@ -144,7 +144,11 @@ class TestDistRunnerBase(object):
"get trainer program done. with nccl2 mode") "get trainer program done. with nccl2 mode")
trainer_prog = fluid.default_main_program() trainer_prog = fluid.default_main_program()
else: else:
my_print(
type(self).__name__,
"do nothing about main program, just use it")
trainer_prog = fluid.default_main_program() trainer_prog = fluid.default_main_program()
my_print(type(self).__name__, "use main program done.")
if args.use_cuda: if args.use_cuda:
device_id = int(os.getenv("FLAGS_selected_gpus", "0")) device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
...@@ -224,11 +228,14 @@ class TestDistRunnerBase(object): ...@@ -224,11 +228,14 @@ class TestDistRunnerBase(object):
my_print(type(self).__name__, "begin to train on trainer") my_print(type(self).__name__, "begin to train on trainer")
out_losses = [] out_losses = []
for _ in six.moves.xrange(RUN_STEP): for i in six.moves.xrange(RUN_STEP):
loss, = exe.run(binary, loss, = exe.run(binary,
fetch_list=[avg_cost.name], fetch_list=[avg_cost.name],
feed=feeder.feed(get_data())) feed=feeder.feed(get_data()))
out_losses.append(loss[0]) out_losses.append(loss[0])
my_print(type(self).__name__, "run step %d finished" % i)
my_print(type(self).__name__, "trainer run finished")
if six.PY2: if six.PY2:
print(pickle.dumps(out_losses)) print(pickle.dumps(out_losses))
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册