未验证 提交 c0cf5cb7 编写于 作者: M Ming-Xu Huang 提交者: GitHub

Apply IOU to test_parallel_executor_seresnext_base_gpu (#43812)

1. test_parallel_executor_seresnext_base_gpu failed on 2 P100 GPUs with `470.82` driver.
```
======================================================================
FAIL: test_seresnext_with_learning_rate_decay (test_parallel_executor_seresnext_base_gpu.TestResnetGPU)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/opt/paddle/paddle/build/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py", line 32, in test_seresnext_with_learning_rate_decay
    self._compare_result_with_origin_model(
  File "/opt/paddle/paddle/build/python/paddle/fluid/tests/unittests/seresnext_test_base.py", line 56, in _compare_result_with_origin_model
    self.assertAlmostEquals(
AssertionError: 6.8825445 != 6.882531 within 1e-05 delta (1.335144e-05 difference)
----------------------------------------------------------------------
```
2. To be more accuracte on evaluating loss convergence, we proposed to apply IOU as metric, instead of comparing first and last loss values.
3. As offline discussion, we also evaluated convergence on P100 and A100 in 1000 interations to make sure this UT have the same convergence property on both devices. The curves are showed below.
![A100-Single, P100-Single and Diff (1)](https://user-images.githubusercontent.com/13541238/175461920-25df6101-6dd8-4387-862c-d1c8e9299c57.png)
上级 ec5f8cfd
...@@ -68,8 +68,8 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -68,8 +68,8 @@ class TestParallelExecutorBase(unittest.TestCase):
feed_data_reader, FeedDataReader feed_data_reader, FeedDataReader
), "feed_data_reader must be type of FeedDataReader" ), "feed_data_reader must be type of FeedDataReader"
paddle.seed(1) paddle.seed(0)
paddle.framework.random._manual_program_seed(1) paddle.framework.random._manual_program_seed(0)
main = fluid.Program() main = fluid.Program()
startup = fluid.Program() startup = fluid.Program()
...@@ -103,17 +103,24 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -103,17 +103,24 @@ class TestParallelExecutorBase(unittest.TestCase):
) if use_device == DeviceType.XPU else int( ) if use_device == DeviceType.XPU else int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count())) os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
area_below_loss = 0
begin = time.time() begin = time.time()
first_loss, = run_executor(exe=exe, first_loss, = run_executor(exe=exe,
binary=binary, binary=binary,
feed=feed_dict, feed=feed_dict,
fetch_list=[loss.name]) fetch_list=[loss.name])
area_below_loss += 0.5 * first_loss.mean()
for _ in range(iter): for _ in range(iter):
run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[]) mid_loss = run_executor(exe=exe,
binary=binary,
feed=feed_dict,
fetch_list=[loss.name])
area_below_loss += mid_loss[0].mean()
last_loss, = run_executor(exe=exe, last_loss, = run_executor(exe=exe,
binary=binary, binary=binary,
feed=feed_dict, feed=feed_dict,
fetch_list=[loss.name]) fetch_list=[loss.name])
area_below_loss += 0.5 * last_loss.mean()
end = time.time() end = time.time()
if batch_size is not None: if batch_size is not None:
...@@ -126,9 +133,9 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -126,9 +133,9 @@ class TestParallelExecutorBase(unittest.TestCase):
float(avg_first_loss_val)): float(avg_first_loss_val)):
sys.exit("got NaN loss, training failed.") sys.exit("got NaN loss, training failed.")
print(first_loss, last_loss) print(first_loss, last_loss, area_below_loss)
# self.assertGreater(first_loss[0], last_loss[0]) # self.assertGreater(first_loss[0], last_loss[0])
return first_loss, last_loss return first_loss, last_loss, area_below_loss
@classmethod @classmethod
def check_pass_conflict(cls, def check_pass_conflict(cls,
......
...@@ -30,7 +30,7 @@ class TestResnetBase(TestParallelExecutorBase): ...@@ -30,7 +30,7 @@ class TestResnetBase(TestParallelExecutorBase):
if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda():
return return
func_1_first_loss, func_1_last_loss = self.check_network_convergence( func_1_first_loss, func_1_last_loss, func_1_loss_area = self.check_network_convergence(
seresnext_net.model, seresnext_net.model,
feed_dict=seresnext_net.feed_dict(use_device), feed_dict=seresnext_net.feed_dict(use_device),
iter=seresnext_net.iter(use_device), iter=seresnext_net.iter(use_device),
...@@ -39,7 +39,7 @@ class TestResnetBase(TestParallelExecutorBase): ...@@ -39,7 +39,7 @@ class TestResnetBase(TestParallelExecutorBase):
use_reduce=False, use_reduce=False,
optimizer=seresnext_net.optimizer) optimizer=seresnext_net.optimizer)
func_2_first_loss, func_2_last_loss = check_func( func_2_first_loss, func_2_last_loss, func_2_loss_area = check_func(
seresnext_net.model, seresnext_net.model,
feed_dict=seresnext_net.feed_dict(use_device), feed_dict=seresnext_net.feed_dict(use_device),
iter=seresnext_net.iter(use_device), iter=seresnext_net.iter(use_device),
...@@ -52,6 +52,9 @@ class TestResnetBase(TestParallelExecutorBase): ...@@ -52,6 +52,9 @@ class TestResnetBase(TestParallelExecutorBase):
for loss in zip(func_1_last_loss, func_2_last_loss): for loss in zip(func_1_last_loss, func_2_last_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=delta2) self.assertAlmostEquals(loss[0], loss[1], delta=delta2)
else: else:
np.testing.assert_allclose(func_1_loss_area,
func_2_loss_area,
rtol=delta2)
self.assertAlmostEquals(np.mean(func_1_first_loss), self.assertAlmostEquals(np.mean(func_1_first_loss),
func_2_first_loss[0], func_2_first_loss[0],
delta=1e-5) delta=1e-5)
......
...@@ -48,7 +48,7 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase): ...@@ -48,7 +48,7 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase):
img, label = init_feed_dict() img, label = init_feed_dict()
feed_dict_data = {"image": img, "label": label} feed_dict_data = {"image": img, "label": label}
not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict=feed_dict_data, feed_dict=feed_dict_data,
get_data_from_feeder=get_data_from_feeder, get_data_from_feeder=get_data_from_feeder,
...@@ -56,7 +56,7 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase): ...@@ -56,7 +56,7 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase):
fuse_all_reduce_ops=False, fuse_all_reduce_ops=False,
fuse_all_optimizer_ops=fuse_all_optimizer_ops, fuse_all_optimizer_ops=fuse_all_optimizer_ops,
optimizer=optimizer) optimizer=optimizer)
fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict=feed_dict_data, feed_dict=feed_dict_data,
get_data_from_feeder=get_data_from_feeder, get_data_from_feeder=get_data_from_feeder,
......
...@@ -42,7 +42,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -42,7 +42,7 @@ class TestMNIST(TestParallelExecutorBase):
# FIXME (liuwei12) # FIXME (liuwei12)
# the new memory optimize strategy will crash this unittest # the new memory optimize strategy will crash this unittest
# add enable_inplace=False here to force pass the unittest # add enable_inplace=False here to force pass the unittest
not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
...@@ -53,7 +53,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -53,7 +53,7 @@ class TestMNIST(TestParallelExecutorBase):
use_ir_memory_optimize=False, use_ir_memory_optimize=False,
enable_inplace=False, enable_inplace=False,
optimizer=_optimizer) optimizer=_optimizer)
fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
......
...@@ -42,14 +42,14 @@ class TestFuseOptimizationOps(TestParallelExecutorBase): ...@@ -42,14 +42,14 @@ class TestFuseOptimizationOps(TestParallelExecutorBase):
if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda():
return return
not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict=feed_dict, feed_dict=feed_dict,
get_data_from_feeder=get_data_from_feeder, get_data_from_feeder=get_data_from_feeder,
use_device=use_device, use_device=use_device,
fuse_all_optimizer_ops=False, fuse_all_optimizer_ops=False,
optimizer=optimizer) optimizer=optimizer)
fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict=feed_dict, feed_dict=feed_dict,
get_data_from_feeder=get_data_from_feeder, get_data_from_feeder=get_data_from_feeder,
......
...@@ -91,7 +91,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -91,7 +91,7 @@ class TestMNIST(TestParallelExecutorBase):
if only_forward: if only_forward:
_optimizer = None _optimizer = None
fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
...@@ -101,7 +101,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -101,7 +101,7 @@ class TestMNIST(TestParallelExecutorBase):
fuse_relu_depthwise_conv=True, fuse_relu_depthwise_conv=True,
use_ir_memory_optimize=True, use_ir_memory_optimize=True,
optimizer=_optimizer) optimizer=_optimizer)
not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
......
...@@ -66,7 +66,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -66,7 +66,7 @@ class TestMNIST(TestParallelExecutorBase):
return return
img, label = self._dummy_data() img, label = self._dummy_data()
first_loss0, last_loss0 = self.check_network_convergence( first_loss0, last_loss0, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
...@@ -74,7 +74,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -74,7 +74,7 @@ class TestMNIST(TestParallelExecutorBase):
}, },
use_device=use_device, use_device=use_device,
use_ir_memory_optimize=False) use_ir_memory_optimize=False)
first_loss1, last_loss1 = self.check_network_convergence( first_loss1, last_loss1, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
......
...@@ -91,7 +91,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -91,7 +91,7 @@ class TestMNIST(TestParallelExecutorBase):
img, label = init_data() img, label = init_data()
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( all_reduce_first_loss, all_reduce_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
...@@ -100,7 +100,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -100,7 +100,7 @@ class TestMNIST(TestParallelExecutorBase):
use_device=use_device, use_device=use_device,
use_reduce=False) use_reduce=False)
reduce_first_loss, reduce_last_loss = self.check_network_convergence( reduce_first_loss, reduce_last_loss, _ = self.check_network_convergence(
model, model,
feed_dict={ feed_dict={
"image": img, "image": img,
...@@ -153,7 +153,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -153,7 +153,7 @@ class TestMNIST(TestParallelExecutorBase):
img, label = init_data() img, label = init_data()
single_first_loss, single_last_loss = self.check_network_convergence( single_first_loss, single_last_loss, _ = self.check_network_convergence(
method=simple_fc_net, method=simple_fc_net,
feed_dict={ feed_dict={
"image": img, "image": img,
...@@ -161,7 +161,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -161,7 +161,7 @@ class TestMNIST(TestParallelExecutorBase):
}, },
use_device=use_device, use_device=use_device,
use_parallel_executor=False) use_parallel_executor=False)
parallel_first_loss, parallel_last_loss = self.check_network_convergence( parallel_first_loss, parallel_last_loss, _ = self.check_network_convergence(
method=simple_fc_net, method=simple_fc_net,
feed_dict={ feed_dict={
"image": img, "image": img,
......
...@@ -55,7 +55,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -55,7 +55,7 @@ class TestMNIST(TestParallelExecutorBase):
return return
img, label = init_data() img, label = init_data()
single_first_loss, single_last_loss = self.check_network_convergence( single_first_loss, single_last_loss, _ = self.check_network_convergence(
method=simple_fc_net, method=simple_fc_net,
feed_dict={ feed_dict={
"image": img, "image": img,
...@@ -63,7 +63,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -63,7 +63,7 @@ class TestMNIST(TestParallelExecutorBase):
}, },
use_device=use_device, use_device=use_device,
use_parallel_executor=False) use_parallel_executor=False)
parallel_first_loss, parallel_last_loss = self.check_network_convergence( parallel_first_loss, parallel_last_loss, _ = self.check_network_convergence(
method=simple_fc_net, method=simple_fc_net,
feed_dict={ feed_dict={
"image": img, "image": img,
......
...@@ -31,6 +31,7 @@ class TestResnetGPU(TestResnetBase): ...@@ -31,6 +31,7 @@ class TestResnetGPU(TestResnetBase):
use_parallel_executor=False) use_parallel_executor=False)
self._compare_result_with_origin_model(check_func, self._compare_result_with_origin_model(check_func,
use_device=DeviceType.CUDA, use_device=DeviceType.CUDA,
delta2=1e-5,
compare_separately=False) compare_separately=False)
......
...@@ -25,7 +25,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): ...@@ -25,7 +25,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase):
if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda():
return return
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( all_reduce_first_loss, all_reduce_last_loss, _ = self.check_network_convergence(
seresnext_net.model, seresnext_net.model,
feed_dict=seresnext_net.feed_dict(use_device), feed_dict=seresnext_net.feed_dict(use_device),
iter=seresnext_net.iter(use_device), iter=seresnext_net.iter(use_device),
...@@ -33,7 +33,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): ...@@ -33,7 +33,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase):
use_device=use_device, use_device=use_device,
use_reduce=False, use_reduce=False,
optimizer=seresnext_net.optimizer) optimizer=seresnext_net.optimizer)
reduce_first_loss, reduce_last_loss = self.check_network_convergence( reduce_first_loss, reduce_last_loss, _ = self.check_network_convergence(
seresnext_net.model, seresnext_net.model,
feed_dict=seresnext_net.feed_dict(use_device), feed_dict=seresnext_net.feed_dict(use_device),
iter=seresnext_net.iter(use_device), iter=seresnext_net.iter(use_device),
...@@ -50,7 +50,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): ...@@ -50,7 +50,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase):
if not use_device: if not use_device:
return return
all_reduce_first_loss_seq, all_reduce_last_loss_seq = self.check_network_convergence( all_reduce_first_loss_seq, all_reduce_last_loss_seq, _ = self.check_network_convergence(
seresnext_net.model, seresnext_net.model,
feed_dict=seresnext_net.feed_dict(use_device), feed_dict=seresnext_net.feed_dict(use_device),
iter=seresnext_net.iter(use_device), iter=seresnext_net.iter(use_device),
...@@ -60,7 +60,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): ...@@ -60,7 +60,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase):
optimizer=seresnext_net.optimizer, optimizer=seresnext_net.optimizer,
enable_sequential_execution=True) enable_sequential_execution=True)
reduce_first_loss_seq, reduce_last_loss_seq = self.check_network_convergence( reduce_first_loss_seq, reduce_last_loss_seq, _ = self.check_network_convergence(
seresnext_net.model, seresnext_net.model,
feed_dict=seresnext_net.feed_dict(use_device), feed_dict=seresnext_net.feed_dict(use_device),
iter=seresnext_net.iter(use_device), iter=seresnext_net.iter(use_device),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册