diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index 2633a5992563f298c13e855eac5018cdb960a026..ffd1ff4a16ac62d63704622e96f785e491ddcf52 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -32,6 +32,7 @@ DeviceType = core.DeviceType class TestParallelExecutorBase(unittest.TestCase): + @classmethod def check_network_convergence(cls, method, @@ -52,6 +53,7 @@ class TestParallelExecutorBase(unittest.TestCase): optimizer=fluid.optimizer.Adam, use_fast_executor=False, enable_sequential_execution=False): + def run_executor(exe, binary, feed, fetch_list): if feed_data_reader is None: res = exe.run(binary, feed=feed, fetch_list=fetch_list) @@ -66,8 +68,8 @@ class TestParallelExecutorBase(unittest.TestCase): feed_data_reader, FeedDataReader ), "feed_data_reader must be type of FeedDataReader" - paddle.seed(1) - paddle.framework.random._manual_program_seed(1) + paddle.seed(0) + paddle.framework.random._manual_program_seed(0) main = fluid.Program() startup = fluid.Program() @@ -101,18 +103,29 @@ class TestParallelExecutorBase(unittest.TestCase): ) if use_device == DeviceType.XPU else int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + area_below_loss = 0 begin = time.time() - first_loss, = run_executor( - exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) + first_loss, = run_executor(exe=exe, + binary=binary, + feed=feed_dict, + fetch_list=[loss.name]) + area_below_loss += 0.5 * first_loss.mean() for _ in range(iter): - run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[]) - last_loss, = run_executor( - exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) + mid_loss = run_executor(exe=exe, + binary=binary, + feed=feed_dict, + fetch_list=[loss.name]) + area_below_loss += mid_loss[0].mean() + last_loss, = run_executor(exe=exe, + binary=binary, + feed=feed_dict, + fetch_list=[loss.name]) + area_below_loss += 0.5 * last_loss.mean() end = time.time() if batch_size is not None: - print("%.4f Instance per second" % ( - (batch_size * iter + 2) / (end - begin))) + print("%.4f Instance per second" % ((batch_size * iter + 2) / + (end - begin))) avg_last_loss_val = np.array(last_loss).mean() avg_first_loss_val = np.array(first_loss).mean() @@ -120,9 +133,9 @@ class TestParallelExecutorBase(unittest.TestCase): float(avg_first_loss_val)): sys.exit("got NaN loss, training failed.") - print(first_loss, last_loss) + print(first_loss, last_loss, area_below_loss) # self.assertGreater(first_loss[0], last_loss[0]) - return first_loss, last_loss + return first_loss, last_loss, area_below_loss @classmethod def check_pass_conflict(cls, diff --git a/python/paddle/fluid/tests/unittests/seresnext_test_base.py b/python/paddle/fluid/tests/unittests/seresnext_test_base.py index cc40b89b585cbf8795a06ee4c5c557b162b0651f..2c85a3401e3236138a19be6409ff4947ebbcdf73 100644 --- a/python/paddle/fluid/tests/unittests/seresnext_test_base.py +++ b/python/paddle/fluid/tests/unittests/seresnext_test_base.py @@ -21,6 +21,7 @@ import numpy as np class TestResnetBase(TestParallelExecutorBase): + def _compare_result_with_origin_model(self, check_func, use_device, @@ -29,7 +30,7 @@ class TestResnetBase(TestParallelExecutorBase): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): return - func_1_first_loss, func_1_last_loss = self.check_network_convergence( + func_1_first_loss, func_1_last_loss, func_1_loss_area = self.check_network_convergence( seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_device), iter=seresnext_net.iter(use_device), @@ -38,7 +39,7 @@ class TestResnetBase(TestParallelExecutorBase): use_reduce=False, optimizer=seresnext_net.optimizer) - func_2_first_loss, func_2_last_loss = check_func( + func_2_first_loss, func_2_last_loss, func_2_loss_area = check_func( seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_device), iter=seresnext_net.iter(use_device), @@ -51,7 +52,12 @@ class TestResnetBase(TestParallelExecutorBase): for loss in zip(func_1_last_loss, func_2_last_loss): self.assertAlmostEquals(loss[0], loss[1], delta=delta2) else: - self.assertAlmostEquals( - np.mean(func_1_first_loss), func_2_first_loss[0], delta=1e-5) - self.assertAlmostEquals( - np.mean(func_1_last_loss), func_2_last_loss[0], delta=delta2) + np.testing.assert_allclose(func_1_loss_area, + func_2_loss_area, + rtol=delta2) + self.assertAlmostEquals(np.mean(func_1_first_loss), + func_2_first_loss[0], + delta=1e-5) + self.assertAlmostEquals(np.mean(func_1_last_loss), + func_2_last_loss[0], + delta=delta2) diff --git a/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py index e3a256613374213bdb80b055171757944b4c0c1a..67729d6633d0e153d1ff67c9d7011456cff35ba7 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestFuseAllReduceOpsBase(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -47,7 +48,7 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase): img, label = init_feed_dict() feed_dict_data = {"image": img, "label": label} - not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( + not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence( model, feed_dict=feed_dict_data, get_data_from_feeder=get_data_from_feeder, @@ -55,7 +56,7 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase): fuse_all_reduce_ops=False, fuse_all_optimizer_ops=fuse_all_optimizer_ops, optimizer=optimizer) - fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( + fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence( model, feed_dict=feed_dict_data, get_data_from_feeder=get_data_from_feeder, @@ -77,13 +78,13 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase): class TestFuseAllReduceOps(TestFuseAllReduceOpsBase): + def _decorate_compare_fused_all_reduce(self, model, use_device): - self.compare_fuse_all_reduce_ops( - model, - use_device, - init_feed_dict=init_data, - optimizer=self.optimizer, - fuse_all_optimizer_ops=True) + self.compare_fuse_all_reduce_ops(model, + use_device, + init_feed_dict=init_data, + optimizer=self.optimizer, + fuse_all_optimizer_ops=True) def test_simple_fc_with_fuse_all_reduce(self): self._decorate_compare_fused_all_reduce(simple_fc_net, DeviceType.CUDA) @@ -101,16 +102,17 @@ class TestFuseAllReduceOps(TestFuseAllReduceOpsBase): class TestFuseAllReduceOpsAndOptiOps(TestFuseAllReduceOps): + def _decorate_compare_fused_all_reduce(self, model, use_device): - self.compare_fuse_all_reduce_ops( - model, - use_device, - init_feed_dict=init_data, - optimizer=self.optimizer, - fuse_all_optimizer_ops=True) + self.compare_fuse_all_reduce_ops(model, + use_device, + init_feed_dict=init_data, + optimizer=self.optimizer, + fuse_all_optimizer_ops=True) class TestFuseAllReduceOpsWithSparseGrad(TestFuseAllReduceOpsBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) diff --git a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py index 6c3fa9e61d2406bc8e84d2c691759a5241b6d67b..15b79bf0a7fd8515a4f5c35d86aaba7b4909c12b 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py @@ -21,6 +21,7 @@ import os class TestMNIST(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -41,19 +42,23 @@ class TestMNIST(TestParallelExecutorBase): # FIXME (liuwei12) # the new memory optimize strategy will crash this unittest # add enable_inplace=False here to force pass the unittest - not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( + not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_elewise_add_act_ops=False, use_ir_memory_optimize=False, enable_inplace=False, optimizer=_optimizer) - fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( + fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_elewise_add_act_ops=True, use_ir_memory_optimize=False, diff --git a/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py index 51c06bb79d72872aabe7561b504a2ce50eb3433e..981d9dfcf4a56c6e56c14ffcda0b612f2f9b1532 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py @@ -24,6 +24,7 @@ import os class TestFuseOptimizationOps(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -41,14 +42,14 @@ class TestFuseOptimizationOps(TestParallelExecutorBase): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): return - not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( + not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence( model, feed_dict=feed_dict, get_data_from_feeder=get_data_from_feeder, use_device=use_device, fuse_all_optimizer_ops=False, optimizer=optimizer) - fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( + fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence( model, feed_dict=feed_dict, get_data_from_feeder=get_data_from_feeder, @@ -63,36 +64,41 @@ class TestFuseOptimizationOps(TestParallelExecutorBase): def _decorate_compare_fused_optimizer_ops(self, model, use_device, optimizer): - self._compare_fused_optimizer_ops( - model, - use_device, - feed_dict=self._get_feed_dict(), - optimizer=optimizer) + self._compare_fused_optimizer_ops(model, + use_device, + feed_dict=self._get_feed_dict(), + optimizer=optimizer) class TestFuseAdamOps(TestFuseOptimizationOps): + def optimizer(self, learning_rate=1e-4): return fluid.optimizer.Adam(learning_rate=learning_rate) def test_batchnorm_fc_with_fuse_op(self): - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CUDA, optimizer=self.optimizer) - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CPU, optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CUDA, + optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CPU, + optimizer=self.optimizer) class TestFuseSGDOps(TestFuseAdamOps): + def optimizer(self, learning_rate=1e-3): return fluid.optimizer.SGD(learning_rate=learning_rate) class TestFuseMomentumOps(TestFuseAdamOps): + def optimizer(self, learning_rate=1e-3): - return fluid.optimizer.Momentum( - learning_rate=learning_rate, momentum=0.1) + return fluid.optimizer.Momentum(learning_rate=learning_rate, + momentum=0.1) class TestSpareFuseAdamOps(TestFuseOptimizationOps): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -120,24 +126,29 @@ class TestSpareFuseAdamOps(TestFuseOptimizationOps): def test_simple_bow_net_with_fuse_op(self): model = partial(bow_net, dict_dim=self.word_dict_len, is_sparse=True) - self._decorate_compare_fused_optimizer_ops( - model, DeviceType.CUDA, optimizer=self.optimizer) - self._decorate_compare_fused_optimizer_ops( - model, DeviceType.CPU, optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(model, + DeviceType.CUDA, + optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(model, + DeviceType.CPU, + optimizer=self.optimizer) class TestSpareFuseSGDOps(TestSpareFuseAdamOps): + def optimizer(self, learning_rate=1e-3): return fluid.optimizer.SGD(learning_rate=learning_rate) class TestSpareFuseMomentumOps(TestSpareFuseAdamOps): + def optimizer(self, learning_rate=1e-3): - return fluid.optimizer.Momentum( - learning_rate=learning_rate, momentum=0.1) + return fluid.optimizer.Momentum(learning_rate=learning_rate, + momentum=0.1) class TestPassConflictBase(TestFuseAdamOps): + def _compare_fused_optimizer_ops(self, model, use_device, @@ -147,36 +158,40 @@ class TestPassConflictBase(TestFuseAdamOps): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): return - self.check_pass_conflict( - model, - feed_dict=feed_dict, - get_data_from_feeder=get_data_from_feeder, - use_device=use_device, - fuse_all_optimizer_ops=True, - optimizer=optimizer, - enable_sequential_execution=True) + self.check_pass_conflict(model, + feed_dict=feed_dict, + get_data_from_feeder=get_data_from_feeder, + use_device=use_device, + fuse_all_optimizer_ops=True, + optimizer=optimizer, + enable_sequential_execution=True) class TestFuseAdamOpsPassConflict(TestPassConflictBase): + def optimizer(self, learning_rate=1e-4): return fluid.optimizer.Adam(learning_rate=learning_rate) def test_batchnorm_fc_with_fuse_op(self): - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CPU, optimizer=self.optimizer) - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CUDA, optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CPU, + optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CUDA, + optimizer=self.optimizer) class TestFuseSGDOpsPassConflict(TestFuseAdamOpsPassConflict): + def optimizer(self, learning_rate=1e-3): return fluid.optimizer.SGD(learning_rate=learning_rate) class TestFuseMomentumOpsPassConflict(TestFuseAdamOpsPassConflict): + def optimizer(self, learning_rate=1e-3): - return fluid.optimizer.Momentum( - learning_rate=learning_rate, momentum=0.1) + return fluid.optimizer.Momentum(learning_rate=learning_rate, + momentum=0.1) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py index d391b04aa4772efbf7fadb7a9556aafd445197db..cddc05f5914442cf66dadcc972c5b276b49d54ef 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py @@ -28,21 +28,25 @@ def norm(*args, **kargs): def sep_conv(input, channel, stride, filter, dilation=1, act=None): # with scope('depthwise'): - input = fluid.layers.conv2d( - input, - input.shape[1], - filter, - stride, - groups=input.shape[1], - padding=(filter // 2) * dilation, - dilation=dilation, - use_cudnn=False, - bias_attr=False) + input = fluid.layers.conv2d(input, + input.shape[1], + filter, + stride, + groups=input.shape[1], + padding=(filter // 2) * dilation, + dilation=dilation, + use_cudnn=False, + bias_attr=False) input = norm(input) if act: input = act(input) # with scope('pointwise'): - input = fluid.layers.conv2d( - input, channel, 1, 1, groups=1, padding=0, bias_attr=False) + input = fluid.layers.conv2d(input, + channel, + 1, + 1, + groups=1, + padding=0, + bias_attr=False) input = norm(input) if act: input = act(input) return input @@ -63,6 +67,7 @@ def simple_depthwise_net(use_feed): class TestMNIST(TestParallelExecutorBase): + def _init_data(self, random=True): np.random.seed(5) if random: @@ -86,18 +91,22 @@ class TestMNIST(TestParallelExecutorBase): if only_forward: _optimizer = None - fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( + fuse_op_first_loss, fuse_op_last_loss, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_relu_depthwise_conv=True, use_ir_memory_optimize=True, optimizer=_optimizer) - not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( + not_fuse_op_first_loss, not_fuse_op_last_loss, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_relu_depthwise_conv=False, optimizer=_optimizer) diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py index f4ec63a8b916e55675f8d5c716a95b57013d994f..4b775197aaea1168e5f9bc47f34c3bad9ce81e2e 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py @@ -54,6 +54,7 @@ def fc_with_inplace_net(use_feed): class TestMNIST(TestParallelExecutorBase): + def _dummy_data(self): np.random.seed(5) img = np.random.random(size=[32, 784]).astype(np.float32) @@ -65,16 +66,20 @@ class TestMNIST(TestParallelExecutorBase): return img, label = self._dummy_data() - first_loss0, last_loss0 = self.check_network_convergence( + first_loss0, last_loss0, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_ir_memory_optimize=False) - first_loss1, last_loss1 = self.check_network_convergence( + first_loss1, last_loss1, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_ir_memory_optimize=True) for loss in zip(first_loss0, first_loss1): diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index 61d643f24c17a9945239119e621732b779729b81..2e2791351bfecd6ad064182ea9ca47aaabb140c9 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -34,8 +34,8 @@ def simple_fc_net(use_feed): hidden, size=200, act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) prediction = fluid.layers.fc(hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.mean(loss) @@ -73,6 +73,7 @@ def init_data(): class TestMNIST(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -90,17 +91,21 @@ class TestMNIST(TestParallelExecutorBase): img, label = init_data() - all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( + all_reduce_first_loss, all_reduce_last_loss, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_reduce=False) - reduce_first_loss, reduce_last_loss = self.check_network_convergence( + reduce_first_loss, reduce_last_loss, _ = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_reduce=True) @@ -119,12 +124,13 @@ class TestMNIST(TestParallelExecutorBase): img, label = init_data() - self.check_network_convergence( - simple_fc_net, - feed_dict={"image": img, - "label": label}, - use_device=use_device, - use_reduce=use_reduce) + self.check_network_convergence(simple_fc_net, + feed_dict={ + "image": img, + "label": label + }, + use_device=use_device, + use_reduce=use_reduce) def test_simple_fc(self): # use_device @@ -147,25 +153,31 @@ class TestMNIST(TestParallelExecutorBase): img, label = init_data() - single_first_loss, single_last_loss = self.check_network_convergence( + single_first_loss, single_last_loss, _ = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=False) - parallel_first_loss, parallel_last_loss = self.check_network_convergence( + parallel_first_loss, parallel_last_loss, _ = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=True) self.assertAlmostEquals( np.mean(parallel_first_loss), single_first_loss, - delta=1e-6, ) - self.assertAlmostEquals( - np.mean(parallel_last_loss), single_last_loss, delta=1e-6) + delta=1e-6, + ) + self.assertAlmostEquals(np.mean(parallel_last_loss), + single_last_loss, + delta=1e-6) def test_simple_fc_parallel_accuracy(self): self.check_simple_fc_parallel_accuracy(DeviceType.CUDA) @@ -178,12 +190,13 @@ class TestMNIST(TestParallelExecutorBase): return img, label = init_data() - self.check_network_convergence( - fc_with_batchnorm, - feed_dict={"image": img, - "label": label}, - use_device=use_device, - use_fast_executor=use_fast_executor) + self.check_network_convergence(fc_with_batchnorm, + feed_dict={ + "image": img, + "label": label + }, + use_device=use_device, + use_fast_executor=use_fast_executor) def test_batchnorm_fc(self): for use_device in (DeviceType.CPU, DeviceType.CUDA): @@ -201,6 +214,7 @@ class TestMNIST(TestParallelExecutorBase): class TestMNISTNoReduce(unittest.TestCase): + def run_program(self, device_type): if device_type == DeviceType.CUDA: if not paddle.is_compiled_with_cuda(): @@ -225,18 +239,16 @@ class TestMNISTNoReduce(unittest.TestCase): build_strategy = paddle.static.BuildStrategy() build_strategy.reduce_strategy = no_reduce main_multi_place = paddle.static.CompiledProgram( - main).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - places=places) + main).with_data_parallel(loss_name=loss.name, + build_strategy=build_strategy, + places=places) build_strategy = paddle.static.BuildStrategy() build_strategy.reduce_strategy = no_reduce - main_single_place = paddle.static.CompiledProgram(main.clone( - )).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - places=places[0]) + main_single_place = paddle.static.CompiledProgram( + main.clone()).with_data_parallel(loss_name=loss.name, + build_strategy=build_strategy, + places=places[0]) image, label = init_data() feed = {'image': image, 'label': label} @@ -256,13 +268,13 @@ class TestMNISTNoReduce(unittest.TestCase): grads_single_place[i].append(g) for i in range(len(grads)): - grads_single_place[i] = np.concatenate( - grads_single_place[i], axis=0) / len(places) + grads_single_place[i] = np.concatenate(grads_single_place[i], + axis=0) / len(places) self.assertEqual(len(grads_multi_place), len(grads_single_place)) for g1, g2 in zip(grads_multi_place, grads_single_place): - self.assertTrue( - np.allclose(g1, g2), 'g1 = {}\ng2 = {}\n'.format(g1, g2)) + self.assertTrue(np.allclose(g1, g2), + 'g1 = {}\ng2 = {}\n'.format(g1, g2)) def split_feed(self, feed, n): image = feed['image'] diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py index e07b89f7aae765e54f06de2715ade910d4fe205f..1ada146985329df8feb9843ed40ebeafe0da844c 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py @@ -18,6 +18,7 @@ import unittest import numpy as np import os + os.environ['FLAGS_enable_parallel_graph'] = str(1) import paddle.fluid.core as core import os @@ -26,6 +27,7 @@ from simple_nets import simple_fc_net, init_data class TestMNIST(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -36,12 +38,13 @@ class TestMNIST(TestParallelExecutorBase): return img, label = init_data() - self.check_network_convergence( - simple_fc_net, - feed_dict={"image": img, - "label": label}, - use_device=use_device, - use_reduce=use_reduce) + self.check_network_convergence(simple_fc_net, + feed_dict={ + "image": img, + "label": label + }, + use_device=use_device, + use_reduce=use_reduce) def test_simple_fc(self): # use_device @@ -52,25 +55,31 @@ class TestMNIST(TestParallelExecutorBase): return img, label = init_data() - single_first_loss, single_last_loss = self.check_network_convergence( + single_first_loss, single_last_loss, _ = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=False) - parallel_first_loss, parallel_last_loss = self.check_network_convergence( + parallel_first_loss, parallel_last_loss, _ = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=True) self.assertAlmostEquals( np.mean(parallel_first_loss), single_first_loss, - delta=1e-6, ) - self.assertAlmostEquals( - np.mean(parallel_last_loss), single_last_loss, delta=1e-6) + delta=1e-6, + ) + self.assertAlmostEquals(np.mean(parallel_last_loss), + single_last_loss, + delta=1e-6) def test_simple_fc_parallel_accuracy(self): self.check_simple_fc_parallel_accuracy(DeviceType.CUDA) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py index 9d1364cc592fe20b9510da6c6f4b903b13cd6f23..0b268ef852f111b438c626126980c8e1649a99a8 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py @@ -20,17 +20,19 @@ from functools import partial class TestResnetGPU(TestResnetBase): + def test_seresnext_with_learning_rate_decay(self): # NOTE(zcd): This test is compare the result of use parallel_executor # and executor, and the result of drop_out op and batch_norm op in # this two executor have diff, so the two ops should be removed # from the model. - check_func = partial( - self.check_network_convergence, - optimizer=seresnext_net.optimizer, - use_parallel_executor=False) - self._compare_result_with_origin_model( - check_func, use_device=DeviceType.CUDA, compare_seperately=False) + check_func = partial(self.check_network_convergence, + optimizer=seresnext_net.optimizer, + use_parallel_executor=False) + self._compare_result_with_origin_model(check_func, + use_device=DeviceType.CUDA, + delta2=1e-5, + compare_seperately=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py index e67934d87f9577d7765e07806a10e68a47bf174f..f2bbbd9fe2a9b976d315d89a1b001b749701a646 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py @@ -20,11 +20,12 @@ import paddle.fluid.core as core class TestResnetWithReduceBase(TestParallelExecutorBase): + def _compare_reduce_and_allreduce(self, use_device, delta2=1e-5): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): return - all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( + all_reduce_first_loss, all_reduce_last_loss, _ = self.check_network_convergence( seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_device), iter=seresnext_net.iter(use_device), @@ -32,7 +33,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): use_device=use_device, use_reduce=False, optimizer=seresnext_net.optimizer) - reduce_first_loss, reduce_last_loss = self.check_network_convergence( + reduce_first_loss, reduce_last_loss, _ = self.check_network_convergence( seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_device), iter=seresnext_net.iter(use_device), @@ -49,7 +50,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): if not use_device: return - all_reduce_first_loss_seq, all_reduce_last_loss_seq = self.check_network_convergence( + all_reduce_first_loss_seq, all_reduce_last_loss_seq, _ = self.check_network_convergence( seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_device), iter=seresnext_net.iter(use_device), @@ -59,7 +60,7 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): optimizer=seresnext_net.optimizer, enable_sequential_execution=True) - reduce_first_loss_seq, reduce_last_loss_seq = self.check_network_convergence( + reduce_first_loss_seq, reduce_last_loss_seq, _ = self.check_network_convergence( seresnext_net.model, feed_dict=seresnext_net.feed_dict(use_device), iter=seresnext_net.iter(use_device), @@ -86,9 +87,10 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): class TestResnetWithReduceCPU(TestResnetWithReduceBase): + def test_seresnext_with_reduce(self): - self._compare_reduce_and_allreduce( - use_device=DeviceType.CPU, delta2=1e-3) + self._compare_reduce_and_allreduce(use_device=DeviceType.CPU, + delta2=1e-3) if __name__ == '__main__':