From 078a678219f85746390992564f6608f800f42773 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Wed, 11 Sep 2019 18:51:12 +0800 Subject: [PATCH] refine math_op_patch, test=develop (#19727) --- paddle/fluid/operators/scale_op.cc | 3 + paddle/fluid/operators/scale_op.cu | 5 + python/paddle/fluid/layers/math_op_patch.py | 127 +++++++++++++----- .../fluid/tests/unittests/test_optimizer.py | 54 +++----- 4 files changed, 119 insertions(+), 70 deletions(-) diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index 4e4a015e183..383e7940fa5 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -108,5 +108,8 @@ REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker, ops::ScaleGradMaker, REGISTER_OP_CPU_KERNEL( scale, ops::ScaleKernel, ops::ScaleKernel, + ops::ScaleKernel, + ops::ScaleKernel, + ops::ScaleKernel, ops::ScaleKernel, ops::ScaleKernel); diff --git a/paddle/fluid/operators/scale_op.cu b/paddle/fluid/operators/scale_op.cu index 349f39360b8..e1f20a73b20 100644 --- a/paddle/fluid/operators/scale_op.cu +++ b/paddle/fluid/operators/scale_op.cu @@ -20,6 +20,11 @@ REGISTER_OP_CUDA_KERNEL( scale, paddle::operators::ScaleKernel, paddle::operators::ScaleKernel, + paddle::operators::ScaleKernel, + paddle::operators::ScaleKernel, + paddle::operators::ScaleKernel, paddle::operators::ScaleKernel, paddle::operators::ScaleKernel, diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index 90689c0f377..87b5cbeb557 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -14,10 +14,19 @@ from __future__ import print_function +from .. import core from ..framework import Variable, unique_name from .layer_function_generator import OpProtoHolder from ..initializer import force_init_on_cpu +_supported_int_dtype_ = [ + core.VarDesc.VarType.UINT8, + core.VarDesc.VarType.INT8, + core.VarDesc.VarType.INT16, + core.VarDesc.VarType.INT32, + core.VarDesc.VarType.INT64, +] + def monkey_patch_variable(): def unique_tmp_name(): @@ -30,10 +39,16 @@ def monkey_patch_variable(): raise ValueError("Cannot get data type from %s", var.name) return dtype + def current_block(var): + return var.block.program.current_block() + + def create_new_tmp_var(block, dtype): + tmp_name = unique_tmp_name() + return block.create_var(name=tmp_name, dtype=dtype) + def create_tensor(block, value, dtype, shape): value = float(value) - tmp_name = unique_tmp_name() - var = block.create_var(name=tmp_name, shape=shape, dtype=dtype) + var = create_new_tmp_var(block, dtype) block.append_op( type="fill_constant", outputs={'Out': [var]}, @@ -53,15 +68,15 @@ def monkey_patch_variable(): def create_tensor_with_batchsize(ref_var, value, dtype): assert isinstance(ref_var, Variable) value = float(value) - tmp_name = unique_tmp_name() - var = ref_var.block.create_var(name=tmp_name, dtype=dtype) + block = current_block(ref_var) + var = create_new_tmp_var(block, dtype) batch_dim = -1 for i, d in enumerate(ref_var.shape): if d < 0: batch_dim = i break assert batch_dim != -1 - ref_var.block.append_op( + block.append_op( type='fill_constant_batch_size_like', outputs={'Out': [var]}, inputs={'Input': [ref_var]}, @@ -87,9 +102,9 @@ def monkey_patch_variable(): Returns: Variable with new dtype """ - tmp_name = unique_tmp_name() - out = self.block.create_var(name=tmp_name, dtype=dtype) - self.block.append_op( + block = current_block(self) + out = create_new_tmp_var(block, dtype) + block.append_op( type="cast", inputs={"X": [self]}, outputs={"Out": [out]}, @@ -97,8 +112,46 @@ def monkey_patch_variable(): "out_dtype": out.dtype}) return out - def _elemwise_method_creator_(method_name, op_type, reverse=False): + def _scalar_elementwise_op_(var, scale, bias): + block = current_block(var) + out = create_new_tmp_var(block, var.dtype) + block.append_op( + type="scale", + inputs={"X": [var]}, + outputs={"Out": [out]}, + attrs={"scale": scale, + "bias": bias}) + return out + + def _scalar_elementwise_add_(var, value): + return _scalar_elementwise_op_(var, 1.0, value) + + def _scalar_elementwise_sub_(var, value): + return _scalar_elementwise_op_(var, 1.0, -value) + + def _scalar_elementwise_rsub_(var, value): + return _scalar_elementwise_op_(var, -1.0, value) + + def _scalar_elementwise_mul_(var, value): + return _scalar_elementwise_op_(var, value, 0.0) + + def _scalar_elementwise_div_(var, value): + return _scalar_elementwise_op_(var, 1.0 / value, 0.0) + + def _elemwise_method_creator_(method_name, + op_type, + reverse=False, + scalar_method=None): def __impl__(self, other_var): + if scalar_method is not None: + if isinstance(other_var, float): + if self.dtype in _supported_int_dtype_: + assert other_var == int(other_var), \ + "float value {} cannot convert to integer".format(other_var) + return scalar_method(self, other_var) + elif isinstance(other_var, int): + return scalar_method(self, float(other_var)) + lhs_dtype = safe_get_dtype(self) if not isinstance(other_var, Variable): @@ -110,7 +163,7 @@ def monkey_patch_variable(): break if not has_batch_size: other_var = create_tensor( - self.block, + current_block(self), other_var, dtype=lhs_dtype, shape=self.shape) @@ -118,9 +171,9 @@ def monkey_patch_variable(): other_var = create_tensor_with_batchsize( self, other_var, lhs_dtype) else: - # add fill_op to self.block + # add fill_op to current_block other_var = create_scalar( - self.block, value=other_var, dtype=lhs_dtype) + current_block(self), value=other_var, dtype=lhs_dtype) rhs_dtype = safe_get_dtype(other_var) if lhs_dtype != rhs_dtype: @@ -130,8 +183,7 @@ def monkey_patch_variable(): self = other_var other_var = tmp - tmp_name = unique_tmp_name() - out = self.block.create_var(name=tmp_name, dtype=lhs_dtype) + out = create_new_tmp_var(current_block(self), dtype=lhs_dtype) axis = -1 if other_var.shape[0] == -1: @@ -141,7 +193,7 @@ def monkey_patch_variable(): "be smaller than the rank of its second argument: %s vs %s" % (len(self.shape), len(other_var.shape))) - self.block.append_op( + current_block(self).append_op( type=op_type, inputs={'X': [self], 'Y': [other_var]}, @@ -164,31 +216,32 @@ def monkey_patch_variable(): return __impl__ # inject methods - for method_name, op_type, reverse in ( - ("__add__", "elementwise_add", False), + for method_name, op_type, reverse, scalar_method in ( + ("__add__", "elementwise_add", False, _scalar_elementwise_add_), # a+b == b+a. Do not need to reverse explicitly - ("__radd__", "elementwise_add", False), - ("__sub__", "elementwise_sub", False), - ("__rsub__", "elementwise_sub", True), - ("__mul__", "elementwise_mul", False), + ("__radd__", "elementwise_add", False, _scalar_elementwise_add_), + ("__sub__", "elementwise_sub", False, _scalar_elementwise_sub_), + ("__rsub__", "elementwise_sub", True, _scalar_elementwise_rsub_), + ("__mul__", "elementwise_mul", False, _scalar_elementwise_mul_), # a*b == b*a. Do not need to reverse explicitly - ("__rmul__", "elementwise_mul", False), - ("__div__", "elementwise_div", False), - ("__truediv__", "elementwise_div", False), - ("__rdiv__", "elementwise_div", True), - ("__rtruediv__", "elementwise_div", True), - ("__pow__", "elementwise_pow", False), - ("__rpow__", "elementwise_pow", True), - ("__floordiv__", "elementwise_floordiv", False), - ("__mod__", "elementwise_mod", False), + ("__rmul__", "elementwise_mul", False, _scalar_elementwise_mul_), + ("__div__", "elementwise_div", False, _scalar_elementwise_div_), + ("__truediv__", "elementwise_div", False, _scalar_elementwise_div_), + ("__rdiv__", "elementwise_div", True, None), + ("__rtruediv__", "elementwise_div", True, None), + ("__pow__", "elementwise_pow", False, None), + ("__rpow__", "elementwise_pow", True, None), + ("__floordiv__", "elementwise_floordiv", False, None), + ("__mod__", "elementwise_mod", False, None), # for logical compare - ("__eq__", "equal", False), - ("__ne__", "not_equal", False), - ("__lt__", "less_than", False), - ("__le__", "less_equal", False), - ("__gt__", "greater_than", False), - ("__ge__", "greater_equal", False)): + ("__eq__", "equal", False, None), + ("__ne__", "not_equal", False, None), + ("__lt__", "less_than", False, None), + ("__le__", "less_equal", False, None), + ("__gt__", "greater_than", False, None), + ("__ge__", "greater_equal", False, None)): setattr(Variable, method_name, - _elemwise_method_creator_(method_name, op_type, reverse)) + _elemwise_method_creator_(method_name, op_type, reverse, + scalar_method)) Variable.astype = astype diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index a23ca69b60f..fefee65c979 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -52,9 +52,8 @@ class TestOptimizer(unittest.TestCase): return opts opts = check_sgd_optimizer({'learning_rate': 1.1}) - self.assertEqual(len(opts), 3) - self.assertEqual([op.type for op in opts], - ["fill_constant", "elementwise_mul", "sgd"]) + self.assertEqual(len(opts), 2) + self.assertEqual([op.type for op in opts], ["scale", "sgd"]) opts = check_sgd_optimizer({'learning_rate': 1.0}) self.assertEqual(len(opts), 1) @@ -94,9 +93,8 @@ class TestOptimizerBackwardApplygrad(unittest.TestCase): return opts opts = check_sgd_optimizer({'learning_rate': 1.1}) - self.assertEqual(len(opts), 3) - self.assertEqual([op.type for op in opts], - ["fill_constant", "elementwise_mul", "sgd"]) + self.assertEqual(len(opts), 2) + self.assertEqual([op.type for op in opts], ["scale", "sgd"]) opts = check_sgd_optimizer({'learning_rate': 1.0}) self.assertEqual(len(opts), 1) @@ -143,10 +141,9 @@ class TestMomentumOptimizer(unittest.TestCase): self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = momentum_optimizer.apply_gradients(params_grads) - self.assertEqual(len(opts), 3) + self.assertEqual(len(opts), 2) sgd_op = opts[-1] - self.assertEqual([op.type for op in opts], - ["fill_constant", "elementwise_mul", "momentum"]) + self.assertEqual([op.type for op in opts], ["scale", "momentum"]) self.assertFalse(sgd_op.attr('use_nesterov')) # Check accumulators @@ -197,10 +194,9 @@ class TestMomentumOptimizer(unittest.TestCase): self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = momentum_optimizer.apply_gradients(params_grads) - self.assertEqual(len(opts), 3) + self.assertEqual(len(opts), 2) sgd_op = opts[-1] - self.assertEqual([op.type for op in opts], - ["fill_constant", "elementwise_mul", "momentum"]) + self.assertEqual([op.type for op in opts], ["scale", "momentum"]) self.assertTrue(sgd_op.attr('use_nesterov')) # Check accumulators @@ -260,9 +256,8 @@ class TestAdagradOptimizer(unittest.TestCase): self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = adagrad_optimizer.apply_gradients(params_grads) - self.assertEqual(len(opts), 3) - self.assertEqual([op.type for op in opts], - ["fill_constant", "elementwise_mul", "adagrad"]) + self.assertEqual(len(opts), 2) + self.assertEqual([op.type for op in opts], ["scale", "adagrad"]) # Check accumulators accumulators = adagrad_optimizer.get_accumulators() @@ -324,10 +319,9 @@ class TestAdamOptimizer(unittest.TestCase): self.assertEqual(len(adam_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = adam_optimizer.apply_gradients(params_grads) - self.assertEqual(len(opts), 5) - self.assertEqual( - [op.type for op in opts], - ["fill_constant", "elementwise_mul", "adam", "scale", "scale"]) + self.assertEqual(len(opts), 4) + self.assertEqual([op.type for op in opts], + ["scale", "adam", "scale", "scale"]) # Check accumulators accumulators = adam_optimizer.get_accumulators() @@ -391,10 +385,8 @@ class TestAdamaxOptimizer(unittest.TestCase): self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = adamax_optimizer.apply_gradients(params_grads) - self.assertEqual(len(opts), 4) - self.assertEqual( - [op.type for op in opts], - ["fill_constant", "elementwise_mul", "adamax", "scale"]) + self.assertEqual(len(opts), 3) + self.assertEqual([op.type for op in opts], ["scale", "adamax", "scale"]) # Check accumulators accumulators = adamax_optimizer.get_accumulators() @@ -455,10 +447,8 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = decayed_adagrad_optimizer.apply_gradients(params_grads) - self.assertEqual(len(opts), 3) - self.assertEqual( - [op.type for op in opts], - ["fill_constant", "elementwise_mul", "decayed_adagrad"]) + self.assertEqual(len(opts), 2) + self.assertEqual([op.type for op in opts], ["scale", "decayed_adagrad"]) # Check accumulators accumulators = decayed_adagrad_optimizer.get_accumulators() @@ -521,9 +511,8 @@ class TestFtrlOptimizer(unittest.TestCase): self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = ftrl_optimizer.apply_gradients(params_grads) - self.assertEqual(len(opts), 3) - self.assertEqual([op.type for op in opts], - ["fill_constant", "elementwise_mul", "ftrl"]) + self.assertEqual(len(opts), 2) + self.assertEqual([op.type for op in opts], ["scale", "ftrl"]) # Check accumulators accumulators = ftrl_optimizer.get_accumulators() @@ -578,9 +567,8 @@ class TestLookaheadOptimizer(unittest.TestCase): lookahead = optimizer.LookaheadOptimizer(sgd, alpha=0.5, k=5) with framework.program_guard(program, init_program): opts, _ = lookahead.minimize(mean_out) - self.assertEqual(len(opts), 3) - self.assertEqual([op.type for op in opts], - ["fill_constant", "elementwise_mul", "sgd"]) + self.assertEqual(len(opts), 2) + self.assertEqual([op.type for op in opts], ["scale", "sgd"]) if __name__ == '__main__': -- GitLab