From 116305ea4b77b272106534f2cbc2dc34a4e29f5c Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Tue, 1 Dec 2020 19:37:39 +0800 Subject: [PATCH] Improve performance of elementwise_add grad op (#29187) * pass stop_gradient for cast op * improve performance of elementwise_add grad * use tensor copy async * dygraph branch * fix dygraph branch * add ut --- .../operators/elementwise/elementwise_add_op.h | 15 ++++++++++++++- python/paddle/fluid/layers/math_op_patch.py | 1 + python/paddle/fluid/layers/tensor.py | 8 +++++++- .../fluid/tests/unittests/test_math_op_patch.py | 13 +++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index c4efc4ab72..acda31e0f2 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -144,7 +144,20 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel { // skip out auto *out = dout; - if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { + // Special case when dy is not needed and dx doesn't reduce + if (dx != nullptr && dy == nullptr && dx->dims() == dout->dims()) { + VLOG(4) << "Special case when dy is not needed and dx doesn't " + "reduce"; + framework::TensorCopy( + *dout, ctx.GetPlace(), + ctx.template device_context(), dx); + } else if (dx == nullptr && dy != nullptr && dy->dims() == dout->dims()) { + VLOG(4) << "Special case when dx is not needed and dy doesn't " + "reduce"; + framework::TensorCopy( + *dout, ctx.GetPlace(), + ctx.template device_context(), dy); + } else if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { elementwise_add_grad(ctx, x, y, out, dout, dx, dy); } else { default_elementwise_add_grad(ctx, x, y, out, dout, dx, diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index 8f5fdf52d9..96947bf72c 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -179,6 +179,7 @@ def monkey_patch_variable(): outputs={"Out": [out]}, attrs={"in_dtype": self.dtype, "out_dtype": out.dtype}) + out.stop_gradient = self.stop_gradient return out def _scalar_op_(var, scale, bias): diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 262a750d5b..7d08803fb0 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -224,6 +224,11 @@ def cast(x, dtype): x = paddle.to_tensor([2, 3, 4], 'float64') y = paddle.cast(x, 'uint8') """ + if in_dygraph_mode(): + if not isinstance(dtype, core.VarDesc.VarType): + dtype = convert_np_dtype_to_dtype_(dtype) + out = core.ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) + check_variable_and_dtype( x, 'x', ['bool', 'float16', 'float32', 'float64', 'int32', 'int64', 'uint8'], @@ -234,7 +239,8 @@ def cast(x, dtype): ], 'cast') helper = LayerHelper('cast', **locals()) - out = helper.create_variable_for_type_inference(dtype=dtype) + out = helper.create_variable_for_type_inference( + dtype=dtype, stop_gradient=x.stop_gradient) helper.append_op( type='cast', inputs={'X': [x]}, diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch.py b/python/paddle/fluid/tests/unittests/test_math_op_patch.py index 76e371b216..fc5e613dec 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch.py @@ -257,6 +257,19 @@ class TestMathOpPatches(unittest.TestCase): fetch_list=[b]) self.assertTrue(numpy.allclose(-a_np, b_np)) + @prog_scope() + def test_astype(self): + a = fluid.layers.data(name="a", shape=[10, 1]) + b = a.astype('float32') + place = fluid.CPUPlace() + exe = fluid.Executor(place) + a_np = numpy.random.uniform(-1, 1, size=[10, 1]).astype('float64') + + b_np = exe.run(fluid.default_main_program(), + feed={"a": a_np}, + fetch_list=[b]) + self.assertTrue(numpy.allclose(a_np.astype('float32'), b_np)) + if __name__ == '__main__': unittest.main() -- GitLab