diff --git a/paddle/fluid/operators/gelu_op_npu.cc b/paddle/fluid/operators/gelu_op_npu.cc index 6e60926cc7951aa777138bb8785083eb48ee50dd..4db82e96cfae7c3a0332f5601b3477780c3d16d1 100644 --- a/paddle/fluid/operators/gelu_op_npu.cc +++ b/paddle/fluid/operators/gelu_op_npu.cc @@ -61,13 +61,14 @@ class GeluGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor out(x->type()); - out.mutable_data(x->dims(), place); - const auto& runner_out = NpuOpRunner("Gelu", {*x}, {out}, {}); - runner_out.Run(stream); - + // NOTE(pangyoki): In the original implementation of GeluGrad op, the input + // is {*dout, *x, out}, where out = Gelu(x). However, we find that variable + // `out` was not actually used. In order to improve performance, the + // useless GELU operation was deleted. + // We directly use `*dout` as a placeholder to replace `out`, it will not + // be used in calculations. const auto& runner_dx = - NpuOpRunner("GeluGrad", {*dout, *x, out}, {*dx}, {}); + NpuOpRunner("GeluGrad", {*dout, *x, *dout}, {*dx}, {}); runner_dx.Run(stream); } }; diff --git a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py index efa1918206b035005dd12939b7001933266c107c..d811aaf228ddf5e264c56b23f69fc318a9352c73 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py @@ -58,12 +58,9 @@ class TestGelu(OpTest): def test_check_output(self): self.check_output_with_place(self.place, check_dygraph=False, atol=1e-3) - # TODO(ascendrc): Add grad test - # def test_check_grad(self): - # if self.dtype == np.float16: - # return - # self.check_grad(['X'], 'Out') - # + def test_check_grad(self): + self.check_grad_with_place( + self.place, ['X'], 'Out', check_dygraph=False) @unittest.skipIf(not paddle.is_compiled_with_npu(), @@ -115,10 +112,10 @@ class TestGeluNet(unittest.TestCase): name="label", shape=[32, 1], dtype='int64') c = paddle.multiply(a, b) - d = fluid.layers.gelu(c) - fc_1 = fluid.layers.fc(input=d, size=128) - prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') + fc_1 = fluid.layers.fc(input=c, size=128) + fc_1_gelu = fluid.layers.gelu(fc_1) + prediction = fluid.layers.fc(input=fc_1_gelu, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.reduce_mean(cost)