[NPU] delete useless GELU in gelu grad npu op (#33872)

* delete useless GELU in gelu npu op * add description * fix format * add check_grad in gelu unittest

[NPU] delete useless GELU in gelu grad npu op (#33872)
* delete useless GELU in gelu npu op * add description * fix format * add check_grad in gelu unittest
4d167240 · pangyoki · GitHub · e8052710 · 4d167240 · 4d167240
Showing with 13 addition and 15 deletion

paddle/fluid/operators/gelu_op_npu.cc paddle/fluid/operators/gelu_op_npu.cc +7 -6

python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py +6 -9

未找到文件。
--- a/paddle/fluid/operators/gelu_op_npu.cc
+++ b/paddle/fluid/operators/gelu_op_npu.cc
@@ -61,13 +61,14 @@ class GeluGradNPUKernel : public framework::OpKernel<T> {
        ctx.template device_context<paddle::platform::NPUDeviceContext>()
            .stream();

-    Tensor out(x->type());
-    out.mutable_data<T>(x->dims(), place);
-    const auto& runner_out = NpuOpRunner("Gelu", {*x}, {out}, {});
-    runner_out.Run(stream);
-
+    // NOTE(pangyoki): In the original implementation of GeluGrad op, the input
+    // is {*dout, *x, out}, where out = Gelu(x). However, we find that variable
+    // `out` was not actually used. In order to improve performance, the
+    // useless GELU operation was deleted.
+    // We directly use `*dout` as a placeholder to replace `out`, it will not
+    // be used in calculations.
    const auto& runner_dx =
-        NpuOpRunner("GeluGrad", {*dout, *x, out}, {*dx}, {});
+        NpuOpRunner("GeluGrad", {*dout, *x, *dout}, {*dx}, {});
    runner_dx.Run(stream);
  }
 };

--- a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
@@ -58,12 +58,9 @@ class TestGelu(OpTest):
    def test_check_output(self):
        self.check_output_with_place(self.place, check_dygraph=False, atol=1e-3)

-    # TODO(ascendrc): Add grad test
-    # def test_check_grad(self):
-    #     if self.dtype == np.float16:
-    #         return
-    #     self.check_grad(['X'], 'Out')
-    #
+    def test_check_grad(self):
+        self.check_grad_with_place(
+            self.place, ['X'], 'Out', check_dygraph=False)


 @unittest.skipIf(not paddle.is_compiled_with_npu(),
@@ -115,10 +112,10 @@ class TestGeluNet(unittest.TestCase):
                name="label", shape=[32, 1], dtype='int64')

            c = paddle.multiply(a, b)
-            d = fluid.layers.gelu(c)

-            fc_1 = fluid.layers.fc(input=d, size=128)
-            prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
+            fc_1 = fluid.layers.fc(input=c, size=128)
+            fc_1_gelu = fluid.layers.gelu(fc_1)
+            prediction = fluid.layers.fc(input=fc_1_gelu, size=2, act='softmax')

            cost = fluid.layers.cross_entropy(input=prediction, label=label)
            loss = fluid.layers.reduce_mean(cost)