optimize op structure (#55988)

6bd7f860 · freeliuzc · GitHub · 4728d58d · 6bd7f860 · 6bd7f860
5 changed file
--- a/paddle/phi/api/yaml/fused_ops.yaml
+++ b/paddle/phi/api/yaml/fused_ops.yaml
@@ -83,7 +83,7 @@
  optional : bias, x_max

 - op : fused_bias_act
-  args : (Tensor x, Tensor bias, Tensor dequant_scales, Tensor shift, Tensor smooth, str act_method = "gelu", str compute_dtype = "default", int rows = -1, int cols = -1, float quant_scale = -1, int quant_round_type = 1, float quant_max_bound = 127.0, float quant_min_bound = -127.0)
+  args : (Tensor x, Tensor bias, Tensor dequant_scales, Tensor shift, Tensor smooth, str act_method = "gelu", str compute_dtype = "default", float quant_scale = -1, int quant_round_type = 1, float quant_max_bound = 127.0, float quant_min_bound = -127.0)
  output : Tensor(out)
  infer_meta :
    func: FusedBiasActInferMeta

--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -1342,8 +1342,6 @@ void FusedBiasActInferMeta(const MetaTensor& x,
                           const MetaTensor& smooth,
                           const std::string& act_method,
                           const std::string& compute_dtype,
-                           int rows,
-                           int cols,
                           float quant_scale,
                           int quant_round_type,
                           float quant_max_bound,
@@ -1358,10 +1356,14 @@ void FusedBiasActInferMeta(const MetaTensor& x,
  auto dim = x_dims[1];

  PADDLE_ENFORCE_GT(
-      rows, 0, phi::errors::InvalidArgument("The size of Attr(rows) must > 0"));
+      x_dims[0],
+      0,
+      phi::errors::InvalidArgument("The size of Attr(rows) must > 0"));

  PADDLE_ENFORCE_GT(
-      cols, 0, phi::errors::InvalidArgument("The size of Attr(cols) must > 0"));
+      x_dims[1],
+      0,
+      phi::errors::InvalidArgument("The size of Attr(cols) must > 0"));

  if (act_method == "geglu" || act_method == "swiglu") {
    PADDLE_ENFORCE_EQ(

--- a/paddle/phi/infermeta/multiary.h
+++ b/paddle/phi/infermeta/multiary.h
@@ -286,8 +286,6 @@ void FusedBiasActInferMeta(const MetaTensor& x,
                           const MetaTensor& smooth,
                           const std::string& act_method,
                           const std::string& compute_dtype,
-                           int rows,
-                           int cols,
                           float quant_scale,
                           int quant_round_type,
                           float quant_max_bound,

--- a/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu
@@ -438,14 +438,14 @@ void FusedBiasActKernel(const Context &dev_ctx,
                        const paddle::optional<DenseTensor> &smooth,
                        const std::string &act_method,
                        const std::string &compute_dtype,
-                        int rows,
-                        int cols,
                        float quant_scale,
                        int quant_round_type,
                        float quant_max_bound,
                        float quant_min_bound,
                        DenseTensor *out) {
 #ifndef PADDLE_WITH_HIP
+  int rows = x.dims()[0];
+  int cols = x.dims()[1];
  if (x.dtype() == phi::DataType::INT32) {
    if (compute_dtype == "bf16") {
      DispatchWithDtype<phi::dtype::bfloat16, Context>(

--- a/test/legacy_test/test_fused_bias_act_op.py
+++ b/test/legacy_test/test_fused_bias_act_op.py
@@ -73,8 +73,6 @@ def fused_act_bias_wrapper(
    smooth=None,
    act_method='gelu',
    compute_dtype='default',
-    rows=0,
-    cols=0,
    quant_scale=-1,
    quant_round_type=0,
    quant_max_bound=0,
@@ -88,8 +86,6 @@ def fused_act_bias_wrapper(
        smooth,
        act_method,
        compute_dtype,
-        rows,
-        cols,
        quant_scale,
        quant_round_type,
        quant_max_bound,
@@ -140,8 +136,6 @@ class TestFusedBiasActOp(unittest.TestCase):
        return fused_act_bias_wrapper(
            x=x,
            bias=bias,
-            rows=self.rows,
-            cols=self.cols,
            act_method=self.act_method,
            compute_dtype=self.compute_dtype,
        )
@@ -197,8 +191,6 @@ class TestFastGeluFP16(TestFusedBiasActOp):
        out = fused_act_bias_wrapper(
            x=x,
            bias=bias,
-            rows=self.rows,
-            cols=self.cols,
            act_method=self.act_method,
        )
        self.use_fast_math(False)
@@ -284,8 +276,6 @@ class TestQuantFP32(TestFusedBiasActOp):
            smooth=smooth,
            act_method=self.act_method,
            compute_dtype=self.compute_dtype,
-            rows=self.rows,
-            cols=self.cols,
            quant_scale=self.quant_scale,
            quant_round_type=self.quant_round_type,
            quant_max_bound=self.quant_max_bound,
@@ -332,8 +322,6 @@ class TestDequantFP32(TestQuantFP32):
            dequant_scales=dequant_scales,
            act_method=self.act_method,
            compute_dtype=self.compute_dtype,
-            rows=self.rows,
-            cols=self.cols,
        )
        return out

@@ -441,8 +429,6 @@ class TestFusedBiasActOpBF16(unittest.TestCase):
            bias=bias,
            act_method=self.act_method,
            compute_dtype=self.compute_dtype,
-            rows=self.rows,
-            cols=self.cols,
        )
        return out

@@ -565,8 +551,6 @@ class TestQuantBF16(TestFusedBiasActOpBF16):
            smooth=smooth,
            act_method=self.act_method,
            compute_dtype=self.compute_dtype,
-            rows=self.rows,
-            cols=self.cols,
            quant_scale=self.quant_scale,
            quant_round_type=self.quant_round_type,
            quant_max_bound=self.quant_max_bound,
@@ -678,8 +662,6 @@ class TestAssert(unittest.TestCase):
            out = fused_act_bias_wrapper(
                x=paddle.to_tensor(x),
                bias=paddle.to_tensor(bias),
-                rows=self.rows,
-                cols=self.cols,
            )
        except ValueError as e:
            pass
@@ -696,8 +678,6 @@ class TestAssert(unittest.TestCase):
            out = fused_act_bias_wrapper(
                x=paddle.to_tensor(x),
                bias=paddle.to_tensor(bias),
-                rows=self.rows,
-                cols=self.cols,
                compute_dtype='fp16',
            )
        except ValueError as e:
@@ -715,8 +695,6 @@ class TestAssert(unittest.TestCase):
            out = fused_act_bias_wrapper(
                x=paddle.to_tensor(x),
                bias=paddle.to_tensor(bias),
-                rows=self.rows,
-                cols=self.cols,
                compute_dtype='fp16',
                act_method=act_method,
            )
@@ -765,8 +743,6 @@ class TestWithoutBias(unittest.TestCase):
        return fused_act_bias_wrapper(
            x=x,
            bias=None,
-            rows=self.rows,
-            cols=self.cols,
            act_method=self.act_method,
        )