refine gather OP performance for dynamic mode (#28587)

a972c33f · wangchaochaohu · GitHub · ece1e4cd · a972c33f · a972c33f
隐藏空白更改
内联并排

Showing with 13 addition and 1 deletion

paddle/fluid/operators/gather_op.cc paddle/fluid/operators/gather_op.cc +9 -0

python/paddle/tensor/manipulation.py python/paddle/tensor/manipulation.py +4 -1

未找到文件。
--- a/paddle/fluid/operators/gather_op.cc
+++ b/paddle/fluid/operators/gather_op.cc
@@ -93,6 +93,15 @@ class GatherGradOp : public framework::OperatorWithKernel {
                                       ctx, framework::GradVarName("Out")),
                                   ctx.device_context());
  }
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string& var_name, const framework::Tensor& tensor,
+      const framework::OpKernelType& expected_kernel_type) const override {
+    if (var_name == "Axis") {
+      return expected_kernel_type;
+    }
+    return framework::OpKernelType(expected_kernel_type.data_type_,
+                                   tensor.place(), tensor.layout());
+  }
 };
 class GatherOpMaker : public framework::OpProtoAndCheckerMaker {

--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -785,9 +785,12 @@ def gather(x, index, axis=None, name=None):
    if axis is None:
        axis = 0
    axis_tensor = axis
+    if not isinstance(axis, Variable) and axis == 0:
+        return paddle.fluid.layers.gather(input=x, index=index, overwrite=True)
    if not isinstance(axis, Variable):
        with device_guard("cpu"):
-            axis_tensor = fill_constant(shape=[1], dtype='int64', value=axis)
+            axis_tensor = fill_constant(
+                shape=[1], dtype='int64', value=axis, force_cpu=True)
    if in_dygraph_mode():
        return core.ops.gather(x, index, axis_tensor)