[NPU] add int64 kernel for slice, test=develop (#36328)

* [NPU] add int64 kernel for scale and slice, test=develop * remove int64 for scale, test=develop

[NPU] add int64 kernel for slice, test=develop (#36328)
* [NPU] add int64 kernel for scale and slice, test=develop * remove int64 for scale, test=develop
8cc7146d · Qi Li · GitHub · e275e423 · 8cc7146d · 8cc7146d
3 changed file
--- a/paddle/fluid/operators/scale_op_npu.cc
+++ b/paddle/fluid/operators/scale_op_npu.cc
@@ -12,11 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include <memory>
-#include <string>
-
-#include "paddle/fluid/operators/npu_op_runner.h"
 #include "paddle/fluid/operators/scale_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/slice_op_npu.cc
+++ b/paddle/fluid/operators/slice_op_npu.cc
@@ -12,18 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the Licnse. */

-#include <memory>
-#include <string>
-
-#include "paddle/fluid/framework/ddim.h"
-#include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/npu_op_runner.h"
 #include "paddle/fluid/operators/slice_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"

 namespace paddle {
 namespace operators {

 using Tensor = framework::Tensor;
+using NPUDeviceContext = platform::NPUDeviceContext;

 void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
                const std::vector<int> starts, const std::vector<int> ends,
@@ -54,7 +50,7 @@ void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
  }
 }

-template <typename DeviceContext, typename T>
+template <typename T>
 class SliceNPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -128,17 +124,14 @@ class SliceNPUKernel : public framework::OpKernel<T> {

    UpdateAttr(in_dims, axes, starts, ends, &offsets, &size);

+    auto stream = ctx.template device_context<NPUDeviceContext>().stream();
    const auto& runner = NpuOpRunner("SliceD", {*input}, {*out},
                                     {{"offsets", offsets}, {"size", size}});
-
-    auto stream =
-        ctx.template device_context<paddle::platform::NPUDeviceContext>()
-            .stream();
    runner.Run(stream);
  }
 };

-template <typename DeviceContext, typename T>
+template <typename T>
 class SliceGradNPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -221,15 +214,13 @@ class SliceGradNPUKernel : public framework::OpKernel<T> {

 namespace ops = paddle::operators;

-REGISTER_OP_NPU_KERNEL(
-    slice, ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, float>,
-    ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, int>,
-    ops::SliceNPUKernel<paddle::platform::NPUDeviceContext,
-                        paddle::platform::float16>);
-
-REGISTER_OP_NPU_KERNEL(
-    slice_grad,
-    ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, float>,
-    ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, int>,
-    ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext,
-                            paddle::platform::float16>);
+REGISTER_OP_NPU_KERNEL(slice, ops::SliceNPUKernel<float>,
+                       ops::SliceNPUKernel<int>,
+#ifdef PADDLE_WITH_ASCEND_INT64
+                       ops::SliceNPUKernel<int64_t>,
+#endif
+                       ops::SliceNPUKernel<paddle::platform::float16>);
+
+REGISTER_OP_NPU_KERNEL(slice_grad, ops::SliceGradNPUKernel<float>,
+                       ops::SliceGradNPUKernel<int>,
+                       ops::SliceGradNPUKernel<paddle::platform::float16>);
--- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
@@ -527,5 +527,69 @@ class TestSliceOpDecsDimStartsListTensorFP16(
        self.dtype = np.float16


+class TestSliceOpInt64(OpTest):
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.place = paddle.NPUPlace(0)
+
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+        self.inputs = {'Input': self.input}
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': self.starts,
+            'ends': self.ends,
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.randint(
+            100, size=(3, 4, 5, 6)).astype(self.dtype)
+        self.starts = [1, 0, 2]
+        self.ends = [3, 3, 4]
+        self.axes = [0, 1, 2]
+        self.infer_flags = [1, 1, 1]
+        self.out = self.input[1:3, 0:3, 2:4, :]
+
+    def init_dtype(self):
+        self.dtype = np.int64
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+
+
+class TestSliceOpTensorInt64(TestSliceOpInt64):
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+        self.inputs = {
+            'Input': self.input,
+            'StartsTensor': self.starts,
+            'EndsTensor': self.ends
+        }
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': [-1, -1, -1],
+            'ends': [-1, -1, -1],
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.randint(
+            100, size=(3, 4, 5, 6)).astype(self.dtype)
+        self.starts = np.array([1, 0, 2]).astype('int32')
+        self.ends = np.array([3, 3, 4]).astype('int32')
+        self.axes = [0, 1, 2]
+        self.infer_flags = [-1, -1, -1]
+        self.out = self.input[1:3, 0:3, 2:4, :]
+
+
 if __name__ == '__main__':
    unittest.main()