diff --git a/paddle/fluid/operators/scale_op_npu.cc b/paddle/fluid/operators/scale_op_npu.cc
index 23817190208693a239c06232f538ca9c94327be8..744a9b137f622e263e4b369a1e195d65ccf8cacb 100644
--- a/paddle/fluid/operators/scale_op_npu.cc
+++ b/paddle/fluid/operators/scale_op_npu.cc
@@ -12,11 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <memory>
-#include <string>
-
-#include "paddle/fluid/operators/npu_op_runner.h"
 #include "paddle/fluid/operators/scale_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/slice_op_npu.cc b/paddle/fluid/operators/slice_op_npu.cc
index f8bf46da4a638309c1a8907678b4ce3e01f5f857..52351a98bce37d47e7c3417d64c1bcb926ebeb03 100644
--- a/paddle/fluid/operators/slice_op_npu.cc
+++ b/paddle/fluid/operators/slice_op_npu.cc
@@ -12,18 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the Licnse. */
 
-#include <memory>
-#include <string>
-
-#include "paddle/fluid/framework/ddim.h"
-#include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/npu_op_runner.h"
 #include "paddle/fluid/operators/slice_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
 
 namespace paddle {
 namespace operators {
 
 using Tensor = framework::Tensor;
+using NPUDeviceContext = platform::NPUDeviceContext;
 
 void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
                 const std::vector<int> starts, const std::vector<int> ends,
@@ -54,7 +50,7 @@ void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
   }
 }
 
-template <typename DeviceContext, typename T>
+template <typename T>
 class SliceNPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -128,17 +124,14 @@ class SliceNPUKernel : public framework::OpKernel<T> {
 
     UpdateAttr(in_dims, axes, starts, ends, &offsets, &size);
 
+    auto stream = ctx.template device_context<NPUDeviceContext>().stream();
     const auto& runner = NpuOpRunner("SliceD", {*input}, {*out},
                                      {{"offsets", offsets}, {"size", size}});
-
-    auto stream =
-        ctx.template device_context<paddle::platform::NPUDeviceContext>()
-            .stream();
     runner.Run(stream);
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T>
 class SliceGradNPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -221,15 +214,13 @@ class SliceGradNPUKernel : public framework::OpKernel<T> {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_NPU_KERNEL(
-    slice, ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, float>,
-    ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, int>,
-    ops::SliceNPUKernel<paddle::platform::NPUDeviceContext,
-                        paddle::platform::float16>);
-
-REGISTER_OP_NPU_KERNEL(
-    slice_grad,
-    ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, float>,
-    ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, int>,
-    ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext,
-                            paddle::platform::float16>);
+REGISTER_OP_NPU_KERNEL(slice, ops::SliceNPUKernel<float>,
+                       ops::SliceNPUKernel<int>,
+#ifdef PADDLE_WITH_ASCEND_INT64
+                       ops::SliceNPUKernel<int64_t>,
+#endif
+                       ops::SliceNPUKernel<paddle::platform::float16>);
+
+REGISTER_OP_NPU_KERNEL(slice_grad, ops::SliceGradNPUKernel<float>,
+                       ops::SliceGradNPUKernel<int>,
+                       ops::SliceGradNPUKernel<paddle::platform::float16>);
diff --git a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
index 055c3015f82f5aa046967a50ca7a7721f5d74433..611691109e187b98d67379a3952fea0e0afd88e9 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
@@ -527,5 +527,69 @@ class TestSliceOpDecsDimStartsListTensorFP16(
         self.dtype = np.float16
 
 
+class TestSliceOpInt64(OpTest):
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.place = paddle.NPUPlace(0)
+
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+        self.inputs = {'Input': self.input}
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': self.starts,
+            'ends': self.ends,
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.randint(
+            100, size=(3, 4, 5, 6)).astype(self.dtype)
+        self.starts = [1, 0, 2]
+        self.ends = [3, 3, 4]
+        self.axes = [0, 1, 2]
+        self.infer_flags = [1, 1, 1]
+        self.out = self.input[1:3, 0:3, 2:4, :]
+
+    def init_dtype(self):
+        self.dtype = np.int64
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+
+
+class TestSliceOpTensorInt64(TestSliceOpInt64):
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+        self.inputs = {
+            'Input': self.input,
+            'StartsTensor': self.starts,
+            'EndsTensor': self.ends
+        }
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': [-1, -1, -1],
+            'ends': [-1, -1, -1],
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.randint(
+            100, size=(3, 4, 5, 6)).astype(self.dtype)
+        self.starts = np.array([1, 0, 2]).astype('int32')
+        self.ends = np.array([3, 3, 4]).astype('int32')
+        self.axes = [0, 1, 2]
+        self.infer_flags = [-1, -1, -1]
+        self.out = self.input[1:3, 0:3, 2:4, :]
+
+
 if __name__ == '__main__':
     unittest.main()