diff --git a/paddle/fluid/operators/split_op_npu.cc b/paddle/fluid/operators/split_op_npu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3cca4f89c10d0e90ad8f838408614700a3f66cf9
--- /dev/null
+++ b/paddle/fluid/operators/split_op_npu.cc
@@ -0,0 +1,83 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <memory>
+#include <string>
+
+#include "paddle/fluid/operators/npu_op_runner.h"
+#include "paddle/fluid/operators/split_op.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename T>
+class SplitNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* in = ctx.Input<framework::Tensor>("X");
+    auto outs = ctx.MultiOutput<framework::Tensor>("Out");
+    int num = ctx.Attr<int>("num");
+    std::vector<int> sections = ctx.Attr<std::vector<int>>("sections");
+    int axis = ctx.Attr<int>("axis");
+
+    if (ctx.HasInput("AxisTensor")) {
+      // TODO(liupeng51):
+      PADDLE_THROW(platform::errors::Unimplemented(
+          "The AxisTensor is not supported on NPU now."));
+    }
+    if (ctx.HasInput("SectionsTensorList")) {
+      // TODO(liupeng51):
+      PADDLE_THROW(platform::errors::Unimplemented(
+          "The SectionsTensorList is not supported on NPU now."));
+    }
+
+    std::vector<Tensor> outputs;
+    auto place = ctx.GetPlace();
+    for (size_t j = 0; j < outs.size(); ++j) {
+      outs[j]->mutable_data<T>(ctx.GetPlace());
+      outputs.push_back(*outs[j]);
+    }
+    auto stream =
+        ctx.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+    NpuOpRunner runner;
+    if (sections.size() == 0) {
+      framework::NPUAttributeMap attr_input = {{"num_split", num},
+                                               {"split_dim", axis}};
+      runner.SetType("SplitD").AddInputs({*in}).AddOutputs(outputs).AddAttrs(
+          attr_input);
+    } else {
+      framework::NPUAttributeMap attr_input = {
+          {"size_splits", sections},
+          {"split_dim", axis},
+          {"num_split", static_cast<int32_t>(sections.size())}};
+      runner.SetType("SplitVD").AddInput(*in).AddOutputs(outputs).AddAttrs(
+          attr_input);
+    }
+
+    runner.Run(stream);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_NPU_KERNEL(split, ops::SplitNPUKernel<float>,
+                       ops::SplitNPUKernel<int>,
+                       ops::SplitNPUKernel<plat::float16>);
diff --git a/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd48ec958e4a4c16ca477712d4370bd5cd12a734
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py
@@ -0,0 +1,158 @@
+#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import unittest
+import sys
+sys.path.append("..")
+from op_test import OpTest
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+
+paddle.enable_static()
+SEED = 2021
+
+
+@unittest.skipIf(not paddle.is_compiled_with_npu(),
+                 "core is not compiled with NPU")
+class TestCase1(OpTest):
+    def setUp(self):
+        self.set_npu()
+        self.set_example()
+        self.op_type = "split"
+        self.place = paddle.NPUPlace(0)
+        ipt = self.x.astype(self.dtype)
+        axis = self.axis if isinstance(self.axis, int) else int(self.axis[0])
+        tmp_outs = np.split(
+            ipt, axis=axis, indices_or_sections=self.num_or_sections)
+        tmp_outs = [o.astype(self.dtype) for o in tmp_outs]
+        self.outputs = {'Out': []}
+        self.outs = []
+        for i, o in enumerate(tmp_outs):
+            self.outputs["Out"].append((str(i), o))
+            self.outs.append(str(i))
+
+        self.attrs = {"axis": self.axis, "num": self.num_or_sections}
+        self.inputs = {}
+        self.inputs.update({'X': ipt.astype(self.dtype)})
+
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.__class__.op_type = "split"
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+
+    def test_check_grad(self):
+        self.check_grad_with_place(self.place, ["X"], self.outs)
+
+    def set_example(self):
+        self.dtype = "float32"
+        self.x = np.random.random((2, 4, 6))
+        self.axis = 1
+        self.num_or_sections = 2
+
+
+class TestCase2(TestCase1):
+    def set_example(self):
+        self.dtype = "float32"
+        self.x = np.random.random((20, 4, 50))
+        self.axis = 0
+        self.num_or_sections = 4
+
+
+class TestCase4(TestCase1):
+    def set_example(self):
+        self.dtype = "float16"
+        self.x = np.random.random((4, 50, 20))
+        self.axis = 2
+        self.num_or_sections = 4
+
+
+# Test Sections
+class TestCase5(TestCase1):
+    def set_example(self):
+        super().set_example()
+        self.x = np.random.random((2, 10, 4))
+        self.axis = 1
+        self.num_or_sections = [2, 4, 8]
+
+    def setUp(self):
+        super().setUp()
+        self.attrs.update({"sections": [2, 2, 4, 2], "num": 0})
+
+
+class API_TestSplit(unittest.TestCase):
+    def test_out(self):
+        with fluid.program_guard(fluid.Program(), fluid.Program()):
+            data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
+            x0, x1 = paddle.split(data, num_or_sections=(3, 7), axis=1)
+            place = fluid.NPUPlace(0)
+            exe = fluid.Executor(place)
+            input1 = np.random.random([1, 10]).astype('float32')
+            r0, r1 = exe.run(feed={"data": input1}, fetch_list=[x0, x1])
+            ex_x0, ex_x1 = np.split(input1, (3, ), axis=1)
+            self.assertTrue(np.allclose(ex_x0, r0))
+            self.assertTrue(np.allclose(ex_x1, r1))
+
+
+class API_TestSplit2(unittest.TestCase):
+    def test_out(self):
+        with fluid.program_guard(fluid.Program(), fluid.Program()):
+            data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
+            x0, x1 = paddle.split(data, num_or_sections=2, axis=1)
+            place = fluid.NPUPlace(0)
+            exe = fluid.Executor(place)
+            input1 = np.random.random([1, 10]).astype('float32')
+            r0, r1 = exe.run(feed={"data": input1}, fetch_list=[x0, x1])
+            ex_x0, ex_x1 = np.split(input1, 2, axis=1)
+            self.assertTrue(np.allclose(ex_x0, r0))
+            self.assertTrue(np.allclose(ex_x1, r1))
+
+
+class API_TestDygraphSplit(unittest.TestCase):
+    def test_out1(self):
+        with fluid.dygraph.guard(paddle.NPUPlace(0)):
+            input_1 = np.random.random([4, 6, 6]).astype("int32")
+            # input is a variable which shape is [4, 6, 6]
+            input = fluid.dygraph.to_variable(input_1)
+            x0, x1, x2 = paddle.split(input, num_or_sections=3, axis=1)
+            x0_out = x0.numpy()
+            x1_out = x1.numpy()
+            x2_out = x2.numpy()
+            ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
+        self.assertTrue(np.allclose(ex_x0, x0_out))
+        self.assertTrue(np.allclose(ex_x1, x1_out))
+        self.assertTrue(np.allclose(ex_x2, x2_out))
+
+    def test_out2(self):
+        with fluid.dygraph.guard(paddle.NPUPlace(0)):
+            input_1 = np.random.random([4, 6, 6]).astype("int32")
+            # input is a variable which shape is [4, 6, 6]
+            input = fluid.dygraph.to_variable(input_1)
+            x0, x1, x2 = paddle.split(input, num_or_sections=[1, 2, 3], axis=1)
+            x0_out = x0.numpy()
+            x1_out = x1.numpy()
+            x2_out = x2.numpy()
+            ex_x0, ex_x1, ex_x2 = np.split(input_1, (1, 3), axis=1)
+        self.assertTrue(np.allclose(ex_x0, x0_out))
+        self.assertTrue(np.allclose(ex_x1, x1_out))
+        self.assertTrue(np.allclose(ex_x2, x2_out))
+
+
+if __name__ == '__main__':
+    unittest.main()