[NPU] Support npu kernel for assign_value op (#34568)

* [NPU] Support npu kernel for assign_value op * move test_assign_value_op_npu.py into unittests/npu folder * correce copyright year; add TestAssignApi class using NPUplace in test files

[NPU] Support npu kernel for assign_value op (#34568)
* [NPU] Support npu kernel for assign_value op * move test_assign_value_op_npu.py into unittests/npu folder * correce copyright year; add TestAssignApi class using NPUplace in test files
f39c3a5a · Sing_chan · GitHub · 3ce14a35 · f39c3a5a · f39c3a5a
3 changed file
--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -138,6 +138,35 @@ void TensorFromArray(const T* src, const size_t& array_size,
        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
  }
 #endif
+#ifdef PADDLE_WITH_ASCEND_CL
+  else if (platform::is_npu_place(dst_place)) {  // NOLINT
+    //  1. vector -> npu pinned tensor
+    platform::NPUPinnedPlace npu_pinned_place;
+    Tensor npu_pinned_tensor;
+    npu_pinned_tensor.Resize(dst->dims());
+    auto npu_pinned_ptr =
+        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type());
+    memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
+    //  2. async copy npu pinned tensor -> npu tensor
+    memory::Copy(
+        BOOST_GET_CONST(platform::NPUPlace, dst_place), dst_ptr,
+        npu_pinned_place, npu_pinned_ptr, size,
+        reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
+    //  3. record event
+    auto npu_pinned_allocator =
+        static_cast<paddle::memory::allocation::NPUPinnedAllocator*>(
+            paddle::memory::allocation::AllocatorFacade::Instance()
+                .GetAllocator(npu_pinned_place)
+                .get());
+    paddle::memory::allocation::Allocation* allocation =
+        npu_pinned_tensor.Holder().get();
+    npu_pinned_allocator->RecordEvent(
+        allocation,
+        reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
+  }
+#endif
 }
 template <typename T>

--- a/paddle/fluid/operators/assign_value_op_npu.cc
+++ b/paddle/fluid/operators/assign_value_op_npu.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/operators/assign_value_op.h"
+namespace ops = paddle::operators;
+REGISTER_OP_NPU_KERNEL(assign_value, ops::AssignValueKernel<bool>,
+                       ops::AssignValueKernel<int>,
+                       ops::AssignValueKernel<int64_t>,
+                       ops::AssignValueKernel<float>);
--- a/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import unittest
+import numpy
+import sys
+sys.path.append("..")
+import op_test
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.framework as framework
+import paddle.fluid.layers as layers
+paddle.enable_static()
+numpy.random.seed(2021)
+class TestAssignValueNPUOp(op_test.OpTest):
+    def setUp(self):
+        self.set_npu()
+        self.place = paddle.NPUPlace(0)
+        self.op_type = "assign_value"
+        self.inputs = {}
+        self.attrs = {}
+        self.init_data()
+        self.attrs["shape"] = self.value.shape
+        self.attrs["dtype"] = framework.convert_np_dtype_to_dtype_(
+            self.value.dtype)
+        self.outputs = {"Out": self.value}
+    def set_npu(self):
+        self.__class__.use_npu = True
+    def init_data(self):
+        self.value = numpy.random.random(size=(2, 5)).astype(numpy.float32)
+        self.attrs["fp32_values"] = [float(v) for v in self.value.flat]
+    def test_forward(self):
+        self.check_output_with_place(self.place)
+class TestAssignValueNPUOp2(TestAssignValueNPUOp):
+    def init_data(self):
+        self.value = numpy.random.random(size=(2, 5)).astype(numpy.int32)
+        self.attrs["int32_values"] = [int(v) for v in self.value.flat]
+class TestAssignValueNPUOp3(TestAssignValueNPUOp):
+    def init_data(self):
+        self.value = numpy.random.random(size=(2, 5)).astype(numpy.int64)
+        self.attrs["int64_values"] = [int(v) for v in self.value.flat]
+class TestAssignValueNPUOp4(TestAssignValueNPUOp):
+    def init_data(self):
+        self.value = numpy.random.choice(
+            a=[False, True], size=(2, 5)).astype(numpy.bool)
+        self.attrs["bool_values"] = [bool(v) for v in self.value.flat]
+class TestAssignApi(unittest.TestCase):
+    def setUp(self):
+        self.init_dtype()
+        self.value = (
+            -100 + 200 * numpy.random.random(size=(2, 5))).astype(self.dtype)
+        self.place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu(
+        ) else fluid.CPUPlace()
+    def init_dtype(self):
+        self.dtype = "float32"
+    def test_assign(self):
+        main_program = fluid.Program()
+        with fluid.program_guard(main_program):
+            x = layers.create_tensor(dtype=self.dtype)
+            layers.assign(input=self.value, output=x)
+        exe = fluid.Executor(self.place)
+        [fetched_x] = exe.run(main_program, feed={}, fetch_list=[x])
+        self.assertTrue(
+            numpy.array_equal(fetched_x, self.value),
+            "fetch_x=%s val=%s" % (fetched_x, self.value))
+        self.assertEqual(fetched_x.dtype, self.value.dtype)
+class TestAssignApi2(TestAssignApi):
+    def init_dtype(self):
+        self.dtype = "int32"
+class TestAssignApi3(TestAssignApi):
+    def init_dtype(self):
+        self.dtype = "int64"
+class TestAssignApi4(TestAssignApi):
+    def setUp(self):
+        self.init_dtype()
+        self.value = numpy.random.choice(
+            a=[False, True], size=(2, 5)).astype(numpy.bool)
+        self.place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu(
+        ) else fluid.CPUPlace()
+    def init_dtype(self):
+        self.dtype = "bool"
+if __name__ == '__main__':
+    unittest.main()