[MLU] adapt coalesce_tensor op for mlu (#42873)

cbb24136 · fwenguang · GitHub · 71b046cd · cbb24136 · cbb24136
4 changed file
--- a/paddle/fluid/operators/coalesce_tensor_op.cc
+++ b/paddle/fluid/operators/coalesce_tensor_op.cc
@@ -24,6 +24,9 @@
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 #endif
 #include "paddle/fluid/framework/convert_utils.h"
+#ifdef PADDLE_WITH_MLU
+#include "paddle/fluid/operators/mlu/mlu_baseop.h"
+#endif

 namespace paddle {
 namespace operators {
@@ -69,6 +72,13 @@ struct FillConstantVisitor {
      phi::funcs::SetConstant<DeviceContext, T> set_constant;
      set_constant(dev_ctx_, tensor_, static_cast<T>(value_));
    }
+#elif defined(PADDLE_WITH_MLU)
+    if (platform::is_mlu_place(context_.GetPlace())) {
+      FillMLUTensorWithHostValue<T>(context_, static_cast<T>(value_), tensor_);
+    } else {
+      phi::funcs::SetConstant<DeviceContext, T> set_constant;
+      set_constant(dev_ctx_, tensor_, static_cast<T>(value_));
+    }
 #else
    phi::funcs::SetConstant<DeviceContext, T> set_constant;
    set_constant(dev_ctx_, tensor_, static_cast<T>(value_));
@@ -509,6 +519,15 @@ REGISTER_OP_NPU_KERNEL(
    ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, double>);
 #endif

+#if defined(PADDLE_WITH_MLU)
+REGISTER_OP_MLU_KERNEL(
+    coalesce_tensor,
+    ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext,
+                                plat::float16>,
+    ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>);
+#endif
+
 REGISTER_OP_VERSION(coalesce_tensor)
    .AddCheckpoint(
        R"ROC(

--- a/paddle/fluid/platform/device_memory_aligment.cc
+++ b/paddle/fluid/platform/device_memory_aligment.cc
@@ -31,9 +31,11 @@ size_t Alignment(size_t size, const platform::Place &place, int align_size) {
      alignment = alignment;
 #elif defined(PADDLE_WITH_ASCEND_CL)
      alignment = NPUMinChunkSize();
+#elif defined(PADDLE_WITH_MLU)
+      alignment = MLUMinChunkSize();
 #else
      PADDLE_THROW(platform::errors::PreconditionNotMet(
-          "Fluid is not compiled with CUDA/XPU/NPU."));
+          "Fluid is not compiled with CUDA/XPU/NPU/MLU."));
 #endif
    }
  }

--- a/paddle/fluid/platform/device_memory_aligment.h
+++ b/paddle/fluid/platform/device_memory_aligment.h
@@ -21,6 +21,9 @@ limitations under the License. */
 #include "paddle/fluid/platform/device/npu/npu_info.h"
 #endif
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
+#ifdef PADDLE_WITH_MLU
+#include "paddle/fluid/platform/device/mlu/mlu_info.h"
+#endif

 namespace paddle {
 namespace platform {

--- a/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import sys
+sys.path.append('..')
+from op_test import OpTest
+from paddle.fluid import core
+import paddle
+
+alignment = 256
+paddle.enable_static()
+
+
+class TestAllocContinuousSpace(OpTest):
+    def setUp(self):
+        self.op_type = "coalesce_tensor"
+        self.dtype, self.fluid_dtype = self.init_dtype()
+        attrs = self.init_attr()
+        self.copy_data = attrs["copy_data"]
+        self.constant = attrs["constant"]
+        self.set_constant = attrs["set_constant"]
+        self.Inputs = self.init_input()
+        self.Outputs, self.FusedOutput = self.init_output(
+            self.Inputs, self.set_constant, self.constant)
+        self.inputs = {'Input': self.Inputs}
+        self.attrs = attrs
+        self.outputs = {'Output': self.Outputs, 'FusedOutput': self.FusedOutput}
+
+    def init_dtype(self):
+        return np.float32, int(core.VarDesc.VarType.FP32)
+
+    def init_input(self):
+        inputs = []
+        inputs.append(("x1", np.random.random([20, 3]).astype(self.dtype)))
+        inputs.append(("x2", np.random.random([20]).astype(self.dtype)))
+        inputs.append(("x3", np.random.random([1]).astype(self.dtype)))
+        inputs.append(("x4", np.random.random([200, 30]).astype(self.dtype)))
+        inputs.append(("x5", np.random.random([30]).astype(self.dtype)))
+        inputs.append(("x6", np.random.random([1]).astype(self.dtype)))
+        return inputs
+
+    def init_attr(self):
+        return {
+            "copy_data": True,
+            "set_constant": False,
+            "constant": 0.0,
+            "dtype": self.fluid_dtype
+        }
+
+    def init_output(self, input_list, set_constant, constant):
+        inputs = []
+        outputs = input_list
+
+        for input in input_list:
+            length = len(input[1].flatten())
+            aligned_len = (length + alignment) / alignment * alignment
+            out = np.zeros(int(aligned_len))
+            out[0:length] = input[1].flatten()
+            inputs.append(out)
+
+        coalesce_tensor_var = np.concatenate([input for input in inputs])
+        if set_constant:
+            coalesce_tensor_var = np.ones((len(coalesce_tensor_var))) * constant
+            outputs = [(out[0],
+                        np.ones(out[1].shape).astype(self.dtype) * constant)
+                       for out in outputs]
+        return outputs, coalesce_tensor_var
+
+    def test_check_output(self):
+        self.check_output_with_place(
+            place=paddle.device.MLUPlace(0),
+            no_check_set=["FusedOutput"],
+            atol=1e-5)
+
+
+class TestAllocContinuousSpace2(TestAllocContinuousSpace):
+    def init_attr(self):
+        return {
+            "copy_data": False,
+            "set_constant": True,
+            "constant": 5,
+            "dtype": self.fluid_dtype,
+            "user_defined_size_of_dtype": 2
+        }
+
+    def test_check_output(self):
+        self.check_output_with_place(
+            place=paddle.device.MLUPlace(0),
+            no_check_set=["FusedOutput"],
+            atol=1e-5)
+
+
+if __name__ == '__main__':
+    unittest.main()