diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 900fd4d8d292e3c4a8884957dceeaa020ee0003e..aa5a38e4dbf08dffeab1655c7ba8534b504579ed 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -24,6 +24,9 @@ #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #endif #include "paddle/fluid/framework/convert_utils.h" +#ifdef PADDLE_WITH_MLU +#include "paddle/fluid/operators/mlu/mlu_baseop.h" +#endif namespace paddle { namespace operators { @@ -69,6 +72,13 @@ struct FillConstantVisitor { phi::funcs::SetConstant set_constant; set_constant(dev_ctx_, tensor_, static_cast(value_)); } +#elif defined(PADDLE_WITH_MLU) + if (platform::is_mlu_place(context_.GetPlace())) { + FillMLUTensorWithHostValue(context_, static_cast(value_), tensor_); + } else { + phi::funcs::SetConstant set_constant; + set_constant(dev_ctx_, tensor_, static_cast(value_)); + } #else phi::funcs::SetConstant set_constant; set_constant(dev_ctx_, tensor_, static_cast(value_)); @@ -509,6 +519,15 @@ REGISTER_OP_NPU_KERNEL( ops::CoalesceTensorOpKernel); #endif +#if defined(PADDLE_WITH_MLU) +REGISTER_OP_MLU_KERNEL( + coalesce_tensor, + ops::CoalesceTensorOpKernel, + ops::CoalesceTensorOpKernel, + ops::CoalesceTensorOpKernel); +#endif + REGISTER_OP_VERSION(coalesce_tensor) .AddCheckpoint( R"ROC( diff --git a/paddle/fluid/platform/device_memory_aligment.cc b/paddle/fluid/platform/device_memory_aligment.cc index 8261c866d073d69206fbdac09ec49567daef3e50..e8a6051c19f2d51dff92f3fc1d263d08efc58aa5 100644 --- a/paddle/fluid/platform/device_memory_aligment.cc +++ b/paddle/fluid/platform/device_memory_aligment.cc @@ -31,9 +31,11 @@ size_t Alignment(size_t size, const platform::Place &place, int align_size) { alignment = alignment; #elif defined(PADDLE_WITH_ASCEND_CL) alignment = NPUMinChunkSize(); +#elif defined(PADDLE_WITH_MLU) + alignment = MLUMinChunkSize(); #else PADDLE_THROW(platform::errors::PreconditionNotMet( - "Fluid is not compiled with CUDA/XPU/NPU.")); + "Fluid is not compiled with CUDA/XPU/NPU/MLU.")); #endif } } diff --git a/paddle/fluid/platform/device_memory_aligment.h b/paddle/fluid/platform/device_memory_aligment.h index a3f88592b7649fcf4acaf01b498901abb8f4a1d7..ee37b93807eaad6b51efbab70a1474358c28a9cc 100644 --- a/paddle/fluid/platform/device_memory_aligment.h +++ b/paddle/fluid/platform/device_memory_aligment.h @@ -21,6 +21,9 @@ limitations under the License. */ #include "paddle/fluid/platform/device/npu/npu_info.h" #endif #include "paddle/fluid/platform/device/gpu/gpu_info.h" +#ifdef PADDLE_WITH_MLU +#include "paddle/fluid/platform/device/mlu/mlu_info.h" +#endif namespace paddle { namespace platform { diff --git a/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py new file mode 100644 index 0000000000000000000000000000000000000000..854ac0b6826cd7c84e9f15dcf0b0dfb9a0385484 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py @@ -0,0 +1,109 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append('..') +from op_test import OpTest +from paddle.fluid import core +import paddle + +alignment = 256 +paddle.enable_static() + + +class TestAllocContinuousSpace(OpTest): + def setUp(self): + self.op_type = "coalesce_tensor" + self.dtype, self.fluid_dtype = self.init_dtype() + attrs = self.init_attr() + self.copy_data = attrs["copy_data"] + self.constant = attrs["constant"] + self.set_constant = attrs["set_constant"] + self.Inputs = self.init_input() + self.Outputs, self.FusedOutput = self.init_output( + self.Inputs, self.set_constant, self.constant) + self.inputs = {'Input': self.Inputs} + self.attrs = attrs + self.outputs = {'Output': self.Outputs, 'FusedOutput': self.FusedOutput} + + def init_dtype(self): + return np.float32, int(core.VarDesc.VarType.FP32) + + def init_input(self): + inputs = [] + inputs.append(("x1", np.random.random([20, 3]).astype(self.dtype))) + inputs.append(("x2", np.random.random([20]).astype(self.dtype))) + inputs.append(("x3", np.random.random([1]).astype(self.dtype))) + inputs.append(("x4", np.random.random([200, 30]).astype(self.dtype))) + inputs.append(("x5", np.random.random([30]).astype(self.dtype))) + inputs.append(("x6", np.random.random([1]).astype(self.dtype))) + return inputs + + def init_attr(self): + return { + "copy_data": True, + "set_constant": False, + "constant": 0.0, + "dtype": self.fluid_dtype + } + + def init_output(self, input_list, set_constant, constant): + inputs = [] + outputs = input_list + + for input in input_list: + length = len(input[1].flatten()) + aligned_len = (length + alignment) / alignment * alignment + out = np.zeros(int(aligned_len)) + out[0:length] = input[1].flatten() + inputs.append(out) + + coalesce_tensor_var = np.concatenate([input for input in inputs]) + if set_constant: + coalesce_tensor_var = np.ones((len(coalesce_tensor_var))) * constant + outputs = [(out[0], + np.ones(out[1].shape).astype(self.dtype) * constant) + for out in outputs] + return outputs, coalesce_tensor_var + + def test_check_output(self): + self.check_output_with_place( + place=paddle.device.MLUPlace(0), + no_check_set=["FusedOutput"], + atol=1e-5) + + +class TestAllocContinuousSpace2(TestAllocContinuousSpace): + def init_attr(self): + return { + "copy_data": False, + "set_constant": True, + "constant": 5, + "dtype": self.fluid_dtype, + "user_defined_size_of_dtype": 2 + } + + def test_check_output(self): + self.check_output_with_place( + place=paddle.device.MLUPlace(0), + no_check_set=["FusedOutput"], + atol=1e-5) + + +if __name__ == '__main__': + unittest.main()