From c5fcc96d5b32509a68fb8de660a5548a54c2151f Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Wed, 14 Oct 2020 16:51:59 +0800 Subject: [PATCH] xpu support for fill_constant Op (#27675) --- paddle/fluid/operators/fill_constant_op.h | 12 +- .../fluid/operators/fill_constant_op_xpu.cc | 23 ++ paddle/fluid/operators/math/math_function.cc | 10 + paddle/fluid/operators/math/math_function.h | 28 ++ .../fluid/operators/math/math_function_impl.h | 15 +- paddle/fluid/operators/utils.h | 8 +- .../xpu/test_fill_constant_op_xpu.py | 241 ++++++++++++++++++ 7 files changed, 330 insertions(+), 7 deletions(-) create mode 100644 paddle/fluid/operators/fill_constant_op_xpu.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index 6fea8fe98bf..41fcf375087 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -66,7 +66,9 @@ class FillConstantKernel : public framework::OpKernel { value_tensor->numel())); const T *tensor_data = value_tensor->data(); framework::Tensor cpu_tensor; - if (platform::is_gpu_place(value_tensor->place())) { + auto tmp_place = value_tensor->place(); + if (platform::is_gpu_place(tmp_place) || + platform::is_xpu_place(tmp_place)) { TensorCopySync(*value_tensor, platform::CPUPlace(), &cpu_tensor); tensor_data = cpu_tensor.data(); } @@ -102,6 +104,14 @@ class FillConstantKernel : public framework::OpKernel { functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); } +#endif +#ifdef PADDLE_WITH_XPU + if (!cpu_place) { + tensor->mutable_data(ctx.GetPlace(), data_type); + math::SetConstant functor; + functor(reinterpret_cast(dev_ctx), + tensor, static_cast(value)); + } #endif } }; diff --git a/paddle/fluid/operators/fill_constant_op_xpu.cc b/paddle/fluid/operators/fill_constant_op_xpu.cc new file mode 100644 index 00000000000..2bf836272a4 --- /dev/null +++ b/paddle/fluid/operators/fill_constant_op_xpu.cc @@ -0,0 +1,23 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/fill_constant_op.h" + +namespace ops = paddle::operators; +#ifdef PADDLE_WITH_XPU +REGISTER_OP_XPU_KERNEL(fill_constant, ops::FillConstantKernel, + ops::FillConstantKernel, + ops::FillConstantKernel, + ops::FillConstantKernel, + ops::FillConstantKernel); +#endif diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index b8af5a21ca5..8c7437e4b5e 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include #endif +#include #include #include #include "paddle/fluid/framework/data_type.h" @@ -44,6 +45,15 @@ template struct SetConstant; template struct SetConstant; template struct SetConstant; +#ifdef PADDLE_WITH_XPU +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +#endif + #define DEFINE_CPU_TRANS(RANK) \ template struct Transpose; \ diff --git a/paddle/fluid/operators/math/math_function.h b/paddle/fluid/operators/math/math_function.h index 6af0278d825..1ad1c29ddd8 100644 --- a/paddle/fluid/operators/math/math_function.h +++ b/paddle/fluid/operators/math/math_function.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include #include #include "paddle/fluid/framework/eigen.h" @@ -84,6 +85,33 @@ struct RowwiseMean { framework::Tensor* vec); }; +#ifdef PADDLE_WITH_XPU +template +struct TensorSetConstantXPU { + TensorSetConstantXPU(framework::Tensor* tensor, U value) + : tensor_(tensor), value_(value) {} + template + void apply() const { + int dev_id = -1; + xpu_current_device(&dev_id); + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real + // dev_id + dev_id -= 64; + } + auto xpu = platform::XPUPlace(dev_id); + auto* begin = tensor_->mutable_data(xpu); + int numel = tensor_->numel(); + std::unique_ptr data_cpu(new T[numel]); + std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast(value_)); + memory::Copy(xpu, begin, platform::CPUPlace(), + static_cast(data_cpu.get()), numel * sizeof(T)); + } + framework::Tensor* tensor_; + U value_; +}; +#endif + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/math_function_impl.h b/paddle/fluid/operators/math/math_function_impl.h index 869a3054598..d2480763dcf 100644 --- a/paddle/fluid/operators/math/math_function_impl.h +++ b/paddle/fluid/operators/math/math_function_impl.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/operators/math/math_function.h" @@ -27,8 +28,18 @@ template void SetConstant::operator()(const DeviceContext& context, framework::Tensor* tensor, T num) { - auto t = framework::EigenVector::Flatten(*tensor); - t.device(*context.eigen_device()) = t.constant(static_cast(num)); + bool xpu_place = false; +#ifdef PADDLE_WITH_XPU + if (context.GetPlace() == platform::XPUPlace()) { + xpu_place = true; + framework::VisitDataType(tensor->type(), + TensorSetConstantXPU(tensor, num)); + } +#endif + if (!xpu_place) { + auto t = framework::EigenVector::Flatten(*tensor); + t.device(*context.eigen_device()) = t.constant(static_cast(num)); + } } template diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index 05d077b173a..985c3512761 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -26,7 +26,7 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { if (x->type() == framework::proto::VarType::INT32) { auto* data = x->data(); framework::Tensor cpu_attr_tensor; - if (platform::is_gpu_place(x->place())) { + if (!platform::is_cpu_place(x->place())) { TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor); data = cpu_attr_tensor.data(); } @@ -34,7 +34,7 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { } else if (x->type() == framework::proto::VarType::INT64) { auto* data = x->data(); framework::Tensor cpu_attr_tensor; - if (platform::is_gpu_place(x->place())) { + if (!platform::is_cpu_place(x->place())) { TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor); data = cpu_attr_tensor.data(); } @@ -62,7 +62,7 @@ inline std::vector GetDataFromTensorList( tensor->dims())); if (tensor->type() == framework::proto::VarType::INT32) { - if (platform::is_gpu_place(tensor->place())) { + if (!platform::is_cpu_place(tensor->place())) { framework::Tensor temp; TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_new_data.push_back(static_cast(*temp.data())); @@ -70,7 +70,7 @@ inline std::vector GetDataFromTensorList( vec_new_data.push_back(static_cast(*tensor->data())); } } else if (tensor->type() == framework::proto::VarType::INT64) { - if (platform::is_gpu_place(tensor->place())) { + if (!platform::is_cpu_place(tensor->place())) { framework::Tensor temp; TensorCopySync(*tensor, platform::CPUPlace(), &temp); // NOTE: Converting int64 to int32 may cause data overflow. diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py new file mode 100644 index 00000000000..b31c80ee9e7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py @@ -0,0 +1,241 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys +sys.path.append("..") +import unittest +from op_test import OpTest + +import paddle +import numpy as np + + +# Situation 1: Attr(shape) is a list(without tensor) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 5, 'value': 3.8} + self.outputs = {'Out': np.full((123, 92), 3.8)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp2(OpTest): + def setUp(self): + '''Test fill_constant op with default value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 5} + self.outputs = {'Out': np.full((123, 92), 0.0)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp3(OpTest): + def setUp(self): + '''Test fill_constant op with specified int64 value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 3, 'value': 10000000000} + self.outputs = {'Out': np.full((123, 92), 10000000000)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp4(OpTest): + def setUp(self): + '''Test fill_constant op with specified int value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 2, 'value': 3} + self.outputs = {'Out': np.full((123, 92), 3)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +# Situation 2: Attr(shape) is a list(with tensor) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1_ShapeTensorList(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + shape_tensor_list = [] + for index, ele in enumerate(self.shape): + shape_tensor_list.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = {"ShapeTensorList": shape_tensor_list} + self.attrs = { + 'shape': self.infer_shape, + 'dtype': 5, + 'value': self.value + } + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [-1, 92] + self.value = 3.8 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp2_ShapeTensorList(OpTest): + def setUp(self): + '''Test fill_constant op with default value''' + self.op_type = "fill_constant" + self.init_data() + shape_tensor_list = [] + for index, ele in enumerate(self.shape): + shape_tensor_list.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = {"ShapeTensorList": shape_tensor_list} + self.attrs = {'shape': self.infer_shape, 'dtype': 5} + self.outputs = {'Out': np.full(self.shape, 0.0)} + + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [-1, -1] + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp3_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [123, -1] + self.value = 10000000000 + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp4_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [123, -1] + self.value = 3 + + +# Situation 3: shape is a tensor +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1_ShapeTensor(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = {"ShapeTensor": np.array(self.shape).astype("int32")} + self.attrs = {'value': self.value, 'dtype': 5} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3.8 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +# Situation 4: value is a tensor +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1_ValueTensor(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = { + "ShapeTensor": np.array(self.shape).astype("int32"), + 'ValueTensor': np.array([self.value]).astype("float32") + } + self.attrs = {'value': self.value + 1.0, 'dtype': 5} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3.8 + self.dtype = np.float32 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +# Situation 5: value is a tensor +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp2_ValueTensor(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = { + "ShapeTensor": np.array(self.shape).astype("int32"), + 'ValueTensor': np.array([self.value]).astype("int32") + } + self.attrs = {'value': self.value, 'dtype': 2} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3 + self.dtype = np.int32 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() -- GitLab