diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index 6fea8fe98bf0e19bbbb023c91f4f9900f5ec1859..41fcf3750878e61616caff84c4f44d18d1d36815 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -66,7 +66,9 @@ class FillConstantKernel : public framework::OpKernel { value_tensor->numel())); const T *tensor_data = value_tensor->data(); framework::Tensor cpu_tensor; - if (platform::is_gpu_place(value_tensor->place())) { + auto tmp_place = value_tensor->place(); + if (platform::is_gpu_place(tmp_place) || + platform::is_xpu_place(tmp_place)) { TensorCopySync(*value_tensor, platform::CPUPlace(), &cpu_tensor); tensor_data = cpu_tensor.data(); } @@ -102,6 +104,14 @@ class FillConstantKernel : public framework::OpKernel { functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); } +#endif +#ifdef PADDLE_WITH_XPU + if (!cpu_place) { + tensor->mutable_data(ctx.GetPlace(), data_type); + math::SetConstant functor; + functor(reinterpret_cast(dev_ctx), + tensor, static_cast(value)); + } #endif } }; diff --git a/paddle/fluid/operators/fill_constant_op_xpu.cc b/paddle/fluid/operators/fill_constant_op_xpu.cc new file mode 100644 index 0000000000000000000000000000000000000000..2bf836272a400d6b57a5fe3bde23af45b53d4503 --- /dev/null +++ b/paddle/fluid/operators/fill_constant_op_xpu.cc @@ -0,0 +1,23 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/fill_constant_op.h" + +namespace ops = paddle::operators; +#ifdef PADDLE_WITH_XPU +REGISTER_OP_XPU_KERNEL(fill_constant, ops::FillConstantKernel, + ops::FillConstantKernel, + ops::FillConstantKernel, + ops::FillConstantKernel, + ops::FillConstantKernel); +#endif diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index b8af5a21ca58185a10b34bca2310dd93436d340c..8c7437e4b5e720af28440119f7781d7940638525 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include #endif +#include #include #include #include "paddle/fluid/framework/data_type.h" @@ -44,6 +45,15 @@ template struct SetConstant; template struct SetConstant; template struct SetConstant; +#ifdef PADDLE_WITH_XPU +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +#endif + #define DEFINE_CPU_TRANS(RANK) \ template struct Transpose; \ diff --git a/paddle/fluid/operators/math/math_function.h b/paddle/fluid/operators/math/math_function.h index 6af0278d82503a27a14de6aef96468d69d6c17ad..1ad1c29ddd879f96853e63a91fbe40398e5a7c7a 100644 --- a/paddle/fluid/operators/math/math_function.h +++ b/paddle/fluid/operators/math/math_function.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include #include #include "paddle/fluid/framework/eigen.h" @@ -84,6 +85,33 @@ struct RowwiseMean { framework::Tensor* vec); }; +#ifdef PADDLE_WITH_XPU +template +struct TensorSetConstantXPU { + TensorSetConstantXPU(framework::Tensor* tensor, U value) + : tensor_(tensor), value_(value) {} + template + void apply() const { + int dev_id = -1; + xpu_current_device(&dev_id); + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real + // dev_id + dev_id -= 64; + } + auto xpu = platform::XPUPlace(dev_id); + auto* begin = tensor_->mutable_data(xpu); + int numel = tensor_->numel(); + std::unique_ptr data_cpu(new T[numel]); + std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast(value_)); + memory::Copy(xpu, begin, platform::CPUPlace(), + static_cast(data_cpu.get()), numel * sizeof(T)); + } + framework::Tensor* tensor_; + U value_; +}; +#endif + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/math_function_impl.h b/paddle/fluid/operators/math/math_function_impl.h index 869a3054598da9cd2223ca0e705c0f910ba043ec..d2480763dcf1297685d068c54e3171253135689e 100644 --- a/paddle/fluid/operators/math/math_function_impl.h +++ b/paddle/fluid/operators/math/math_function_impl.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/operators/math/math_function.h" @@ -27,8 +28,18 @@ template void SetConstant::operator()(const DeviceContext& context, framework::Tensor* tensor, T num) { - auto t = framework::EigenVector::Flatten(*tensor); - t.device(*context.eigen_device()) = t.constant(static_cast(num)); + bool xpu_place = false; +#ifdef PADDLE_WITH_XPU + if (context.GetPlace() == platform::XPUPlace()) { + xpu_place = true; + framework::VisitDataType(tensor->type(), + TensorSetConstantXPU(tensor, num)); + } +#endif + if (!xpu_place) { + auto t = framework::EigenVector::Flatten(*tensor); + t.device(*context.eigen_device()) = t.constant(static_cast(num)); + } } template diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index 05d077b173a13e457fd38187b832f9586926a2ee..985c35127617bf1c4c708c3ab741ff8ca058af8a 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -26,7 +26,7 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { if (x->type() == framework::proto::VarType::INT32) { auto* data = x->data(); framework::Tensor cpu_attr_tensor; - if (platform::is_gpu_place(x->place())) { + if (!platform::is_cpu_place(x->place())) { TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor); data = cpu_attr_tensor.data(); } @@ -34,7 +34,7 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { } else if (x->type() == framework::proto::VarType::INT64) { auto* data = x->data(); framework::Tensor cpu_attr_tensor; - if (platform::is_gpu_place(x->place())) { + if (!platform::is_cpu_place(x->place())) { TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor); data = cpu_attr_tensor.data(); } @@ -62,7 +62,7 @@ inline std::vector GetDataFromTensorList( tensor->dims())); if (tensor->type() == framework::proto::VarType::INT32) { - if (platform::is_gpu_place(tensor->place())) { + if (!platform::is_cpu_place(tensor->place())) { framework::Tensor temp; TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_new_data.push_back(static_cast(*temp.data())); @@ -70,7 +70,7 @@ inline std::vector GetDataFromTensorList( vec_new_data.push_back(static_cast(*tensor->data())); } } else if (tensor->type() == framework::proto::VarType::INT64) { - if (platform::is_gpu_place(tensor->place())) { + if (!platform::is_cpu_place(tensor->place())) { framework::Tensor temp; TensorCopySync(*tensor, platform::CPUPlace(), &temp); // NOTE: Converting int64 to int32 may cause data overflow. diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..b31c80ee9e7e8ef6c7b522e72e17498474121758 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py @@ -0,0 +1,241 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys +sys.path.append("..") +import unittest +from op_test import OpTest + +import paddle +import numpy as np + + +# Situation 1: Attr(shape) is a list(without tensor) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 5, 'value': 3.8} + self.outputs = {'Out': np.full((123, 92), 3.8)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp2(OpTest): + def setUp(self): + '''Test fill_constant op with default value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 5} + self.outputs = {'Out': np.full((123, 92), 0.0)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp3(OpTest): + def setUp(self): + '''Test fill_constant op with specified int64 value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 3, 'value': 10000000000} + self.outputs = {'Out': np.full((123, 92), 10000000000)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp4(OpTest): + def setUp(self): + '''Test fill_constant op with specified int value''' + self.op_type = "fill_constant" + + self.inputs = {} + self.attrs = {'shape': [123, 92], 'dtype': 2, 'value': 3} + self.outputs = {'Out': np.full((123, 92), 3)} + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +# Situation 2: Attr(shape) is a list(with tensor) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1_ShapeTensorList(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + shape_tensor_list = [] + for index, ele in enumerate(self.shape): + shape_tensor_list.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = {"ShapeTensorList": shape_tensor_list} + self.attrs = { + 'shape': self.infer_shape, + 'dtype': 5, + 'value': self.value + } + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [-1, 92] + self.value = 3.8 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp2_ShapeTensorList(OpTest): + def setUp(self): + '''Test fill_constant op with default value''' + self.op_type = "fill_constant" + self.init_data() + shape_tensor_list = [] + for index, ele in enumerate(self.shape): + shape_tensor_list.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = {"ShapeTensorList": shape_tensor_list} + self.attrs = {'shape': self.infer_shape, 'dtype': 5} + self.outputs = {'Out': np.full(self.shape, 0.0)} + + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [-1, -1] + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp3_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [123, -1] + self.value = 10000000000 + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp4_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [123, -1] + self.value = 3 + + +# Situation 3: shape is a tensor +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1_ShapeTensor(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = {"ShapeTensor": np.array(self.shape).astype("int32")} + self.attrs = {'value': self.value, 'dtype': 5} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3.8 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +# Situation 4: value is a tensor +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp1_ValueTensor(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = { + "ShapeTensor": np.array(self.shape).astype("int32"), + 'ValueTensor': np.array([self.value]).astype("float32") + } + self.attrs = {'value': self.value + 1.0, 'dtype': 5} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3.8 + self.dtype = np.float32 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +# Situation 5: value is a tensor +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestFillConstantOp2_ValueTensor(OpTest): + def setUp(self): + '''Test fill_constant op with specified value''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = { + "ShapeTensor": np.array(self.shape).astype("int32"), + 'ValueTensor': np.array([self.value]).astype("int32") + } + self.attrs = {'value': self.value, 'dtype': 2} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3 + self.dtype = np.int32 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main()