From d4359b0f39af18fa86ac660a47125cbe63517408 Mon Sep 17 00:00:00 2001 From: Jack Zhou <136876878@qq.com> Date: Mon, 12 Oct 2020 11:04:15 +0800 Subject: [PATCH] add the kunlun kernel for the paddle 2.0 Add xpu kernel for KUNLUN core: * accuracy op * sign op * scale op * sum op Add default atol in xpu unittest. --- .../operators/metrics/accuracy_op_xpu.cc | 120 ++++++++++++++++++ paddle/fluid/operators/scale_op_xpu.cc | 63 +++++++++ paddle/fluid/operators/sign_op_xpu.cc | 44 +++++++ paddle/fluid/operators/sum_op_xpu.cc | 65 ++++++++++ .../unittests/xpu/test_accuracy_op_xpu.py | 63 +++++++++ .../tests/unittests/xpu/test_scale_op_xpu.py | 54 ++++++++ .../tests/unittests/xpu/test_sign_op_xpu.py | 54 ++++++++ .../tests/unittests/xpu/test_sum_op_xpu.py | 61 +++++++++ 8 files changed, 524 insertions(+) create mode 100644 paddle/fluid/operators/metrics/accuracy_op_xpu.cc create mode 100644 paddle/fluid/operators/scale_op_xpu.cc create mode 100644 paddle/fluid/operators/sign_op_xpu.cc create mode 100644 paddle/fluid/operators/sum_op_xpu.cc create mode 100755 python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc new file mode 100644 index 00000000000..c0aa00e7934 --- /dev/null +++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc @@ -0,0 +1,120 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/metrics/accuracy_op.h" +#include "paddle/fluid/platform/xpu_header.h" + +namespace paddle { +namespace operators { + +template +class AccuracyXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* inference = ctx.Input("Out"); + auto* indices = ctx.Input("Indices"); + auto* label = ctx.Input("Label"); + auto* accuracy = ctx.Output("Accuracy"); + auto* correct = ctx.Output("Correct"); + auto* total = ctx.Output("Total"); + int* correct_data = correct->mutable_data(ctx.GetPlace()); + int* total_data = total->mutable_data(ctx.GetPlace()); + float* accuracy_data = accuracy->mutable_data(ctx.GetPlace()); + const int64_t* indices_data = indices->data(); + const int64_t* label_data = label->data(); + size_t num_samples = inference->dims()[0]; + size_t class_dim = inference->dims()[1]; + if (num_samples == 0) { + return; + } + size_t indices_int32_size = num_samples * class_dim * sizeof(int); + size_t indices_int64_size = num_samples * class_dim * sizeof(int64_t); + size_t label_int32_size = num_samples * sizeof(int); + size_t label_int64_size = num_samples * sizeof(int64_t); + auto& dev_ctx = ctx.template device_context(); + int* indices_int32_device = NULL; + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&indices_int32_device), + indices_int32_size), + XPU_SUCCESS, + platform::errors::ResourceExhausted( + "\n\nOut of memory error on XPU, Cannot allocate %s memory" + " on XPU. \n\nPlease check whether there is any other process " + "using XPU.\n", + string::HumanReadableSize(indices_int32_size))); + int* label_int32_device = NULL; + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&label_int32_device), + label_int32_size), + XPU_SUCCESS, + platform::errors::ResourceExhausted( + "\n\nOut of memory error on XPU, Cannot allocate %s memory" + " on XPU. \n\nPlease check whether there is any other process " + "using XPU.\n", + string::HumanReadableSize(label_int32_size))); + + int* indices_int32_host = + reinterpret_cast(std::malloc(indices_int32_size)); + int64_t* indices_int64_host = + reinterpret_cast(std::malloc(indices_int64_size)); + int* label_int32_host = + reinterpret_cast(std::malloc(label_int32_size)); + int64_t* label_int64_host = + reinterpret_cast(std::malloc(label_int64_size)); + dev_ctx.Wait(); + memory::Copy(platform::CPUPlace(), indices_int64_host, + BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()), + indices_data, indices_int64_size); + memory::Copy(platform::CPUPlace(), label_int64_host, + BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()), + label_data, label_int64_size); + for (int i = 0; i < num_samples; ++i) { + label_int32_host[i] = label_int64_host[i]; + for (int j = 0; j < class_dim; ++j) { + indices_int32_host[i * class_dim + j] = + indices_int64_host[i * class_dim + j]; + } + } + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()), + indices_int32_device, platform::CPUPlace(), indices_int32_host, + indices_int32_size); + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()), + label_int32_device, platform::CPUPlace(), label_int32_host, + label_int32_size); + int r = xpu::accuracy(dev_ctx.x_context(), indices_int32_device, + label_int32_device, num_samples, class_dim, + correct_data, total_data, accuracy_data); + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::Fatal("XPU kernel error!")); + dev_ctx.Wait(); + xpu_free(indices_int32_device); + xpu_free(label_int32_device); + std::free(indices_int32_host); + std::free(indices_int64_host); + std::free(label_int32_host); + std::free(label_int64_host); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL( + accuracy, + ops::AccuracyXPUKernel); + +#endif diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc new file mode 100644 index 00000000000..4002be81001 --- /dev/null +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -0,0 +1,63 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/scale_op.h" +#include +#include "paddle/fluid/platform/xpu_header.h" + +namespace paddle { +namespace operators { +template +class ScaleXPUKernel : public framework::OpKernel { + public: + virtual void Compute(const framework::ExecutionContext& ctx) const { + auto* in_var = ctx.InputVar("X"); + auto* in = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_var); + auto scale = static_cast(ctx.Attr("scale")); + auto bias = static_cast(ctx.Attr("bias")); + auto bias_after_scale = ctx.Attr("bias_after_scale"); + auto* out_var = ctx.OutputVar("Out"); + if (in_var->IsType() && in_var != out_var) { + auto& in_slr = in_var->Get(); + auto* out_slr = out_var->GetMutable(); + out_slr->set_rows(in_slr.rows()); + out_slr->set_height(in_slr.height()); + } + auto* out = + framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); + out->mutable_data(in->place()); + PADDLE_ENFORCE_EQ( + in->dims(), out->dims(), + platform::errors::InvalidArgument("In and out should have the same dim," + " expected %s, but got %s.", + in->dims().to_str().c_str(), + out->dims().to_str().c_str())); + auto& dev_ctx = ctx.template device_context(); + int r = xpu::scale(dev_ctx.x_context(), in->numel(), scale, bias, + bias_after_scale, in->data(), out->data()); + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::Fatal("XPU kernel error!")); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL( + scale, ops::ScaleXPUKernel); + +#endif diff --git a/paddle/fluid/operators/sign_op_xpu.cc b/paddle/fluid/operators/sign_op_xpu.cc new file mode 100644 index 00000000000..44fd555544e --- /dev/null +++ b/paddle/fluid/operators/sign_op_xpu.cc @@ -0,0 +1,44 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/sign_op.h" +#include "paddle/fluid/platform/xpu_header.h" +namespace paddle { +namespace operators { + +template +class SignXPUKernel : public framework::OpKernel { + public: + virtual void Compute(const framework::ExecutionContext& context) const { + auto* out = context.Output("Out"); + auto* in = context.Input("X"); + out->mutable_data(in->place()); + auto xpu_context = context.device_context().x_context(); + int r = xpu::activation_forward(xpu_context, xpu::Activation_t::SIGN, + in->numel(), in->data(), out->data()); + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::Fatal("XPU kernel error!")); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL( + sign, ops::SignXPUKernel); + +#endif diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc new file mode 100644 index 00000000000..14928061d23 --- /dev/null +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -0,0 +1,65 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/sum_op.h" +#include +#include "paddle/fluid/platform/xpu_header.h" + +namespace paddle { +namespace operators { +using framework::Tensor; + +template +class SumXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto in_vars = context.MultiInputVar("X"); + auto out_var = context.OutputVar("Out"); + auto *out = context.Output("Out"); + bool in_place = out_var == in_vars[0]; + int N = in_vars.size(); + PADDLE_ENFORCE_EQ( + out_var->IsType(), true, + platform::errors::InvalidArgument("XPU only surpport LodTensor")); + if (!in_place) { + out->mutable_data(context.GetPlace()); + } + auto &dev_ctx = context.template device_context(); + std::vector ptrs(N, nullptr); + int valid_count = 0; + for (int i = 0; i < N; ++i) { + PADDLE_ENFORCE_EQ( + in_vars[i]->IsType(), true, + platform::errors::InvalidArgument("XPU only surpport LodTensor")); + auto &in_t = in_vars[i]->Get(); + if (in_t.numel() == 0) { + continue; + } + ptrs[valid_count] = reinterpret_cast(in_t.data()); + valid_count++; + } + int r = xpu::sum_batch(dev_ctx.x_context(), ptrs.data(), out->data(), + valid_count, out->numel()); + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::Fatal("XPU kernel error!")); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_XPU_KERNEL( + sum, ops::SumXPUKernel); +#endif diff --git a/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py new file mode 100755 index 00000000000..7aaa7885681 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +import paddle + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUAccuracyOp(OpTest): + def setUp(self): + self.op_type = "accuracy" + self.init_dtype() + n = 8192 + infer = np.random.random((n, 1)).astype(self.dtype) + indices = np.random.randint(0, 2, (n, 1)).astype('int64') + label = np.random.randint(0, 2, (n, 1)).astype('int64') + self.inputs = {'Out': infer, 'Indices': indices, "Label": label} + num_correct = 0 + for rowid in range(n): + for ele in indices[rowid]: + if ele == label[rowid]: + num_correct += 1 + break + self.outputs = { + 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype), + 'Correct': np.array([num_correct]).astype("int32"), + 'Total': np.array([n]).astype("int32") + } + self.attrs = {'use_xpu': True} + + def init_dtype(self): + self.dtype = np.float32 + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py new file mode 100644 index 00000000000..1f74fa5e2d6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py @@ -0,0 +1,54 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.op import Operator +import paddle + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUScaleOp(OpTest): + def setUp(self): + self.op_type = "scale" + self.dtype = np.float32 + self.inputs = {'X': np.random.random((10, 10)).astype(self.dtype)} + self.attrs = {'scale': -2.3, 'use_xpu': True} + self.outputs = { + 'Out': self.inputs['X'] * self.dtype(self.attrs['scale']) + } + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X'], 'Out') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py new file mode 100644 index 00000000000..ab07221a070 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py @@ -0,0 +1,54 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard +import paddle + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSignOp(OpTest): + def setUp(self): + self.op_type = "sign" + self.dtype = np.float32 + self.inputs = { + 'X': np.random.uniform(-10, 10, (10, 10)).astype(self.dtype) + } + self.outputs = {'Out': np.sign(self.inputs['X'])} + self.attrs = {'use_xpu': True} + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X'], 'Out') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py new file mode 100644 index 00000000000..3bafbf649e6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py @@ -0,0 +1,61 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.op import Operator +import paddle + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSumOp(OpTest): + def setUp(self): + self.op_type = "sum" + self.use_mkldnn = False + self.init_kernel_type() + x0 = np.random.random((3, 40)).astype(self.dtype) + x1 = np.random.random((3, 40)).astype(self.dtype) + x2 = np.random.random((3, 40)).astype(self.dtype) + self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]} + y = x0 + x1 + x2 + self.outputs = {'Out': y} + self.attrs = {'use_mkldnn': self.use_mkldnn, 'use_xpu': True} + + def init_kernel_type(self): + self.dtype = np.float32 + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['x0'], 'Out') + + +if __name__ == "__main__": + unittest.main() -- GitLab