From 5098891fdf573a9a2db5fedacbefa059c9def8ce Mon Sep 17 00:00:00 2001 From: zhupengyang Date: Sat, 10 Oct 2020 15:34:54 +0800 Subject: [PATCH] add softmax xpu kernel (#27700) --- paddle/fluid/operators/softmax_op_xpu.cc | 99 +++++++++++++++++++ .../unittests/xpu/test_softmax_op_xpu.py | 93 +++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 paddle/fluid/operators/softmax_op_xpu.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py diff --git a/paddle/fluid/operators/softmax_op_xpu.cc b/paddle/fluid/operators/softmax_op_xpu.cc new file mode 100644 index 00000000000..29740000aeb --- /dev/null +++ b/paddle/fluid/operators/softmax_op_xpu.cc @@ -0,0 +1,99 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/softmax_op.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using DDim = framework::DDim; + +template +class SoftmaxXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* x = context.Input("X"); + auto* out = context.Output("Out"); + const int rank = x->dims().size(); + const int axis = CanonicalAxis(context.Attr("axis"), rank); + PADDLE_ENFORCE_EQ(axis == -1 || axis == rank - 1, true, + platform::errors::InvalidArgument( + "xpu softmax kernel only support last dimension of x " + "(axis==-1 or axis==x_dims-1), but received axis: " + "%d, x's shape: %s.", + axis, x->dims())); + + // allocate memory on device. + out->mutable_data(context.GetPlace()); + + const int n = SizeToAxis(axis, x->dims()); + const int d = SizeFromAxis(axis, x->dims()); + + auto& dev_ctx = context.template device_context(); + int r = xpu::softmax2d_forward(dev_ctx.x_context(), x->data(), + out->data(), n, d, d <= 2048); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(softmax2d_forward) return wrong " + "value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + } +}; + +template +class SoftmaxGradXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* out = context.Input("Out"); + auto* dout = context.Input(framework::GradVarName("Out")); + auto* dx = context.Output(framework::GradVarName("X")); + const int rank = dx->dims().size(); + const int axis = CanonicalAxis(context.Attr("axis"), rank); + + // allocate memory on device. + dx->mutable_data(context.GetPlace()); + + const int n = SizeToAxis(axis, dx->dims()); + const int d = SizeFromAxis(axis, dx->dims()); + + auto& dev_ctx = context.template device_context(); + int r = + xpu::softmax2d_backward(dev_ctx.x_context(), out->data(), + dout->data(), dx->data(), n, d); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(softmax2d_backward) return wrong " + "value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_XPU_KERNEL( + softmax, ops::SoftmaxXPUKernel); +REGISTER_OP_XPU_KERNEL( + softmax_grad, + ops::SoftmaxGradXPUKernel); + +#endif // PADDLE_WITH_XPU diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py new file mode 100644 index 00000000000..92842fbc2e6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py @@ -0,0 +1,93 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import numpy as np +import sys +import unittest +sys.path.append("..") +from op_test import OpTest + +paddle.enable_static() +np.random.seed(10) + + +def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + # clip to shiftx, otherwise, when calc loss with + # log(exp(shiftx)), may get log(0)=INF + shiftx = (x - np.max(x)).clip(-64.) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + +def ref_softmax(x, axis=None, dtype=None): + x_t = x.copy() + if dtype is not None: + x_t = x_t.astype(dtype) + if axis is None: + axis = -1 + return np.apply_along_axis(stable_softmax, axis, x_t) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftmaxOp(OpTest): + def setUp(self): + self.op_type = "softmax" + self.dtype = np.float32 + self.shape = [2, 3, 4, 5] + self.axis = -1 + self.set_attrs() + + x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + out = np.apply_along_axis(stable_softmax, self.axis, x) + + self.inputs = {'X': x} + self.outputs = {'Out': out} + self.attrs = {'axis': self.axis, 'use_xpu': True} + + def set_attrs(self): + pass + + def test_check_output(self): + self.check_output_with_place(paddle.XPUPlace(0), atol=1e-4) + + def test_check_grad(self): + self.check_grad_with_place(paddle.XPUPlace(0), ['X'], 'Out') + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftmaxAxis3(TestXPUSoftmaxOp): + def set_attrs(self): + self.axis = 3 + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftmax2D(TestXPUSoftmaxOp): + def set_attrs(self): + self.shape = [10, 12] + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSoftmax3D(TestXPUSoftmaxOp): + def set_attrs(self): + self.shape = [4, 5, 6] + + +if __name__ == "__main__": + unittest.main() -- GitLab