From e61f48c1370db6e31491c2008590cbe0fd7ee6bd Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Fri, 29 Jul 2022 10:34:20 +0800 Subject: [PATCH] [XPU] add sampling_id op, add top_k op, update xdnn api. test=kunlun (#44704) --- cmake/external/xpu.cmake | 4 +- paddle/fluid/framework/tensor_util.h | 10 + .../detection/generate_proposals_v2_op_xpu.cc | 22 +- paddle/fluid/operators/one_hot_op_xpu.cc | 17 +- paddle/fluid/operators/sampling_id_op_xpu.cc | 20 ++ .../fluid/platform/device/xpu/xpu2_op_list.h | 6 + .../unittests/xpu/test_one_hot_op_xpu.py | 267 +++++++----------- .../unittests/xpu/test_sampling_id_op_xpu.py | 51 ++++ 8 files changed, 212 insertions(+), 185 deletions(-) create mode 100644 paddle/fluid/operators/sampling_id_op_xpu.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 28a9bdfc581..718b8fefe66 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220727") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220728") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() @@ -19,7 +19,7 @@ endif() if(NOT DEFINED XPU_XDNN_BASE_URL) set(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev") - set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220727") + set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220728") else() set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") endif() diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index 994d9771a16..79532172571 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -302,6 +302,11 @@ void TensorFromVector(const std::vector& src, size, reinterpret_cast(ctx).stream()); } +#endif +#ifdef PADDLE_WITH_XPU + else if (platform::is_xpu_place(dst_place)) { // NOLINT + memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); + } #endif else { // NOLINT PADDLE_THROW(platform::errors::Unimplemented( @@ -381,6 +386,11 @@ inline void TensorFromVector(const std::vector& src, reinterpret_cast(ctx).stream(); memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); } +#endif +#ifdef PADDLE_WITH_XPU + else if (platform::is_xpu_place(dst_place)) { // NOLINT + memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); + } #endif else { // NOLINT PADDLE_THROW(platform::errors::Unimplemented( diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op_xpu.cc b/paddle/fluid/operators/detection/generate_proposals_v2_op_xpu.cc index 14fa4e71425..8b513d69c26 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op_xpu.cc +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op_xpu.cc @@ -219,20 +219,14 @@ static std::pair ProposalForOneImage( // 4. nms int nms_keep_num = 0; - r = xpu::nms(dev_ctx.x_context(), - proposals_filter.data(), - nullptr, - keep_index.data(), - 1, - 1, - keep_num, - -1, - nms_thresh, - -1, - 0, - &nms_keep_num, - pixel_offset); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "nms"); + r = xpu::sorted_nms(dev_ctx.x_context(), + proposals_filter.data(), + keep_index.data(), + nms_keep_num, + keep_num, + nms_thresh, + pixel_offset); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "sorted_nms"); if (post_nms_top_n > 0 && post_nms_top_n < nms_keep_num) { keep_index.Resize({post_nms_top_n}); } else { diff --git a/paddle/fluid/operators/one_hot_op_xpu.cc b/paddle/fluid/operators/one_hot_op_xpu.cc index f3444d6fa8f..7c213956bfd 100644 --- a/paddle/fluid/operators/one_hot_op_xpu.cc +++ b/paddle/fluid/operators/one_hot_op_xpu.cc @@ -17,6 +17,7 @@ #include #include "paddle/fluid/operators/one_hot_op.h" +#include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { namespace operators { @@ -28,9 +29,13 @@ template class OneHotXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); + const auto* in = context.Input("X"); auto* out = context.Output("Out"); + + // get depth from attr int depth = context.Attr("depth"); + + // get depth from input tensor if (context.HasInput("depth_tensor")) { auto* depth_tensor = context.Input("depth_tensor"); auto* depth_data = depth_tensor->data(); @@ -50,18 +55,14 @@ class OneHotXPUKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); int len = in->numel(); + // int one_hot(Context* ctx, const T* x, float* y, int len, int depth, float + // on_value = 1.0f, float off_value = 0.0f); int ret = xpu::one_hot(dev_ctx.x_context(), in->data(), out->mutable_data(context.GetPlace()), len, depth); - - PADDLE_ENFORCE_EQ(ret, - XPU_SUCCESS, - platform::errors::External( - "XPU one_hot kernel return wrong value[%d %s]", - ret, - XPUAPIErrorMsg[ret])); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "one_hot"); } }; diff --git a/paddle/fluid/operators/sampling_id_op_xpu.cc b/paddle/fluid/operators/sampling_id_op_xpu.cc new file mode 100644 index 00000000000..027db5508de --- /dev/null +++ b/paddle/fluid/operators/sampling_id_op_xpu.cc @@ -0,0 +1,20 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/sampling_id_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL(sampling_id, + paddle::operators::SamplingIdKernel, + paddle::operators::SamplingIdKernel); diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 5182d432e1d..28ff2bfba5f 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -322,6 +322,9 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot", + XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), + pOpKernelType(vartype::INT64, XPUPlace())})}, {"one_hot_v2", XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::INT64, XPUPlace())})}, @@ -393,6 +396,9 @@ XPUOpMap& get_kl2_ops() { {"scatter", XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sampling_id", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::FP64, XPUPlace())})}, {"sgd", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py index 33b59a8de65..258168b1da8 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,172 +13,117 @@ # limitations under the License. from __future__ import print_function - import unittest import numpy as np -import paddle -import paddle.fluid.core as core import sys sys.path.append("..") +import paddle +import paddle.fluid.core as core +from op_test import OpTest from op_test_xpu import XPUOpTest -import paddle.fluid as fluid -from paddle.fluid import Program, program_guard -import time +from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper paddle.enable_static() -""" -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - 'core is not compiled with XPU') -class TestOneHotOp(XPUOpTest): - def setUp(self): - self.use_xpu = True - self.op_type = 'one_hot' - depth = 10 - depth_np = np.array(10).astype('int32') - x_lod = [[4, 1, 3, 3]] - x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] - x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) - - out = np.zeros(shape=(np.product(x.shape[:-1]), - depth)).astype('float32') - - for i in range(np.product(x.shape)): - out[i, x[i]] = 1.0 - - self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} - self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} - self.outputs = {'Out': (out, x_lod)} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place, check_dygraph=False) - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - 'core is not compiled with XPU') -class TestOneHotOp_attr(XPUOpTest): - def setUp(self): - self.op_type = 'one_hot' - depth = 10 - x_lod = [[4, 1, 3, 3]] - x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] - x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) - - out = np.zeros(shape=(np.product(x.shape[:-1]), - depth)).astype('float32') - - for i in range(np.product(x.shape)): - out[i, x[i]] = 1.0 - - self.inputs = {'X': (x, x_lod)} - self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth} - self.outputs = {'Out': (out, x_lod)} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place, check_dygraph=False) - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - 'core is not compiled with XPU') -class TestOneHotOp_default_dtype(XPUOpTest): - def setUp(self): - self.op_type = 'one_hot' - depth = 10 - depth_np = np.array(10).astype('int32') - x_lod = [[4, 1, 3, 3]] - x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] - x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) - - out = np.zeros(shape=(np.product(x.shape[:-1]), - depth)).astype('float32') - - for i in range(np.product(x.shape)): - out[i, x[i]] = 1.0 - - self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} - self.attrs = {} - self.outputs = {'Out': (out, x_lod)} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place, check_dygraph=False) - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - 'core is not compiled with XPU') -class TestOneHotOp_default_dtype_attr(XPUOpTest): - def setUp(self): - self.op_type = 'one_hot' - depth = 10 - x_lod = [[4, 1, 3, 3]] - x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] - x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) - - out = np.zeros(shape=(np.product(x.shape[:-1]), - depth)).astype('float32') - - for i in range(np.product(x.shape)): - out[i, x[i]] = 1.0 - - self.inputs = {'X': (x, x_lod)} - self.attrs = {'depth': depth} - self.outputs = {'Out': (out, x_lod)} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place, check_dygraph=False) - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - 'core is not compiled with XPU') -class TestOneHotOp_out_of_range(XPUOpTest): - def setUp(self): - self.op_type = 'one_hot' - depth = 10 - x_lod = [[4, 1, 3, 3]] - x = [np.random.choice([-1, depth]) for i in range(sum(x_lod[0]))] - x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) - - out = np.zeros(shape=(np.product(x.shape[:-1]), - depth)).astype('float32') - - self.inputs = {'X': (x, x_lod)} - self.attrs = {'depth': depth, 'allow_out_of_range': True} - self.outputs = {'Out': (out, x_lod)} - - def test_check_output(self): - place = paddle.XPUPlace(0) - self.check_output_with_place(place, check_dygraph=False) - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - 'core is not compiled with XPU') -class TestOneHotOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - # the input must be Variable - in_w = np.random.random((4, 1)).astype('int32') - self.assertRaises(TypeError, fluid.layers.one_hot, in_w) - # the input must be int32 or int 64 - in_w2 = fluid.layers.data( - name='in_w2', - shape=[4, 1], - append_batch_size=False, - dtype='float32') - self.assertRaises(TypeError, fluid.layers.one_hot, in_w2) - # the depth must be int, long or Variable - in_r = fluid.layers.data( - name='in_r', - shape=[4, 1], - append_batch_size=False, - dtype='int32') - depth_w = np.array([4]) - self.assertRaises(TypeError, fluid.layers.one_hot, in_r, 4.1) - self.assertRaises(TypeError, fluid.layers.one_hot, in_r, depth_w) -""" - -if __name__ == '__main__': - paddle.enable_static() + + +class XPUTestOneHotOP(XPUOpTestWrapper): + + def __init__(self): + self.op_name = 'one_hot' + self.use_dynamic_create_class = False + + class TestXPUOneHotOP(XPUOpTest): + + def setUp(self): + self.place = paddle.XPUPlace(0) + self.init_dtype() + self.op_type = 'one_hot' + + self.set_data() + self.set_input() + + def set_data(self): + self.depth = 10 + self.depth_np = np.array(10).astype('int32') + self.x_lod = [[4, 1, 3, 3]] + self.x = [ + np.random.randint(0, self.depth - 1) + for i in range(sum(self.x_lod[0])) + ] + self.x = np.array(self.x).astype(self.dtype).reshape( + [sum(self.x_lod[0]), 1]) + + self.out = np.zeros(shape=(np.product(self.x.shape[:-1]), + self.depth)).astype('float32') + for i in range(np.product(self.x.shape)): + self.out[i, self.x[i]] = 1.0 + + self.outputs = {'Out': (self.out, self.x_lod)} + + def set_input(self): + self.inputs = { + 'X': (self.x, self.x_lod), + 'depth_tensor': self.depth_np + } + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + def init_dtype(self): + self.dtype = self.in_type + + class TestXPUOneHotOP_attr(TestXPUOneHotOP): + + def set_input(self): + self.inputs = {'X': (self.x, self.x_lod)} + self.attrs = { + 'dtype': int(core.VarDesc.VarType.FP32), + 'depth': self.depth + } + + class TestXPUOneHotOP_default_dtype(TestXPUOneHotOP): + + def set_input(self): + self.inputs = { + 'X': (self.x, self.x_lod), + 'depth_tensor': self.depth_np + } + self.attrs = {} + + class TestXPUOneHotOP_default_dtype_attr(TestXPUOneHotOP): + + def set_input(self): + self.inputs = {'X': (self.x, self.x_lod)} + self.attrs = {'depth': self.depth} + + class TestXPUOneHotOP_out_of_range(TestXPUOneHotOP): + + def set_data(self): + self.depth = 10 + self.x_lod = [[4, 1, 3, 3]] + self.x = [ + np.random.choice([-1, self.depth]) + for i in range(sum(self.x_lod[0])) + ] + self.x = np.array(self.x).astype(self.dtype).reshape( + [sum(self.x_lod[0]), 1]) + + self.out = np.zeros(shape=(np.product(self.x.shape[:-1]), + self.depth)).astype('float32') + + self.outputs = {'Out': (self.out, self.x_lod)} + + def set_input(self): + self.inputs = {'X': (self.x, self.x_lod)} + self.attrs = {'depth': self.depth, 'allow_out_of_range': True} + + +support_types = get_xpu_op_support_types('one_hot') +print("support_types: %s" % str(support_types)) +for stype in support_types: + create_test_class(globals(), XPUTestOneHotOP, stype) + +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py new file mode 100644 index 00000000000..3c56a5271e4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py @@ -0,0 +1,51 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import sys + +sys.path.append("..") + +from op_test import OpTest +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid.op import Operator +import paddle + + +class TestSamplingIdShape(unittest.TestCase): + + def test_shape(self): + paddle.enable_static() + x = fluid.layers.data(name='x', shape=[3], dtype='float32') + output = fluid.layers.sampling_id(x) + + place = fluid.XPUPlace(0) + exe = fluid.Executor(place=place) + exe.run(fluid.default_startup_program()) + + feed = { + 'x': np.array([[0.2, 0.3, 0.5], [0.2, 0.3, 0.4]], dtype='float32') + } + output_np = exe.run(feed=feed, fetch_list=[output])[0] + + self.assertEqual(output.shape[0], -1) + self.assertEqual(len(output.shape), 1) + self.assertEqual(output_np.shape[0], 2) + self.assertEqual(len(output_np.shape), 1) + + +if __name__ == "__main__": + unittest.main() -- GitLab