diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 9e7ecdfdac9c293df0eb7fb58c8755d92ef9a065..32a6ec18714a4ac6426066753a6272b53ba37abf 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220820") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220831") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() @@ -19,7 +19,7 @@ endif() if(NOT DEFINED XPU_XDNN_BASE_URL) set(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev") - set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220820") + set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220831") else() set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") endif() diff --git a/paddle/fluid/operators/collective/c_embedding_op_xpu.cc b/paddle/fluid/operators/collective/c_embedding_op_xpu.cc new file mode 100644 index 0000000000000000000000000000000000000000..787d0e92aec522fa0de7f00e9884948404ed3da4 --- /dev/null +++ b/paddle/fluid/operators/collective/c_embedding_op_xpu.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_embedding_op.h" +#include "paddle/fluid/platform/device/device_wrapper.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; + +template +class CEmbeddingOpXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* table_t = ctx.Input("W"); + auto* ids_t = ctx.Input("Ids"); + auto* output_t = ctx.Output("Out"); + const int64_t start_index = ctx.Attr("start_index"); + const T* table_data = table_t->data(); + T* output_data = output_t->mutable_data(ctx.GetPlace()); + + const int64_t height = table_t->dims()[0]; + const int64_t width = table_t->dims()[1]; + + // int embedding(Context* ctx, const T* x, const TID* indices, T* y, int xm, + // int n, int ym, int padding_idx, TID start_index = 0); + + // xm: table height: number of entries of table. + // n: embedding dim: number of float value within single entry. + // ym: number of elements of input ids. + + auto& dev_ctx = ctx.template device_context(); + + const auto& index_type = framework::TransToProtoVarType(ids_t->dtype()); + if (index_type == framework::proto::VarType::INT32) { + int r = xpu::embedding(dev_ctx.x_context(), + table_data, + ids_t->data(), + output_data, + height, + width, + ids_t->numel(), + -1, + static_cast(start_index)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "embedding"); + } else if (index_type == framework::proto::VarType::INT64) { + int r = xpu::embedding(dev_ctx.x_context(), + table_data, + ids_t->data(), + output_data, + height, + width, + ids_t->numel(), + -1, + static_cast(start_index)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "embedding"); + } else { + PADDLE_THROW(platform::errors::Unavailable( + "XPU c_embedding ids only support int32 or int64.")); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_XPU_KERNEL( + c_embedding, + ops::CEmbeddingOpXPUKernel); diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index d1693c9f49b561e45a91cf477d4a948645a944ba..0aa0e2049180fb92ef2ef9853a80629efd900513 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -84,6 +84,7 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({pOpKernelType(vartype::FP16, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace())})}, + {"c_embedding", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"c_identity", XPUKernelSet({pOpKernelType(vartype::FP16, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace()), diff --git a/python/paddle/fluid/tests/unittests/c_embedding_op_base.py b/python/paddle/fluid/tests/unittests/c_embedding_op_base.py index 8b5f18407906a7ed7733959773f5cfedfc674aa9..acd64e71865ef51db1f3da9d999af24f8a9e1cd7 100644 --- a/python/paddle/fluid/tests/unittests/c_embedding_op_base.py +++ b/python/paddle/fluid/tests/unittests/c_embedding_op_base.py @@ -42,6 +42,8 @@ class TestCEmbeddingCPU(OpTest): self.initcase() if core.is_compiled_with_npu(): self.__class__.use_npu = True + elif core.is_compiled_with_xpu(): + self.__class__.use_xpu = True elif core.is_compiled_with_cuda(): self.__class__.exist_fp64_check_grad = True @@ -59,6 +61,8 @@ class TestCEmbeddingCPU(OpTest): self.attrs = {'start_index': self.start_index} if core.is_compiled_with_npu(): self.__class__.use_npu = True + elif core.is_compiled_with_xpu(): + self.__class__.use_xpu = True def test_check_cpu(self): self.check_output_with_place(core.CPUPlace()) @@ -82,12 +86,16 @@ class TestCEmbeddingOpBase(TestCEmbeddingCPU): self.check_output_with_place(core.CUDAPlace(0)) elif core.is_compiled_with_npu(): self.check_output_with_place(core.NPUPlace(0)) + elif core.is_compiled_with_xpu(): + self.check_output_with_place(core.XPUPlace(0)) def test_check_grad(self): if core.is_compiled_with_cuda(): self.check_grad_with_place(core.CUDAPlace(0), ['W'], 'Out') elif core.is_compiled_with_npu(): self.check_grad_with_place(core.NPUPlace(0), ['W'], 'Out') + elif core.is_compiled_with_xpu(): + self.check_grad_with_place(core.XPUPlace(0), ['W'], 'Out') def init_dtype(self): if core.is_compiled_with_cuda(): @@ -96,6 +104,9 @@ class TestCEmbeddingOpBase(TestCEmbeddingCPU): elif core.is_compiled_with_npu(): self.dtype = "float32" self.ids_dtype = "int32" + elif core.is_compiled_with_xpu(): + self.dtype = "float32" + self.ids_dtype = "int64" class TestCEmbeddingOpFP32(TestCEmbeddingOpBase): @@ -123,6 +134,8 @@ class TestCEmbeddingOpFP32(TestCEmbeddingOpBase): if core.is_compiled_with_npu(): self.__class__.use_npu = True + elif core.is_compiled_with_xpu(): + self.__class__.use_xpu = True elif core.is_compiled_with_cuda(): self.__class__.exist_fp64_check_grad = True diff --git a/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..cdf965c24be7beba10dbc82156b150c2f49ee5a8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py @@ -0,0 +1,34 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys + +sys.path.append("..") +import paddle +from paddle.fluid.tests.unittests.c_embedding_op_base import TestCEmbeddingCPU, TestCEmbeddingOpBase, TestCEmbeddingOpFP32 + +paddle.enable_static() + +TestCEmbeddingCPU() + +TestCEmbeddingOpBase() + +TestCEmbeddingOpFP32() + +if __name__ == "__main__": + unittest.main()