From 65e9bd90fe481cd91de43f1d64a5987459cc4427 Mon Sep 17 00:00:00 2001 From: ykkk2333 <77383312+ykkk2333@users.noreply.github.com> Date: Fri, 2 Sep 2022 16:27:13 +0800 Subject: [PATCH] migrate sigmoid with cross entropy, and tile xpu kernels to phi, test=kunlun (#45621) --- ...igmoid_cross_entropy_with_logits_op_xpu.cc | 179 ------------------ paddle/fluid/operators/tile_op_xpu.cc | 142 -------------- ...d_cross_entropy_with_logits_grad_kernel.cc | 91 +++++++++ ...igmoid_cross_entropy_with_logits_kernel.cc | 87 +++++++++ paddle/phi/kernels/xpu/tile_kernel.cc | 130 +++++++++++++ 5 files changed, 308 insertions(+), 321 deletions(-) delete mode 100644 paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_xpu.cc delete mode 100644 paddle/fluid/operators/tile_op_xpu.cc create mode 100644 paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_kernel.cc create mode 100644 paddle/phi/kernels/xpu/tile_kernel.cc diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_xpu.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_xpu.cc deleted file mode 100644 index 90c299ead1d..00000000000 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_xpu.cc +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_XPU - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/device/xpu/xpu_header.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -template -class SigmoidCrossEntropyWithLogitsXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(context.GetPlace()), - true, - platform::errors::Unavailable("This kernel only runs on XPU.")); - - // input and output data - auto* input = context.Input("X"); - auto* label = context.Input("Label"); - auto* output = context.Output("Out"); - output->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); - - // attrs - int ignore_index = context.Attr("ignore_index"); - bool normalize = context.Attr("normalize"); - - // allocate temp memory - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - int* hit = RAII_GUARD.alloc_l3_or_gm(input->numel()); - PADDLE_ENFORCE_NOT_NULL( - hit, platform::errors::External("XPU alloc_l3_or_gm returns nullptr")); - - int r = xpu::sigmoid_cross_entropy_with_logits( - dev_ctx.x_context(), - reinterpret_cast(input->data()), - reinterpret_cast(label->data()), - reinterpret_cast(output->data()), - 1, - input->numel(), - hit, - ignore_index); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid_cross_entropy_with_logits"); - if (normalize) { - int* non_zero = RAII_GUARD.alloc_l3_or_gm(1); - PADDLE_ENFORCE_NOT_NULL( - non_zero, - platform::errors::External("XPU alloc_l3_or_gm returns nullptr")); - int r = xpu::nonzero_count(dev_ctx.x_context(), - reinterpret_cast(hit), - non_zero, - input->numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "nonzero_count"); - int non_zero_cpu = 0; - memory::Copy(platform::CPUPlace(), - static_cast(&non_zero_cpu), - context.GetPlace(), - static_cast(non_zero), - sizeof(int)); - r = xpu::scale(dev_ctx.x_context(), - reinterpret_cast(output->data()), - reinterpret_cast(output->data()), - input->numel(), - false, - 1.0f / static_cast(non_zero_cpu), - 0.0f); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); - } - } -}; - -template -class SigmoidCrossEntropyWithLogitsGradXPUKernel - : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(context.GetPlace()), - true, - platform::errors::Unavailable("This kernel only runs on XPU.")); - - // input and output data - auto* input = context.Input("X"); - auto* label = context.Input("Label"); - auto* dy = context.Input(framework::GradVarName("Out")); - auto* dx = context.Output(framework::GradVarName("X")); - dx->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); - - // attrs - int ignore_index = context.Attr("ignore_index"); - bool normalize = context.Attr("normalize"); - - // allocate temp memory - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - int* hit = RAII_GUARD.alloc_l3_or_gm(input->numel()); - PADDLE_ENFORCE_NOT_NULL( - hit, platform::errors::External("XPU alloc_l3_or_gm returns nullptr")); - - int r = xpu::sigmoid_cross_entropy_with_logits_grad( - dev_ctx.x_context(), - reinterpret_cast(input->data()), - reinterpret_cast(label->data()), - reinterpret_cast(dy->data()), - reinterpret_cast(dx->data()), - 1, - input->numel(), - hit, - ignore_index); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid_cross_entropy_with_logits"); - if (normalize) { - int* non_zero = RAII_GUARD.alloc_l3_or_gm(1); - PADDLE_ENFORCE_NOT_NULL( - non_zero, - platform::errors::External("XPU alloc_l3_or_gm returns nullptr")); - int r = xpu::nonzero_count(dev_ctx.x_context(), - reinterpret_cast(hit), - non_zero, - input->numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "nonzero_count"); - int non_zero_cpu = 0; - memory::Copy(platform::CPUPlace(), - static_cast(&non_zero_cpu), - context.GetPlace(), - static_cast(non_zero), - sizeof(int)); - r = xpu::scale(dev_ctx.x_context(), - reinterpret_cast(dx->data()), - reinterpret_cast(dx->data()), - input->numel(), - false, - 1.0f / static_cast(non_zero_cpu), - 0.0f); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_XPU_KERNEL(sigmoid_cross_entropy_with_logits, - ops::SigmoidCrossEntropyWithLogitsXPUKernel< - paddle::platform::XPUDeviceContext, - float>); - -REGISTER_OP_XPU_KERNEL(sigmoid_cross_entropy_with_logits_grad, - ops::SigmoidCrossEntropyWithLogitsGradXPUKernel< - paddle::platform::XPUDeviceContext, - float>); - -#endif diff --git a/paddle/fluid/operators/tile_op_xpu.cc b/paddle/fluid/operators/tile_op_xpu.cc deleted file mode 100644 index 9e437f59dec..00000000000 --- a/paddle/fluid/operators/tile_op_xpu.cc +++ /dev/null @@ -1,142 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/tile_op_functor.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -template -class TileXPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); - PADDLE_ENFORCE_GE( - rank, - 1, - platform::errors::InvalidArgument( - "The rank of the input 'x' for tile op must be a positive " - "integer, but the value received is %d.", - rank)); - PADDLE_ENFORCE_LE( - rank, - MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank of the input 'x' for tile op " - "must be less than or equal to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, - rank)); - auto repeat_times = get_repeat_times(context); - int repeat_times_size = repeat_times.size(); - PADDLE_ENFORCE_GE( - repeat_times_size, - 1, - platform::errors::InvalidArgument( - "The number of elements of the input 'repeat_times' for tile " - "op must be positive, but the value received is %d.", - repeat_times_size)); - PADDLE_ENFORCE_LE( - repeat_times_size, - MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The number of elements of the input 'repeat_times' for tile op " - "must be less than or equal to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, - repeat_times_size)); - - auto* in0 = context.Input("X"); - auto in_dims = in0->dims(); - for (size_t i = 0; i < repeat_times.size(); ++i) { - PADDLE_ENFORCE_GT( - repeat_times[i], - 0, - platform::errors::InvalidArgument( - "All elements of the input 'repeat_times' for tile op must " - "be positive integers, but the value received is %d.", - repeat_times[i])); - } - auto vec_in_dims = phi::vectorize(in_dims); - if (repeat_times.size() < vec_in_dims.size()) { - int diff = vec_in_dims.size() - repeat_times.size(); - repeat_times.insert(repeat_times.begin(), diff, 1); - } else { - int diff = repeat_times.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - } - PADDLE_ENFORCE_EQ( - repeat_times.size(), - vec_in_dims.size(), - platform::errors::InvalidArgument( - "The rank (%d) of the input 'x' and the rank (%d) of the input " - "'repeat_times' for tile op must match after promotion.", - vec_in_dims.size(), - repeat_times.size())); - - auto* out0 = context.Output("Out"); - framework::DDim new_in_dims = phi::make_ddim(vec_in_dims); - framework::DDim out_dims(new_in_dims); - - for (size_t i = 0; i < repeat_times.size(); ++i) { - out_dims[i] *= repeat_times[i]; - } - auto vec_out_dims = phi::vectorize(out_dims); - out0->Resize(out_dims); - out0->mutable_data(context.GetPlace()); - - auto& dev_ctx = - context.template device_context(); - std::vector temp(repeat_times.size(), 1); - if (repeat_times == temp) { - framework::TensorCopy(*in0, context.GetPlace(), dev_ctx, out0); - return; - } - - int ret = XPU_SUCCESS; - if (std::is_same::value) { - ret = xpu::broadcast( - dev_ctx.x_context(), - reinterpret_cast(in0->data()), - reinterpret_cast(out0->data()), - vec_in_dims, - vec_out_dims); - - } else { - ret = xpu::broadcast(dev_ctx.x_context(), - in0->data(), - out0->data(), - vec_in_dims, - vec_out_dims); - } - PADDLE_ENFORCE_EQ( - ret, - XPU_SUCCESS, - platform::errors::External("XPU tile kernel return wrong value[%d %s]", - ret, - XPUAPIErrorMsg[ret])); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_XPU_KERNEL(tile, - ops::TileXPUKernel, - ops::TileXPUKernel, - ops::TileXPUKernel, - ops::TileXPUKernel); - -#endif diff --git a/paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_grad_kernel.cc b/paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_grad_kernel.cc new file mode 100644 index 00000000000..5485f143445 --- /dev/null +++ b/paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_grad_kernel.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/phi/kernels/sigmoid_cross_entropy_with_logits_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +#include "paddle/fluid/memory/memcpy.h" + +namespace phi { + +template +void SigmoidCrossEntropyWithLogitsGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& label, + const DenseTensor& out_grad, + bool normalize, + int ignore_index, + DenseTensor* in_grad) { + using XPUType = typename XPUTypeTrait::Type; + PADDLE_ENFORCE_EQ(x.place().GetType() == phi::AllocationType::XPU, + true, + errors::Unavailable("This kernel only runs on XPU.")); + + dev_ctx.template Alloc(in_grad); + + // allocate temp memory + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + int* hit = RAII_GUARD.alloc_l3_or_gm(x.numel()); + PADDLE_ENFORCE_NOT_NULL( + hit, errors::External("XPU alloc_l3_or_gm returns nullptr")); + + int r = xpu::sigmoid_cross_entropy_with_logits_grad( + dev_ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(label.data()), + reinterpret_cast(out_grad.data()), + reinterpret_cast(in_grad->data()), + 1, + x.numel(), + hit, + ignore_index); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid_cross_entropy_with_logits"); + if (normalize) { + int* non_zero = RAII_GUARD.alloc_l3_or_gm(1); + PADDLE_ENFORCE_NOT_NULL( + non_zero, errors::External("XPU alloc_l3_or_gm returns nullptr")); + int r = xpu::nonzero_count(dev_ctx.x_context(), + reinterpret_cast(hit), + non_zero, + x.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "nonzero_count"); + int non_zero_cpu = 0; + paddle::memory::Copy(CPUPlace(), + static_cast(&non_zero_cpu), + dev_ctx.GetPlace(), + static_cast(non_zero), + sizeof(int)); + r = xpu::scale(dev_ctx.x_context(), + reinterpret_cast(in_grad->data()), + reinterpret_cast(in_grad->data()), + x.numel(), + false, + 1.0f / static_cast(non_zero_cpu), + 0.0f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(sigmoid_cross_entropy_with_logits_grad, + XPU, + ALL_LAYOUT, + phi::SigmoidCrossEntropyWithLogitsGradKernel, + float) {} diff --git a/paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_kernel.cc b/paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_kernel.cc new file mode 100644 index 00000000000..2a4e94b02c3 --- /dev/null +++ b/paddle/phi/kernels/xpu/sigmoid_cross_entropy_with_logits_kernel.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/phi/kernels/sigmoid_cross_entropy_with_logits_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +#include "paddle/fluid/memory/memcpy.h" + +namespace phi { + +template +void SigmoidCrossEntropyWithLogitsKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& label, + bool normalize, + int ignore_index, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + PADDLE_ENFORCE_EQ(x.place().GetType() == phi::AllocationType::XPU, + true, + errors::Unavailable("This kernel only runs on XPU.")); + + dev_ctx.template Alloc(out); + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + int* hit = RAII_GUARD.alloc_l3_or_gm(x.numel()); + PADDLE_ENFORCE_NOT_NULL( + hit, errors::External("XPU alloc_l3_or_gm returns nullptr")); + + int r = xpu::sigmoid_cross_entropy_with_logits( + dev_ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(label.data()), + reinterpret_cast(out->data()), + 1, + x.numel(), + hit, + ignore_index); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid_cross_entropy_with_logits"); + if (normalize) { + int* non_zero = RAII_GUARD.alloc_l3_or_gm(1); + PADDLE_ENFORCE_NOT_NULL( + non_zero, errors::External("XPU alloc_l3_or_gm returns nullptr")); + int r = xpu::nonzero_count(dev_ctx.x_context(), + reinterpret_cast(hit), + non_zero, + x.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "nonzero_count"); + int non_zero_cpu = 0; + paddle::memory::Copy(CPUPlace(), + static_cast(&non_zero_cpu), + dev_ctx.GetPlace(), + static_cast(non_zero), + sizeof(int)); + + r = xpu::scale(dev_ctx.x_context(), + reinterpret_cast(out->data()), + reinterpret_cast(out->data()), + x.numel(), + false, + 1.0f / static_cast(non_zero_cpu), + 0.0f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); + } +} +} // namespace phi + +PD_REGISTER_KERNEL(sigmoid_cross_entropy_with_logits, + XPU, + ALL_LAYOUT, + phi::SigmoidCrossEntropyWithLogitsKernel, + float) {} diff --git a/paddle/phi/kernels/xpu/tile_kernel.cc b/paddle/phi/kernels/xpu/tile_kernel.cc new file mode 100644 index 00000000000..022e355f4c9 --- /dev/null +++ b/paddle/phi/kernels/xpu/tile_kernel.cc @@ -0,0 +1,130 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/phi/kernels/tile_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" + +namespace phi { + +template +void TileKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& repeat_times_arr, + DenseTensor* out) { + auto rank = x.dims().size(); + PADDLE_ENFORCE_GE( + rank, + 1, + errors::InvalidArgument( + "The rank of the input 'x' for tile op must be a positive " + "integer, but the value received is %d.", + rank)); + PADDLE_ENFORCE_LE( + rank, + MAX_RANK_SUPPORTED, + errors::InvalidArgument( + "The rank of the input 'x' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, + rank)); + std::vector repeat_times = repeat_times_arr.GetData(); + int repeat_times_size = repeat_times.size(); + PADDLE_ENFORCE_GE( + repeat_times_size, + 1, + errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile " + "op must be positive, but the value received is %d.", + repeat_times_size)); + PADDLE_ENFORCE_LE( + repeat_times_size, + MAX_RANK_SUPPORTED, + errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, + repeat_times_size)); + + auto in_dims = x.dims(); + for (size_t i = 0; i < repeat_times.size(); ++i) { + PADDLE_ENFORCE_GT( + repeat_times[i], + 0, + errors::InvalidArgument( + "All elements of the input 'repeat_times' for tile op must " + "be positive integers, but the value received is %d.", + repeat_times[i])); + } + auto vec_in_dims = phi::vectorize(in_dims); + if (repeat_times.size() < vec_in_dims.size()) { + int diff = vec_in_dims.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, 1); + } else { + int diff = repeat_times.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + } + PADDLE_ENFORCE_EQ( + repeat_times.size(), + vec_in_dims.size(), + errors::InvalidArgument( + "The rank (%d) of the input 'x' and the rank (%d) of the input " + "'repeat_times' for tile op must match after promotion.", + vec_in_dims.size(), + repeat_times.size())); + + DDim new_in_dims = phi::make_ddim(vec_in_dims); + DDim out_dims(new_in_dims); + + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_dims[i] *= repeat_times[i]; + } + auto vec_out_dims = phi::vectorize(out_dims); + out->Resize(out_dims); + dev_ctx.template Alloc(out); + + std::vector temp(repeat_times.size(), 1); + if (repeat_times == temp) { + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); + return; + } + + int ret = XPU_SUCCESS; + if (std::is_same::value) { + ret = xpu::broadcast(dev_ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(out->data()), + vec_in_dims, + vec_out_dims); + + } else { + ret = xpu::broadcast(dev_ctx.x_context(), + x.data(), + out->data(), + vec_in_dims, + vec_out_dims); + } + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "broadcast"); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + tile, XPU, ALL_LAYOUT, phi::TileKernel, bool, float, int, int64_t) {} -- GitLab