diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc deleted file mode 100644 index a445868153452ec15a7df2fe419f7b2301132dca..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU - -#include - -#include "paddle/fluid/framework/lod_tensor_array.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/device/xpu/xpu_header.h" - -namespace paddle { -namespace operators { -using framework::Tensor; -using SelectedRows = phi::SelectedRows; -using LoDTensor = framework::LoDTensor; -template -class SumXPUKernel : public framework::OpKernel { - using XPUType = typename XPUTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &context) const override { - auto in_vars = context.MultiInputVar("X"); - auto out_var = context.OutputVar("Out"); - - if (out_var->IsType()) { - auto *out = context.Output("Out"); - bool in_place = out_var == in_vars[0]; - int N = in_vars.size(); - - if (!in_place) { - out->mutable_data(context.GetPlace()); - } - auto &dev_ctx = context.template device_context(); - std::vector ptrs; - for (int i = 0; i < N; ++i) { - PADDLE_ENFORCE_EQ( - in_vars[i]->IsType(), - true, - platform::errors::InvalidArgument("XPU only support LodTensor")); - auto &in_t = in_vars[i]->Get(); - if (in_t.numel() == 0) { - continue; - } - ptrs.push_back(reinterpret_cast(in_t.data())); - } - int r = xpu::sum(dev_ctx.x_context(), - ptrs, - reinterpret_cast(out->data()), - out->numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "sum"); - } else if (out_var->IsType()) { - bool in_place = out_var == in_vars[0]; - auto &out_array = *out_var->GetMutable(); - - for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) { - PADDLE_ENFORCE_EQ(in_vars[i]->IsType(), - true, - platform::errors::InvalidArgument( - "Only support all inputs are TensorArray, " - "but inputs[%d] is not TensorArray.", - i)); - auto &in_array = in_vars[i]->Get(); - - for (size_t i = 0; i < in_array.size(); ++i) { - if (in_array[i].IsInitialized() && (in_array[i].numel() != 0)) { - if (i >= out_array.size()) { - out_array.resize(i + 1); - } - if (!out_array[i].IsInitialized() || (out_array[i].numel() == 0)) { - framework::TensorCopy(in_array[i], - in_array[i].place(), - context.device_context(), - &out_array[i]); - out_array[i].set_lod(in_array[i].lod()); - } else { - PADDLE_ENFORCE_EQ( - out_array[i].lod(), - in_array[i].lod(), - platform::errors::InvalidArgument( - "The lod message between inputs[%d] and" - " outputs[%d] must be same, but now is not same.", - i, - i)); - - std::vector ptrs; - ptrs.push_back( - reinterpret_cast(in_array[i].data())); - ptrs.push_back( - reinterpret_cast(out_array[i].data())); - - auto &dev_ctx = context.template device_context(); - // int sum(Context* ctx, const std::vector& x_list, T* - // y, int len); - int r = - xpu::sum(dev_ctx.x_context(), - ptrs, - reinterpret_cast(out_array[i].data()), - out_array[i].numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "sum"); - } - } - } - } - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) must be Tensor or " - "LoDTensorArray. But got " - "unsupport type: %s.", - framework::ToTypeName(out_var->Type()))); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_XPU_KERNEL( - sum, - ops::SumXPUKernel, - ops::SumXPUKernel); -#endif diff --git a/paddle/phi/kernels/xpu/add_n_kernel.cc b/paddle/phi/kernels/xpu/add_n_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..3c5cae9d35660e17c8fe5e639a296f890023959b --- /dev/null +++ b/paddle/phi/kernels/xpu/add_n_kernel.cc @@ -0,0 +1,130 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/add_n_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void AddNKernel(const Context& dev_ctx, + const std::vector& x, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + size_t in_num = x.size(); + bool in_place = false; + if (x.size() > 0 && x[0]->initialized() && DenseTensor::classof(x[0])) { + if ((static_cast(x[0]))->Holder() == out->Holder()) { + in_place = true; + } + } + + if (!in_place) { + dev_ctx.template Alloc(out); + } + std::vector ptrs; + for (size_t i = 0; i < in_num; ++i) { + PADDLE_ENFORCE_EQ(DenseTensor::classof(x[i]), + true, + errors::InvalidArgument("XPU only support DensorTensor")); + + auto& in_t = *(static_cast(x[i])); + if (in_t.numel() == 0) { + continue; + } + ptrs.push_back(reinterpret_cast(in_t.data())); + } + int r = xpu::sum(dev_ctx.x_context(), + ptrs, + reinterpret_cast(out->data()), + out->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "sum"); +} + +template +void AddNArrayKernel(const Context& dev_ctx, + const std::vector& x, + TensorArray* out) { + using XPUType = typename XPUTypeTrait::Type; + for (auto& ele : *out) { + dev_ctx.template Alloc(&ele); + } + bool in_place = true; + if (x.size() > 0 && x[0]->size() == out->size()) { + for (size_t i = 0; i < out->size(); i++) { + if (x[0]->at(i).IsInitialized() && + out->at(i).data() != x[0]->at(i).data()) { + in_place = false; + break; + } + } + } else { + in_place = false; + } + + for (size_t i = in_place ? 1 : 0; i < x.size(); ++i) { + auto* in_array = x.at(i); + + for (size_t j = 0; j < in_array->size(); ++j) { + if (in_array->at(j).IsInitialized() && (in_array->at(j).numel() != 0)) { + if (j >= out->size()) { + out->resize(j + 1); + } + if (!out->at(j).IsInitialized() || (out->at(j).numel() == 0)) { + Copy(dev_ctx, + in_array->at(j), + in_array->at(j).place(), + false, + &out->at(j)); + out->at(j).set_lod(in_array->at(j).lod()); + } else { + PADDLE_ENFORCE_EQ( + out->at(j).lod(), + in_array->at(j).lod(), + phi::errors::InvalidArgument( + "The lod message between inputs[%d] and" + " outputs[%d] must be same, but now is not same.", + j, + j)); + + std::vector ptrs; + ptrs.push_back( + reinterpret_cast(in_array->at(j).data())); + ptrs.push_back( + reinterpret_cast(out->at(j).data())); + + // int sum(Context* ctx, const std::vector& x_list, T* + // y, int len); + int r = xpu::sum(dev_ctx.x_context(), + ptrs, + reinterpret_cast(out->at(j).data()), + out->at(j).numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "sum"); + } + } + } + } +} +} // namespace phi + +PD_REGISTER_KERNEL( + add_n, XPU, ALL_LAYOUT, phi::AddNKernel, float, phi::dtype::float16) {} +PD_REGISTER_KERNEL(add_n_array, + XPU, + ALL_LAYOUT, + phi::AddNArrayKernel, + float, + phi::dtype::float16) {}