// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/phi/kernels/elementwise_add_grad_kernel.h" #include #include #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/backends/xpu/xpu_header.h" #include "paddle/phi/backends/xpu/xpu_info.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" namespace phi { template void AddGradKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, const DenseTensor& dout, int axis, DenseTensor* dx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; funcs::ElementwiseGradPreProcess(dout, dx); auto* dz = &dout; const DDim& dz_dims = dz->dims(); const T* dz_data = dz->data(); if (dx != nullptr) { T* dx_data = dev_ctx.template Alloc(dx); if (dx->dims() == dz_dims) { if (dx_data != dz_data) { int ret = xpu::copy(dev_ctx.x_context(), reinterpret_cast(dz_data), reinterpret_cast(dx->data()), dx->numel()); PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); } } else { // For inplace strategy, dx will be stored in addr of dz, which makes // the result of dy wrong. if (dx->IsSharedBufferWith(*dz)) { dx->clear(); dx->Resize(x.dims()); dev_ctx.template Alloc(dx); } std::vector reduce_dims = funcs::GetReduceDim(dx->dims(), dz_dims, axis); std::vector dz_vector = phi::vectorize(dz_dims); int ret = xpu::reduce_sum(dev_ctx.x_context(), reinterpret_cast(dz_data), reinterpret_cast(dx->data()), dz_vector, reduce_dims); PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reduce_sum"); } } if (dy != nullptr) { T* dy_data = dev_ctx.template Alloc(dy); if (dy->dims() == dz_dims) { if (dy_data != dz_data) { int ret = xpu::copy(dev_ctx.x_context(), reinterpret_cast(dz_data), reinterpret_cast(dy->data()), dy->numel()); PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); } } else { std::vector reduce_dims = funcs::GetReduceDim(dy->dims(), dz_dims, axis); std::vector dz_vector = phi::vectorize(dz_dims); int ret = xpu::reduce_sum(dev_ctx.x_context(), reinterpret_cast(dz_data), reinterpret_cast(dy_data), dz_vector, reduce_dims); PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reduce_sum"); } } } } // namespace phi PD_REGISTER_KERNEL( add_grad, XPU, ALL_LAYOUT, phi::AddGradKernel, phi::dtype::float16, float) { }