// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/phi/kernels/grid_sample_grad_kernel.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/grid_sample_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { template static inline void ClipWithMask(const CPUContext& ctx, const int max_val, // height-1 or width-1 bool align_corners, std::string padding_mode, DenseTensor* grid_slice, DenseTensor* grid_scale) { auto& place = *ctx.eigen_device(); grid_scale->Resize(grid_slice->dims()); ctx.Alloc(grid_scale); auto grid_slice_t = EigenTensor::From(*grid_slice); auto factor = static_cast(max_val * 0.5); if (!align_corners) { factor = static_cast((max_val + 1) * 0.5); } auto grid_scale_t = EigenTensor::From(*grid_scale).setConstant(factor); if (padding_mode == "border") { // auto bounded_lo = grid_slice_t.cwiseMax(static_cast(0)); auto res = grid_slice_t.cwiseMax(static_cast(0)) .cwiseMin(static_cast(max_val)); auto in_bound = (res == grid_slice_t); grid_scale_t.device(place) = grid_scale_t * in_bound.template cast(); grid_slice_t.device(place) = res; } else if (padding_mode == "reflection") { if (align_corners) { auto double_range = static_cast(max_val * 2); auto is_neg = (grid_slice_t < static_cast(0)); auto grid_abs = grid_slice_t.abs(); auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; auto one_more_flip = (extra > (double_range - extra)); grid_scale_t.device(place) = grid_scale_t * ((is_neg == one_more_flip).template cast() - (is_neg != one_more_flip).template cast()); grid_slice_t.device(place) = extra.cwiseMin(double_range - extra); if (max_val == 0) { grid_slice_t.device(place) = grid_slice_t.constant(static_cast(0)); } } else { auto double_range = static_cast((max_val + 1) * 2); auto grid_abs = (grid_slice_t + static_cast(0.5)).abs(); auto is_neg = ((grid_slice_t + static_cast(0.5)) < static_cast(0)); auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; auto one_more_flip = (extra > (double_range - extra)); auto reflected = extra.cwiseMin(double_range - extra) - static_cast(0.5); auto clipped = reflected.cwiseMax(static_cast(0)) .cwiseMin(static_cast(max_val)); auto in_bound = (clipped == reflected).template cast(); grid_scale_t.device(place) = grid_scale_t * ((is_neg == one_more_flip).template cast() - (is_neg != one_more_flip).template cast()) * in_bound; grid_slice_t.device(place) = clipped; } } } template static void CalcGridLocationsWithGrad(const CPUContext& ctx, const DenseTensor& grid, const int in_h, const int in_w, bool align_corners, std::string padding_mode, DenseTensor* grid_x, DenseTensor* grid_y, DenseTensor* grid_x_scale, DenseTensor* grid_y_scale) { const int n = grid.dims()[0]; const int out_h = grid.dims()[1]; const int out_w = grid.dims()[2]; // split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim grid_x->Resize({n, out_h, out_w}); grid_y->Resize({n, out_h, out_w}); T* grid_x_data = ctx.Alloc(grid_x); T* grid_y_data = ctx.Alloc(grid_y); const T* grid_data = grid.data(); for (int i = 0; i < n * out_h * out_w; i++) { grid_x_data[i] = grid_data[2 * i]; grid_y_data[i] = grid_data[(2 * i) + 1]; } Unnormalize(ctx, grid_x, in_w - 1, align_corners); Unnormalize(ctx, grid_y, in_h - 1, align_corners); ClipWithMask( ctx, in_w - 1, align_corners, padding_mode, grid_x, grid_x_scale); ClipWithMask( ctx, in_h - 1, align_corners, padding_mode, grid_y, grid_y_scale); } template static void GatherOutputGradToInputGrad(const DenseTensor& output_grad, DenseTensor* input_grad, const DenseTensor& x, const DenseTensor& y, const DenseTensor& d1, const DenseTensor& d2) { const int n = output_grad.dims()[0]; const int c = output_grad.dims()[1]; const int out_h = output_grad.dims()[2]; const int out_w = output_grad.dims()[3]; const int in_h = input_grad->dims()[2]; const int in_w = input_grad->dims()[3]; auto x_t = EigenTensor::From(x); auto y_t = EigenTensor::From(y); auto d1_t = EigenTensor::From(d1); auto d2_t = EigenTensor::From(d2); auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); for (int i = 0; i < n; i++) { for (int k = 0; k < out_h; k++) { for (int l = 0; l < out_w; l++) { if (IsInBound( x_t(i, k, l), y_t(i, k, l), (T)(in_w - 1), (T)(in_h - 1))) { for (int j = 0; j < c; j++) { input_grad_t(i, j, static_cast(round(y_t(i, k, l))), static_cast(round(x_t(i, k, l)))) += output_grad_t(i, j, k, l) * d1_t(i, k, l) * d2_t(i, k, l); } } } } } } template static void GatherBilinearGrad(const CPUContext& ctx, const DenseTensor& input, const DenseTensor& output_grad, DenseTensor* grid_x, DenseTensor* grid_y, DenseTensor* grid_x_scale, DenseTensor* grid_y_scale, DenseTensor* input_grad, DenseTensor* grid_grad) { const int n = grid_x->dims()[0]; const int out_h = grid_x->dims()[1]; const int out_w = grid_x->dims()[2]; const int c = input.dims()[1]; DenseTensor x_w, x_e, y_n, y_s; DenseTensor d_w, d_e, d_n, d_s; DenseTensor v_wn, v_en, v_ws, v_es; AllNeigbors(ctx, input, grid_x, // grid_x grid_y, // grid_y &x_w, &x_e, &y_n, &y_s, &d_w, &d_e, &d_n, &d_s, &v_wn, &v_en, &v_ws, &v_es); // gather output grad value to input grad by corner point coords and weight GatherOutputGradToInputGrad(output_grad, input_grad, x_w, y_n, d_e, d_s); GatherOutputGradToInputGrad(output_grad, input_grad, x_w, y_s, d_e, d_n); GatherOutputGradToInputGrad(output_grad, input_grad, x_e, y_n, d_w, d_s); GatherOutputGradToInputGrad(output_grad, input_grad, x_e, y_s, d_w, d_n); auto v_wn_t = EigenTensor::From(v_wn); auto v_en_t = EigenTensor::From(v_en); auto v_ws_t = EigenTensor::From(v_ws); auto v_es_t = EigenTensor::From(v_es); auto d_w_t = EigenTensor::From(d_w); auto d_e_t = EigenTensor::From(d_e); auto d_n_t = EigenTensor::From(d_n); auto d_s_t = EigenTensor::From(d_s); auto output_grad_t = EigenTensor::From(output_grad); if (grid_grad != nullptr) { DenseTensor grid_grad_x, grid_grad_y; grid_grad_x.Resize({n, out_h, out_w}); grid_grad_y.Resize({n, out_h, out_w}); ctx.Alloc(&grid_grad_x); ctx.Alloc(&grid_grad_y); auto grid_grad_x_t = EigenTensor::From(grid_grad_x).setConstant(static_cast(0.0)); auto grid_grad_y_t = EigenTensor::From(grid_grad_y).setConstant(static_cast(0.0)); for (int i = 0; i < n; i++) { for (int j = 0; j < c; j++) { for (int k = 0; k < out_h; k++) { for (int l = 0; l < out_w; l++) { grid_grad_x_t(i, k, l) += ((v_en_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_s_t(i, k, l) + (v_es_t(i, j, k, l) - v_ws_t(i, j, k, l)) * d_n_t(i, k, l)) * output_grad_t(i, j, k, l); grid_grad_y_t(i, k, l) += ((v_ws_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_e_t(i, k, l) + (v_es_t(i, j, k, l) - v_en_t(i, j, k, l)) * d_w_t(i, k, l)) * output_grad_t(i, j, k, l); } } } } // const T x_max = static_cast(in_w - 1); // const T y_max = static_cast(in_h - 1); auto grid_x_scale_t = EigenTensor::From(*grid_x_scale); auto grid_y_scale_t = EigenTensor::From(*grid_y_scale); grid_grad_x_t = grid_grad_x_t * grid_x_scale_t; grid_grad_y_t = grid_grad_y_t * grid_y_scale_t; // gather grid_grad [x, y] in 3rd Dim T* grid_grad_data = grid_grad->data(); T* grid_grad_x_data = grid_grad_x.data(); T* grid_grad_y_data = grid_grad_y.data(); for (int i = 0; i < n * out_h * out_w; i++) { grid_grad_data[2 * i] = grid_grad_x_data[i]; grid_grad_data[2 * i + 1] = grid_grad_y_data[i]; } } } template static void GatherOutputGradToInputGrad(const DenseTensor& output_grad, DenseTensor* input_grad, const DenseTensor& x, const DenseTensor& y) { const int n = output_grad.dims()[0]; const int c = output_grad.dims()[1]; const int out_h = output_grad.dims()[2]; const int out_w = output_grad.dims()[3]; const int in_h = input_grad->dims()[2]; const int in_w = input_grad->dims()[3]; auto x_t = EigenTensor::From(x); auto y_t = EigenTensor::From(y); auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); for (int i = 0; i < n; i++) { for (int k = 0; k < out_h; k++) { for (int l = 0; l < out_w; l++) { if (IsInBound( x_t(i, k, l), y_t(i, k, l), (T)(in_w - 1), (T)(in_h - 1))) { for (int j = 0; j < c; j++) { input_grad_t(i, j, static_cast(round(y_t(i, k, l))), static_cast(round(x_t(i, k, l)))) += output_grad_t(i, j, k, l); } } } } } } template void GridSampleGradKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& grid, const DenseTensor& out_grid, const std::string& mode, const std::string& padding_mode, bool align_corners, DenseTensor* x_grad, DenseTensor* grid_grad) { const int n = grid.dims()[0]; const int out_h = grid.dims()[1]; const int out_w = grid.dims()[2]; const int c = x.dims()[1]; const int in_h = x.dims()[2]; const int in_w = x.dims()[3]; x_grad->Resize({n, c, in_h, in_w}); dev_ctx.template Alloc(x_grad); phi::funcs::SetConstant()(dev_ctx, x_grad, static_cast(0)); if (grid_grad != nullptr) { grid_grad->Resize({n, out_h, out_w, 2}); dev_ctx.template Alloc(grid_grad); phi::funcs::SetConstant()( dev_ctx, grid_grad, static_cast(0)); } DenseTensor grid_x, grid_y; DenseTensor grid_x_scale, grid_y_scale; CalcGridLocationsWithGrad(dev_ctx, grid, in_h, in_w, align_corners, padding_mode, &grid_x, &grid_y, &grid_x_scale, &grid_y_scale); if (mode == "bilinear") { GatherBilinearGrad(dev_ctx, x, out_grid, &grid_x, &grid_y, &grid_x_scale, &grid_y_scale, x_grad, grid_grad); } else { auto grid_x_t = EigenTensor::From(grid_x); auto grid_y_t = EigenTensor::From(grid_y); grid_x_t = grid_x_t.round(); grid_y_t = grid_y_t.round(); GatherOutputGradToInputGrad(out_grid, x_grad, grid_x, grid_y); } } } // namespace phi PD_REGISTER_KERNEL(grid_sample_grad, CPU, ALL_LAYOUT, phi::GridSampleGradKernel, float, double) {}