// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_function.h" namespace paddle { namespace operators { #define HANDLE_DIM(NDIM, RDIM) \ if (ndim == NDIM && rdim == RDIM) { \ ReduceFunctor( \ context.template device_context(), *input, output, \ axis, keepdim); \ } struct LogsumexpFunctor { template void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) { auto x_dim = x->dimensions(); auto t_dim = x_dim; for (int i = 0; i < static_cast(dim.size()); i++) { t_dim[dim[i]] = 1; } auto r_dim = x_dim; for (int i = 0; i < static_cast(r_dim.size()); i++) { r_dim[i] = 1; } for (int i = 0; i < static_cast(dim.size()); i++) { r_dim[dim[i]] = x_dim[dim[i]]; } auto y_dim = y->dimensions(); auto x_max = x->maximum(dim); y->device(place) = (x_max + (*x - x_max.reshape(t_dim).broadcast(r_dim)).exp().sum(dim).log()) .reshape(y_dim); } }; struct LogsumexpGradFunctor { template void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy, const Dim& dim, int size) { dx->device(place) = dy->broadcast(dim) * (*x - y->broadcast(dim)).exp(); } }; template class LogsumexpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* input = context.Input("X"); auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); auto axis = context.Attr>("axis"); auto keepdim = context.Attr("keepdim"); auto reduce_all = context.Attr("reduce_all"); const auto& input_dim_size = input->dims().size(); // The dims has full dim, set the reduce_all is True reduce_all |= (static_cast(axis.size()) == input_dim_size); if (reduce_all) { // Flatten and reduce 1-D tensor auto x = EigenVector::Flatten(*input); auto out = EigenScalar::From(*output); auto& place = *context.template device_context().eigen_device(); auto reduce_dim = Eigen::array({{0}}); LogsumexpFunctor()(place, &x, &out, reduce_dim); } else { int ndim = input_dim_size; int rdim = axis.size(); // comments for accelerating compiling temporarily. // HANDLE_DIM(6, 5); // HANDLE_DIM(6, 4); // HANDLE_DIM(6, 3); // HANDLE_DIM(6, 2); // HANDLE_DIM(6, 1); // HANDLE_DIM(5, 4); // HANDLE_DIM(5, 3); // HANDLE_DIM(5, 2); // HANDLE_DIM(5, 1); HANDLE_DIM(4, 3); HANDLE_DIM(4, 2); HANDLE_DIM(4, 1); HANDLE_DIM(3, 2); HANDLE_DIM(3, 1); HANDLE_DIM(2, 1); } } }; template class LogsumexpGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* input = context.Input("X"); auto* output = context.Input("Out"); auto* output_grad = context.Input(framework::GradVarName("Out")); auto* input_grad = context.Output(framework::GradVarName("X")); input_grad->mutable_data(context.GetPlace()); auto axis = context.Attr>("axis"); auto reduce_all = context.Attr("reduce_all"); const auto input_dim_size = context.Input("X")->dims().size(); reduce_all |= (static_cast(axis.size()) == input_dim_size); if (reduce_all) { auto x = EigenVector::Flatten(*input); auto y = EigenVector::Flatten(*output); auto dy = EigenVector::Flatten(*output_grad); auto dx = EigenVector::Flatten(*input_grad); auto& place = *context.template device_context().eigen_device(); auto broadcast_dim = Eigen::array({{static_cast(input->numel())}}); LogsumexpGradFunctor()(place, &x, &y, &dx, &dy, broadcast_dim, broadcast_dim[0]); } else { int rank = input->dims().size(); switch (rank) { case 1: ReduceGradFunctor( context.template device_context(), *input, *output, *output_grad, input_grad, axis); break; case 2: ReduceGradFunctor( context.template device_context(), *input, *output, *output_grad, input_grad, axis); break; case 3: ReduceGradFunctor( context.template device_context(), *input, *output, *output_grad, input_grad, axis); break; case 4: ReduceGradFunctor( context.template device_context(), *input, *output, *output_grad, input_grad, axis); break; } } } }; } // namespace operators } // namespace paddle