diff --git a/paddle/fluid/operators/mode_op.cc b/paddle/fluid/operators/mode_op.cc index c7fb92cd5107cee12e0995948e320ef3ed616f4d..9c16ccb138f7da56568ce6224dc30deb5bbccb7f 100644 --- a/paddle/fluid/operators/mode_op.cc +++ b/paddle/fluid/operators/mode_op.cc @@ -12,10 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/mode_op.h" #include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/phi/core/infermeta_utils.h" +#include "paddle/phi/infermeta/unary.h" + namespace paddle { namespace operators { @@ -23,43 +27,6 @@ class ModeOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "mode"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "mode"); - OP_INOUT_CHECK(ctx->HasOutput("Indices"), "Output", "Indices", "mode"); - - auto input_dims = ctx->GetInputDim("X"); - const int& dim_size = input_dims.size(); - int axis = static_cast(ctx->Attrs().Get("axis")); - PADDLE_ENFORCE_EQ( - (axis < dim_size) && (axis >= (-1 * dim_size)), true, - paddle::platform::errors::InvalidArgument( - "the axis of ModeOp must be [-%d, %d), but you set axis is %d", - dim_size, dim_size, axis)); - PADDLE_ENFORCE_GE(input_dims.size(), 1, - paddle::platform::errors::InvalidArgument( - "input of ModeOp must have >= 1d shape")); - if (axis < 0) axis += dim_size; - bool keepdim = ctx->Attrs().Get("keepdim"); - std::vector dimvec; - for (int64_t i = 0; i < axis; i++) { - dimvec.emplace_back(input_dims[i]); - } - if (keepdim) { - dimvec.emplace_back(static_cast(1)); - } - for (int64_t i = axis + 1; i < dim_size; i++) { - dimvec.emplace_back(input_dims[i]); - } - framework::DDim dims = phi::make_ddim(dimvec); - PADDLE_ENFORCE_GE(input_dims.size(), 1, platform::errors::InvalidArgument( - "input shape should >= 1d")); - ctx->SetOutputDim("Out", dims); - ctx->SetOutputDim("Indices", dims); - ctx->ShareLoD("X", "Out"); - ctx->ShareLoD("X", "Indices"); - } - protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { @@ -138,18 +105,11 @@ class ModeGradOpMaker : public framework::SingleGradOpMaker { } // namespace paddle namespace ops = paddle::operators; + +DECLARE_INFER_SHAPE_FUNCTOR(mode, ModeInferShapeFunctor, + PD_INFER_META(phi::ModeInferMeta)); REGISTER_OPERATOR(mode, ops::ModeOp, ops::ModeOpMaker, ops::ModeGradOpMaker, - ops::ModeGradOpMaker); -REGISTER_OP_CPU_KERNEL(mode, - ops::ModeCPUKernel, - ops::ModeCPUKernel, - ops::ModeCPUKernel, - ops::ModeCPUKernel); - + ops::ModeGradOpMaker, + ModeInferShapeFunctor); REGISTER_OPERATOR(mode_grad, ops::ModeOpGrad); -REGISTER_OP_CPU_KERNEL( - mode_grad, ops::ModeGradCPUKernel, - ops::ModeGradCPUKernel, - ops::ModeGradCPUKernel, - ops::ModeGradCPUKernel); diff --git a/paddle/fluid/operators/mode_op.cu b/paddle/fluid/operators/mode_op.cu deleted file mode 100644 index 2bacda8afb0eb340c4c8d4068f3013e2adbc7f91..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/mode_op.cu +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mode_op.h" -#include "paddle/fluid/operators/top_k_function_cuda.h" - -namespace paddle { -namespace operators { - -int ComputeBlockSize(int col) { - if (col > 512) - return 1024; - else if (col > 256 && col <= 512) - return 512; - else if (col > 128 && col <= 256) - return 256; - else if (col > 64 && col <= 128) - return 128; - else - return 64; -} - -template -void getModebySort(const platform::CUDADeviceContext& ctx, - const framework::Tensor* input_tensor, - const int64_t num_cols, const int64_t num_rows, - T* out_tensor, int64_t* indices_tensor) { - framework::Tensor input_tmp; - framework::TensorCopy(*input_tensor, ctx.GetPlace(), &input_tmp); - T* input_tmp_data = input_tmp.mutable_data(ctx.GetPlace()); - input_tmp.Resize(phi::make_ddim({num_rows, num_cols})); - thrust::device_ptr out_tensor_ptr(out_tensor); - thrust::device_ptr indices_tensor_ptr(indices_tensor); - - for (int64_t i = 0; i < num_rows; ++i) { - T* begin = input_tmp_data + num_cols * i; - T* end = input_tmp_data + num_cols * (i + 1); - thrust::device_vector indices_data(num_cols); - thrust::sequence(thrust::device, indices_data.begin(), - indices_data.begin() + num_cols); - thrust::sort_by_key(thrust::device, begin, end, indices_data.begin()); - int unique = 1 + thrust::inner_product(thrust::device, begin, end - 1, - begin + 1, 0, thrust::plus(), - thrust::not_equal_to()); - thrust::device_vector keys_data(unique); - thrust::device_vector cnts_data(unique); - thrust::reduce_by_key(thrust::device, begin, end, - thrust::constant_iterator(1), keys_data.begin(), - cnts_data.begin()); - auto it = thrust::max_element(thrust::device, cnts_data.begin(), - cnts_data.begin() + unique); - T mode = keys_data[it - cnts_data.begin()]; - int64_t counts = cnts_data[it - cnts_data.begin()]; - auto pos = thrust::find(thrust::device, begin, end, mode); - int64_t index = indices_data[pos - begin + counts - 1]; - out_tensor_ptr[i] = static_cast(mode); - indices_tensor_ptr[i] = static_cast(index); - } -} - -template -class ModeOpCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( - "It must use CUDAPlace, you must check your device set.")); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); - int axis = static_cast(ctx.Attr("axis")); - bool keepdim = static_cast(ctx.Attr("keepdim")); - - // get the input dims - const auto& in_dims = input->dims(); - // calcluate the real axis - if (axis < 0) axis += in_dims.size(); - - auto out_dims = output->dims(); - - const T* input_data = input->data(); - T* output_data = output->mutable_data(ctx.GetPlace()); - int64_t* indices_data = indices->mutable_data(ctx.GetPlace()); - - if (axis == in_dims.size() - 1) { - const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); - const int64_t& input_width = in_dims[in_dims.size() - 1]; - const auto& dev_ctx = ctx.cuda_device_context(); - getModebySort(dev_ctx, input, input_width, input_height, output_data, - indices_data); - } else { - std::vector trans_axis; - for (int i = 0; i < axis; i++) { - trans_axis.emplace_back(i); - } - trans_axis.emplace_back(in_dims.size() - 1); - for (int i = axis + 1; i < in_dims.size() - 1; i++) { - trans_axis.emplace_back(i); - } - trans_axis.emplace_back(axis); - - if (!keepdim) { - std::vector tmp_out_shape; - for (int i = 0; i < axis; i++) { - tmp_out_shape.emplace_back(in_dims[i]); - } - tmp_out_shape.emplace_back(1); - for (int i = axis + 1; i < in_dims.size(); i++) { - tmp_out_shape.emplace_back(in_dims[i]); - } - framework::DDim tmp_out_dim = phi::make_ddim(tmp_out_shape); - output->Resize(tmp_out_dim); - indices->Resize(tmp_out_dim); - } - - framework::DDim trans_shape(in_dims); - framework::DDim trans_out_shape(in_dims); - for (int i = 0; i < trans_axis.size(); i++) { - trans_shape[i] = in_dims[trans_axis[i]]; - trans_out_shape[i] = in_dims[trans_axis[i]]; - } - trans_out_shape[in_dims.size() - 1] = 1; - - // second step, tranpose the input - framework::Tensor trans_input; - trans_input.mutable_data(trans_shape, ctx.GetPlace()); - int ndims = trans_axis.size(); - const auto& dev_ctx = ctx.cuda_device_context(); - TransCompute(ndims, dev_ctx, *input, - &trans_input, trans_axis); - framework::Tensor trans_ind; - int64_t* trans_ind_data = - trans_ind.mutable_data(trans_out_shape, ctx.GetPlace()); - framework::Tensor trans_out; - T* trans_out_data = - trans_out.mutable_data(trans_out_shape, ctx.GetPlace()); - - const int64_t input_height = - phi::product(phi::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); - const int64_t input_width = trans_shape[trans_shape.size() - 1]; - getModebySort(dev_ctx, &trans_input, input_width, input_height, - trans_out_data, trans_ind_data); - // last step, tranpose back the indices and output - TransCompute( - ndims, dev_ctx, trans_ind, indices, trans_axis); - TransCompute(ndims, dev_ctx, trans_out, - output, trans_axis); - if (!keepdim) { - output->Resize(out_dims); - indices->Resize(out_dims); - } - } - } -}; - -template -class ModeOpGradCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(context.GetPlace()), true, - platform::errors::InvalidArgument( - "It must use CUDAPlace, you must check your device set.")); - auto* x = context.Input("X"); - auto* out_grad = - context.Input(framework::GradVarName("Out")); - auto* indices = context.Input("Indices"); - auto* x_grad = - context.Output(framework::GradVarName("X")); - int axis = context.Attr("axis"); - - const auto& in_dims = x->dims(); - auto out_dims = indices->dims(); - - if (axis < 0) axis += in_dims.size(); - // allocate the cuda memory for the x_grad - T* x_grad_data = x_grad->mutable_data(context.GetPlace()); - const T* out_grad_data = out_grad->data(); - const int64_t* indices_data = indices->data(); - - int pre, n, post; - GetDims(in_dims, axis, &pre, &n, &post); - - // calcluate the block and grid num - auto& dev_ctx = context.cuda_device_context(); - int block_size = ComputeBlockSize(post); - int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); - const int max_blocks = std::max(((max_threads - 1) / block_size + 1), 1); - int grid_size = std::min(max_blocks, pre); - AssignGradWithAxis<<>>( - out_grad_data, indices_data, x_grad_data, pre, post, n, 1); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - mode, ops::ModeOpCUDAKernel, - ops::ModeOpCUDAKernel, - ops::ModeOpCUDAKernel, - ops::ModeOpCUDAKernel); -REGISTER_OP_CUDA_KERNEL( - mode_grad, - ops::ModeOpGradCUDAKernel, - ops::ModeOpGradCUDAKernel, - ops::ModeOpGradCUDAKernel, - ops::ModeOpGradCUDAKernel); diff --git a/paddle/fluid/operators/mode_op.h b/paddle/fluid/operators/mode_op.h deleted file mode 100644 index 76d356ed16eb3f81b10d541230f49b73fd836543..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/mode_op.h +++ /dev/null @@ -1,317 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/transpose_op.h" - -namespace paddle { -namespace operators { - -template -static void getMode(Type input_height, Type input_width, int input_dim, - const framework::Tensor* input, T* t_out, Type* t_indices) { -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for -#endif - for (Type i = 0; i < input_height; ++i) { - std::vector> col_vec; - col_vec.reserve(input_width); - if (input_dim == 1) { - auto e_input = framework::EigenVector::Flatten(*input); - for (Type j = 0; j < input_width; ++j) { - col_vec.emplace_back(std::pair(e_input(j), j)); - } - } else { - auto e_input = framework::EigenMatrix::Reshape(*input, input_dim - 1); - for (Type j = 0; j < input_width; ++j) { - col_vec.emplace_back(std::pair(e_input(i, j), j)); - } - } - std::sort(col_vec.begin(), col_vec.end(), - [](const std::pair& l, const std::pair& r) { - return (!std::isnan(static_cast(l.first)) && - std::isnan(static_cast(r.first))) || - (l.first < r.first); - }); - T mode = 0; - int64_t indice = 0; - int64_t cur_freq = 0; - int64_t max_freq = 0; - for (int64_t i = 0; i < input_width; ++i) { - ++cur_freq; - if (i == input_width - 1 || (col_vec[i + 1].first != col_vec[i].first)) { - if (cur_freq > max_freq) { - max_freq = cur_freq; - mode = col_vec[i].first; - indice = col_vec[i].second; - } - cur_freq = 0; - } - } - t_out[i] = mode; - t_indices[i] = indice; - } -} - -template -static void ModeAssign(const Type& input_height, const Type& input_width, - const int& input_dim, const framework::Tensor* input, - const framework::Tensor* indices, T* output_data) { -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for -#endif - for (Type i = 0; i < input_height; ++i) { - if (input_dim == 1) { - auto e_input = framework::EigenVector::Flatten(*input); - auto e_indices = framework::EigenVector::Flatten(*indices); - output_data[i * input_width + e_indices(0)] = e_input(0); - } else { - auto e_input = framework::EigenMatrix::Reshape(*input, input_dim - 1); - auto e_indices = - framework::EigenMatrix::Reshape(*indices, input_dim - 1); - output_data[i * input_width + e_indices(i, 0)] = e_input(i, 0); - } - } -} - -template -class ModeCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* output = context.Output("Out"); - auto* indices = context.Output("Indices"); - const auto& in_dims = input->dims(); - bool keepdim = static_cast(context.Attr("keepdim")); - - // axis < 0, cacluate the real axis - int axis = static_cast(context.Attr("axis")); - if (axis < 0) axis += in_dims.size(); - - T* output_data = output->mutable_data(context.GetPlace()); - int64_t* indices_data = indices->mutable_data(context.GetPlace()); - auto out_dims = output->dims(); - // if axis is not the last dim, transpose it to the last dim, do the - // calculation, - // then tranpose it back to orginal axis. - if (axis == in_dims.size() - 1) { - const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); - const int64_t& input_width = in_dims[in_dims.size() - 1]; - getMode(input_height, input_width, in_dims.size(), input, - output_data, indices_data); - } else { - std::vector trans_axis; - for (int i = 0; i < axis; i++) { - trans_axis.emplace_back(i); - } - trans_axis.push_back(in_dims.size() - 1); - for (int i = axis + 1; i < in_dims.size() - 1; i++) { - trans_axis.emplace_back(i); - } - trans_axis.emplace_back(axis); - - if (!keepdim) { - std::vector tmp_out_shape; - for (int i = 0; i < axis; i++) { - tmp_out_shape.emplace_back(in_dims[i]); - } - tmp_out_shape.emplace_back(1); - for (int i = axis + 1; i < in_dims.size(); i++) { - tmp_out_shape.emplace_back(in_dims[i]); - } - framework::DDim tmp_out_dim = phi::make_ddim(tmp_out_shape); - output->Resize(tmp_out_dim); - indices->Resize(tmp_out_dim); - } - - // get the trans input_dims, out_dims - framework::DDim trans_shape(in_dims); - framework::DDim trans_out_shape(in_dims); - - for (size_t i = 0; i < trans_axis.size(); i++) { - trans_shape[i] = in_dims[trans_axis[i]]; - trans_out_shape[i] = in_dims[trans_axis[i]]; - } - trans_out_shape[in_dims.size() - 1] = 1; - - framework::Tensor trans_input; - trans_input.mutable_data(trans_shape, context.GetPlace()); - int ndims = trans_axis.size(); - auto& dev_context = - context.template device_context(); - - // transpose the input value - TransCompute(ndims, dev_context, *input, - &trans_input, trans_axis); - - const int64_t input_height = - phi::product(phi::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); - const int64_t input_width = trans_shape[trans_shape.size() - 1]; - framework::Tensor tmp_out; - T* t_out = tmp_out.mutable_data(trans_out_shape, context.GetPlace()); - framework::Tensor tmp_indices; - auto* t_ind = tmp_indices.mutable_data(trans_out_shape, - context.GetPlace()); - - getMode(input_height, input_width, in_dims.size(), - &trans_input, t_out, t_ind); - // transpose back - TransCompute( - ndims, dev_context, tmp_indices, indices, trans_axis); - TransCompute(ndims, dev_context, tmp_out, - output, trans_axis); - if (!keepdim) { - output->Resize(out_dims); - indices->Resize(out_dims); - } - } - } -}; - -template -class ModeGradCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out_grad = - context.Input(framework::GradVarName("Out")); - auto* indices = context.Input("Indices"); - auto* x_grad = - context.Output(framework::GradVarName("X")); - int axis = static_cast(context.Attr("axis")); - bool keepdim = static_cast(context.Attr("keepdim")); - - auto in_dims = x->dims(); - auto out_dims = indices->dims(); - - // axis < 0, get the real axis - axis = (axis < 0) ? (in_dims.size() + axis) : axis; - - if (!keepdim) { - std::vector tmp_out_shape; - for (int i = 0; i < axis; i++) { - tmp_out_shape.emplace_back(out_dims[i]); - } - tmp_out_shape.emplace_back(1); - for (int i = axis + 1; i < in_dims.size(); i++) { - tmp_out_shape.emplace_back(out_dims[i - 1]); - } - out_dims = phi::make_ddim(tmp_out_shape); - } - T* x_grad_data = x_grad->mutable_data(context.GetPlace()); - if (axis == in_dims.size() - 1) { - // allocate the memory for the input_grad - // assign the out_grad to input_grad directly - const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); - const int64_t input_width = in_dims[in_dims.size() - 1]; - - // init the output grad with 0, because some input elements has no grad - memset(x_grad_data, 0, x_grad->numel() * sizeof(T)); - // Assign the output_grad to input_grad - if (keepdim) { - ModeAssign(input_height, input_width, in_dims.size(), out_grad, indices, - x_grad_data); - } else { - auto& dev_context = - context.template device_context(); - framework::Tensor out_grad_tmp; - framework::Tensor indices_tmp; - out_grad_tmp.mutable_data(out_grad->dims(), dev_context.GetPlace()); - indices_tmp.mutable_data(indices->dims(), - dev_context.GetPlace()); - framework::TensorCopy(*out_grad, dev_context.GetPlace(), dev_context, - &out_grad_tmp); - framework::TensorCopy(*indices, dev_context.GetPlace(), dev_context, - &indices_tmp); - out_grad_tmp.Resize(out_dims); - indices_tmp.Resize(out_dims); - ModeAssign(input_height, input_width, in_dims.size(), &out_grad_tmp, - &indices_tmp, x_grad_data); - } - } else { - // can not assign grad to input_grad, must do the transpose - std::vector trans_axis; - for (int i = 0; i < axis; i++) { - trans_axis.emplace_back(i); - } - trans_axis.emplace_back(out_dims.size() - 1); - for (int i = axis + 1; i < out_dims.size() - 1; i++) { - trans_axis.emplace_back(i); - } - trans_axis.emplace_back(axis); - framework::DDim trans_shape(out_dims); - framework::DDim trans_in_shape(in_dims); - for (size_t i = 0; i < trans_axis.size(); i++) { - trans_shape[i] = out_dims[trans_axis[i]]; - trans_in_shape[i] = in_dims[trans_axis[i]]; - } - // transpose the out_grad, indices - framework::Tensor trans_dO; - trans_dO.mutable_data(trans_shape, context.GetPlace()); - framework::Tensor trans_ind; - trans_ind.mutable_data(trans_shape, context.GetPlace()); - int ndims = trans_axis.size(); - auto& dev_context = - context.template device_context(); - - if (keepdim) { - // Do transpose - TransCompute( - ndims, dev_context, *out_grad, &trans_dO, trans_axis); - TransCompute( - ndims, dev_context, *indices, &trans_ind, trans_axis); - } else { - framework::Tensor out_grad_tmp; - framework::Tensor indices_tmp; - out_grad_tmp.mutable_data(out_grad->dims(), dev_context.GetPlace()); - indices_tmp.mutable_data(indices->dims(), - dev_context.GetPlace()); - framework::TensorCopy(*out_grad, dev_context.GetPlace(), dev_context, - &out_grad_tmp); - framework::TensorCopy(*indices, dev_context.GetPlace(), dev_context, - &indices_tmp); - out_grad_tmp.Resize(out_dims); - indices_tmp.Resize(out_dims); - // Do transpose - TransCompute( - ndims, dev_context, out_grad_tmp, &trans_dO, trans_axis); - TransCompute( - ndims, dev_context, indices_tmp, &trans_ind, trans_axis); - } - const int64_t input_height = phi::product( - phi::slice_ddim(trans_in_shape, 0, trans_in_shape.size() - 1)); - const int64_t input_width = trans_in_shape[trans_in_shape.size() - 1]; - - // Assign the out_grad to tranpose input_grad - framework::Tensor tmp_out; - T* t_out = tmp_out.mutable_data(trans_in_shape, context.GetPlace()); - memset(t_out, 0, x_grad->numel() * sizeof(T)); - - ModeAssign(input_height, input_width, in_dims.size(), - &trans_dO, &trans_ind, t_out); - - // Transpose back - TransCompute(ndims, dev_context, tmp_out, - x_grad, trans_axis); - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 262ada3eaf3169bebc919940e7630a75b0733cd9..f81f4a1b7c7390baca82d55abecbc49d0f67c235 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -648,6 +648,49 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, mask->set_dtype(paddle::experimental::CppTypeToDataType::Type()); } +void ModeInferMeta(const MetaTensor& x, + int axis, + bool keepdim, + MetaTensor* out, + MetaTensor* indices) { + auto input_dims = x.dims(); + const int& dim_size = input_dims.size(); + PADDLE_ENFORCE_EQ( + (axis < dim_size) && (axis >= (-1 * dim_size)), + true, + errors::InvalidArgument( + "the axis of ModeOp must be [-%d, %d), but you set axis is %d", + dim_size, + dim_size, + axis)); + PADDLE_ENFORCE_GE( + input_dims.size(), + 1, + errors::InvalidArgument("input of ModeOp must have >= 1d shape")); + if (axis < 0) axis += dim_size; + std::vector dimvec; + for (int64_t i = 0; i < axis; i++) { + dimvec.emplace_back(input_dims[i]); + } + if (keepdim) { + dimvec.emplace_back(static_cast(1)); + } + for (int64_t i = axis + 1; i < dim_size; i++) { + dimvec.emplace_back(input_dims[i]); + } + DDim dims = phi::make_ddim(dimvec); + PADDLE_ENFORCE_GE(input_dims.size(), + 1, + errors::InvalidArgument("input shape should >= 1d")); + out->set_dims(dims); + out->share_lod(x); + out->set_dtype(x.dtype()); + + indices->set_dims(dims); + indices->share_lod(x); + indices->set_dtype(x.dtype()); +} + void MultinomialInferMeta(const MetaTensor& x, int num_samples, bool replacement, diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index 3dfc9b797c089281cd9631642640a54be05ce679..eb894003e5354222bff14945cc0e2aeb565b4e3d 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -112,6 +112,12 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, MetaTensor* mask, MetaConfig config = MetaConfig()); +void ModeInferMeta(const MetaTensor& x, + int axis, + bool keepdim, + MetaTensor* out, + MetaTensor* indices); + void MultinomialInferMeta(const MetaTensor& x, int num_samples, bool replacement, diff --git a/paddle/phi/kernels/cpu/mode_grad_kernel.cc b/paddle/phi/kernels/cpu/mode_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..ca813c1757eacce24ecea8687b7b80bd43c5e8f9 --- /dev/null +++ b/paddle/phi/kernels/cpu/mode_grad_kernel.cc @@ -0,0 +1,170 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/mode_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/mode.h" + +namespace phi { + +template +void ModeGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& indices, + const DenseTensor& out_grad, + int axis, + bool keepdim, + DenseTensor* x_grad) { + auto in_dims = x.dims(); + auto out_dims = indices.dims(); + + // axis < 0, get the real axis + axis = (axis < 0) ? (in_dims.size() + axis) : axis; + + if (!keepdim) { + std::vector tmp_out_shape; + for (int i = 0; i < axis; i++) { + tmp_out_shape.emplace_back(out_dims[i]); + } + tmp_out_shape.emplace_back(1); + for (int i = axis + 1; i < in_dims.size(); i++) { + tmp_out_shape.emplace_back(out_dims[i - 1]); + } + out_dims = phi::make_ddim(tmp_out_shape); + } + T* x_grad_data = dev_ctx.template Alloc(x_grad); + + if (axis == in_dims.size() - 1) { + // allocate the memory for the input_grad + // assign the out_grad to input_grad directly + const int64_t input_height = + phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + const int64_t input_width = in_dims[in_dims.size() - 1]; + + // init the output grad with 0, because some input elements has no grad + memset(x_grad_data, 0, x_grad->numel() * sizeof(T)); + // Assign the output_grad to input_grad + if (keepdim) { + funcs::ModeAssign(input_height, + input_width, + in_dims.size(), + &out_grad, + &indices, + x_grad_data); + } else { + DenseTensor out_grad_tmp; + dev_ctx.template Alloc(&out_grad_tmp); + DenseTensor indices_tmp; + dev_ctx.template Alloc(&indices_tmp); + + phi::Copy(dev_ctx, out_grad, dev_ctx.GetPlace(), false, &out_grad_tmp); + phi::Copy(dev_ctx, indices, dev_ctx.GetPlace(), false, &indices_tmp); + + out_grad_tmp.Resize(out_dims); + indices_tmp.Resize(out_dims); + + funcs::ModeAssign(input_height, + input_width, + in_dims.size(), + &out_grad_tmp, + &indices_tmp, + x_grad_data); + } + } else { + // can not assign grad to input_grad, must do the transpose + std::vector trans_axis; + for (int i = 0; i < axis; i++) { + trans_axis.emplace_back(i); + } + trans_axis.emplace_back(out_dims.size() - 1); + for (int i = axis + 1; i < out_dims.size() - 1; i++) { + trans_axis.emplace_back(i); + } + trans_axis.emplace_back(axis); + DDim trans_shape(out_dims); + DDim trans_in_shape(in_dims); + for (size_t i = 0; i < trans_axis.size(); i++) { + trans_shape[i] = out_dims[trans_axis[i]]; + trans_in_shape[i] = in_dims[trans_axis[i]]; + } + // transpose the out_grad, indices + DenseTensor trans_dO; + trans_dO.Resize(trans_shape); + dev_ctx.template Alloc(&trans_dO); + + DenseTensor trans_ind; + trans_ind.Resize(trans_shape); + dev_ctx.template Alloc(&trans_ind); + + int ndims = trans_axis.size(); + + if (keepdim) { + // Do transpose + funcs::TransCompute( + ndims, dev_ctx, out_grad, &trans_dO, trans_axis); + funcs::TransCompute( + ndims, dev_ctx, indices, &trans_ind, trans_axis); + } else { + DenseTensor out_grad_tmp; + dev_ctx.template Alloc(&out_grad_tmp); + + DenseTensor indices_tmp; + dev_ctx.template Alloc(&indices_tmp); + + phi::Copy(dev_ctx, out_grad, dev_ctx.GetPlace(), false, &out_grad_tmp); + phi::Copy(dev_ctx, indices, dev_ctx.GetPlace(), false, &indices_tmp); + out_grad_tmp.Resize(out_dims); + indices_tmp.Resize(out_dims); + // Do transpose + funcs::TransCompute( + ndims, dev_ctx, out_grad_tmp, &trans_dO, trans_axis); + funcs::TransCompute( + ndims, dev_ctx, indices_tmp, &trans_ind, trans_axis); + } + const int64_t input_height = phi::product( + phi::slice_ddim(trans_in_shape, 0, trans_in_shape.size() - 1)); + const int64_t input_width = trans_in_shape[trans_in_shape.size() - 1]; + + // Assign the out_grad to tranpose input_grad + DenseTensor tmp_out; + tmp_out.Resize(trans_in_shape); + T* t_out = dev_ctx.template Alloc(&tmp_out); + memset(t_out, 0, x_grad->numel() * sizeof(T)); + + funcs::ModeAssign(input_height, + input_width, + in_dims.size(), + &trans_dO, + &trans_ind, + t_out); + + // Transpose back + funcs::TransCompute( + ndims, dev_ctx, tmp_out, x_grad, trans_axis); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(mode_grad, + CPU, + ALL_LAYOUT, + phi::ModeGradKernel, + float, + double, + int32_t, + int64_t) {} diff --git a/paddle/phi/kernels/cpu/mode_kernel.cc b/paddle/phi/kernels/cpu/mode_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..6535d1b89af420ee4266981f004983157179f34f --- /dev/null +++ b/paddle/phi/kernels/cpu/mode_kernel.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/mode_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/mode.h" + +namespace phi { + +template +void ModeKernel(const Context& dev_ctx, + const DenseTensor& x, + int axis, + bool keepdim, + DenseTensor* out, + DenseTensor* indices) { + const auto& in_dims = x.dims(); + auto out_dims = out->dims(); + // axis < 0, cacluate the real axis + if (axis < 0) axis += in_dims.size(); + + T* output_data = dev_ctx.template Alloc(out); + int64_t* indices_data = dev_ctx.template Alloc(indices); + // if axis is not the last dim, transpose it to the last dim, do the + // calculation, then tranpose it back to original axis. + if (axis == in_dims.size() - 1) { + const int64_t& input_height = + phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + const int64_t& input_width = in_dims[in_dims.size() - 1]; + funcs::GetMode(input_height, + input_width, + in_dims.size(), + &x, + output_data, + indices_data); + } else { + std::vector trans_axis; + for (int i = 0; i < axis; i++) { + trans_axis.emplace_back(i); + } + trans_axis.push_back(in_dims.size() - 1); + for (int i = axis + 1; i < in_dims.size() - 1; i++) { + trans_axis.emplace_back(i); + } + trans_axis.emplace_back(axis); + + if (!keepdim) { + std::vector tmp_out_shape; + for (int i = 0; i < axis; i++) { + tmp_out_shape.emplace_back(in_dims[i]); + } + tmp_out_shape.emplace_back(1); + for (int i = axis + 1; i < in_dims.size(); i++) { + tmp_out_shape.emplace_back(in_dims[i]); + } + DDim tmp_out_dim = phi::make_ddim(tmp_out_shape); + out->Resize(tmp_out_dim); + indices->Resize(tmp_out_dim); + } + + // get the trans input_dims, out_dims + DDim trans_shape(in_dims); + DDim trans_out_shape(in_dims); + + for (size_t i = 0; i < trans_axis.size(); i++) { + trans_shape[i] = in_dims[trans_axis[i]]; + trans_out_shape[i] = in_dims[trans_axis[i]]; + } + trans_out_shape[in_dims.size() - 1] = 1; + + DenseTensor trans_input; + trans_input.Resize(trans_shape); + dev_ctx.template Alloc(&trans_input); + int ndims = trans_axis.size(); + + // transpose the input value + funcs::TransCompute( + ndims, dev_ctx, x, &trans_input, trans_axis); + + const int64_t input_height = + phi::product(phi::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); + const int64_t input_width = trans_shape[trans_shape.size() - 1]; + DenseTensor tmp_out; + tmp_out.Resize(trans_out_shape); + T* t_out = dev_ctx.template Alloc(&tmp_out); + + DenseTensor tmp_indices; + tmp_indices.Resize(trans_out_shape); + int64_t* t_ind = dev_ctx.template Alloc(&tmp_indices); + + funcs::GetMode( + input_height, input_width, in_dims.size(), &trans_input, t_out, t_ind); + // transpose back + funcs::TransCompute( + ndims, dev_ctx, tmp_indices, indices, trans_axis); + funcs::TransCompute( + ndims, dev_ctx, tmp_out, out, trans_axis); + if (!keepdim) { + out->Resize(out_dims); + indices->Resize(out_dims); + } + } +} + +} // namespace phi + +PD_REGISTER_KERNEL( + mode, CPU, ALL_LAYOUT, phi::ModeKernel, float, double, int32_t, int64_t) {} diff --git a/paddle/phi/kernels/funcs/mode.h b/paddle/phi/kernels/funcs/mode.h new file mode 100644 index 0000000000000000000000000000000000000000..1b7641762e2639acf3db540280891b518f22eed2 --- /dev/null +++ b/paddle/phi/kernels/funcs/mode.h @@ -0,0 +1,197 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#include +#include +#include +#include +#ifdef PADDLE_WITH_MKLML +#include +#endif + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { +namespace funcs { + +static int ComputeBlockSize(int col) { + if (col > 512) + return 1024; + else if (col > 256 && col <= 512) + return 512; + else if (col > 128 && col <= 256) + return 256; + else if (col > 64 && col <= 128) + return 128; + else + return 64; +} + +static inline void GetDims( + const phi::DDim& dim, int axis, int* pre, int* n, int* post) { + *pre = 1; + *post = 1; + *n = dim[axis]; + for (int i = 0; i < axis; ++i) { + (*pre) *= dim[i]; + } + for (int i = axis + 1; i < dim.size(); ++i) { + (*post) *= dim[i]; + } +} + +template +static void GetMode(Type input_height, + Type input_width, + int input_dim, + const DenseTensor* input, + T* t_out, + Type* t_indices) { +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (Type i = 0; i < input_height; ++i) { + std::vector> col_vec; + col_vec.reserve(input_width); + if (input_dim == 1) { + auto e_input = EigenVector::Flatten(*input); + for (Type j = 0; j < input_width; ++j) { + col_vec.emplace_back(std::pair(e_input(j), j)); + } + } else { + auto e_input = EigenMatrix::Reshape(*input, input_dim - 1); + for (Type j = 0; j < input_width; ++j) { + col_vec.emplace_back(std::pair(e_input(i, j), j)); + } + } + std::sort(col_vec.begin(), + col_vec.end(), + [](const std::pair& l, const std::pair& r) { + return (!std::isnan(static_cast(l.first)) && + std::isnan(static_cast(r.first))) || + (l.first < r.first); + }); + T mode = 0; + int64_t indice = 0; + int64_t cur_freq = 0; + int64_t max_freq = 0; + for (int64_t i = 0; i < input_width; ++i) { + ++cur_freq; + if (i == input_width - 1 || (col_vec[i + 1].first != col_vec[i].first)) { + if (cur_freq > max_freq) { + max_freq = cur_freq; + mode = col_vec[i].first; + indice = col_vec[i].second; + } + cur_freq = 0; + } + } + t_out[i] = mode; + t_indices[i] = indice; + } +} + +template +static void ModeAssign(const Type& input_height, + const Type& input_width, + const int& input_dim, + const DenseTensor* input, + const DenseTensor* indices, + T* output_data) { +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (Type i = 0; i < input_height; ++i) { + if (input_dim == 1) { + auto e_input = EigenVector::Flatten(*input); + auto e_indices = EigenVector::Flatten(*indices); + output_data[i * input_width + e_indices(0)] = e_input(0); + } else { + auto e_input = EigenMatrix::Reshape(*input, input_dim - 1); + auto e_indices = EigenMatrix::Reshape(*indices, input_dim - 1); + output_data[i * input_width + e_indices(i, 0)] = e_input(i, 0); + } + } +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +template +static void GetModebySort(const phi::GPUContext& dev_ctx, + const DenseTensor* input_tensor, + const int64_t num_cols, + const int64_t num_rows, + T* out_tensor, + int64_t* indices_tensor) { + DenseTensor input_tmp; + input_tmp.Resize(phi::make_ddim({num_rows, num_cols})); + T* input_tmp_data = dev_ctx.Alloc(&input_tmp); + phi::Copy(dev_ctx, *input_tensor, dev_ctx.GetPlace(), false, &input_tmp); + + thrust::device_ptr out_tensor_ptr(out_tensor); + thrust::device_ptr indices_tensor_ptr(indices_tensor); + + for (int64_t i = 0; i < num_rows; ++i) { + T* begin = input_tmp_data + num_cols * i; + T* end = input_tmp_data + num_cols * (i + 1); + thrust::device_vector indices_data(num_cols); + thrust::sequence( + thrust::device, indices_data.begin(), indices_data.begin() + num_cols); + thrust::sort_by_key(thrust::device, begin, end, indices_data.begin()); + int unique = 1 + thrust::inner_product(thrust::device, + begin, + end - 1, + begin + 1, + 0, + thrust::plus(), + thrust::not_equal_to()); + thrust::device_vector keys_data(unique); + thrust::device_vector cnts_data(unique); + thrust::reduce_by_key(thrust::device, + begin, + end, + thrust::constant_iterator(1), + keys_data.begin(), + cnts_data.begin()); + auto it = thrust::max_element( + thrust::device, cnts_data.begin(), cnts_data.begin() + unique); + T mode = keys_data[it - cnts_data.begin()]; + int64_t counts = cnts_data[it - cnts_data.begin()]; + auto pos = thrust::find(thrust::device, begin, end, mode); + int64_t index = indices_data[pos - begin + counts - 1]; + out_tensor_ptr[i] = static_cast(mode); + indices_tensor_ptr[i] = static_cast(index); + } +} +#endif + +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/gpu/mode_grad_kernel.cu b/paddle/phi/kernels/gpu/mode_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..43502621c2d3a878a144de1878aa09b8d64b6a47 --- /dev/null +++ b/paddle/phi/kernels/gpu/mode_grad_kernel.cu @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/mode_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/mode.h" + +namespace phi { + +template +__global__ void AssignGradWithAxis(const T* grad_out, + const int64_t* indices, + T* grad_in, + int pre, + int post, + int raw_height, + int k) { + // raw_height is the length of topk axis + for (int i = blockIdx.x; i < pre; i += gridDim.x) { + int base_index = i * post * k; + int base_grad = i * post * raw_height; + for (int j = threadIdx.x; j < raw_height * post; j += blockDim.x) { + grad_in[base_grad + j] = static_cast(0); + } + __syncthreads(); + for (int j = threadIdx.x; j < k * post; j += blockDim.x) { + int64_t idx_ij = indices[base_index + j]; + int64_t in_ij = base_grad + (idx_ij * post) + (j % post); + grad_in[in_ij] = grad_out[base_index + j]; + } + } +} + +template +void ModeGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& indices, + const DenseTensor& out_grad, + int axis, + bool keepdim, + DenseTensor* x_grad) { + const auto& in_dims = x.dims(); + auto out_dims = indices.dims(); + + if (axis < 0) axis += in_dims.size(); + // allocate the cuda memory for the x_grad + T* x_grad_data = dev_ctx.template Alloc(x_grad); + const T* out_grad_data = out_grad.data(); + const int64_t* indices_data = indices.data(); + + int pre, n, post; + funcs::GetDims(in_dims, axis, &pre, &n, &post); + + // calcluate the block and grid num + int block_size = funcs::ComputeBlockSize(post); + int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); + const int max_blocks = std::max(((max_threads - 1) / block_size + 1), 1); + int grid_size = std::min(max_blocks, pre); + AssignGradWithAxis<<>>( + out_grad_data, indices_data, x_grad_data, pre, post, n, 1); +} + +} // namespace phi + +PD_REGISTER_KERNEL(mode_grad, + GPU, + ALL_LAYOUT, + phi::ModeGradKernel, + float, + double, + int32_t, + int64_t) {} diff --git a/paddle/phi/kernels/gpu/mode_kernel.cu b/paddle/phi/kernels/gpu/mode_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..629b9722cd6bcfe12d0fb5a7e8be6439f5ea286f --- /dev/null +++ b/paddle/phi/kernels/gpu/mode_kernel.cu @@ -0,0 +1,119 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/mode_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/mode.h" + +namespace phi { + +template +void ModeKernel(const Context& dev_ctx, + const DenseTensor& x, + int axis, + bool keepdim, + DenseTensor* out, + DenseTensor* indices) { + // get the input dims + const auto& in_dims = x.dims(); + // calcluate the real axis + if (axis < 0) axis += in_dims.size(); + + auto out_dims = out->dims(); + + const T* input_data = x.data(); + T* output_data = dev_ctx.template Alloc(out); + int64_t* indices_data = dev_ctx.template Alloc(indices); + + if (axis == in_dims.size() - 1) { + const int64_t& input_height = + phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + const int64_t& input_width = in_dims[in_dims.size() - 1]; + funcs::GetModebySort( + dev_ctx, &x, input_width, input_height, output_data, indices_data); + } else { + std::vector trans_axis; + for (int i = 0; i < axis; i++) { + trans_axis.emplace_back(i); + } + trans_axis.emplace_back(in_dims.size() - 1); + for (int i = axis + 1; i < in_dims.size() - 1; i++) { + trans_axis.emplace_back(i); + } + trans_axis.emplace_back(axis); + + if (!keepdim) { + std::vector tmp_out_shape; + for (int i = 0; i < axis; i++) { + tmp_out_shape.emplace_back(in_dims[i]); + } + tmp_out_shape.emplace_back(1); + for (int i = axis + 1; i < in_dims.size(); i++) { + tmp_out_shape.emplace_back(in_dims[i]); + } + DDim tmp_out_dim = phi::make_ddim(tmp_out_shape); + out->Resize(tmp_out_dim); + indices->Resize(tmp_out_dim); + } + + DDim trans_shape(in_dims); + DDim trans_out_shape(in_dims); + for (int i = 0; i < trans_axis.size(); i++) { + trans_shape[i] = in_dims[trans_axis[i]]; + trans_out_shape[i] = in_dims[trans_axis[i]]; + } + trans_out_shape[in_dims.size() - 1] = 1; + + // second step, tranpose the input + DenseTensor trans_input; + trans_input.Resize(trans_shape); + dev_ctx.template Alloc(&trans_input); + + int ndims = trans_axis.size(); + funcs::TransCompute( + ndims, dev_ctx, x, &trans_input, trans_axis); + DenseTensor trans_ind; + trans_ind.Resize(trans_out_shape); + int64_t* trans_ind_data = dev_ctx.template Alloc(&trans_ind); + + DenseTensor trans_out; + trans_out.Resize(trans_out_shape); + T* trans_out_data = dev_ctx.template Alloc(&trans_out); + + const int64_t input_height = + phi::product(phi::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); + const int64_t input_width = trans_shape[trans_shape.size() - 1]; + funcs::GetModebySort(dev_ctx, + &trans_input, + input_width, + input_height, + trans_out_data, + trans_ind_data); + // last step, tranpose back the indices and output + funcs::TransCompute( + ndims, dev_ctx, trans_ind, indices, trans_axis); + funcs::TransCompute(ndims, dev_ctx, trans_out, out, trans_axis); + if (!keepdim) { + out->Resize(out_dims); + indices->Resize(out_dims); + } + } +} + +} // namespace phi + +PD_REGISTER_KERNEL( + mode, GPU, ALL_LAYOUT, phi::ModeKernel, float, double, int32_t, int64_t) {} diff --git a/paddle/phi/kernels/mode_grad_kernel.h b/paddle/phi/kernels/mode_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..ccde8c3648fa556401f1937c78039743daf43f4c --- /dev/null +++ b/paddle/phi/kernels/mode_grad_kernel.h @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void ModeGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& indices, + const DenseTensor& out_grad, + int axis, + bool keepdim, + DenseTensor* x_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/mode_kernel.h b/paddle/phi/kernels/mode_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..831c4369304e5c5d27cddf01bcba021745bf7083 --- /dev/null +++ b/paddle/phi/kernels/mode_kernel.h @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void ModeKernel(const Context& dev_ctx, + const DenseTensor& x, + int axis, + bool keepdim, + DenseTensor* out, + DenseTensor* indices); + +} // namespace phi diff --git a/paddle/phi/ops/compat/mode_sig.cc b/paddle/phi/ops/compat/mode_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..20994c08aa73c33328568e334d258c44eef68171 --- /dev/null +++ b/paddle/phi/ops/compat/mode_sig.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature ModeOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature( + "mode", {"X"}, {"axis", "keepdim"}, {"Out", "Indices"}); +} + +KernelSignature ModeGradOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature("mode_grad", + {"X", "Indices", GradVarName("Out")}, + {"axis", "keepdim"}, + {GradVarName("X")}); +} + +} // namespace phi + +PD_REGISTER_ARG_MAPPING_FN(mode, phi::ModeOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(mode_grad, phi::ModeGradOpArgumentMapping);