From 5e222dc27871a8b85247460082e30496dfa8e745 Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Mon, 19 Dec 2022 13:46:52 +0800 Subject: [PATCH] [PHI Decoupling] move maxouting and matrix_bit_code from fluid to phi (#49131) * move maxouting from fluid to phi * move matrix_bit_code from fluid to phi * replace mutable_data and fix include * fix include * move gather_scatter_kernel from fluid to phi * Revert "move gather_scatter_kernel from fluid to phi" This reverts commit 3d0b1eaf179656072e8c483dfca688cccccdda01. --- paddle/fluid/operators/math/CMakeLists.txt | 2 -- paddle/phi/kernels/cpu/hsigmoid_loss_grad.h | 11 +++++----- .../phi/kernels/cpu/hsigmoid_loss_kernel.cc | 15 ++++++------- paddle/phi/kernels/funcs/CMakeLists.txt | 2 ++ .../kernels/funcs}/matrix_bit_code.cc | 18 +++++++++------ .../kernels/funcs}/matrix_bit_code.h | 22 ++++++------------- .../math => phi/kernels/funcs}/maxouting.cc | 17 ++++++-------- .../math => phi/kernels/funcs}/maxouting.cu | 18 +++++++-------- .../math => phi/kernels/funcs}/maxouting.h | 16 +++++--------- .../kernels/impl/maxout_grad_kernel_impl.h | 4 ++-- paddle/phi/kernels/impl/maxout_kernel_impl.h | 4 ++-- 11 files changed, 58 insertions(+), 71 deletions(-) rename paddle/{fluid/operators/math => phi/kernels/funcs}/matrix_bit_code.cc (97%) rename paddle/{fluid/operators/math => phi/kernels/funcs}/matrix_bit_code.h (94%) rename paddle/{fluid/operators/math => phi/kernels/funcs}/maxouting.cc (93%) rename paddle/{fluid/operators/math => phi/kernels/funcs}/maxouting.cu (95%) rename paddle/{fluid/operators/math => phi/kernels/funcs}/maxouting.h (81%) diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 3d5c7bfb4e..9b1f3cb9b6 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -27,7 +27,6 @@ math_library(sample_prob) math_library(sampler DEPS generator) # math_library(math_function DEPS blas dense_tensor tensor) -math_library(maxouting) math_library(sequence_padding) math_library(sequence_pooling DEPS math_function jit_kernel_helper) @@ -39,7 +38,6 @@ elseif(WITH_XPU) else() math_library(beam_search DEPS math_function) endif() -math_library(matrix_bit_code) math_library(unpooling) math_library(prelu) diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h index 12960e305a..8c8b40c8d9 100644 --- a/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h +++ b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h @@ -14,17 +14,16 @@ #pragma once -#include "paddle/fluid/operators/math/matrix_bit_code.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/selected_rows.h" +#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/funcs/matrix_bit_code.h" namespace phi { -namespace math = paddle::operators::math; - template void HSigmoidLossGradKernelImpl(const Context& ctx, const DenseTensor& x, @@ -55,12 +54,12 @@ void HSigmoidLossGradKernelImpl(const Context& ctx, is_custom = true; } - std::unique_ptr> bit_code; + std::unique_ptr> bit_code; if (!is_custom) { - bit_code.reset(new math::MatrixBitCodeFunctor( + bit_code.reset(new phi::funcs::MatrixBitCodeFunctor( num_classes, label.template data())); } else { - bit_code.reset(new math::MatrixBitCodeFunctor( + bit_code.reset(new phi::funcs::MatrixBitCodeFunctor( *(path.get_ptr()), *(code.get_ptr()), label.template data())); } diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc index a6f10b4ff1..062aa1be24 100644 --- a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc @@ -14,19 +14,17 @@ #include "paddle/phi/kernels/hsigmoid_loss_kernel.h" -#include "paddle/fluid/operators/math/matrix_bit_code.h" #include "paddle/fluid/platform/transform.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/math_function_impl.h" +#include "paddle/phi/kernels/funcs/matrix_bit_code.h" #include "paddle/phi/kernels/impl/clip_kernel_impl.h" namespace phi { -namespace math = paddle::operators::math; - template void HSigmoidLossKernel(const Context& ctx, const DenseTensor& x, @@ -48,8 +46,9 @@ void HSigmoidLossKernel(const Context& ctx, if (path.get_ptr()) { is_custom = true; } - int64_t code_length = path.get_ptr() ? path.get_ptr()->dims()[1] - : math::FindLastSet(num_classes_st - 1); + int64_t code_length = path.get_ptr() + ? path.get_ptr()->dims()[1] + : phi::funcs::FindLastSet(num_classes_st - 1); int64_t batch_size = x.dims()[0]; DenseTensor sum; pre_out->Resize(phi::make_ddim({batch_size, code_length})); @@ -63,12 +62,12 @@ void HSigmoidLossKernel(const Context& ctx, auto& place = *ctx.eigen_device(); funcs::RowwiseSum row_sum; - std::unique_ptr> bit_code; + std::unique_ptr> bit_code; if (!is_custom) { - bit_code.reset(new math::MatrixBitCodeFunctor( + bit_code.reset(new phi::funcs::MatrixBitCodeFunctor( num_classes_st, label.template data())); } else { - bit_code.reset(new math::MatrixBitCodeFunctor( + bit_code.reset(new phi::funcs::MatrixBitCodeFunctor( *(path.get_ptr()), *(code.get_ptr()), label.template data())); } diff --git a/paddle/phi/kernels/funcs/CMakeLists.txt b/paddle/phi/kernels/funcs/CMakeLists.txt index efef150b56..0d2cfa150f 100644 --- a/paddle/phi/kernels/funcs/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/CMakeLists.txt @@ -20,6 +20,8 @@ math_library(cross_entropy) math_library(im2col) math_library(vol2col) math_library(softmax DEPS math_function) +math_library(maxouting) +math_library(matrix_bit_code) cc_library( phi_data_layout_transform diff --git a/paddle/fluid/operators/math/matrix_bit_code.cc b/paddle/phi/kernels/funcs/matrix_bit_code.cc similarity index 97% rename from paddle/fluid/operators/math/matrix_bit_code.cc rename to paddle/phi/kernels/funcs/matrix_bit_code.cc index aa2779c350..2fecb1c526 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.cc +++ b/paddle/phi/kernels/funcs/matrix_bit_code.cc @@ -12,11 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/matrix_bit_code.h" +#include "paddle/phi/kernels/funcs/matrix_bit_code.h" -namespace paddle { -namespace operators { -namespace math { +#include +#include + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/kernels/funcs/blas/blas.h" + +namespace phi { +namespace funcs { template struct MatrixBitCodeFunctorAdd { @@ -354,6 +359,5 @@ void MatrixBitCodeFunctor::Sub(phi::DenseTensor *tmat) { template class MatrixBitCodeFunctor; template class MatrixBitCodeFunctor; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace phi diff --git a/paddle/fluid/operators/math/matrix_bit_code.h b/paddle/phi/kernels/funcs/matrix_bit_code.h similarity index 94% rename from paddle/fluid/operators/math/matrix_bit_code.h rename to paddle/phi/kernels/funcs/matrix_bit_code.h index eb232940b8..8d3335791e 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.h +++ b/paddle/phi/kernels/funcs/matrix_bit_code.h @@ -13,18 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include -#include #include #include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/selected_rows_utils.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/device_context.h" - -#include "paddle/phi/kernels/funcs/blas/blas.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/selected_rows.h" +#include "paddle/utils/variant.h" #if defined(_WIN32) #include @@ -34,9 +28,8 @@ limitations under the License. */ #include #endif // _WIN32 -namespace paddle { -namespace operators { -namespace math { +namespace phi { +namespace funcs { /** * SimpleCodeTable class should support 3 functions: * @@ -273,6 +266,5 @@ class MatrixBitCodeFunctor { const int64_t* ids_; CodeTable code_table_; }; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace phi diff --git a/paddle/fluid/operators/math/maxouting.cc b/paddle/phi/kernels/funcs/maxouting.cc similarity index 93% rename from paddle/fluid/operators/math/maxouting.cc rename to paddle/phi/kernels/funcs/maxouting.cc index 91ae7d472d..67a924128c 100644 --- a/paddle/fluid/operators/math/maxouting.cc +++ b/paddle/phi/kernels/funcs/maxouting.cc @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/maxouting.h" +#include "paddle/phi/kernels/funcs/maxouting.h" #include "paddle/phi/backends/cpu/cpu_context.h" -namespace paddle { -namespace operators { -namespace math { +namespace phi { +namespace funcs { // All tensors are in NCHW or NHWC format, and the groups must be greater than 1 template @@ -35,7 +34,7 @@ void MaxOutFunctor::operator()(const DeviceContext& context, // c_size means the output size of each sample int c_size = fea_size * output_channels; const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); + T* output_data = context.template Alloc(output); for (int i = 0; i < batch_size; ++i) { int new_bindex = c_size * i; for (int c = 0; c < output_channels; ++c) { @@ -80,8 +79,7 @@ void MaxOutGradFunctor::operator()( const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); - + T* input_grad_data = context.template Alloc(input_grad); for (int i = 0; i < batch_size; ++i) { int blen = fea_size * output_channels * i; for (int c = 0; c < output_channels; ++c) { @@ -114,6 +112,5 @@ template class MaxOutGradFunctor; template class MaxOutFunctor; template class MaxOutFunctor; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace phi diff --git a/paddle/fluid/operators/math/maxouting.cu b/paddle/phi/kernels/funcs/maxouting.cu similarity index 95% rename from paddle/fluid/operators/math/maxouting.cu rename to paddle/phi/kernels/funcs/maxouting.cu index 9f1d228639..89450dbd5c 100644 --- a/paddle/fluid/operators/math/maxouting.cu +++ b/paddle/phi/kernels/funcs/maxouting.cu @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/maxouting.h" +#include "paddle/phi/kernels/funcs/maxouting.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -namespace paddle { -namespace operators { -namespace math { +namespace phi { +namespace funcs { template __global__ void KernelMaxOut(const int nthreads, @@ -57,6 +56,7 @@ __global__ void KernelMaxOut(const int nthreads, output_data[i] = ele; } } + template __global__ void KernelMaxoutGrad(const int nthreads, const T* input_data, @@ -102,6 +102,7 @@ __global__ void KernelMaxoutGrad(const int nthreads, } } } + /* * All tensors are in NCHW or NHWC format. */ @@ -118,7 +119,7 @@ void MaxOutFunctor::operator()(const DeviceContext& context, const int output_channels = output->dims()[axis]; const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); + T* output_data = context.template Alloc(output); int nthreads = output->numel(); int blocks = (nthreads + 1024 - 1) / 1024; dim3 threads(1024, 1); @@ -155,7 +156,7 @@ void MaxOutGradFunctor::operator()( const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + T* input_grad_data = context.template Alloc(input_grad); int nthreads = output.numel(); int blocks = (nthreads + 1024 - 1) / 1024; dim3 threads(1024, 1); @@ -179,6 +180,5 @@ template class MaxOutGradFunctor; template class MaxOutFunctor; template class MaxOutFunctor; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace phi diff --git a/paddle/fluid/operators/math/maxouting.h b/paddle/phi/kernels/funcs/maxouting.h similarity index 81% rename from paddle/fluid/operators/math/maxouting.h rename to paddle/phi/kernels/funcs/maxouting.h index f42bbdb0e3..c6242318a3 100644 --- a/paddle/fluid/operators/math/maxouting.h +++ b/paddle/phi/kernels/funcs/maxouting.h @@ -13,14 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/macros.h" -#include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/macros.h" -namespace paddle { -namespace operators { -namespace math { +namespace phi { +namespace funcs { template class MaxOutFunctor { @@ -43,6 +40,5 @@ class MaxOutGradFunctor { const int groups, const int axis = 1); }; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h b/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h index 4d551b3d82..f5e3085463 100644 --- a/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/fluid/operators/math/maxouting.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/funcs/maxouting.h" #include "paddle/phi/kernels/maxout_grad_kernel.h" namespace phi { @@ -36,7 +36,7 @@ void MaxOutGradKernel(const Context& dev_ctx, if (x_grad) { dev_ctx.template Alloc(x_grad); zero(dev_ctx, x_grad, static_cast(0.0)); - paddle::operators::math::MaxOutGradFunctor maxout_backward; + phi::funcs::MaxOutGradFunctor maxout_backward; maxout_backward(dev_ctx, x, x_grad, out, out_grad, groups, axis); } } diff --git a/paddle/phi/kernels/impl/maxout_kernel_impl.h b/paddle/phi/kernels/impl/maxout_kernel_impl.h index 529534d11c..2b1d044cda 100644 --- a/paddle/phi/kernels/impl/maxout_kernel_impl.h +++ b/paddle/phi/kernels/impl/maxout_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/fluid/operators/math/maxouting.h" +#include "paddle/phi/kernels/funcs/maxouting.h" #include "paddle/phi/kernels/maxout_kernel.h" namespace phi { @@ -29,7 +29,7 @@ void MaxOutKernel(const Context& dev_ctx, axis += x.dims().size(); } - paddle::operators::math::MaxOutFunctor maxout_forward; + phi::funcs::MaxOutFunctor maxout_forward; maxout_forward(dev_ctx, x, out, groups, axis); } -- GitLab