From 5b5656d05d2e95649a4148a81fb198ef49f2421a Mon Sep 17 00:00:00 2001 From: Feiyu Chan Date: Wed, 16 Feb 2022 10:52:25 +0800 Subject: [PATCH] [Pten] move complex_functors.h (#39558) * move complex_functors.h and update all references to symbols within it --- paddle/fluid/operators/CMakeLists.txt | 4 + paddle/fluid/operators/angle_op.h | 87 +------- paddle/fluid/operators/cholesky_solve_op.h | 10 +- paddle/fluid/operators/complex_op.h | 2 +- paddle/fluid/operators/complex_view_op.h | 2 +- paddle/fluid/operators/cumprod_op.cu | 6 +- paddle/fluid/operators/cumprod_op.h | 6 +- paddle/fluid/operators/determinant_op.h | 4 +- paddle/fluid/operators/dot_op.h | 2 +- paddle/fluid/operators/eig_op.h | 42 ++-- paddle/fluid/operators/eigh_op.h | 2 +- paddle/fluid/operators/eigvals_op.h | 24 +- paddle/fluid/operators/imag_op.h | 13 +- paddle/fluid/operators/lstsq_op.h | 6 +- paddle/fluid/operators/lu_op.h | 5 +- .../operators/math/eigen_values_vectors.h | 12 +- paddle/fluid/operators/math/inclusive_scan.h | 4 +- paddle/fluid/operators/matmul_v2_op.h | 2 +- paddle/fluid/operators/matrix_rank_op.cu | 6 +- paddle/fluid/operators/qr_op.cu | 17 +- paddle/fluid/operators/qr_op.h | 23 +- paddle/fluid/operators/real_op.h | 13 +- paddle/fluid/operators/renorm_op.h | 2 +- paddle/fluid/operators/spectral_op.cu | 18 +- paddle/fluid/operators/svd_helper.h | 19 +- paddle/fluid/operators/svd_op.h | 14 +- paddle/fluid/operators/triangular_solve_op.h | 6 +- paddle/pten/kernels/cpu/abs_grad_kernel.cc | 2 +- paddle/pten/kernels/cpu/abs_kernel.cc | 10 +- .../kernels/funcs}/complex_functors.h | 206 +++++++++++++----- paddle/pten/kernels/gpu/abs_kernel.cu | 17 +- .../pten/kernels/impl/abs_grad_kernel_impl.h | 9 +- .../pten/kernels/impl/complex_kernel_impl.h | 4 +- .../pten/kernels/impl/dot_grad_kernel_impl.h | 26 +-- paddle/pten/kernels/impl/matmul_kernel_impl.h | 2 +- 35 files changed, 318 insertions(+), 309 deletions(-) rename paddle/{fluid/operators/math => pten/kernels/funcs}/complex_functors.h (57%) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index b87cdf6f6df..a279c76430f 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -1,5 +1,9 @@ include(operators) +# solve "math constants not defined" problems caused by the order of inclusion +# of and the definition of macro _USE_MATH_DEFINES +add_definitions(-D_USE_MATH_DEFINES) + # clean cache and pybind_file content first when rebuild unset(GLOB_OP_LIB CACHE) unset(OP_LIBRARY CACHE) diff --git a/paddle/fluid/operators/angle_op.h b/paddle/fluid/operators/angle_op.h index 093a04f03df..1e0dc803d76 100644 --- a/paddle/fluid/operators/angle_op.h +++ b/paddle/fluid/operators/angle_op.h @@ -17,7 +17,7 @@ #define _USE_MATH_DEFINES #endif #include -#include "paddle/fluid/operators/math/complex_functors.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -26,81 +26,6 @@ namespace paddle { namespace operators { -namespace math { -template -struct AngleFunctor; - -// angel function for complex -template -struct AngleFunctor>> { - AngleFunctor(const T* input, Real* output, int64_t numel) - : input_(input), output_(output), numel_(numel) {} - - HOSTDEVICE void operator()(int64_t idx) const { - output_[idx] = arg(input_[idx]); - } - - const T* input_; - Real* output_; - int64_t numel_; -}; - -// angel function for real -template -struct AngleFunctor>> { - AngleFunctor(const T* input, T* output, int64_t numel) - : input_(input), output_(output), numel_(numel) {} - - HOSTDEVICE void operator()(int64_t idx) const { - output_[idx] = input_[idx] < static_cast(0) ? M_PI : 0; - } - - const T* input_; - T* output_; - int64_t numel_; -}; - -template -struct AngleGradFunctor; - -// angle grad for complex -template -struct AngleGradFunctor>> { - AngleGradFunctor(const math::Real* dout, const T* x, T* dx, int64_t numel) - : dout_(dout), x_(x), dx_(dx), numel_(numel) {} - - HOSTDEVICE void operator()(int64_t idx) const { - if (x_[idx] == T(0)) { - dx_[idx] = T(0); - } else { - const math::Real r_square = - x_[idx].real * x_[idx].real + x_[idx].imag * x_[idx].imag; - dx_[idx] = T(-dout_[idx] * x_[idx].imag / r_square, - dout_[idx] * x_[idx].real / r_square); - } - } - - const math::Real* dout_; - const T* x_; - T* dx_; - int64_t numel_; -}; - -// angle grad for real -template -struct AngleGradFunctor>> { - AngleGradFunctor(const math::Real* dout, const T* x, T* dx, int64_t numel) - : dout_(dout), x_(x), dx_(dx), numel_(numel) {} - - HOSTDEVICE void operator()(int64_t idx) const { dx_[idx] = 0; } - - const math::Real* dout_; - const T* x_; - T* dx_; - int64_t numel_; -}; -} // namespace math - using Tensor = framework::Tensor; template class AngleKernel : public framework::OpKernel { @@ -111,12 +36,12 @@ class AngleKernel : public framework::OpKernel { auto numel = x->numel(); auto* x_data = x->data(); - auto* out_data = out->mutable_data>( - context.GetPlace(), size_t(x->numel() * sizeof(math::Real))); + auto* out_data = out->mutable_data>( + context.GetPlace(), size_t(x->numel() * sizeof(pten::funcs::Real))); auto& dev_ctx = context.template device_context(); platform::ForRange for_range(dev_ctx, numel); - math::AngleFunctor functor(x_data, out_data, numel); + pten::funcs::AngleFunctor functor(x_data, out_data, numel); for_range(functor); } }; @@ -132,14 +57,14 @@ class AngleGradKernel : public framework::OpKernel { ctx.Output(framework::GradVarName("X")); auto numel = d_out->numel(); - auto* dout_data = d_out->data>(); + auto* dout_data = d_out->data>(); auto* x_data = x->data(); auto* dx_data = d_x->mutable_data( ctx.GetPlace(), static_cast(numel * sizeof(T))); auto& dev_ctx = ctx.template device_context(); platform::ForRange for_range(dev_ctx, numel); - math::AngleGradFunctor functor(dout_data, x_data, dx_data, numel); + pten::funcs::AngleGradFunctor functor(dout_data, x_data, dx_data, numel); for_range(functor); } }; diff --git a/paddle/fluid/operators/cholesky_solve_op.h b/paddle/fluid/operators/cholesky_solve_op.h index 5004aad7c59..2c92969225f 100644 --- a/paddle/fluid/operators/cholesky_solve_op.h +++ b/paddle/fluid/operators/cholesky_solve_op.h @@ -64,7 +64,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx, // calculate u's conjugate for complex framework::Tensor u_conj(u_bst.type()); platform::ForRange u_for_range(dev_ctx, u_bst.numel()); - math::ConjFunctor u_functor( + pten::funcs::ConjFunctor u_functor( u_bst.data(), u_bst.numel(), u_conj.mutable_data(u_bst.dims(), dev_ctx.GetPlace())); u_for_range(u_functor); @@ -73,7 +73,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx, // calculate b's conjugate for complex framework::Tensor b_conj(b_bst.type()); platform::ForRange b_for_range(dev_ctx, b_bst.numel()); - math::ConjFunctor b_functor( + pten::funcs::ConjFunctor b_functor( b_bst.data(), b_bst.numel(), b_conj.mutable_data(b_bst.dims(), dev_ctx.GetPlace())); b_for_range(b_functor); @@ -113,7 +113,7 @@ void cholesky_solve_fn(const paddle::framework::ExecutionContext &ctx, // calculate out's conjugate for complex platform::ForRange out_for_range(dev_ctx, out->numel()); - math::ConjFunctor out_functor( + pten::funcs::ConjFunctor out_functor( out->data(), out->numel(), out->mutable_data(out->dims(), dev_ctx.GetPlace())); out_for_range(out_functor); @@ -173,7 +173,7 @@ class CholeskySolveGradKernel : public framework::OpKernel { // calculate out's conjugate for complex framework::Tensor out_conj(out->type()); platform::ForRange out_for_range(dev_ctx, out->numel()); - math::ConjFunctor out_functor( + pten::funcs::ConjFunctor out_functor( out->data(), out->numel(), out_conj.mutable_data(out->dims(), dev_ctx.GetPlace())); out_for_range(out_functor); @@ -195,7 +195,7 @@ class CholeskySolveGradKernel : public framework::OpKernel { framework::Tensor commonterm_conj(commonterm.type()); platform::ForRange commonterm_for_range( dev_ctx, commonterm.numel()); - math::ConjFunctor commonterm_functor( + pten::funcs::ConjFunctor commonterm_functor( commonterm.data(), commonterm.numel(), commonterm_conj.mutable_data(commonterm.dims(), dev_ctx.GetPlace())); diff --git a/paddle/fluid/operators/complex_op.h b/paddle/fluid/operators/complex_op.h index 3dd5ea9f7e8..fb324277fb0 100644 --- a/paddle/fluid/operators/complex_op.h +++ b/paddle/fluid/operators/complex_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/complex.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/complex_view_op.h b/paddle/fluid/operators/complex_view_op.h index 9a8d89db402..98ba732e240 100644 --- a/paddle/fluid/operators/complex_view_op.h +++ b/paddle/fluid/operators/complex_view_op.h @@ -17,9 +17,9 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/cumprod_op.cu b/paddle/fluid/operators/cumprod_op.cu index 2b69db7d24a..3a63bd99ad5 100644 --- a/paddle/fluid/operators/cumprod_op.cu +++ b/paddle/fluid/operators/cumprod_op.cu @@ -14,9 +14,9 @@ #include #include "paddle/fluid/operators/cumprod_op.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/inclusive_scan.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -243,12 +243,12 @@ class CumprodGradOpCUDAKernel : public framework::OpKernel { platform::ForRange for_range_x(dev_ctx, numel); - math::ConjFunctor functor_x(x_data, numel, x_data_conj); + pten::funcs::ConjFunctor functor_x(x_data, numel, x_data_conj); for_range_x(functor_x); platform::ForRange for_range_y(dev_ctx, numel); - math::ConjFunctor functor_y(y_data, numel, y_data_conj); + pten::funcs::ConjFunctor functor_y(y_data, numel, y_data_conj); for_range_y(functor_y); x_data_deal = x_data_conj; y_data_deal = y_data_conj; diff --git a/paddle/fluid/operators/cumprod_op.h b/paddle/fluid/operators/cumprod_op.h index d8c3c1febdc..15c3d514331 100644 --- a/paddle/fluid/operators/cumprod_op.h +++ b/paddle/fluid/operators/cumprod_op.h @@ -18,8 +18,8 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -124,12 +124,12 @@ class CumprodGradOpCPUKernel : public framework::OpKernel { platform::ForRange for_range_x(dev_ctx, numel); - math::ConjFunctor functor_x(x_data, numel, x_data_conj); + pten::funcs::ConjFunctor functor_x(x_data, numel, x_data_conj); for_range_x(functor_x); platform::ForRange for_range_out(dev_ctx, numel); - math::ConjFunctor functor_out(out_data, numel, out_data_conj); + pten::funcs::ConjFunctor functor_out(out_data, numel, out_data_conj); for_range_out(functor_out); x_data_deal = x_data_conj; diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h index 90443e0928b..1da680fbd95 100644 --- a/paddle/fluid/operators/determinant_op.h +++ b/paddle/fluid/operators/determinant_op.h @@ -19,11 +19,11 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/matrix_inverse.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -395,7 +395,7 @@ class SlogDeterminantGradKernel : public framework::OpKernel { size_t(numel * sizeof(T))); platform::ForRange for_range(dev_ctx, numel); - math::ConjFunctor functor(inverse_A.data(), numel, conj_data); + pten::funcs::ConjFunctor functor(inverse_A.data(), numel, conj_data); for_range(functor); VLOG(3) << "inverse(A).conj() dims: " << conj_inverse_A.dims(); diff --git a/paddle/fluid/operators/dot_op.h b/paddle/fluid/operators/dot_op.h index c5d43ef0126..52fc26342a1 100644 --- a/paddle/fluid/operators/dot_op.h +++ b/paddle/fluid/operators/dot_op.h @@ -16,8 +16,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index b4b6e2ce2fc..f822802d305 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -17,12 +17,12 @@ #include #include #include -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/lapack_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/pten/kernels/funcs/math_function.h" #define EPSILON 1e-6 @@ -87,18 +87,19 @@ void LapackEig(Tensor* input, Tensor* values, Tensor* vectors, int info, int values_stride = values->dims()[values->dims().size() - 1]; Tensor rwork; - math::Real* rwork_data = nullptr; + pten::funcs::Real* rwork_data = nullptr; rwork.Resize(framework::make_ddim({lda * 2})); - rwork_data = rwork.mutable_data>(context.GetPlace()); + rwork_data = rwork.mutable_data>(context.GetPlace()); // call lapackEig once to compute the size of work; T computed_work_size; - math::lapackEig>( + math::lapackEig>( jobvl, jobvr, order, input_data, lda, values_data, lvector_data, ldvl, rvector_data, ldvr, &computed_work_size, lwork, rwork_data, &info); - lwork = std::max(1, static_cast(math::Real(computed_work_size))); + lwork = std::max( + 1, static_cast(pten::funcs::Real(computed_work_size))); Tensor work; work.Resize(framework::make_ddim({lwork})); T* work_data = work.mutable_data(context.GetPlace()); @@ -108,7 +109,7 @@ void LapackEig(Tensor* input, Tensor* values, Tensor* vectors, int info, T* current_values = &values_data[i * values_stride]; T* current_rvectors = &rvector_data[i * matrix_stride]; - math::lapackEig>( + math::lapackEig>( jobvl, jobvr, order, current_matrix, lda, current_values, lvector_data, ldvl, current_rvectors, ldvr, work_data, lwork, rwork_data, &info); PADDLE_ENFORCE_EQ( @@ -207,26 +208,27 @@ class EigKernel : public framework::OpKernel { origin_dim.push_back(last_item * 2); framework::DDim big_dim = framework::make_ddim(origin_dim); - real_values.mutable_data>(big_dim, context.GetPlace()); - real_vectors.mutable_data>(x->dims(), context.GetPlace()); + real_values.mutable_data>(big_dim, + context.GetPlace()); + real_vectors.mutable_data>(x->dims(), + context.GetPlace()); - ApplyEigKernel>(*x, &real_values, - &real_vectors, context); - auto dito = - math::DeviceIndependenceTensorOperations, - Tout>(context); + ApplyEigKernel>( + *x, &real_values, &real_vectors, context); + auto dito = math::DeviceIndependenceTensorOperations< + DeviceContext, pten::funcs::Real, Tout>(context); // 1. extract real part & imag part from real_values Tensor real_part = dito.Slice(real_values, {-1}, {0}, {order}); Tensor imag_part = dito.Slice(real_values, {-1}, {order}, {order * 2}); // 2. construct complex values - auto* real_part_data = real_part.data>(); - auto* imag_part_data = imag_part.data>(); + auto* real_part_data = real_part.data>(); + auto* imag_part_data = imag_part.data>(); int out_values_numel = out_values->numel(); platform::ForRange for_range( context.template device_context(), out_values_numel); - math::RealImagToComplexFunctor functor( + pten::funcs::RealImagToComplexFunctor functor( real_part_data, imag_part_data, out_values->mutable_data(context.GetPlace()), out_values_numel); for_range(functor); @@ -235,7 +237,7 @@ class EigKernel : public framework::OpKernel { Tensor real_vector_trans = dito.Transpose(real_vectors); Tensor out_vectors_trans; out_vectors_trans.mutable_data(x->dims(), context.GetPlace()); - ConstructComplexVectors, Tout>( + ConstructComplexVectors, Tout>( &out_vectors_trans, *out_values, real_vector_trans, context, batch_count, order); TransposeTwoAxis(out_vectors_trans, out_vectors, @@ -271,14 +273,14 @@ void ComputeBackwardForComplexInput( // turn diag_unsqueezed into complex auto numel = diag_unsqueezed.numel(); Tensor diag_unsqueezed_complex; - auto* data_diag_un = diag_unsqueezed.data>(); + auto* data_diag_un = diag_unsqueezed.data>(); auto* data_diag_un_com = diag_unsqueezed_complex.mutable_data( diag_unsqueezed.dims(), context.GetPlace(), static_cast(numel * sizeof(Tout))); auto& dev_ctx = context.template device_context(); platform::ForRange for_range(dev_ctx, numel); - math::RealToComplexFunctor functor(data_diag_un, data_diag_un_com, - numel); + pten::funcs::RealToComplexFunctor functor(data_diag_un, + data_diag_un_com, numel); for_range(functor); // real tensor multiply complex tensor in broadcast manner Tensor res1 = dito.RealMulComplex(V, diag_unsqueezed_complex); diff --git a/paddle/fluid/operators/eigh_op.h b/paddle/fluid/operators/eigh_op.h index ad9b0f59831..77afaf681da 100644 --- a/paddle/fluid/operators/eigh_op.h +++ b/paddle/fluid/operators/eigh_op.h @@ -40,7 +40,7 @@ template class EighGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - using ValueType = math::Real; + using ValueType = pten::funcs::Real; auto& x_grad = *ctx.Output(framework::GradVarName("X")); x_grad.mutable_data(ctx.GetPlace()); auto& output_w = *ctx.Input("Eigenvalues"); diff --git a/paddle/fluid/operators/eigvals_op.h b/paddle/fluid/operators/eigvals_op.h index d825833b024..a069ea164c9 100644 --- a/paddle/fluid/operators/eigvals_op.h +++ b/paddle/fluid/operators/eigvals_op.h @@ -20,9 +20,9 @@ #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/allocation/allocator.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/lapack_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -48,7 +48,7 @@ struct PaddleComplex< template using PaddleCType = typename PaddleComplex::type; template -using Real = typename math::Real; +using Real = typename pten::funcs::Real; static void SpiltBatchSquareMatrix(const Tensor& input, std::vector* output) { @@ -118,7 +118,7 @@ LapackEigvals(const framework::ExecutionContext& ctx, const Tensor& input, platform::ForRange for_range( ctx.template device_context(), n_dim); - math::RealImagToComplexFunctor> functor( + pten::funcs::RealImagToComplexFunctor> functor( w_data, w_data + n_dim, output->template data>(), n_dim); for_range(functor); } @@ -143,7 +143,7 @@ LapackEigvals(const framework::ExecutionContext& ctx, const Tensor& input, required_work_mem, work_mem)); int64_t rwork_mem = rwork->memory_size(); - int64_t required_rwork_mem = (n_dim << 1) * sizeof(Real); + int64_t required_rwork_mem = (n_dim << 1) * sizeof(pten::funcs::Real); PADDLE_ENFORCE_GE( rwork_mem, required_rwork_mem, platform::errors::InvalidArgument( @@ -153,11 +153,11 @@ LapackEigvals(const framework::ExecutionContext& ctx, const Tensor& input, required_rwork_mem, rwork_mem)); int info = 0; - math::lapackEig>( + math::lapackEig>( 'N', 'N', static_cast(n_dim), a.template data(), static_cast(n_dim), output->template data(), NULL, 1, NULL, 1, work->template data(), static_cast(work_mem / sizeof(T)), - rwork->template data>(), &info); + rwork->template data>(), &info); std::string name = "framework::platform::dynload::cgeev_"; if (framework::TransToProtoVarType(input.dtype()) == @@ -187,10 +187,10 @@ class EigvalsKernel : public framework::OpKernel { // query workspace size T qwork; int info; - math::lapackEig>('N', 'N', static_cast(n_dim), - input_matrices[0].template data(), - static_cast(n_dim), NULL, NULL, 1, NULL, 1, - &qwork, -1, static_cast*>(NULL), &info); + math::lapackEig>( + 'N', 'N', static_cast(n_dim), input_matrices[0].template data(), + static_cast(n_dim), NULL, NULL, 1, NULL, 1, &qwork, -1, + static_cast*>(NULL), &info); int64_t lwork = static_cast(qwork); Tensor work, rwork; @@ -207,8 +207,8 @@ class EigvalsKernel : public framework::OpKernel { } if (framework::IsComplexType( framework::TransToProtoVarType(input->dtype()))) { - rwork.mutable_data>(framework::make_ddim({n_dim << 1}), - ctx.GetPlace()); + rwork.mutable_data>( + framework::make_ddim({n_dim << 1}), ctx.GetPlace()); } for (int64_t i = 0; i < n_batch; ++i) { diff --git a/paddle/fluid/operators/imag_op.h b/paddle/fluid/operators/imag_op.h index 562a8dffa90..02682cfc954 100644 --- a/paddle/fluid/operators/imag_op.h +++ b/paddle/fluid/operators/imag_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -31,12 +31,13 @@ class ImagKernel : public framework::OpKernel { auto numel = x->numel(); auto* x_data = x->data(); - auto* out_data = out->mutable_data>( - ctx.GetPlace(), static_cast(numel * sizeof(math::Real))); + auto* out_data = out->mutable_data>( + ctx.GetPlace(), + static_cast(numel * sizeof(pten::funcs::Real))); auto& dev_ctx = ctx.template device_context(); platform::ForRange for_range(dev_ctx, numel); - math::ImagFunctor functor(x_data, out_data, numel); + pten::funcs::ImagFunctor functor(x_data, out_data, numel); for_range(functor); } }; @@ -51,13 +52,13 @@ class ImagGradKernel : public framework::OpKernel { ctx.Output(framework::GradVarName("X")); auto numel = d_out->numel(); - auto* dout_data = d_out->data>(); + auto* dout_data = d_out->data>(); auto* dx_data = d_x->mutable_data( ctx.GetPlace(), static_cast(numel * sizeof(T))); auto& dev_ctx = ctx.template device_context(); platform::ForRange for_range(dev_ctx, numel); - math::ImagToComplexFunctor functor(dout_data, dx_data, numel); + pten::funcs::ImagToComplexFunctor functor(dout_data, dx_data, numel); for_range(functor); } }; diff --git a/paddle/fluid/operators/lstsq_op.h b/paddle/fluid/operators/lstsq_op.h index 4819bd72518..f39d65d681f 100644 --- a/paddle/fluid/operators/lstsq_op.h +++ b/paddle/fluid/operators/lstsq_op.h @@ -18,7 +18,6 @@ #include #include #include "paddle/fluid/operators/eig_op.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/eigen_values_vectors.h" #include "paddle/fluid/operators/math/lapack_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" @@ -26,6 +25,7 @@ #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/pten/kernels/funcs/math_function.h" #define EPSILON 1e-6 @@ -46,7 +46,7 @@ template class LstsqCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - using ValueType = math::Real; + using ValueType = pten::funcs::Real; const Tensor& x = *context.Input("X"); auto y = context.Input("Y"); @@ -169,7 +169,7 @@ class LstsqCPUKernel : public framework::OpKernel { &rwkopt, &info); } - lwork = std::max(1, static_cast(math::Real(wkopt))); + lwork = std::max(1, static_cast(pten::funcs::Real(wkopt))); Tensor work; work.Resize(framework::make_ddim({lwork})); T* work_data = work.mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index 11174540cb0..0d05d766e67 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -211,8 +211,9 @@ void Tensor_Conj(const DeviceContext& dev_ctx, const framework::Tensor& tensor, framework::Tensor* out) { out->Resize(tensor.dims()); platform::ForRange out_for_range(dev_ctx, tensor.numel()); - math::ConjFunctor out_functor(tensor.data(), tensor.numel(), - out->mutable_data(dev_ctx.GetPlace())); + pten::funcs::ConjFunctor out_functor( + tensor.data(), tensor.numel(), + out->mutable_data(dev_ctx.GetPlace())); out_for_range(out_functor); } diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index 9ce615c949f..b946d4d072b 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -63,7 +63,7 @@ struct MatrixEighFunctor { void operator()(const framework::ExecutionContext &ctx, const Tensor &input, Tensor *eigen_values, Tensor *eigen_vectors, bool is_lower, bool has_vectors) { - using ValueType = math::Real; + using ValueType = pten::funcs::Real; auto *out_value = eigen_values->mutable_data(ctx.GetPlace()); auto dito = @@ -123,9 +123,9 @@ struct MatrixEighFunctor { for (auto i = 0; i < batch_size; i++) { auto *value_data = out_value + i * values_stride; auto *input_data = input_vector + i * vector_stride; - math::lapackEigh>(jobz, uplo, n, input_data, lda, value_data, - work_data, lwork, rwork_data, lrwork, - iwork_data, liwork, &info); + math::lapackEigh>( + jobz, uplo, n, input_data, lda, value_data, work_data, lwork, + rwork_data, lrwork, iwork_data, liwork, &info); CheckEighResult(i, info); } if (has_vectors) { @@ -151,7 +151,7 @@ struct MatrixEighFunctor { void operator()(const framework::ExecutionContext &ctx, const Tensor &input, Tensor *eigen_values, Tensor *eigen_vectors, bool is_lower, bool has_vectors) { - using ValueType = math::Real; + using ValueType = pten::funcs::Real; auto *out_value = eigen_values->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); @@ -233,7 +233,7 @@ struct MatrixEighFunctor { } } - using ValueType = math::Real; + using ValueType = pten::funcs::Real; inline void EvdBuffer(cusolverDnHandle_t handle, cusolverEigMode_t jobz, cublasFillMode_t uplo, int n, const T *A, int lda, const ValueType *W, int *lwork) const; diff --git a/paddle/fluid/operators/math/inclusive_scan.h b/paddle/fluid/operators/math/inclusive_scan.h index 5fdc2889a88..1c750fcb832 100644 --- a/paddle/fluid/operators/math/inclusive_scan.h +++ b/paddle/fluid/operators/math/inclusive_scan.h @@ -26,9 +26,9 @@ namespace cub = hipcub; #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -115,7 +115,7 @@ static __global__ void InclusiveScanInnerDimCUDAKernel(const T *x, T *y, size_t num_rows, size_t row_size, T init, BinaryOp op) { - using RealT = math::Real; + using RealT = pten::funcs::Real; constexpr auto kSharedBufferSize = framework::IsComplex::value ? 4 * kThreadNumX : 2 * kThreadNumX; __shared__ RealT sbuf[kThreadNumY][kSharedBufferSize]; diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index 0e1c6b82e41..6fac2d10383 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -22,8 +22,8 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/dot_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" diff --git a/paddle/fluid/operators/matrix_rank_op.cu b/paddle/fluid/operators/matrix_rank_op.cu index d974d7c1b78..2df794fb794 100644 --- a/paddle/fluid/operators/matrix_rank_op.cu +++ b/paddle/fluid/operators/matrix_rank_op.cu @@ -18,11 +18,11 @@ limitations under the License. */ #include #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/matrix_rank_op.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/platform/dynload/cusolver.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { @@ -93,8 +93,8 @@ class MatrixRankGPUKernel : public framework::OpKernel { info_ptr); platform::ForRange for_range( dev_ctx, eigenvalue_tensor.numel()); - math::AbsFunctor functor(eigenvalue_data, eigenvalue_data, - eigenvalue_tensor.numel()); + pten::funcs::AbsFunctor functor(eigenvalue_data, eigenvalue_data, + eigenvalue_tensor.numel()); for_range(functor); } else { Tensor U, VH; diff --git a/paddle/fluid/operators/qr_op.cu b/paddle/fluid/operators/qr_op.cu index c8b6404830c..dfeec15d9b8 100644 --- a/paddle/fluid/operators/qr_op.cu +++ b/paddle/fluid/operators/qr_op.cu @@ -56,12 +56,13 @@ class QrGPUKernel : public framework::OpKernel { int tau_stride = min_mn; if (compute_q) { - q.mutable_data>( + q.mutable_data>( context.GetPlace(), - size_t(batch_size * m * k * sizeof(math::Real))); + size_t(batch_size * m * k * sizeof(pten::funcs::Real))); } - r.mutable_data>( - context.GetPlace(), size_t(batch_size * k * n * sizeof(math::Real))); + r.mutable_data>( + context.GetPlace(), + size_t(batch_size * k * n * sizeof(pten::funcs::Real))); auto dito = math::DeviceIndependenceTensorOperations { // Note: allocate temporary tensors because of lacking in-place operatios. // Prepare qr Tensor qr; - qr.mutable_data>( - context.GetPlace(), size_t(batch_size * m * n * sizeof(math::Real))); + qr.mutable_data>( + context.GetPlace(), + size_t(batch_size * m * n * sizeof(pten::funcs::Real))); // BatchedGeqrf performs computation in-place and 'qr' must be a copy of // input paddle::framework::TensorCopy(x, context.GetPlace(), &qr); @@ -124,7 +126,8 @@ class QrGPUKernel : public framework::OpKernel { for (int i = 0; i < batch_size; ++i) { memory::Copy(dev_ctx.GetPlace(), (new_qr_data + i * new_qr_stride), dev_ctx.GetPlace(), (qr_data + i * qr_stride), - qr_stride * sizeof(math::Real), dev_ctx.stream()); + qr_stride * sizeof(pten::funcs::Real), + dev_ctx.stream()); } BatchedOrgqr( dev_ctx, batch_size, m, m, min_mn, new_qr_data, m, tau_data, diff --git a/paddle/fluid/operators/qr_op.h b/paddle/fluid/operators/qr_op.h index c55619a4f76..b8308b29106 100644 --- a/paddle/fluid/operators/qr_op.h +++ b/paddle/fluid/operators/qr_op.h @@ -18,9 +18,9 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -74,17 +74,20 @@ class QrCPUKernel : public framework::OpKernel { int q_stride = m * k; int r_stride = k * n; - auto* x_data = x.data>(); + auto* x_data = x.data>(); T* q_data = nullptr; if (compute_q) { - q_data = q.mutable_data>( + q_data = q.mutable_data>( context.GetPlace(), - size_t(batch_size * m * k * sizeof(math::Real))); - memset(q_data, 0, size_t(batch_size * m * k * sizeof(math::Real))); + size_t(batch_size * m * k * sizeof(pten::funcs::Real))); + memset(q_data, 0, + size_t(batch_size * m * k * sizeof(pten::funcs::Real))); } - auto* r_data = r.mutable_data>( - context.GetPlace(), size_t(batch_size * k * n * sizeof(math::Real))); - memset(r_data, 0, size_t(batch_size * k * n * sizeof(math::Real))); + auto* r_data = r.mutable_data>( + context.GetPlace(), + size_t(batch_size * k * n * sizeof(pten::funcs::Real))); + memset(r_data, 0, + size_t(batch_size * k * n * sizeof(pten::funcs::Real))); // Implement QR by calling Eigen for (int i = 0; i < batch_size; ++i) { @@ -140,7 +143,7 @@ class QrGradKernel : public framework::OpKernel { // Use a different name dA instead of dX framework::Tensor& dA = *ctx.Output(framework::GradVarName("X")); - dA.mutable_data>(ctx.GetPlace()); + dA.mutable_data>(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); pten::funcs::SetConstant()(dev_ctx, &dA, T(0)); @@ -222,7 +225,7 @@ class QrGradKernel : public framework::OpKernel { } else { // If m < n for input matrices A, we partition A = [X|Y] and R = [U|V] // Calculate dX and dY individually and concatenate them to get dA - dA.mutable_data>(ctx.GetPlace()); + dA.mutable_data>(ctx.GetPlace()); auto Y = dito.Slice(A, {-1}, {m}, {n}); auto U = dito.Slice(R, {-1}, {0}, {m}); diff --git a/paddle/fluid/operators/real_op.h b/paddle/fluid/operators/real_op.h index 6cc9065269c..41549393f57 100644 --- a/paddle/fluid/operators/real_op.h +++ b/paddle/fluid/operators/real_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -31,12 +31,13 @@ class RealKernel : public framework::OpKernel { auto numel = x->numel(); auto* x_data = x->data(); - auto* out_data = out->mutable_data>( - ctx.GetPlace(), static_cast(numel * sizeof(math::Real))); + auto* out_data = out->mutable_data>( + ctx.GetPlace(), + static_cast(numel * sizeof(pten::funcs::Real))); auto& dev_ctx = ctx.template device_context(); platform::ForRange for_range(dev_ctx, numel); - math::RealFunctor functor(x_data, out_data, numel); + pten::funcs::RealFunctor functor(x_data, out_data, numel); for_range(functor); } }; @@ -51,13 +52,13 @@ class RealGradKernel : public framework::OpKernel { ctx.Output(framework::GradVarName("X")); auto numel = d_out->numel(); - auto* dout_data = d_out->data>(); + auto* dout_data = d_out->data>(); auto* dx_data = d_x->mutable_data( ctx.GetPlace(), static_cast(numel * sizeof(T))); auto& dev_ctx = ctx.template device_context(); platform::ForRange for_range(dev_ctx, numel); - math::RealToComplexFunctor functor(dout_data, dx_data, numel); + pten::funcs::RealToComplexFunctor functor(dout_data, dx_data, numel); for_range(functor); } }; diff --git a/paddle/fluid/operators/renorm_op.h b/paddle/fluid/operators/renorm_op.h index 461f383ad25..753ed9e27ac 100644 --- a/paddle/fluid/operators/renorm_op.h +++ b/paddle/fluid/operators/renorm_op.h @@ -17,8 +17,8 @@ #include "math.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; diff --git a/paddle/fluid/operators/spectral_op.cu b/paddle/fluid/operators/spectral_op.cu index c932834db39..77703637db5 100644 --- a/paddle/fluid/operators/spectral_op.cu +++ b/paddle/fluid/operators/spectral_op.cu @@ -20,11 +20,11 @@ #include #include "paddle/fluid/operators/conj_op.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/spectral_helper.h" #include "paddle/fluid/operators/spectral_op.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -115,8 +115,8 @@ void exec_cufft_plan(const DeviceContext& ctx, const FFTConfig& config, framework::Tensor input_conj(input->type()); input_conj.mutable_data(input->dims(), ctx.GetPlace()); platform::ForRange for_range(ctx, input->numel()); - math::ConjFunctor functor(input->data(), input->numel(), - input_conj.data()); + pten::funcs::ConjFunctor functor(input->data(), input->numel(), + input_conj.data()); for_range(functor); exec_cufft_plan_raw(config, input_conj.data(), output->data(), forward); } else if (fft_type == FFTTransformType::R2C && !forward) { @@ -126,8 +126,8 @@ void exec_cufft_plan(const DeviceContext& ctx, const FFTConfig& config, exec_cufft_plan_raw(config, input->data(), out_conj.data(), forward); platform::ForRange for_range(ctx, output->numel()); - math::ConjFunctor functor(out_conj.data(), output->numel(), - output->data()); + pten::funcs::ConjFunctor functor(out_conj.data(), output->numel(), + output->data()); for_range(functor); } else { exec_cufft_plan_raw(config, input->data(), output->data(), forward); @@ -227,8 +227,8 @@ void exec_hipfft_plan(const DeviceContext& ctx, const FFTConfig& config, framework::Tensor input_conj(input->type()); input_conj.mutable_data(input->dims(), ctx.GetPlace()); platform::ForRange for_range(ctx, input->numel()); - math::ConjFunctor functor(input->data(), input->numel(), - input_conj.data()); + pten::funcs::ConjFunctor functor(input->data(), input->numel(), + input_conj.data()); for_range(functor); exec_hipfft_plan_raw(config, input_conj.data(), output->data(), forward); } else if (fft_type == FFTTransformType::R2C && !forward) { @@ -238,8 +238,8 @@ void exec_hipfft_plan(const DeviceContext& ctx, const FFTConfig& config, exec_hipfft_plan_raw(config, input->data(), out_conj.data(), forward); platform::ForRange for_range(ctx, output->numel()); - math::ConjFunctor functor(out_conj.data(), output->numel(), - output->data()); + pten::funcs::ConjFunctor functor(out_conj.data(), output->numel(), + output->data()); for_range(functor); } else { exec_hipfft_plan_raw(config, input->data(), output->data(), forward); diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 3a57a7b3e54..4384e7152fa 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -25,9 +25,9 @@ #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { @@ -105,7 +105,8 @@ struct RealMulComplexFunctor { "The image part of y must to be 0" "but got [%d]", y.imag)); - return platform::complex>(x.real * y.real, x.imag * y.real); + return platform::complex>(x.real * y.real, + x.imag * y.real); } }; @@ -390,11 +391,11 @@ struct DeviceIndependenceTensorOperations { // batch_diag for CPU only Tensor BatchDiag(const Tensor& x, int batch) { Tensor out; - auto* x_data = x.data>(); + auto* x_data = x.data>(); auto numel = x.numel(); - auto* out_data = out.mutable_data>( + auto* out_data = out.mutable_data>( x.dims(), context.GetPlace(), - static_cast(numel * sizeof(math::Real))); + static_cast(numel * sizeof(pten::funcs::Real))); auto x_dims = x.dims(); int num_dims = x_dims.size(); @@ -654,7 +655,7 @@ struct DeviceIndependenceTensorOperations { auto* out_data = out.mutable_data(x.dims(), context.GetPlace()); auto* x_data = x.data(); auto for_range = GetForRange(x.numel()); - math::ConjFunctor functor(x_data, x.numel(), out_data); + pten::funcs::ConjFunctor functor(x_data, x.numel(), out_data); for_range(functor); return out; } @@ -662,12 +663,12 @@ struct DeviceIndependenceTensorOperations { Tensor Real(const Tensor& x) { Tensor out; auto numel = x.numel(); - auto* out_data = out.mutable_data>( + auto* out_data = out.mutable_data>( x.dims(), context.GetPlace(), - static_cast(numel * sizeof(math::Real))); + static_cast(numel * sizeof(pten::funcs::Real))); auto* x_data = x.data(); auto for_range = GetForRange(numel); - math::RealFunctor functor(x_data, out_data, numel); + pten::funcs::RealFunctor functor(x_data, out_data, numel); for_range(functor); return out; } diff --git a/paddle/fluid/operators/svd_op.h b/paddle/fluid/operators/svd_op.h index f387dca7b7f..4042fcccf33 100644 --- a/paddle/fluid/operators/svd_op.h +++ b/paddle/fluid/operators/svd_op.h @@ -17,9 +17,9 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -46,14 +46,14 @@ class SvdCPUKernel : public framework::OpKernel { int col_u = full ? rows : k; int col_v = full ? cols : k; int batches = numel / (rows * cols); - auto* U_out = U->mutable_data>( + auto* U_out = U->mutable_data>( context.GetPlace(), - size_t(batches * rows * col_u * sizeof(math::Real))); - auto* VH_out = VH->mutable_data>( + size_t(batches * rows * col_u * sizeof(pten::funcs::Real))); + auto* VH_out = VH->mutable_data>( context.GetPlace(), - size_t(batches * col_v * cols * sizeof(math::Real))); - auto* S_out = S->mutable_data>( - context.GetPlace(), size_t(batches * k * sizeof(math::Real))); + size_t(batches * col_v * cols * sizeof(pten::funcs::Real))); + auto* S_out = S->mutable_data>( + context.GetPlace(), size_t(batches * k * sizeof(pten::funcs::Real))); /*SVD Use the Eigen Library*/ math::BatchSvd(x_data, U_out, VH_out, S_out, rows, cols, batches, full); } diff --git a/paddle/fluid/operators/triangular_solve_op.h b/paddle/fluid/operators/triangular_solve_op.h index f64b016366e..e892d258f3b 100644 --- a/paddle/fluid/operators/triangular_solve_op.h +++ b/paddle/fluid/operators/triangular_solve_op.h @@ -19,10 +19,10 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.h" #include "paddle/fluid/operators/solve_op.h" #include "paddle/fluid/operators/tril_triu_op.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { @@ -152,7 +152,7 @@ class TriangularSolveGradKernel : public framework::OpKernel { // calculate x's conjugate for complex Tensor x_conj(x->type()); platform::ForRange x_for_range(dev_ctx, x->numel()); - math::ConjFunctor x_functor( + pten::funcs::ConjFunctor x_functor( x->data(), x->numel(), x_conj.mutable_data(x->dims(), dev_ctx.GetPlace())); x_for_range(x_functor); @@ -179,7 +179,7 @@ class TriangularSolveGradKernel : public framework::OpKernel { // calculate out's conjugate for complex Tensor out_conj(out->type()); platform::ForRange out_for_range(dev_ctx, out->numel()); - math::ConjFunctor out_functor( + pten::funcs::ConjFunctor out_functor( out->data(), out->numel(), out_conj.mutable_data(out->dims(), dev_ctx.GetPlace())); out_for_range(out_functor); diff --git a/paddle/pten/kernels/cpu/abs_grad_kernel.cc b/paddle/pten/kernels/cpu/abs_grad_kernel.cc index a3f3aabd16c..9d6675aa7b3 100644 --- a/paddle/pten/kernels/cpu/abs_grad_kernel.cc +++ b/paddle/pten/kernels/cpu/abs_grad_kernel.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/pten/common/complex.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/pten/kernels/impl/abs_grad_kernel_impl.h" using pten::dtype::complex; diff --git a/paddle/pten/kernels/cpu/abs_kernel.cc b/paddle/pten/kernels/cpu/abs_kernel.cc index 49094f5c64e..ee766a18d42 100644 --- a/paddle/pten/kernels/cpu/abs_kernel.cc +++ b/paddle/pten/kernels/cpu/abs_kernel.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/pten/kernels/abs_kernel.h" -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/common/complex.h" #include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace pten { @@ -25,12 +25,12 @@ template void AbsKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { auto numel = x.numel(); auto* x_data = x.data(); - ctx.template Alloc>( - out, size_t(x.numel() * sizeof(paddle::operators::math::Real))); - auto* out_data = out->data>(); + ctx.template Alloc>( + out, size_t(x.numel() * sizeof(pten::funcs::Real))); + auto* out_data = out->data>(); paddle::platform::ForRange for_range(ctx, numel); - paddle::operators::math::AbsFunctor functor(x_data, out_data, numel); + pten::funcs::AbsFunctor functor(x_data, out_data, numel); for_range(functor); } diff --git a/paddle/fluid/operators/math/complex_functors.h b/paddle/pten/kernels/funcs/complex_functors.h similarity index 57% rename from paddle/fluid/operators/math/complex_functors.h rename to paddle/pten/kernels/funcs/complex_functors.h index 48f16b87cbd..b0eee3ac1fd 100644 --- a/paddle/fluid/operators/math/complex_functors.h +++ b/paddle/pten/kernels/funcs/complex_functors.h @@ -13,15 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif +#include #include -#include "paddle/fluid/platform/complex.h" +#include "paddle/pten/common/complex.h" #include "paddle/pten/core/hostdevice.h" -namespace paddle { -namespace operators { -namespace math { +namespace pten { +namespace funcs { template struct cond { @@ -64,8 +66,8 @@ using select_t = typename select::type; template using Real = - select_t>::value, float>, - cond>::value, double>, + select_t>::value, float>, + cond>::value, double>, T>; template @@ -77,13 +79,13 @@ using NoComplex = typename std::enable_if::value>::type; template using EnableComplex = typename std::enable_if< - std::is_same>::value || - std::is_same>::value>::type; + std::is_same>::value || + std::is_same>::value>::type; template using DisableComplex = typename std::enable_if< - !std::is_same>::value && - !std::is_same>::value>::type; + !std::is_same>::value && + !std::is_same>::value>::type; template struct RealFunctor; @@ -154,8 +156,7 @@ struct AbsFunctor>> { template struct AbsGradFunctor { - AbsGradFunctor(const math::Real* dout, const T* x, T* output, - int64_t numel) + AbsGradFunctor(const Real* dout, const T* x, T* output, int64_t numel) : dout_(dout), x_(x), output_(output), numel_(numel) {} HOSTDEVICE void operator()(int64_t idx) const { @@ -166,52 +167,55 @@ struct AbsGradFunctor { } } - const math::Real* dout_; + const Real* dout_; const T* x_; T* output_; int64_t numel_; }; template <> -struct AbsGradFunctor> { - AbsGradFunctor(const float* dout, const paddle::platform::complex* x, - paddle::platform::complex* output, int64_t numel) +struct AbsGradFunctor> { + AbsGradFunctor(const float* dout, + const pten::dtype::complex* x, + pten::dtype::complex* output, + int64_t numel) : dout_(dout), x_(x), output_(output), numel_(numel) {} HOSTDEVICE void operator()(int64_t idx) const { - if (x_[idx] == paddle::platform::complex(0)) { - output_[idx] = paddle::platform::complex(0); + if (x_[idx] == pten::dtype::complex(0)) { + output_[idx] = pten::dtype::complex(0); } else { - output_[idx] = paddle::platform::complex(dout_[idx]) * - (x_[idx] / paddle::platform::complex(abs(x_[idx]))); + output_[idx] = pten::dtype::complex(dout_[idx]) * + (x_[idx] / pten::dtype::complex(abs(x_[idx]))); } } const float* dout_; - const paddle::platform::complex* x_; - paddle::platform::complex* output_; + const pten::dtype::complex* x_; + pten::dtype::complex* output_; int64_t numel_; }; template <> -struct AbsGradFunctor> { - AbsGradFunctor(const double* dout, const paddle::platform::complex* x, - paddle::platform::complex* output, int64_t numel) +struct AbsGradFunctor> { + AbsGradFunctor(const double* dout, + const pten::dtype::complex* x, + pten::dtype::complex* output, + int64_t numel) : dout_(dout), x_(x), output_(output), numel_(numel) {} HOSTDEVICE void operator()(int64_t idx) const { - if (x_[idx] == paddle::platform::complex(0)) { - output_[idx] = paddle::platform::complex(0); + if (x_[idx] == pten::dtype::complex(0)) { + output_[idx] = pten::dtype::complex(0); } else { - output_[idx] = - paddle::platform::complex(dout_[idx]) * - (x_[idx] / paddle::platform::complex(abs(x_[idx]))); + output_[idx] = pten::dtype::complex(dout_[idx]) * + (x_[idx] / pten::dtype::complex(abs(x_[idx]))); } } const double* dout_; - const paddle::platform::complex* x_; - paddle::platform::complex* output_; + const pten::dtype::complex* x_; + pten::dtype::complex* output_; int64_t numel_; }; @@ -235,46 +239,48 @@ struct AbsGradGradFunctor { }; template <> -struct AbsGradGradFunctor> { - AbsGradGradFunctor(const paddle::platform::complex* ddx, - const paddle::platform::complex* x, - paddle::platform::complex* output, int64_t numel) +struct AbsGradGradFunctor> { + AbsGradGradFunctor(const pten::dtype::complex* ddx, + const pten::dtype::complex* x, + pten::dtype::complex* output, + int64_t numel) : ddx_(ddx), x_(x), output_(output), numel_(numel) {} HOSTDEVICE void operator()(int64_t idx) const { - if (x_[idx] == paddle::platform::complex(0)) { - output_[idx] = paddle::platform::complex(0); + if (x_[idx] == pten::dtype::complex(0)) { + output_[idx] = pten::dtype::complex(0); } else { - output_[idx] = paddle::platform::complex(ddx_[idx]) * x_[idx] / - paddle::platform::complex(abs(x_[idx])); + output_[idx] = pten::dtype::complex(ddx_[idx]) * x_[idx] / + pten::dtype::complex(abs(x_[idx])); } } - const paddle::platform::complex* ddx_; - const paddle::platform::complex* x_; - paddle::platform::complex* output_; + const pten::dtype::complex* ddx_; + const pten::dtype::complex* x_; + pten::dtype::complex* output_; int64_t numel_; }; template <> -struct AbsGradGradFunctor> { - AbsGradGradFunctor(const paddle::platform::complex* ddx, - const paddle::platform::complex* x, - paddle::platform::complex* output, int64_t numel) +struct AbsGradGradFunctor> { + AbsGradGradFunctor(const pten::dtype::complex* ddx, + const pten::dtype::complex* x, + pten::dtype::complex* output, + int64_t numel) : ddx_(ddx), x_(x), output_(output), numel_(numel) {} HOSTDEVICE void operator()(int64_t idx) const { - if (x_[idx] == paddle::platform::complex(0)) { - output_[idx] = paddle::platform::complex(0); + if (x_[idx] == pten::dtype::complex(0)) { + output_[idx] = pten::dtype::complex(0); } else { - output_[idx] = paddle::platform::complex(ddx_[idx]) * x_[idx] / - paddle::platform::complex(abs(x_[idx])); + output_[idx] = pten::dtype::complex(ddx_[idx]) * x_[idx] / + pten::dtype::complex(abs(x_[idx])); } } - const paddle::platform::complex* ddx_; - const paddle::platform::complex* x_; - paddle::platform::complex* output_; + const pten::dtype::complex* ddx_; + const pten::dtype::complex* x_; + pten::dtype::complex* output_; int64_t numel_; }; template @@ -318,8 +324,10 @@ struct RealImagToComplexFunctor; template struct RealImagToComplexFunctor>> { - RealImagToComplexFunctor(const Real* input_real, const Real* input_imag, - T* output, int64_t numel) + RealImagToComplexFunctor(const Real* input_real, + const Real* input_imag, + T* output, + int64_t numel) : input_real_(input_real), input_imag_(input_imag), output_(output), @@ -363,6 +371,84 @@ struct ConjFunctor> { T* output_; }; -} // namespace math -} // namespace operators -} // namespace paddle +template +struct AngleFunctor; + +// angel function for complex +template +struct AngleFunctor>> { + AngleFunctor(const T* input, pten::funcs::Real* output, int64_t numel) + : input_(input), output_(output), numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { + output_[idx] = arg(input_[idx]); + } + + const T* input_; + pten::funcs::Real* output_; + int64_t numel_; +}; + +// angel function for real +template +struct AngleFunctor>> { + AngleFunctor(const T* input, T* output, int64_t numel) + : input_(input), output_(output), numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { + output_[idx] = input_[idx] < static_cast(0) ? M_PI : 0; + } + + const T* input_; + T* output_; + int64_t numel_; +}; + +template +struct AngleGradFunctor; + +// angle grad for complex +template +struct AngleGradFunctor>> { + AngleGradFunctor(const pten::funcs::Real* dout, + const T* x, + T* dx, + int64_t numel) + : dout_(dout), x_(x), dx_(dx), numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { + if (x_[idx] == T(0)) { + dx_[idx] = T(0); + } else { + const pten::funcs::Real r_square = + x_[idx].real * x_[idx].real + x_[idx].imag * x_[idx].imag; + dx_[idx] = T(-dout_[idx] * x_[idx].imag / r_square, + dout_[idx] * x_[idx].real / r_square); + } + } + + const pten::funcs::Real* dout_; + const T* x_; + T* dx_; + int64_t numel_; +}; + +// angle grad for real +template +struct AngleGradFunctor>> { + AngleGradFunctor(const pten::funcs::Real* dout, + const T* x, + T* dx, + int64_t numel) + : dout_(dout), x_(x), dx_(dx), numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { dx_[idx] = 0; } + + const pten::funcs::Real* dout_; + const T* x_; + T* dx_; + int64_t numel_; +}; + +} // namespace funcs +} // namespace pten diff --git a/paddle/pten/kernels/gpu/abs_kernel.cu b/paddle/pten/kernels/gpu/abs_kernel.cu index d97aa791053..06eff050674 100644 --- a/paddle/pten/kernels/gpu/abs_kernel.cu +++ b/paddle/pten/kernels/gpu/abs_kernel.cu @@ -14,11 +14,11 @@ #include #include -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/abs_kernel.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/pten/kernels/funcs/elementwise_base.h" namespace pten { @@ -27,19 +27,14 @@ template struct CudaAbsFunctor; template -struct CudaAbsFunctor< - T, - paddle::operators::math::Complex>> { - __device__ __forceinline__ paddle::operators::math::Real operator()( - const T x) const { +struct CudaAbsFunctor>> { + __device__ __forceinline__ pten::funcs::Real operator()(const T x) const { return abs(x); } }; template -struct CudaAbsFunctor< - T, - paddle::operators::math::NoComplex>> { +struct CudaAbsFunctor>> { __device__ __forceinline__ T operator()(const T x) const { return std::abs(x); } @@ -47,12 +42,12 @@ struct CudaAbsFunctor< template void AbsKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { - ctx.template Alloc>(out); + ctx.template Alloc>(out); std::vector ins = {&x}; std::vector outs = {out}; auto functor = CudaAbsFunctor(); - funcs::LaunchSameDimsElementwiseCudaKernel>( + funcs::LaunchSameDimsElementwiseCudaKernel>( ctx, ins, &outs, functor); } diff --git a/paddle/pten/kernels/impl/abs_grad_kernel_impl.h b/paddle/pten/kernels/impl/abs_grad_kernel_impl.h index c702a0042dc..ff829e10b2d 100644 --- a/paddle/pten/kernels/impl/abs_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/abs_grad_kernel_impl.h @@ -14,9 +14,9 @@ #pragma once -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/pten/kernels/abs_grad_kernel.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace pten { @@ -26,15 +26,14 @@ void AbsGradKernel(const Context& ctx, const DenseTensor& dout, DenseTensor* dx) { auto numel = dout.numel(); - auto* dout_data = dout.data>(); + auto* dout_data = dout.data>(); auto* x_data = x.data(); ctx.template Alloc(dx, static_cast(numel * sizeof(T))); auto* dx_data = dx->data(); paddle::platform::ForRange for_range(ctx, numel); - paddle::operators::math::AbsGradFunctor functor( - dout_data, x_data, dx_data, numel); + pten::funcs::AbsGradFunctor functor(dout_data, x_data, dx_data, numel); for_range(functor); } @@ -50,7 +49,7 @@ void AbsDoubleGradKernel(const Context& ctx, auto* ddout_data = ddout->data(); paddle::platform::ForRange for_range(ctx, numel); - paddle::operators::math::AbsGradGradFunctor functor( + pten::funcs::AbsGradGradFunctor functor( ddx_data, x_data, ddout_data, numel); for_range(functor); } diff --git a/paddle/pten/kernels/impl/complex_kernel_impl.h b/paddle/pten/kernels/impl/complex_kernel_impl.h index 7e4c4f0d66d..17cfb886e57 100644 --- a/paddle/pten/kernels/impl/complex_kernel_impl.h +++ b/paddle/pten/kernels/impl/complex_kernel_impl.h @@ -15,8 +15,8 @@ #pragma once // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace pten { @@ -29,7 +29,7 @@ void ConjKernel(const Context& dev_ctx, auto* out_data = dev_ctx.template Alloc(out); paddle::platform::ForRange for_range(dev_ctx, numel); - paddle::operators::math::ConjFunctor functor(x_data, numel, out_data); + pten::funcs::ConjFunctor functor(x_data, numel, out_data); for_range(functor); } diff --git a/paddle/pten/kernels/impl/dot_grad_kernel_impl.h b/paddle/pten/kernels/impl/dot_grad_kernel_impl.h index d4ea9fc9445..4ed47bd69dd 100644 --- a/paddle/pten/kernels/impl/dot_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/dot_grad_kernel_impl.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/pten/kernels/complex_kernel.h" #include "paddle/fluid/operators/eigen/eigen_function.h" -#include "paddle/fluid/operators/math/complex_functors.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" namespace pten { @@ -35,9 +35,7 @@ struct DotGradFunction { }; template -struct DotGradFunction> { +struct DotGradFunction> { void operator()(const DeviceContext& ctx, const DenseTensor* tensor_x, const DenseTensor* tensor_y, @@ -133,9 +131,7 @@ struct DotGradFunction -struct DotGradFunction> { +struct DotGradFunction> { void operator()(const DeviceContext& ctx, const DenseTensor* tensor_x, const DenseTensor* tensor_y, @@ -221,9 +217,7 @@ struct DotDoubleGradFunction { }; template -struct DotDoubleGradFunction> { +struct DotDoubleGradFunction> { void operator()(const DeviceContext& ctx, const DenseTensor* tensor_x, const DenseTensor* tensor_y, @@ -334,9 +328,7 @@ struct DotDoubleGradFunction -struct DotDoubleGradFunction> { +struct DotDoubleGradFunction> { void operator()(const DeviceContext& ctx, const DenseTensor* tensor_x, const DenseTensor* tensor_y, @@ -461,9 +453,7 @@ struct DotTripleGradFunction { // TODO(wuweilong): enable this function when the unittests framewark for multi // grad is ok (dtype: complex64 or complex128). template -struct DotTripleGradFunction> { +struct DotTripleGradFunction> { void operator()(const DeviceContext& ctx, const DenseTensor* in_tensor_x, const DenseTensor* in_tensor_y, @@ -656,9 +646,7 @@ struct DotTripleGradFunction -struct DotTripleGradFunction> { +struct DotTripleGradFunction> { void operator()(const DeviceContext& ctx, const DenseTensor* in_tensor_x, const DenseTensor* in_tensor_y, diff --git a/paddle/pten/kernels/impl/matmul_kernel_impl.h b/paddle/pten/kernels/impl/matmul_kernel_impl.h index 858807a1d4d..addea622f14 100644 --- a/paddle/pten/kernels/impl/matmul_kernel_impl.h +++ b/paddle/pten/kernels/impl/matmul_kernel_impl.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/complex_functors.h" +#include "paddle/pten/kernels/funcs/complex_functors.h" #include "paddle/pten/core/dense_tensor.h" -- GitLab