diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu index 375063813ede8addc095c7d8a32d429740446e94..243c945d3c410059085c44bffd57897cdf30116d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu @@ -17,6 +17,10 @@ limitations under the License. */ #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/float16.h" +// only can include the headers in paddle/top/api dirs +#include "paddle/pten/api/lib/utils/tensor_utils.h" +#include "paddle/pten/include/core.h" +#include "paddle/pten/include/math.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -28,15 +32,39 @@ class ElementwiseMulKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - framework::Tensor x_for_selectedrows; - std::vector ins; - std::vector outs; + auto x_var = ctx.InputVar("X"); + PADDLE_ENFORCE_EQ(x_var != nullptr, true, + platform::errors::InvalidArgument( + "Cannot get input Variable X, Variable name = %s.", + ctx.InputName("X"))); const auto& cuda_ctx = ctx.template device_context(); - - int axis = PackTensorsIntoVector(ctx, &ins, &outs, &x_for_selectedrows); - LaunchElementwiseCudaKernel( - cuda_ctx, ins, &outs, axis, MulFunctor()); + if (x_var->IsType()) { + framework::Tensor x_for_selectedrows; + std::vector ins; + std::vector outs; + int axis = + PackTensorsIntoVector(ctx, &ins, &outs, &x_for_selectedrows); + LaunchElementwiseCudaKernel( + cuda_ctx, ins, &outs, axis, MulFunctor()); + } else if (x_var->IsType()) { + auto* x_lod = ctx.Input("X"); + auto* y_lod = ctx.Input("Y"); + auto* z_lod = ctx.Output("Out"); + z_lod->mutable_data(ctx.GetPlace()); + + int axis = ctx.Attr("axis"); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); + auto pt_y = paddle::experimental::MakePtenDenseTensor(*y_lod); + auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); + pten::ElementwiseMul(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "X's type[%s] is not supported by elementwise_op. X's type should be " + "LoDTensor or SelectedRows.", + framework::ToTypeName(x_var->Type()))); + } } }; diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index 211bf6e3fb539dc486a264b1774f6d320c6970d5..ba279e90a345d14d4a8aa8e09523201e2f57ae60 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -15,11 +15,16 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/platform/cpu_info.h" +// only can include the headers in paddle/pten/include dirs +#include "paddle/pten/api/lib/utils/tensor_utils.h" +#include "paddle/pten/include/core.h" +#include "paddle/pten/include/math.h" namespace paddle { namespace operators { @@ -106,24 +111,32 @@ class ElementwiseMulKernel : public framework::OpKernel { out_sele->mutable_value()->Resize(x_sele.value().dims()); out_sele->mutable_value()->mutable_data(ctx.GetPlace(), x.type()); z = ctx.Output("Out")->mutable_value(); + z->mutable_data(ctx.GetPlace()); + auto dims_equal = x.dims() == y->dims(); + if (dims_equal) { + SameDimsElemwiseMul same_dims_mul; + same_dims_mul(ctx, &x, y, z); + } else { + default_elementwise_mul(ctx, &x, y, z); + } } else if (x_var->IsType()) { - x = x_var->Get(); - z = ctx.Output("Out"); + auto* x_lod = ctx.Input("X"); + auto* z_lod = ctx.Output("Out"); + z_lod->mutable_data(ctx.GetPlace()); + + auto& dev_ctx = ctx.device_context(); + int axis = ctx.Attr("axis"); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); + auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); + auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); + pten::ElementwiseMul(dev_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); } else { PADDLE_THROW(platform::errors::InvalidArgument( "X's type[%s] is not supported by elementwise_op. X's type should be " "LoDTensor or SelectedRows.", framework::ToTypeName(x_var->Type()))); } - - z->mutable_data(ctx.GetPlace()); - auto dims_equal = x.dims() == y->dims(); - if (dims_equal) { - SameDimsElemwiseMul same_dims_mul; - same_dims_mul(ctx, &x, y, z); - } else { - default_elementwise_mul(ctx, &x, y, z); - } } }; template diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index be4c25ef4c56c3579784e728b02701899bf462a5..a52262f66de0dd7bc0720467d8d977e34ba2dd6f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -160,6 +160,12 @@ class ElementwiseOp : public framework::OperatorWithKernel { {"axis"}, {"Out"}); } } + if (Type() == "elementwise_mul") { + if (ctx.InputVar("X")->IsType()) { + return framework::KernelSignature("elementwise_mul", {"X", "Y"}, + {"axis"}, {"Out"}); + } + } return framework::KernelSignature("None", {"X"}, {}, {"Out"}); } }; diff --git a/paddle/pten/api/include/math.h b/paddle/pten/api/include/math.h index cdc9db55d952d34de77449066dfe24be06d3302a..ac37bc69ed1090ac2abc0fec69a21ef8f75b7727 100644 --- a/paddle/pten/api/include/math.h +++ b/paddle/pten/api/include/math.h @@ -28,5 +28,8 @@ PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y); PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y); PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y); + +PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y); + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/math.cc b/paddle/pten/api/lib/math.cc index d85d5e66d03d9a0d3988750253e019e3ffc1a42f..56a2cddae2a06cbd07a50c354f1430c1343729f2 100644 --- a/paddle/pten/api/lib/math.cc +++ b/paddle/pten/api/lib/math.cc @@ -172,6 +172,41 @@ PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y) { return out; } + +PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y) { + // 1. Get kernel signature and kernel + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "elementwise_mul", kernel_key); + + // 2. Get Device Context + auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + auto kernel_context = pten::KernelContext(dev_ctx); + + // 3. Auto data transform + auto dense_x = std::dynamic_pointer_cast(x.impl()); + kernel_context.EmplaceBackInput(dense_x); + auto dense_y = std::dynamic_pointer_cast(y.impl()); + kernel_context.EmplaceBackInput(dense_y); + kernel_context.EmplaceBackAttr(-1); + + // 4. InferShape + auto out_meta = ElementwiseInferShape(dense_x->meta(), dense_y->meta(), -1); + + // 5. Prepare outputs + Tensor out; + const auto allocator = std::make_shared( + pten::TransToFluidPlace(kernel_key.backend())); + auto dense_out = std::make_shared(allocator, out_meta); + kernel_context.EmplaceBackOutput(dense_out); + out.set_impl(dense_out); + + // 6. Call kernel + kernel(&kernel_context); + + return out; +} } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index d4be9574783fc3ccf9b1b594b916c11f6b160394..044946408242bbad07a2ce7a1ae63f6ca7dde539 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -234,9 +234,14 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, const pten::TensorArgDef& arg_def, pten::DenseTensor* dst) { auto expected_place = pten::TransToFluidPlace(arg_def.backend); - if (variable.IsType()) { const auto& tensor = variable.Get(); + // check input dtype before ReMakePtenDenseTensor + PADDLE_ENFORCE( + (arg_def.dtype == pten::TransToPtenDataType(tensor.type())), + paddle::platform::errors::InvalidArgument( + "The type of input data is diffrent from the type of the " + "argument's definition in kernel.")); if (!platform::is_same_place(tensor.place(), expected_place)) { framework::LoDTensor tmp_tensor; framework::TensorCopySync(tensor, expected_place, &tmp_tensor); @@ -248,6 +253,11 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, // TODO(chenweihang): now we don't deal with row and height // by xiaowei's advice const auto& tensor = variable.Get(); + PADDLE_ENFORCE( + (arg_def.dtype == pten::TransToPtenDataType(tensor.value().type())), + paddle::platform::errors::InvalidArgument( + "The type of input data is diffrent from the type of the " + "argument's definition in kernel.")); if (!platform::is_same_place(tensor.value().place(), expected_place)) { framework::Tensor tmp_tensor; TensorCopySync(tensor.value(), expected_place, &tmp_tensor); diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h index ec0bde161293d538dfe2b2aedd92f0e8290b3c43..cc4c4f33f74c929fdde0e17fe3c718219961f104 100644 --- a/paddle/pten/include/math.h +++ b/paddle/pten/include/math.h @@ -115,4 +115,18 @@ DenseTensor Divide(const ContextT& dev_ctx, ElementwiseDiv(dev_ctx, x, y, axis, &dense_out); return dense_out; } + +template +DenseTensor Multiply(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferShape(x.meta(), y.meta(), axis); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + ElementwiseMul(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} } // namespace pten diff --git a/paddle/pten/kernels/cpu/math.cc b/paddle/pten/kernels/cpu/math.cc index 68378170c45763842c94cb2337872cd992dff31c..0d6880ab373177422b63499fd2efb26b9bb12ce1 100644 --- a/paddle/pten/kernels/cpu/math.cc +++ b/paddle/pten/kernels/cpu/math.cc @@ -64,56 +64,6 @@ void ScaleHost(const CPUContext& dev_ctx, out); } -template -void ElementwiseAdd(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - out->mutable_data(); - - if (x.dims() == y.dims()) { - SameDimsElementwiseCompute>()( - dev_ctx, x, y, out); - } else { - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::AddFunctor(), out); - } else { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::InverseAddFunctor(), out); - } - } -} - -template -void ElementwiseSub(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - out->mutable_data(); - - if (x.dims() == y.dims()) { - SameDimsElementwiseCompute>()( - dev_ctx, x, y, out); - } else { - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::SubFunctor(), out); - } else { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::InverseSubFunctor(), out); - } - } -} - template void ElementwiseDiv(const CPUContext& dev_ctx, const DenseTensor& x, @@ -138,6 +88,15 @@ void ElementwiseDiv(const CPUContext& dev_ctx, } } +// Create the definition of ElementwiseAdd +DEFINE_CPU_ELEMENTWISE_OP(Add) + +// Create the definition of ElementwiseSub +DEFINE_CPU_ELEMENTWISE_OP(Sub) + +// Create the definition of ElementwiseMul +DEFINE_CPU_ELEMENTWISE_OP(Mul) + } // namespace pten // TODO(chenweihang): replace by better impl @@ -208,3 +167,14 @@ PT_REGISTER_KERNEL("elementwise_div", int64_t, complex64, complex128) {} +PT_REGISTER_KERNEL("elementwise_mul", + CPU, + ANY, + pten::ElementwiseMul, + float, + double, + int, + int64_t, + bool, + complex64, + complex128) {} diff --git a/paddle/pten/kernels/cpu/math.h b/paddle/pten/kernels/cpu/math.h index 7495b838ff4e52444fb6fbee007622e65e3e17db..c265403fde6733af253e2051f794a0bf9b48a87f 100644 --- a/paddle/pten/kernels/cpu/math.h +++ b/paddle/pten/kernels/cpu/math.h @@ -66,4 +66,36 @@ void ElementwiseDiv(const CPUContext& dev_ctx, const DenseTensor& y, int axis, DenseTensor* out); + +template +void ElementwiseMul(const CPUContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); } // namespace pten + +#define DEFINE_CPU_ELEMENTWISE_OP(name) \ + template \ + void Elementwise##name(const CPUContext& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + out->mutable_data(); \ + if (x.dims() == y.dims()) { \ + SameDimsElementwiseCompute< \ + general::SameDims##name##Functor>()( \ + dev_ctx, x, y, out); \ + } else { \ + auto x_dims = x.dims(); \ + auto y_dims = y.dims(); \ + if (x_dims.size() >= y_dims.size()) { \ + ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, general::name##Functor(), out); \ + } else { \ + ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, general::Inverse##name##Functor(), out); \ + } \ + } \ + } diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu index ca84e92c4c7e6ff680bcbc58e319c2330fd87e92..49b7f3c731c2d3212067eeede1ae8fe23dd49f4a 100644 --- a/paddle/pten/kernels/cuda/math.cu +++ b/paddle/pten/kernels/cuda/math.cu @@ -124,56 +124,14 @@ void ScaleHost(const CUDAContext& dev_ctx, out); } -template -void ElementwiseAdd(const CUDAContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - std::vector inputs; - std::vector outputs; - inputs.emplace_back(&x); - inputs.emplace_back(&y); - // allocate memory for out - out->mutable_data(); - outputs.emplace_back(out); - LaunchElementwiseCudaKernel( - dev_ctx, inputs, &outputs, axis, general::AddFunctor()); -} - -template -void ElementwiseSub(const CUDAContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - std::vector inputs; - std::vector outputs; - inputs.emplace_back(&x); - inputs.emplace_back(&y); - // allocate memory for out - out->mutable_data(); - outputs.emplace_back(out); - LaunchElementwiseCudaKernel( - dev_ctx, inputs, &outputs, axis, general::SubFunctor()); -} - -template -void ElementwiseDiv(const CUDAContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - std::vector inputs; - std::vector outputs; - inputs.emplace_back(&x); - inputs.emplace_back(&y); - // allocate memory for out - out->mutable_data(); - outputs.emplace_back(out); - LaunchElementwiseCudaKernel( - dev_ctx, inputs, &outputs, axis, general::DivFunctor()); -} +// Create the definition of ElementwiseAdd +DEFINE_CUDA_ELEMENTWISE_OP(Add) +// Create the definition of ElementwiseSub +DEFINE_CUDA_ELEMENTWISE_OP(Sub) +// Create the definition of ElementwiseMul +DEFINE_CUDA_ELEMENTWISE_OP(Mul) +// Create the definition of ElementwiseDiv +DEFINE_CUDA_ELEMENTWISE_OP(Div) } // namespace pten @@ -245,3 +203,15 @@ PT_REGISTER_KERNEL("elementwise_div", float16, complex64, complex128) {} +PT_REGISTER_KERNEL("elementwise_mul", + CUDA, + ANY, + pten::ElementwiseMul, + float, + double, + int, + int64_t, + bool, + float16, + complex64, + complex128) {} diff --git a/paddle/pten/kernels/cuda/math.h b/paddle/pten/kernels/cuda/math.h index 9f70edac968805057476e2e8d0f4cab8d5763cc2..f90a58d8ae3f8a5b085376cb307df5aae782a7a4 100644 --- a/paddle/pten/kernels/cuda/math.h +++ b/paddle/pten/kernels/cuda/math.h @@ -69,6 +69,29 @@ void ElementwiseDiv(const CUDAContext& dev_ctx, int axis, DenseTensor* out); +template +void ElementwiseMul(const CUDAContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); } // namespace pten +#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ + template \ + void Elementwise##name(const CUDAContext& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + std::vector inputs; \ + std::vector outputs; \ + inputs.emplace_back(&x); \ + inputs.emplace_back(&y); \ + outputs.emplace_back(out); \ + out->mutable_data(); \ + LaunchElementwiseCudaKernel( \ + dev_ctx, inputs, &outputs, axis, general::name##Functor()); \ + } + #endif diff --git a/paddle/pten/kernels/functions/blas/elementwise.h b/paddle/pten/kernels/functions/blas/elementwise.h index 34946dcbf8e015fec39cb3d382131aa846b86d8f..1a530c9f8e940dc8d48ef79a821ab6534a261d7c 100644 --- a/paddle/pten/kernels/functions/blas/elementwise.h +++ b/paddle/pten/kernels/functions/blas/elementwise.h @@ -47,5 +47,13 @@ void ElementwiseDiv(const DevCtx& dev_ctx, blas.VDIV(x.numel(), x.data(), y.data(), out->mutable_data()); } +template +void ElementwiseMul(const DevCtx& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + auto blas = paddle::operators::math::GetBlas(dev_ctx); + blas.VMUL(x.numel(), x.data(), y.data(), out->mutable_data()); +} } // namespace blas } // namespace pten diff --git a/paddle/pten/kernels/functions/eigen/elementwise.h b/paddle/pten/kernels/functions/eigen/elementwise.h index dd42234118c99397e97ede90c5e1534f1fd0a29e..369ff36c46e7f6c5f66d9edda23721943e97cad5 100644 --- a/paddle/pten/kernels/functions/eigen/elementwise.h +++ b/paddle/pten/kernels/functions/eigen/elementwise.h @@ -45,5 +45,17 @@ void ElementwiseSub(const DevCtx& dev_ctx, eigen_z.device(place) = eigen_x - eigen_y; } +template +void ElementwiseMul(const DevCtx& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + auto eigen_x = pten::EigenVector::Flatten(x); + auto eigen_y = pten::EigenVector::Flatten(y); + auto eigen_z = pten::EigenVector::Flatten(*out); + auto& place = *dev_ctx.eigen_device(); + eigen_z.device(place) = eigen_x * eigen_y; +} + } // namespace eigen } // namespace pten diff --git a/paddle/pten/kernels/functions/general/elementwise_functor.h b/paddle/pten/kernels/functions/general/elementwise_functor.h index f0d4305ea6cd95e5f02261fbd5e9f07db5042790..973389f14797e97d976e095538e028e4a65df08a 100644 --- a/paddle/pten/kernels/functions/general/elementwise_functor.h +++ b/paddle/pten/kernels/functions/general/elementwise_functor.h @@ -174,5 +174,48 @@ struct InverseDivFunctor { inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; } }; +// Multiply +template +struct SameDimsMulFunctor { + void operator()(const DevCtx& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* z); +}; + +template +struct SameDimsMulFunctor< + DevCtx, + T, + typename std::enable_if::value>::type> { + void operator()(const DevCtx& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* z) { + blas::ElementwiseMul(dev_ctx, x, y, z); + } +}; + +template +struct SameDimsMulFunctor< + DevCtx, + T, + typename std::enable_if::value>::type> { + void operator()(const DevCtx& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* z) { + eigen::ElementwiseMul(dev_ctx, x, y, z); + } +}; +template +struct MulFunctor { + inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; } +}; +template +struct InverseMulFunctor { + inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; } +}; + } // namespace general } // namespace pten diff --git a/paddle/pten/tests/api/test_elementwise_api.cc b/paddle/pten/tests/api/test_elementwise_api.cc index 795a0176c2902311936e1d730454e0e08ffaf0dd..44033f1c611c44fd4bcdc225d5142f567a929080 100644 --- a/paddle/pten/tests/api/test_elementwise_api.cc +++ b/paddle/pten/tests/api/test_elementwise_api.cc @@ -164,6 +164,7 @@ TEST(API, divide) { for (size_t i = 0; i < 10; ++i) { dense_y_data[i] = i * 2.0 + 1; } + paddle::experimental::Tensor x(dense_x); paddle::experimental::Tensor y(dense_y); @@ -189,5 +190,57 @@ TEST(API, divide) { ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f); } +TEST(API, multiply) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + auto dense_x = std::make_shared( + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 10}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x->mutable_data(); + + auto dense_y = std::make_shared( + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({10}), + pten::DataLayout::NCHW)); + auto* dense_y_data = dense_y->mutable_data(); + + float mul[3][10] = {0.0}; + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 10; ++j) { + dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0; + mul[i][j] = (i * 10 + j) * 1.0 * j * 2.0; + } + } + for (size_t i = 0; i < 10; ++i) { + dense_y_data[i] = i * 2.0; + } + paddle::experimental::Tensor x(dense_x); + paddle::experimental::Tensor y(dense_y); + + // 2. test API + auto out = paddle::experimental::multiply(x, y); + + // 3. check result + ASSERT_EQ(out.shape().size(), 2UL); + ASSERT_EQ(out.shape()[0], 3); + ASSERT_EQ(out.numel(), 30); + ASSERT_EQ(out.is_cpu(), true); + ASSERT_EQ(out.type(), pten::DataType::FLOAT32); + ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); + ASSERT_EQ(out.initialized(), true); + + auto expect_result = mul; + auto dense_out = std::dynamic_pointer_cast(out.impl()); + auto actual_result0 = dense_out->data()[0]; + auto actual_result1 = dense_out->data()[1]; + auto actual_result2 = dense_out->data()[10]; + ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f); + ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f); + ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f); +} } // namespace tests } // namespace paddle diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc index b22a37f1bb20f99b96db93f85b5c20a1179d5d5a..f12a2d48e6b2b876fe0c8bba7de6461aa41313bd 100644 --- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc @@ -68,8 +68,8 @@ TEST(DEV_API, add) { // 3. check result ASSERT_EQ(dense_out.dims().size(), 2); ASSERT_EQ(dense_out.dims()[0], 3); - ASSERT_EQ(dense_out.meta().dtype, pten::DataType::FLOAT32); - ASSERT_EQ(dense_out.meta().layout, pten::DataLayout::NCHW); + ASSERT_EQ(dense_out.dtype(), pten::DataType::FLOAT32); + ASSERT_EQ(dense_out.layout(), pten::DataLayout::NCHW); auto expect_result = sum; auto actual_result0 = dense_out.data()[0]; @@ -174,8 +174,8 @@ TEST(DEV_API, divide) { // 3. check result ASSERT_EQ(dense_out.dims().size(), 2); ASSERT_EQ(dense_out.dims()[0], 3); - ASSERT_EQ(dense_out.meta().dtype, pten::DataType::FLOAT32); - ASSERT_EQ(dense_out.meta().layout, pten::DataLayout::NCHW); + ASSERT_EQ(dense_out.dtype(), pten::DataType::FLOAT32); + ASSERT_EQ(dense_out.layout(), pten::DataLayout::NCHW); auto expect_result = div; auto actual_result0 = dense_out.data()[0]; @@ -186,5 +186,57 @@ TEST(DEV_API, divide) { ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f); } +TEST(DEV_API, multiply) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 10}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x.mutable_data(); + + pten::DenseTensor dense_y(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({10}), + pten::DataLayout::NCHW)); + auto* dense_y_data = dense_y.mutable_data(); + + float mul[3][10] = {0.0}; + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 10; ++j) { + dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0; + mul[i][j] = (i * 10 + j) * 1.0 * j * 2.0; + } + } + for (size_t i = 0; i < 10; ++i) { + dense_y_data[i] = i * 2.0; + } + int axis = 1; + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto dense_out = pten::Multiply( + *(static_cast(dev_ctx)), + dense_x, + dense_y, + axis); + + // 3. check result + ASSERT_EQ(dense_out.dims().size(), 2); + ASSERT_EQ(dense_out.dims()[0], 3); + ASSERT_EQ(dense_out.dtype(), pten::DataType::FLOAT32); + ASSERT_EQ(dense_out.layout(), pten::DataLayout::NCHW); + + auto expect_result = mul; + auto actual_result0 = dense_out.data()[0]; + auto actual_result1 = dense_out.data()[1]; + auto actual_result2 = dense_out.data()[10]; + ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f); + ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f); + ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f); +} } // namespace tests } // namespace pten