From 93701dba50e2555c7bd9cb69efe38debd5441cb7 Mon Sep 17 00:00:00 2001 From: dengkaipeng Date: Wed, 20 Mar 2019 03:27:35 +0000 Subject: [PATCH] add jit kernel for softmax axis. test=develop --- paddle/fluid/operators/jit/benchmark.cc | 2 +- paddle/fluid/operators/jit/helper.cc | 2 + paddle/fluid/operators/jit/kernel_base.h | 24 ++++++- paddle/fluid/operators/jit/more/mix/mix.cc | 18 +++-- paddle/fluid/operators/jit/more/mix/mix.h | 2 +- .../operators/jit/more/mkl/CMakeLists.txt | 1 + paddle/fluid/operators/jit/more/mkl/mkl.cc | 35 ++++++++++ paddle/fluid/operators/jit/more/mkl/mkl.h | 23 +++++-- .../fluid/operators/jit/refer/CMakeLists.txt | 2 + paddle/fluid/operators/jit/refer/refer.cc | 2 + paddle/fluid/operators/jit/refer/refer.h | 36 ++++++++-- paddle/fluid/operators/jit/test.cc | 67 ++++++++++--------- paddle/fluid/operators/math/softmax_impl.h | 7 +- paddle/fluid/operators/softmax_op.cc | 15 ++++- paddle/fluid/operators/softmax_op.h | 5 -- .../fluid/tests/unittests/test_softmax_op.py | 22 +----- 16 files changed, 185 insertions(+), 78 deletions(-) diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index fbb04a166e..9ff1fe478d 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -386,7 +386,7 @@ void BenchKernelSoftmax() { RandomVec(bs * n, x.mutable_data(PlaceType()), -2.f, 2.f); const T* x_data = x.data(); T* y_data = y.mutable_data(PlaceType()); - BenchAllImpls(n, x_data, y_data, n, bs); + BenchAllImpls(n, x_data, y_data, n, bs, 1); } } } diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index eb1c410b6f..fe508788ef 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -34,6 +34,7 @@ const char* to_string(KernelType kt) { ONE_CASE(kVAddRelu); ONE_CASE(kVSub); ONE_CASE(kVScal); + ONE_CASE(kStrideScal); ONE_CASE(kVAddBias); ONE_CASE(kVRelu); ONE_CASE(kVBroadcast); @@ -55,6 +56,7 @@ const char* to_string(KernelType kt) { ONE_CASE(kMatMul); ONE_CASE(kHMax); ONE_CASE(kHSum); + ONE_CASE(kStrideSum); ONE_CASE(kSoftmax); ONE_CASE(kEmbSeqPool); ONE_CASE(kSgd); diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index bd34d7dfc7..6fd8a59d55 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -53,6 +53,8 @@ typedef enum { kVSquare, kVSub, kVTanh, + kStrideSum, + kStrideScal, } KernelType; typedef enum { @@ -74,6 +76,14 @@ struct XYZNTuple { template struct AXYNTuple : public XYZNTuple {}; +// a, x, y, n, stride +template +struct AXYNSTuple { + typedef T data_type; + typedef int attr_type; + typedef void (*func_type)(const T*, const T*, T*, int, int); +}; + // x, y, n template struct XYNTuple { @@ -86,6 +96,14 @@ struct XYNTuple { template struct XRNTuple : public XYNTuple {}; +// x, returned value, n, stride +template +struct XRNSTuple { + typedef T data_type; + typedef int attr_type; + typedef void (*func_type)(const T*, T*, int, int); +}; + #define DECLARE_KERNELTUPLE(kernel_tuple, type) \ template \ struct type##Tuple : public kernel_tuple { \ @@ -101,6 +119,8 @@ DECLARE_KERNELTUPLE(XYZNTuple, VSub); DECLARE_KERNELTUPLE(AXYNTuple, VScal); DECLARE_KERNELTUPLE(AXYNTuple, VAddBias); +DECLARE_KERNELTUPLE(AXYNSTuple, StrideScal); + DECLARE_KERNELTUPLE(XYNTuple, VRelu); DECLARE_KERNELTUPLE(XYNTuple, VIdentity); DECLARE_KERNELTUPLE(XYNTuple, VSquare); @@ -112,6 +132,8 @@ DECLARE_KERNELTUPLE(XYNTuple, VCopy); DECLARE_KERNELTUPLE(XRNTuple, HMax); DECLARE_KERNELTUPLE(XRNTuple, HSum); +DECLARE_KERNELTUPLE(XRNSTuple, StrideSum); + typedef struct { void* gates; // gates: x_ch, x_ih, x_fh, x_oh const void* ct_1; @@ -285,7 +307,7 @@ struct SoftmaxTuple { static constexpr KernelType kernel_type = kSoftmax; typedef T data_type; typedef int attr_type; - typedef void (*func_type)(const T*, T*, int, int); + typedef void (*func_type)(const T*, T*, int, int, int); }; // nChw16c = nChw16c .* NC diff --git a/paddle/fluid/operators/jit/more/mix/mix.cc b/paddle/fluid/operators/jit/more/mix/mix.cc index 6e709a16d2..58a44d4b55 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.cc +++ b/paddle/fluid/operators/jit/more/mix/mix.cc @@ -50,10 +50,12 @@ void VTanh(const T* x, T* y, int n) { compute_addbias(&b, y, y, n); } -void Softmax(const T* x, T* y, int n, int bs) { +void Softmax(const T* x, T* y, int n, int bs, int m) { auto compute_hmax = KernelFuncs, CPUPlace>::Cache().At(n); auto compute_hsum = KernelFuncs, CPUPlace>::Cache().At(n); auto compute_vscal = KernelFuncs, CPUPlace>::Cache().At(n); + auto compute_stridesum = KernelFuncs, CPUPlace>::Cache().At(n); + auto compute_stridescal = KernelFuncs, CPUPlace>::Cache().At(n); auto compute_vaddbias = KernelFuncs, CPUPlace>::Cache().At(n); auto compute_vexp = KernelFuncs, CPUPlace>::Cache().At(n); @@ -64,9 +66,17 @@ void Softmax(const T* x, T* y, int n, int bs) { scalar = static_cast(0) - scalar; compute_vaddbias(&scalar, x, y, n); // x - max compute_vexp(y, y, n); - compute_hsum(y, &scalar, n); - scalar = static_cast(1) / scalar; - compute_vscal(&scalar, y, y, n); + if (m == 1) { + compute_hsum(y, &scalar, n); + scalar = static_cast(1) / scalar; + compute_vscal(&scalar, y, y, n); + } else { + for (int j = 0; j < m; ++j) { + compute_stridesum(&y[j], &scalar, n, m); + scalar = static_cast(1) / scalar; + compute_stridescal(&scalar, &y[j], &y[j], n, m); + } + } x += n; y += n; } diff --git a/paddle/fluid/operators/jit/more/mix/mix.h b/paddle/fluid/operators/jit/more/mix/mix.h index 994d485909..a0079506f8 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.h +++ b/paddle/fluid/operators/jit/more/mix/mix.h @@ -26,7 +26,7 @@ using T = float; void VSigmoid(const T* x, T* y, int n); void VTanh(const T* x, T* y, int n); -void Softmax(const T* x, T* y, int n, int bs); +void Softmax(const T* x, T* y, int n, int bs, int m); void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr); void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr); diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index f69417c370..56f1a62ad4 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -7,6 +7,7 @@ USE_JITKERNEL_MORE(kMatMul, mkl) USE_JITKERNEL_MORE(kVMul, mkl) USE_JITKERNEL_MORE(kVAdd, mkl) USE_JITKERNEL_MORE(kVScal, mkl) +USE_JITKERNEL_MORE(kStrideScal, mkl) USE_JITKERNEL_MORE(kVExp, mkl) USE_JITKERNEL_MORE(kVSquare, mkl) USE_JITKERNEL_MORE(kVCopy, mkl) diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index 4f600b3814..2828d75815 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -78,6 +78,24 @@ void VScal(const double* a, const double* x, double* y, int n) { } } +template <> +void StrideScal(const float* a, const float* x, float* y, int n, int stride) { + if (x == y) { + platform::dynload::cblas_sscal(n, *a, y, stride); + } else { + refer::StrideScal(a, x, y, n, stride); + } +} + +template <> +void StrideScal(const double* a, const double* x, double* y, int n, int stride) { + if (x == y) { + platform::dynload::cblas_dscal(n, *a, y, stride); + } else { + refer::StrideScal(a, x, y, n, stride); + } +} + template <> void VExp(const float* x, float* y, int n) { platform::dynload::vsExp(n, x, y); @@ -128,6 +146,16 @@ void ASum(const double* x, double* res, int n) { res[0] = platform::dynload::cblas_dasum(n, x, 1); } +template <> +void StrideSum(const float* x, float* res, int n, int stride) { + res[0] = platform::dynload::cblas_sasum(n, x, stride); +} + +template <> +void StrideSum(const double* x, double* res, int n, int stride) { + res[0] = platform::dynload::cblas_dasum(n, x, stride); +} + // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 template <> bool VMulKernel::CanBeUsed(const int& d) const { @@ -144,6 +172,11 @@ bool VScalKernel::CanBeUsed(const int& d) const { return platform::MayIUse(platform::avx512f) && d > 512; } +template <> +bool StrideScalKernel::CanBeUsed(const int& d) const { + return platform::MayIUse(platform::avx512f) && d > 512; +} + template <> bool VExpKernel::CanBeUsed(const int& d) const { return d > 7; @@ -235,6 +268,7 @@ bool SoftmaxKernel::CanBeUsed(const int& d) const { AWALYS_USE_ME_WITH_DOUBLE(VMul); AWALYS_USE_ME_WITH_DOUBLE(VAdd); AWALYS_USE_ME_WITH_DOUBLE(VScal); +AWALYS_USE_ME_WITH_DOUBLE(StrideScal); AWALYS_USE_ME_WITH_DOUBLE(VExp); AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); AWALYS_USE_ME_WITH_DOUBLE(VTanh); @@ -259,6 +293,7 @@ REGISTER_MKL_KERNEL(MatMul); REGISTER_MKL_KERNEL(VMul); REGISTER_MKL_KERNEL(VAdd); REGISTER_MKL_KERNEL(VScal); +REGISTER_MKL_KERNEL(StrideScal); REGISTER_MKL_KERNEL(VExp); REGISTER_MKL_KERNEL(VSquare); REGISTER_MKL_KERNEL(VCopy); diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index f51dca654c..1e974c095f 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -129,7 +129,13 @@ template void ASum(const T* x, T* res, int n); template -void Softmax(const T* x, T* y, int n, int bs) { +void StrideSum(const T* x, T* res, int n, int stride); + +template +void StrideScal(const T* a, const T* x, T* y, int n, int stride); + +template +void Softmax(const T* x, T* y, int n, int bs, int m=1) { std::vector entities(bs); for (int i = 0; i < bs; ++i) { entities[i] = x[i * n]; @@ -143,9 +149,17 @@ void Softmax(const T* x, T* y, int n, int bs) { VExp(y, y, n * bs); for (int i = 0; i < bs; ++i) { T sum; - ASum(&y[i * n], &sum, n); - sum = static_cast(1) / sum; - VScal(&sum, &y[i * n], &y[i * n], n); + if (m == 1) { + ASum(&y[i * n], &sum, n); + sum = static_cast(1) / sum; + VScal(&sum, &y[i * n], &y[i * n], n); + } else { + for (int j = 0; j < m; ++j) { + StrideSum(&y[i * n + j], &sum, n/m, m); + sum = static_cast(1) / sum; + StrideScal(&sum, &y[i * n + j], &y[i * n + j], n/m, m); + } + } } } @@ -193,6 +207,7 @@ DECLARE_MKL_KERNEL(VAdd); // AXYN DECLARE_MKL_KERNEL(VScal); +DECLARE_MKL_KERNEL(StrideScal); // XYN DECLARE_MKL_KERNEL(VExp); diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index ffab9c1457..9a39809c93 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -12,6 +12,7 @@ USE_JITKERNEL_REFER(kVAdd) USE_JITKERNEL_REFER(kVAddRelu) USE_JITKERNEL_REFER(kVSub) USE_JITKERNEL_REFER(kVScal) +USE_JITKERNEL_REFER(kStrideScal) USE_JITKERNEL_REFER(kVAddBias) USE_JITKERNEL_REFER(kVCopy) USE_JITKERNEL_REFER(kVRelu) @@ -32,6 +33,7 @@ USE_JITKERNEL_REFER(kMatMul) USE_JITKERNEL_REFER(kVSquare) USE_JITKERNEL_REFER(kHSum) USE_JITKERNEL_REFER(kHMax) +USE_JITKERNEL_REFER(kStrideSum) USE_JITKERNEL_REFER(kSoftmax) USE_JITKERNEL_REFER(kEmbSeqPool) USE_JITKERNEL_REFER(kSgd) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index 0d1c477090..704124e805 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -27,6 +27,7 @@ REGISTER_REFER_KERNEL(VAddRelu); REGISTER_REFER_KERNEL(VSub); REGISTER_REFER_KERNEL(VScal); +REGISTER_REFER_KERNEL(StrideScal); REGISTER_REFER_KERNEL(VAddBias); REGISTER_REFER_KERNEL(VRelu); @@ -51,6 +52,7 @@ REGISTER_REFER_KERNEL(SeqPool); REGISTER_REFER_KERNEL(MatMul); REGISTER_REFER_KERNEL(HMax); REGISTER_REFER_KERNEL(HSum); +REGISTER_REFER_KERNEL(StrideSum); REGISTER_REFER_KERNEL(Softmax); REGISTER_REFER_KERNEL(EmbSeqPool); REGISTER_REFER_KERNEL(Sgd); diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index cac705a484..dee9245524 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -411,19 +411,42 @@ void HSum(const T* x, T* res, int n) { } } +template +void StrideSum(const T* x, T* res, int n, int stride) { + res[0] = x[0]; + for (int i = stride; i < n; i+=stride) { + res[0] += x[i]; + } +} + +template +void StrideScal(const T* a, const T* x, T* y, int n , int stride) { + for (int i = 0; i < n; i+=stride) { + y[i] = x[i] * a[0]; + } +} + // y = e^(x - max(x)) // y = y / sum(y) template -void Softmax(const T* x, T* y, int n, int bs = 1) { +void Softmax(const T* x, T* y, int n, int bs = 1, int m = 1) { for (int i = 0; i < bs; ++i) { T scalar; HMax(x, &scalar, n); scalar = static_cast(0) - scalar; VAddBias(&scalar, x, y, n); // x - max VExp(y, y, n); - HSum(y, &scalar, n); - scalar = static_cast(1) / scalar; - VScal(&scalar, y, y, n); + if (m == 1) { + HSum(y, &scalar, n); + scalar = static_cast(1) / scalar; + VScal(&scalar, y, y, n); + } else { + for (int j = 0; j < m; j++) { + StrideSum(&y[j], &scalar, n, m); + scalar = static_cast(1) / scalar; + StrideScal(&scalar, &y[j], &y[j], n, m); + } + } x += n; y += n; } @@ -507,6 +530,9 @@ DECLARE_REFER_KERNEL(VSub); DECLARE_REFER_KERNEL(VScal); DECLARE_REFER_KERNEL(VAddBias); +// const T* a, const T* x, T* y, int n, int stride +DECLARE_REFER_KERNEL(StrideScal); + // const T* x, T* y, int n DECLARE_REFER_KERNEL(VRelu); DECLARE_REFER_KERNEL(VIdentity); @@ -528,6 +554,8 @@ DECLARE_REFER_KERNEL(GRUHtPart2); DECLARE_REFER_KERNEL(HMax); DECLARE_REFER_KERNEL(HSum); +DECLARE_REFER_KERNEL(StrideSum); + // others DECLARE_REFER_KERNEL(CRFDecoding); DECLARE_REFER_KERNEL(LayerNorm); diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 6c099a7a06..93a448166f 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -723,39 +723,44 @@ void TestKernelSoftmax() { VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type); for (int bs : {1, 2, 10}) { for (int n : TestSizes()) { - auto ref = jit::GetReferFunc(); - EXPECT_TRUE(ref != nullptr); - std::vector x(bs * n), y(bs * n); - RandomVec(bs * n, x.data()); - const T* x_data = x.data(); - T* y_data = y.data(); + for (int m : {1, 2}) { + if (m > n || n % m != 0) { + continue; + } + auto ref = jit::GetReferFunc(); + EXPECT_TRUE(ref != nullptr); + std::vector x(bs * n), y(bs * n); + RandomVec(bs * n, x.data()); + const T* x_data = x.data(); + T* y_data = y.data(); - std::vector xinp(x.size()); // inplace test - std::copy(x.begin(), x.end(), xinp.begin()); - ref(x_data, y_data, n, bs); - T* xinp_data = xinp.data(); - ref(xinp_data, xinp_data, n, bs); - ExpectEQ(xinp_data, y_data, n * bs); + std::vector xinp(x.size()); // inplace test + std::copy(x.begin(), x.end(), xinp.begin()); + ref(x_data, y_data, n, bs, m); + T* xinp_data = xinp.data(); + ref(xinp_data, xinp_data, n, bs, m); + ExpectEQ(xinp_data, y_data, n * bs); - auto verifier = [](const typename KernelTuple::func_type tgt, - const std::vector& x, const std::vector& yref, - int n, int bs) { - EXPECT_TRUE(tgt != nullptr); - EXPECT_EQ(yref.size(), x.size()); - EXPECT_EQ(x.size(), static_cast(n * bs)); - const T* x_data = x.data(); - const T* yref_data = yref.data(); - std::vector ytgt(n * bs); - T* ytgt_data = ytgt.data(); - // test normal - tgt(x_data, ytgt_data, n, bs); - ExpectEQ(ytgt_data, yref_data, n * bs); - // test inplace x - std::copy(x.begin(), x.end(), ytgt.begin()); - tgt(ytgt_data, ytgt_data, n, bs); - ExpectEQ(ytgt_data, yref_data, n * bs); - }; - TestAllImpls(n, verifier, x, y, n, bs); + auto verifier = [](const typename KernelTuple::func_type tgt, + const std::vector& x, const std::vector& yref, + int n, int bs, int m) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(yref.size(), x.size()); + EXPECT_EQ(x.size(), static_cast(n * bs)); + const T* x_data = x.data(); + const T* yref_data = yref.data(); + std::vector ytgt(n * bs); + T* ytgt_data = ytgt.data(); + // test normal + tgt(x_data, ytgt_data, n, bs, m); + ExpectEQ(ytgt_data, yref_data, n * bs); + // test inplace x + std::copy(x.begin(), x.end(), ytgt.begin()); + tgt(ytgt_data, ytgt_data, n, bs, m); + ExpectEQ(ytgt_data, yref_data, n * bs); + }; + TestAllImpls(n, verifier, x, y, n, bs, m); + } } } } diff --git a/paddle/fluid/operators/math/softmax_impl.h b/paddle/fluid/operators/math/softmax_impl.h index 9bcb272b93..dea8142cc8 100644 --- a/paddle/fluid/operators/math/softmax_impl.h +++ b/paddle/fluid/operators/math/softmax_impl.h @@ -76,8 +76,8 @@ using enable_if_CPU = typename std::enable_if< template class SoftmaxFunctor> { - void operator()(const DeviceContext& context, const framework::Tensor* X, - framework::Tensor* Y) { + void operator()(const DeviceContext& context, const int axis_dim, + const framework::Tensor* X, framework::Tensor* Y) { auto in_dims = X->dims(); const float* in_data = X->data(); float* out_data = Y->data(); @@ -87,7 +87,8 @@ class SoftmaxFunctor> { auto compute_softmax = jit::KernelFuncs, platform::CPUPlace>::Cache() .At(in_dims[kClassDim]); - compute_softmax(in_data, out_data, in_dims[kClassDim], in_dims[kBatchDim]); + compute_softmax(in_data, out_data, in_dims[kClassDim], in_dims[kBatchDim], + in_dims[kClassDim] / axis_dim); } }; diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 578ab8eee3..9cbb6691f4 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -42,9 +42,18 @@ class SoftmaxOp : public framework::OperatorWithKernel { auto dim_x = ctx->GetInputDim("X"); auto rank_x = dim_x.size(); auto axis = ctx->Attrs().Get("axis"); - PADDLE_ENFORCE(axis >= -1 && axis < rank_x, - "Attr(axis) value should larger equal then -1" - "and less then the rank of Input(X)"); + PADDLE_ENFORCE(axis >= -rank_x && axis < rank_x, + "Attr(axis) value should be in range [-R, R-1], " + "R is the rank of Input(X)."); + + auto use_cudnn = ctx->Attrs().Get("use_cudnn"); + auto use_mkldnn = ctx->Attrs().Get("use_mkldnn"); + if (axis != rank_x - 1 && axis != -1) { + PADDLE_ENFORCE(!use_cudnn, + "CUDNN kernel only support axis as -1."); + PADDLE_ENFORCE(!use_mkldnn, + "MKLDNN kernel only support axis as -1."); + } ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/softmax_op.h b/paddle/fluid/operators/softmax_op.h index 76e8eeab08..bbea935101 100644 --- a/paddle/fluid/operators/softmax_op.h +++ b/paddle/fluid/operators/softmax_op.h @@ -63,8 +63,6 @@ class SoftmaxKernel : public framework::OpKernel { Tensor X_2d, Out_2d; X_2d.ShareDataWith(*X).Resize({n, d}); Out_2d.ShareDataWith(*Out).Resize({n, d}); - // Tensor X_2d = framework::ReshapeToMatrix(*X, axis - 1); - // Tensor Out_2d = framework::ReshapeToMatrix(*Out, axis - 1); #ifdef PADDLE_ON_INFERENCE math::SoftmaxFunctor()( @@ -96,9 +94,6 @@ class SoftmaxGradKernel : public framework::OpKernel { dX_2d.ShareDataWith(*dX).Resize({n, d}); Out_2d.ShareDataWith(*Out).Resize({n, d}); dOut_2d.ShareDataWith(*dOut).Resize({n, d}); - // Tensor Out_2d = framework::ReshapeToMatrix(*Out, axis - 1); - // Tensor dOut_2d = framework::ReshapeToMatrix(*dOut, axis - 1); - // Tensor dX_2d = framework::ReshapeToMatrix(*dX, axis - 1); math::SoftmaxGradFunctor()( context.template device_context(), axis_dim, &Out_2d, &dOut_2d, diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 2e779270f0..8b07126028 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -125,26 +125,6 @@ class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp): return [2, 3, 4, 5] -@unittest.skipIf(not core.is_compiled_with_cuda(), - "core is not compiled with CUDA") -class TestSoftmaxCUDNNOp3(TestSoftmaxCUDNNOp): - def get_x_shape(self): - return [2, 3, 4, 5] - - def get_axis(self): - return 0 - - -@unittest.skipIf(not core.is_compiled_with_cuda(), - "core is not compiled with CUDA") -class TestSoftmaxCUDNNOp4(TestSoftmaxCUDNNOp): - def get_x_shape(self): - return [2, 3, 4, 5] - - def get_axis(self): - return 1 - - @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp5(TestSoftmaxCUDNNOp): @@ -152,7 +132,7 @@ class TestSoftmaxCUDNNOp5(TestSoftmaxCUDNNOp): return [2, 3, 4, 5] def get_axis(self): - return 2 + return 3 @unittest.skipIf(not core.is_compiled_with_cuda(), -- GitLab