From 7383eefd2db74a593563ea35bc5aeb831e557a32 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 28 Jan 2019 13:30:06 +0000 Subject: [PATCH] add softmax mix and mkl code test=develop --- .../operators/jit/more/mix/CMakeLists.txt | 1 + paddle/fluid/operators/jit/more/mix/mix.cc | 24 +++++++++++++++++ paddle/fluid/operators/jit/more/mix/mix.h | 4 +++ .../operators/jit/more/mkl/CMakeLists.txt | 1 + paddle/fluid/operators/jit/more/mkl/mkl.cc | 17 ++++++++++++ paddle/fluid/operators/jit/more/mkl/mkl.h | 27 +++++++++++++++++++ 6 files changed, 74 insertions(+) diff --git a/paddle/fluid/operators/jit/more/mix/CMakeLists.txt b/paddle/fluid/operators/jit/more/mix/CMakeLists.txt index e05f204b1..dd039d291 100644 --- a/paddle/fluid/operators/jit/more/mix/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mix/CMakeLists.txt @@ -12,3 +12,4 @@ USE_JITKERNEL_MORE(kLSTMC1H1, mix) USE_JITKERNEL_MORE(kGRUH1, mix) USE_JITKERNEL_MORE(kGRUHtPart1, mix) USE_JITKERNEL_MORE(kGRUHtPart2, mix) +USE_JITKERNEL_MORE(kSoftmax, mix) diff --git a/paddle/fluid/operators/jit/more/mix/mix.cc b/paddle/fluid/operators/jit/more/mix/mix.cc index df0a85256..2a75eb23c 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.cc +++ b/paddle/fluid/operators/jit/more/mix/mix.cc @@ -48,6 +48,27 @@ void VTanh(const T* x, T* y, int n) { compute_addbias(&b, y, y, n); } +void Softmax(const T* x, T* y, int n, int bs) { + auto compute_hmax = Get, platform::CPUPlace>(n); + auto compute_hsum = Get, platform::CPUPlace>(n); + auto compute_vscal = Get, platform::CPUPlace>(n); + auto compute_vaddbias = Get, platform::CPUPlace>(n); + auto compute_vexp = + Get, platform::CPUPlace>(n); + for (int i = 0; i < bs; ++i) { + T scalar; + compute_hmax(x, &scalar, n); + scalar = static_cast(0) - scalar; + compute_vaddbias(&scalar, x, y, n); // x - max + compute_vexp(y, y, n); + compute_hsum(y, &scalar, n); + scalar = static_cast(1) / scalar; + compute_vscal(&scalar, y, y, n); + x += n; + y += n; + } +} + void (*getActFunc(KernelType type, int d))(const T*, T*, int) { // NOLINT if (type == kVSigmoid) { return Get, platform::CPUPlace>(d); @@ -184,6 +205,8 @@ bool VSigmoidKernel::UseMe(const int& d) const { return true; } bool VTanhKernel::UseMe(const int& d) const { return true; } +bool SoftmaxKernel::UseMe(const int& d) const { return true; } + bool LSTMCtHtKernel::UseMe(const lstm_attr_t& attr) const { return true; } bool LSTMC1H1Kernel::UseMe(const lstm_attr_t& attr) const { return true; } @@ -207,6 +230,7 @@ namespace mix = paddle::operators::jit::more::mix; REGISTER_MORE_KERNEL(kVSigmoid, VSigmoid); REGISTER_MORE_KERNEL(kVTanh, VTanh); +REGISTER_MORE_KERNEL(kSoftmax, Softmax); REGISTER_MORE_KERNEL(kLSTMCtHt, LSTMCtHt); REGISTER_MORE_KERNEL(kLSTMC1H1, LSTMC1H1); REGISTER_MORE_KERNEL(kGRUH1, GRUH1); diff --git a/paddle/fluid/operators/jit/more/mix/mix.h b/paddle/fluid/operators/jit/more/mix/mix.h index a70ecdf93..d64af1921 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.h +++ b/paddle/fluid/operators/jit/more/mix/mix.h @@ -26,6 +26,7 @@ using T = float; void VSigmoid(const T* x, T* y, int n); void VTanh(const T* x, T* y, int n); +void Softmax(const T* x, T* y, int n, int bs); void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr); void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr); @@ -45,6 +46,9 @@ void GRUHtPart2(gru_t* step, const gru_attr_t* attr); DECLARE_MORE_KERNEL(VSigmoid, XYNTuples); DECLARE_MORE_KERNEL(VTanh, XYNTuples); +// XRN +DECLARE_MORE_KERNEL(Softmax, SoftmaxTuples); + DECLARE_MORE_KERNEL(LSTMCtHt, LSTMTuples); DECLARE_MORE_KERNEL(LSTMC1H1, LSTMTuples); diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index 667c6dfad..f9e5aea32 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -12,3 +12,4 @@ USE_JITKERNEL_MORE(kVSquare, mkl) USE_JITKERNEL_MORE(kVSigmoid, mkl) USE_JITKERNEL_MORE(kVTanh, mkl) USE_JITKERNEL_MORE(kSeqPool, mkl) +USE_JITKERNEL_MORE(kSoftmax, mkl) diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index fccdc68f5..b13b8638e 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -116,6 +116,16 @@ void VAXPY(double a, const double* x, double* y, int n) { platform::dynload::cblas_daxpy(n, a, x, 1, y, 1); } +template <> +void ASum(const float* x, float* res, int n) { + res[0] = platform::dynload::cblas_sasum(n, x, 1); +} + +template <> +void ASum(const double* x, double* res, int n) { + res[0] = platform::dynload::cblas_dasum(n, x, 1); +} + // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 template <> bool MatMulKernel::UseMe(const int& d) const { @@ -167,6 +177,11 @@ bool SeqPoolKernel::UseMe(const seq_pool_attr_t& attr) const { return true; } +template <> +bool SoftmaxKernel::UseMe(const int& d) const { + return true; +} + #define AWALYS_USE_ME_WITH_DOUBLE(func) \ template <> \ bool func##Kernel::UseMe(const int& d) const { \ @@ -181,6 +196,7 @@ AWALYS_USE_ME_WITH_DOUBLE(VExp); AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); AWALYS_USE_ME_WITH_DOUBLE(VTanh); AWALYS_USE_ME_WITH_DOUBLE(VSquare); +AWALYS_USE_ME_WITH_DOUBLE(Softmax); #undef AWALYS_USE_ME_WITH_DOUBLE } // namespace mkl @@ -204,5 +220,6 @@ REGISTER_MKL_KERNEL(kVSquare, VSquare); REGISTER_MKL_KERNEL(kVSigmoid, VSigmoid); REGISTER_MKL_KERNEL(kVTanh, VTanh); REGISTER_MKL_KERNEL(kSeqPool, SeqPool); +REGISTER_MKL_KERNEL(kSoftmax, Softmax); #undef REGISTER_MKL_KERNEL diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index a27196fa1..6b95b9c87 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -16,6 +16,7 @@ #include #include +#include #include "paddle/fluid/operators/jit/kernel_base.h" namespace paddle { @@ -90,6 +91,30 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) { } } +template +void ASum(const T* x, T* res, int n); + +template +void Softmax(const T* x, T* y, int n, int bs) { + std::vector entities(bs); + for (int i = 0; i < bs; ++i) { + entities[i] = x[i * n]; + for (int c = 1; c < n; ++c) { + entities[i] = x[i * n + c] > entities[i] ? x[i * n + c] : entities[i]; + } + for (int c = 0; c < n; ++c) { + y[i * n + c] = x[i * n + c] - entities[i]; + } + } + VExp(y, y, n * bs); + for (int i = 0; i < bs; ++i) { + T sum; + ASum(&y[i * n], &sum, n); + sum = static_cast(1) / sum; + VScal(&sum, &y[i * n], &y[i * n], n); + } +} + #define DECLARE_MKL_KERNEL(name, tuples) \ template \ class name##Kernel : public KernelMore> { \ @@ -117,6 +142,8 @@ DECLARE_MKL_KERNEL(VSquare, XYNTuples); DECLARE_MKL_KERNEL(SeqPool, SeqPoolTuples); +DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples); + #undef DECLARE_MKL_KERNEL } // namespace mkl -- GitLab