From 5e97be7ba774c5b109c6674ba41d8ba1bfd18229 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 14 Dec 2018 10:10:41 +0000 Subject: [PATCH] enable jitkernel mkl vexp, vsigmoid and vtanh --- paddle/fluid/operators/jit/gen/jitcode.h | 4 -- paddle/fluid/operators/jit/helper.h | 4 -- paddle/fluid/operators/jit/kernel_base.h | 1 + paddle/fluid/operators/jit/macro.h | 32 +++++++++++++++ .../operators/jit/more/mkl/CMakeLists.txt | 3 ++ paddle/fluid/operators/jit/more/mkl/mkl.cc | 31 ++++++++++++++ paddle/fluid/operators/jit/more/mkl/mkl.h | 33 +++++++++++++++ paddle/fluid/operators/jit/test.cc | 2 +- paddle/fluid/operators/math/jit_kernel_exp.cc | 41 ------------------- 9 files changed, 101 insertions(+), 50 deletions(-) create mode 100644 paddle/fluid/operators/jit/macro.h diff --git a/paddle/fluid/operators/jit/gen/jitcode.h b/paddle/fluid/operators/jit/gen/jitcode.h index 765952fc35..64126e3f61 100644 --- a/paddle/fluid/operators/jit/gen/jitcode.h +++ b/paddle/fluid/operators/jit/gen/jitcode.h @@ -56,10 +56,6 @@ typedef enum { identity } operand_type; -#define XMM_FLOAT_BLOCK 4 -#define YMM_FLOAT_BLOCK 8 -#define ZMM_FLOAT_BLOCK 16 - #define DECLARE_JIT_CODE(codename) \ const char* name() const override { return #codename; } diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index 3431c22111..44952fb907 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -27,10 +27,6 @@ namespace paddle { namespace operators { namespace jit { -#define SIGMOID_THRESHOLD_MIN -40.0 -#define SIGMOID_THRESHOLD_MAX 13.0 -#define EXP_MAX_INPUT 40.0 - template inline typename std::enable_if< std::is_same::value && diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index 00d583c60b..f10d9f3fdd 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -13,6 +13,7 @@ * limitations under the License. */ #pragma once +#include "paddle/fluid/operators/jit/macro.h" #include "paddle/fluid/platform/macros.h" namespace paddle { diff --git a/paddle/fluid/operators/jit/macro.h b/paddle/fluid/operators/jit/macro.h new file mode 100644 index 0000000000..b2622eba8b --- /dev/null +++ b/paddle/fluid/operators/jit/macro.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once +#include + +namespace paddle { +namespace operators { +namespace jit { + +#define SIGMOID_THRESHOLD_MIN -40.0 +#define SIGMOID_THRESHOLD_MAX 13.0 +#define EXP_MAX_INPUT 40.0 + +#define XMM_FLOAT_BLOCK 4 +#define YMM_FLOAT_BLOCK 8 +#define ZMM_FLOAT_BLOCK 16 + +} // namespace jit +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index ffecb73297..3ecb520392 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -6,3 +6,6 @@ set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl PARENT_SCOPE USE_JITKERNEL_MORE(vmul, mkl) USE_JITKERNEL_MORE(vadd, mkl) USE_JITKERNEL_MORE(vscal, mkl) +USE_JITKERNEL_MORE(vexp, mkl) +USE_JITKERNEL_MORE(vsigmoid, mkl) +USE_JITKERNEL_MORE(vtanh, mkl) diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index 3d963cbf1d..42f6df576b 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -62,6 +62,16 @@ void VScal(const double* a, const double* x, double* y, int n) { } } +template <> +void VExp(const float* x, float* y, int n) { + platform::dynload::vsExp(n, x, y); +} + +template <> +void VExp(const double* x, double* y, int n) { + platform::dynload::vdExp(n, x, y); +} + // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 template <> bool VMulKernel::UseMe(int d) const { @@ -78,6 +88,21 @@ bool VScalKernel::UseMe(int d) const { return platform::MayIUse(platform::avx512f) && d > 512; } +template <> +bool VExpKernel::UseMe(int d) const { + return d > 7; +} + +template <> +bool VSigmoidKernel::UseMe(int d) const { + return d > 7; +} + +template <> +bool VTanhKernel::UseMe(int d) const { + return d > 7; +} + #define AWALYS_USE_ME_WITH_DOUBLE(func) \ template <> \ bool func##Kernel::UseMe(int d) const { \ @@ -87,6 +112,9 @@ bool VScalKernel::UseMe(int d) const { AWALYS_USE_ME_WITH_DOUBLE(VMul); AWALYS_USE_ME_WITH_DOUBLE(VAdd); AWALYS_USE_ME_WITH_DOUBLE(VScal); +AWALYS_USE_ME_WITH_DOUBLE(VExp); +AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); +AWALYS_USE_ME_WITH_DOUBLE(VTanh); #undef AWALYS_USE_ME_WITH_DOUBLE } // namespace mkl @@ -104,5 +132,8 @@ namespace mkl = paddle::operators::jit::more::mkl; REGISTER_MKL_KERNEL(vmul, VMul); REGISTER_MKL_KERNEL(vadd, VAdd); REGISTER_MKL_KERNEL(vscal, VScal); +REGISTER_MKL_KERNEL(vexp, VExp); +REGISTER_MKL_KERNEL(vsigmoid, VSigmoid); +REGISTER_MKL_KERNEL(vtanh, VTanh); #undef REGISTER_MKL_KERNEL diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index 84a93f408f..bf209d2f9d 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -32,6 +32,34 @@ void VAdd(const T* x, const T* y, T* z, int n); template void VScal(const T* a, const T* x, T* y, int n); +template +void VExp(const T* x, T* y, int n); + +template +void VSigmoid(const T* x, T* y, int n) { + const T min = SIGMOID_THRESHOLD_MIN; + const T max = SIGMOID_THRESHOLD_MAX; + for (int i = 0; i < n; ++i) { + y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); + y[i] = static_cast(0) - y[i]; + } + VExp(y, y, n); + for (int i = 0; i < n; ++i) { + y[i] = static_cast(1) / (static_cast(1) + y[i]); + } +} + +template +void VTanh(const T* x, T* y, int n) { + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * x[i]; + } + VSigmoid(y, y, n); + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * y[i] - static_cast(1); + } +} + #define DECLARE_MKL_KERNEL(name, tuples) \ template \ class name##Kernel : public KernelImpl> { \ @@ -47,6 +75,11 @@ DECLARE_MKL_KERNEL(VAdd, XYZNTuples); // AXYN DECLARE_MKL_KERNEL(VScal, AXYNTuples); +// XYN +DECLARE_MKL_KERNEL(VExp, XYNTuples); +DECLARE_MKL_KERNEL(VSigmoid, XYNTuples); +DECLARE_MKL_KERNEL(VTanh, XYNTuples); + #undef DECLARE_MKL_KERNEL } // namespace mkl diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 62d4cdc19a..e211276d18 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -312,7 +312,7 @@ void TestXYNKernel() { std::vector x(d), yref(d); std::vector xinp(d); // inplace test - RandomVec(d, x.data()); + RandomVec(d, x.data(), -2.f, 2.f); std::copy(x.begin(), x.end(), xinp.begin()); const T* x_data = x.data(); diff --git a/paddle/fluid/operators/math/jit_kernel_exp.cc b/paddle/fluid/operators/math/jit_kernel_exp.cc index 7945cfb253..1f97ed1e62 100644 --- a/paddle/fluid/operators/math/jit_kernel_exp.cc +++ b/paddle/fluid/operators/math/jit_kernel_exp.cc @@ -30,47 +30,6 @@ namespace operators { namespace math { namespace jitkernel { -#ifdef PADDLE_WITH_MKLML -// try to use MKL to speedup -template -void VExpMKL(const T* x, T* y, int n); - -template <> -void VExpMKL(const float* x, float* y, int n) { - platform::dynload::vsExp(n, x, y); -} - -template <> -void VExpMKL(const double* x, double* y, int n) { - platform::dynload::vdExp(n, x, y); -} - -template -void VSigmoidMKL(const T* x, T* y, int n) { - const T min = SIGMOID_THRESHOLD_MIN; - const T max = SIGMOID_THRESHOLD_MAX; - for (int i = 0; i < n; ++i) { - y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); - y[i] = static_cast(0) - y[i]; - } - VExpMKL(y, y, n); - for (int i = 0; i < n; ++i) { - y[i] = static_cast(1) / (static_cast(1) + y[i]); - } -} - -template -void VTanhMKL(const T* x, T* y, int n) { - for (int i = 0; i < n; ++i) { - y[i] = static_cast(2) * x[i]; - } - VSigmoidMKL(y, y, n); - for (int i = 0; i < n; ++i) { - y[i] = static_cast(2) * y[i] - static_cast(1); - } -} -#endif - /* VExp JitKernel */ template class VExpKernelImpl : public VExpKernel { -- GitLab