diff --git a/paddle/fluid/operators/jit/gen/jitcode.h b/paddle/fluid/operators/jit/gen/jitcode.h index 765952fc35276554fc8fcbf19208f81bedd611c6..64126e3f61a8bd63db9c68d6438386c68dd13859 100644 --- a/paddle/fluid/operators/jit/gen/jitcode.h +++ b/paddle/fluid/operators/jit/gen/jitcode.h @@ -56,10 +56,6 @@ typedef enum { identity } operand_type; -#define XMM_FLOAT_BLOCK 4 -#define YMM_FLOAT_BLOCK 8 -#define ZMM_FLOAT_BLOCK 16 - #define DECLARE_JIT_CODE(codename) \ const char* name() const override { return #codename; } diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index 3431c22111f948b3d5261a59feb761668300ce24..44952fb90797f8bb51f891c4ea69fcf0b7f1dbe5 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -27,10 +27,6 @@ namespace paddle { namespace operators { namespace jit { -#define SIGMOID_THRESHOLD_MIN -40.0 -#define SIGMOID_THRESHOLD_MAX 13.0 -#define EXP_MAX_INPUT 40.0 - template inline typename std::enable_if< std::is_same::value && diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index 00d583c60bf73582dab7df75ec8feac1b8f3c3c9..f10d9f3fdd65b280aa59694e6890448e5418cc2d 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -13,6 +13,7 @@ * limitations under the License. */ #pragma once +#include "paddle/fluid/operators/jit/macro.h" #include "paddle/fluid/platform/macros.h" namespace paddle { diff --git a/paddle/fluid/operators/jit/macro.h b/paddle/fluid/operators/jit/macro.h new file mode 100644 index 0000000000000000000000000000000000000000..b2622eba8b70cc553a2da44638d577c9d7751b25 --- /dev/null +++ b/paddle/fluid/operators/jit/macro.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once +#include + +namespace paddle { +namespace operators { +namespace jit { + +#define SIGMOID_THRESHOLD_MIN -40.0 +#define SIGMOID_THRESHOLD_MAX 13.0 +#define EXP_MAX_INPUT 40.0 + +#define XMM_FLOAT_BLOCK 4 +#define YMM_FLOAT_BLOCK 8 +#define ZMM_FLOAT_BLOCK 16 + +} // namespace jit +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index ffecb732975a652456b154a61feb8a20a727d306..3ecb520392e59d995a68ee8d10022b53d1bb1ddd 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -6,3 +6,6 @@ set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl PARENT_SCOPE USE_JITKERNEL_MORE(vmul, mkl) USE_JITKERNEL_MORE(vadd, mkl) USE_JITKERNEL_MORE(vscal, mkl) +USE_JITKERNEL_MORE(vexp, mkl) +USE_JITKERNEL_MORE(vsigmoid, mkl) +USE_JITKERNEL_MORE(vtanh, mkl) diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index 3d963cbf1dd5468afc717178f1a53234d8e14a99..42f6df576b1b025ac8c42cc331993f56e9f42e1b 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -62,6 +62,16 @@ void VScal(const double* a, const double* x, double* y, int n) { } } +template <> +void VExp(const float* x, float* y, int n) { + platform::dynload::vsExp(n, x, y); +} + +template <> +void VExp(const double* x, double* y, int n) { + platform::dynload::vdExp(n, x, y); +} + // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 template <> bool VMulKernel::UseMe(int d) const { @@ -78,6 +88,21 @@ bool VScalKernel::UseMe(int d) const { return platform::MayIUse(platform::avx512f) && d > 512; } +template <> +bool VExpKernel::UseMe(int d) const { + return d > 7; +} + +template <> +bool VSigmoidKernel::UseMe(int d) const { + return d > 7; +} + +template <> +bool VTanhKernel::UseMe(int d) const { + return d > 7; +} + #define AWALYS_USE_ME_WITH_DOUBLE(func) \ template <> \ bool func##Kernel::UseMe(int d) const { \ @@ -87,6 +112,9 @@ bool VScalKernel::UseMe(int d) const { AWALYS_USE_ME_WITH_DOUBLE(VMul); AWALYS_USE_ME_WITH_DOUBLE(VAdd); AWALYS_USE_ME_WITH_DOUBLE(VScal); +AWALYS_USE_ME_WITH_DOUBLE(VExp); +AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); +AWALYS_USE_ME_WITH_DOUBLE(VTanh); #undef AWALYS_USE_ME_WITH_DOUBLE } // namespace mkl @@ -104,5 +132,8 @@ namespace mkl = paddle::operators::jit::more::mkl; REGISTER_MKL_KERNEL(vmul, VMul); REGISTER_MKL_KERNEL(vadd, VAdd); REGISTER_MKL_KERNEL(vscal, VScal); +REGISTER_MKL_KERNEL(vexp, VExp); +REGISTER_MKL_KERNEL(vsigmoid, VSigmoid); +REGISTER_MKL_KERNEL(vtanh, VTanh); #undef REGISTER_MKL_KERNEL diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index 84a93f408f51e444c62ea3b70fba8daab280fed0..bf209d2f9d23dbd1376b0ea2e2ac007280dded4c 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -32,6 +32,34 @@ void VAdd(const T* x, const T* y, T* z, int n); template void VScal(const T* a, const T* x, T* y, int n); +template +void VExp(const T* x, T* y, int n); + +template +void VSigmoid(const T* x, T* y, int n) { + const T min = SIGMOID_THRESHOLD_MIN; + const T max = SIGMOID_THRESHOLD_MAX; + for (int i = 0; i < n; ++i) { + y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); + y[i] = static_cast(0) - y[i]; + } + VExp(y, y, n); + for (int i = 0; i < n; ++i) { + y[i] = static_cast(1) / (static_cast(1) + y[i]); + } +} + +template +void VTanh(const T* x, T* y, int n) { + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * x[i]; + } + VSigmoid(y, y, n); + for (int i = 0; i < n; ++i) { + y[i] = static_cast(2) * y[i] - static_cast(1); + } +} + #define DECLARE_MKL_KERNEL(name, tuples) \ template \ class name##Kernel : public KernelImpl> { \ @@ -47,6 +75,11 @@ DECLARE_MKL_KERNEL(VAdd, XYZNTuples); // AXYN DECLARE_MKL_KERNEL(VScal, AXYNTuples); +// XYN +DECLARE_MKL_KERNEL(VExp, XYNTuples); +DECLARE_MKL_KERNEL(VSigmoid, XYNTuples); +DECLARE_MKL_KERNEL(VTanh, XYNTuples); + #undef DECLARE_MKL_KERNEL } // namespace mkl diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 62d4cdc19ae05870789ca624a454ba9080d82e3b..e211276d189132b113c0ca531b590b31ac084812 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -312,7 +312,7 @@ void TestXYNKernel() { std::vector x(d), yref(d); std::vector xinp(d); // inplace test - RandomVec(d, x.data()); + RandomVec(d, x.data(), -2.f, 2.f); std::copy(x.begin(), x.end(), xinp.begin()); const T* x_data = x.data(); diff --git a/paddle/fluid/operators/math/jit_kernel_exp.cc b/paddle/fluid/operators/math/jit_kernel_exp.cc index 7945cfb253a61b7d1191c39537254126e2bb85dd..1f97ed1e62c506caa8edbb89b3b6c9b6a7032fb3 100644 --- a/paddle/fluid/operators/math/jit_kernel_exp.cc +++ b/paddle/fluid/operators/math/jit_kernel_exp.cc @@ -30,47 +30,6 @@ namespace operators { namespace math { namespace jitkernel { -#ifdef PADDLE_WITH_MKLML -// try to use MKL to speedup -template -void VExpMKL(const T* x, T* y, int n); - -template <> -void VExpMKL(const float* x, float* y, int n) { - platform::dynload::vsExp(n, x, y); -} - -template <> -void VExpMKL(const double* x, double* y, int n) { - platform::dynload::vdExp(n, x, y); -} - -template -void VSigmoidMKL(const T* x, T* y, int n) { - const T min = SIGMOID_THRESHOLD_MIN; - const T max = SIGMOID_THRESHOLD_MAX; - for (int i = 0; i < n; ++i) { - y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); - y[i] = static_cast(0) - y[i]; - } - VExpMKL(y, y, n); - for (int i = 0; i < n; ++i) { - y[i] = static_cast(1) / (static_cast(1) + y[i]); - } -} - -template -void VTanhMKL(const T* x, T* y, int n) { - for (int i = 0; i < n; ++i) { - y[i] = static_cast(2) * x[i]; - } - VSigmoidMKL(y, y, n); - for (int i = 0; i < n; ++i) { - y[i] = static_cast(2) * y[i] - static_cast(1); - } -} -#endif - /* VExp JitKernel */ template class VExpKernelImpl : public VExpKernel {