提交 5e97be7b 编写于 作者: T tensor-tang

enable jitkernel mkl vexp, vsigmoid and vtanh

上级 ae179269
...@@ -56,10 +56,6 @@ typedef enum { ...@@ -56,10 +56,6 @@ typedef enum {
identity identity
} operand_type; } operand_type;
#define XMM_FLOAT_BLOCK 4
#define YMM_FLOAT_BLOCK 8
#define ZMM_FLOAT_BLOCK 16
#define DECLARE_JIT_CODE(codename) \ #define DECLARE_JIT_CODE(codename) \
const char* name() const override { return #codename; } const char* name() const override { return #codename; }
......
...@@ -27,10 +27,6 @@ namespace paddle { ...@@ -27,10 +27,6 @@ namespace paddle {
namespace operators { namespace operators {
namespace jit { namespace jit {
#define SIGMOID_THRESHOLD_MIN -40.0
#define SIGMOID_THRESHOLD_MAX 13.0
#define EXP_MAX_INPUT 40.0
template <KernelType KT, typename KernelTuples, typename PlaceType> template <KernelType KT, typename KernelTuples, typename PlaceType>
inline typename std::enable_if< inline typename std::enable_if<
std::is_same<typename KernelTuples::data_type, float>::value && std::is_same<typename KernelTuples::data_type, float>::value &&
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
* limitations under the License. */ * limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/operators/jit/macro.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
namespace paddle { namespace paddle {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <type_traits>
namespace paddle {
namespace operators {
namespace jit {
#define SIGMOID_THRESHOLD_MIN -40.0
#define SIGMOID_THRESHOLD_MAX 13.0
#define EXP_MAX_INPUT 40.0
#define XMM_FLOAT_BLOCK 4
#define YMM_FLOAT_BLOCK 8
#define ZMM_FLOAT_BLOCK 16
} // namespace jit
} // namespace operators
} // namespace paddle
...@@ -6,3 +6,6 @@ set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl PARENT_SCOPE ...@@ -6,3 +6,6 @@ set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl PARENT_SCOPE
USE_JITKERNEL_MORE(vmul, mkl) USE_JITKERNEL_MORE(vmul, mkl)
USE_JITKERNEL_MORE(vadd, mkl) USE_JITKERNEL_MORE(vadd, mkl)
USE_JITKERNEL_MORE(vscal, mkl) USE_JITKERNEL_MORE(vscal, mkl)
USE_JITKERNEL_MORE(vexp, mkl)
USE_JITKERNEL_MORE(vsigmoid, mkl)
USE_JITKERNEL_MORE(vtanh, mkl)
...@@ -62,6 +62,16 @@ void VScal<double>(const double* a, const double* x, double* y, int n) { ...@@ -62,6 +62,16 @@ void VScal<double>(const double* a, const double* x, double* y, int n) {
} }
} }
template <>
void VExp<float>(const float* x, float* y, int n) {
platform::dynload::vsExp(n, x, y);
}
template <>
void VExp<double>(const double* x, double* y, int n) {
platform::dynload::vdExp(n, x, y);
}
// TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512
template <> template <>
bool VMulKernel<float>::UseMe(int d) const { bool VMulKernel<float>::UseMe(int d) const {
...@@ -78,6 +88,21 @@ bool VScalKernel<float>::UseMe(int d) const { ...@@ -78,6 +88,21 @@ bool VScalKernel<float>::UseMe(int d) const {
return platform::MayIUse(platform::avx512f) && d > 512; return platform::MayIUse(platform::avx512f) && d > 512;
} }
template <>
bool VExpKernel<float>::UseMe(int d) const {
return d > 7;
}
template <>
bool VSigmoidKernel<float>::UseMe(int d) const {
return d > 7;
}
template <>
bool VTanhKernel<float>::UseMe(int d) const {
return d > 7;
}
#define AWALYS_USE_ME_WITH_DOUBLE(func) \ #define AWALYS_USE_ME_WITH_DOUBLE(func) \
template <> \ template <> \
bool func##Kernel<double>::UseMe(int d) const { \ bool func##Kernel<double>::UseMe(int d) const { \
...@@ -87,6 +112,9 @@ bool VScalKernel<float>::UseMe(int d) const { ...@@ -87,6 +112,9 @@ bool VScalKernel<float>::UseMe(int d) const {
AWALYS_USE_ME_WITH_DOUBLE(VMul); AWALYS_USE_ME_WITH_DOUBLE(VMul);
AWALYS_USE_ME_WITH_DOUBLE(VAdd); AWALYS_USE_ME_WITH_DOUBLE(VAdd);
AWALYS_USE_ME_WITH_DOUBLE(VScal); AWALYS_USE_ME_WITH_DOUBLE(VScal);
AWALYS_USE_ME_WITH_DOUBLE(VExp);
AWALYS_USE_ME_WITH_DOUBLE(VSigmoid);
AWALYS_USE_ME_WITH_DOUBLE(VTanh);
#undef AWALYS_USE_ME_WITH_DOUBLE #undef AWALYS_USE_ME_WITH_DOUBLE
} // namespace mkl } // namespace mkl
...@@ -104,5 +132,8 @@ namespace mkl = paddle::operators::jit::more::mkl; ...@@ -104,5 +132,8 @@ namespace mkl = paddle::operators::jit::more::mkl;
REGISTER_MKL_KERNEL(vmul, VMul); REGISTER_MKL_KERNEL(vmul, VMul);
REGISTER_MKL_KERNEL(vadd, VAdd); REGISTER_MKL_KERNEL(vadd, VAdd);
REGISTER_MKL_KERNEL(vscal, VScal); REGISTER_MKL_KERNEL(vscal, VScal);
REGISTER_MKL_KERNEL(vexp, VExp);
REGISTER_MKL_KERNEL(vsigmoid, VSigmoid);
REGISTER_MKL_KERNEL(vtanh, VTanh);
#undef REGISTER_MKL_KERNEL #undef REGISTER_MKL_KERNEL
...@@ -32,6 +32,34 @@ void VAdd(const T* x, const T* y, T* z, int n); ...@@ -32,6 +32,34 @@ void VAdd(const T* x, const T* y, T* z, int n);
template <typename T> template <typename T>
void VScal(const T* a, const T* x, T* y, int n); void VScal(const T* a, const T* x, T* y, int n);
template <typename T>
void VExp(const T* x, T* y, int n);
template <typename T>
void VSigmoid(const T* x, T* y, int n) {
const T min = SIGMOID_THRESHOLD_MIN;
const T max = SIGMOID_THRESHOLD_MAX;
for (int i = 0; i < n; ++i) {
y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
y[i] = static_cast<T>(0) - y[i];
}
VExp(y, y, n);
for (int i = 0; i < n; ++i) {
y[i] = static_cast<T>(1) / (static_cast<T>(1) + y[i]);
}
}
template <typename T>
void VTanh(const T* x, T* y, int n) {
for (int i = 0; i < n; ++i) {
y[i] = static_cast<T>(2) * x[i];
}
VSigmoid(y, y, n);
for (int i = 0; i < n; ++i) {
y[i] = static_cast<T>(2) * y[i] - static_cast<T>(1);
}
}
#define DECLARE_MKL_KERNEL(name, tuples) \ #define DECLARE_MKL_KERNEL(name, tuples) \
template <typename T> \ template <typename T> \
class name##Kernel : public KernelImpl<tuples<T>> { \ class name##Kernel : public KernelImpl<tuples<T>> { \
...@@ -47,6 +75,11 @@ DECLARE_MKL_KERNEL(VAdd, XYZNTuples); ...@@ -47,6 +75,11 @@ DECLARE_MKL_KERNEL(VAdd, XYZNTuples);
// AXYN // AXYN
DECLARE_MKL_KERNEL(VScal, AXYNTuples); DECLARE_MKL_KERNEL(VScal, AXYNTuples);
// XYN
DECLARE_MKL_KERNEL(VExp, XYNTuples);
DECLARE_MKL_KERNEL(VSigmoid, XYNTuples);
DECLARE_MKL_KERNEL(VTanh, XYNTuples);
#undef DECLARE_MKL_KERNEL #undef DECLARE_MKL_KERNEL
} // namespace mkl } // namespace mkl
......
...@@ -312,7 +312,7 @@ void TestXYNKernel() { ...@@ -312,7 +312,7 @@ void TestXYNKernel() {
std::vector<T> x(d), yref(d); std::vector<T> x(d), yref(d);
std::vector<T> xinp(d); // inplace test std::vector<T> xinp(d); // inplace test
RandomVec<T>(d, x.data()); RandomVec<T>(d, x.data(), -2.f, 2.f);
std::copy(x.begin(), x.end(), xinp.begin()); std::copy(x.begin(), x.end(), xinp.begin());
const T* x_data = x.data(); const T* x_data = x.data();
......
...@@ -30,47 +30,6 @@ namespace operators { ...@@ -30,47 +30,6 @@ namespace operators {
namespace math { namespace math {
namespace jitkernel { namespace jitkernel {
#ifdef PADDLE_WITH_MKLML
// try to use MKL to speedup
template <typename T>
void VExpMKL(const T* x, T* y, int n);
template <>
void VExpMKL<float>(const float* x, float* y, int n) {
platform::dynload::vsExp(n, x, y);
}
template <>
void VExpMKL<double>(const double* x, double* y, int n) {
platform::dynload::vdExp(n, x, y);
}
template <typename T>
void VSigmoidMKL(const T* x, T* y, int n) {
const T min = SIGMOID_THRESHOLD_MIN;
const T max = SIGMOID_THRESHOLD_MAX;
for (int i = 0; i < n; ++i) {
y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
y[i] = static_cast<T>(0) - y[i];
}
VExpMKL(y, y, n);
for (int i = 0; i < n; ++i) {
y[i] = static_cast<T>(1) / (static_cast<T>(1) + y[i]);
}
}
template <typename T>
void VTanhMKL(const T* x, T* y, int n) {
for (int i = 0; i < n; ++i) {
y[i] = static_cast<T>(2) * x[i];
}
VSigmoidMKL(y, y, n);
for (int i = 0; i < n; ++i) {
y[i] = static_cast<T>(2) * y[i] - static_cast<T>(1);
}
}
#endif
/* VExp JitKernel */ /* VExp JitKernel */
template <typename T> template <typename T>
class VExpKernelImpl : public VExpKernel<T> { class VExpKernelImpl : public VExpKernel<T> {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册