提交 31676583 编写于 作者: D dzhwinter

add back jit simd instructions. stage.

上级 bf2e4cb1
...@@ -181,10 +181,10 @@ include(external/eigen) # download eigen3 ...@@ -181,10 +181,10 @@ include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/cares) include(external/cares)
include(external/cub) include(external/cub)
include(external/xxhash) # download xxhash
if (NOT WIN32) if (NOT WIN32)
# there is no official support of snappystream, warpctc, nccl, cupti in windows # there is no official support of snappystream, warpctc, nccl, cupti in windows
include(external/xxhash) # download xxhash
include(external/snappy) # download snappy include(external/snappy) # download snappy
include(external/snappystream) # download snappystream include(external/snappystream) # download snappystream
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
......
...@@ -87,13 +87,14 @@ copy(boost_lib ...@@ -87,13 +87,14 @@ copy(boost_lib
DSTS ${dst_dir} DSTS ${dst_dir}
DEPS boost DEPS boost
) )
if(NOT WIN32)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash")
copy(xxhash_lib copy(xxhash_lib
SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES} SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
DEPS xxhash DEPS xxhash
) )
endif(NOT WIN32)
if(NOT PROTOBUF_FOUND) if(NOT PROTOBUF_FOUND)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
......
...@@ -20,9 +20,6 @@ ...@@ -20,9 +20,6 @@
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle/fluid/inference/paddle_inference_api.h"
namespace paddle { namespace paddle {
// DEFINE_string(dirname, "./lb",
// "Directory of the inference model.");
NativeConfig GetConfig() { NativeConfig GetConfig() {
NativeConfig config; NativeConfig config;
......
...@@ -86,7 +86,7 @@ function(op_library TARGET) ...@@ -86,7 +86,7 @@ function(op_library TARGET)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops. # remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op" "hierarchical_sigmoid_op" foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op" "hierarchical_sigmoid_op"
"crf_decoding_op" "select_op" "lstmp_op" "gru_op" "fusion_gru_op" "lstm_op" "fusion_lstm_op" "cumsum_op" "crf_decoding_op" "select_op" "lstmp_op" "gru_op" "fusion_gru_op" "lstm_op" "fusion_lstm_op" "cumsum_op"
"fusion_seqconv_eltadd_relu_op" "channel_send_op" "channel_create_op" "channel_close_op" "channel_recv_op") "fusion_seqconv_eltadd_relu_op" "hash_op")
if ("${TARGET}" STREQUAL "${windows_unsupport_op}") if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
return() return()
endif() endif()
......
...@@ -74,8 +74,8 @@ cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split) ...@@ -74,8 +74,8 @@ cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split)
cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info) cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
if (NOT WIN32) if (NOT WIN32)
math_library(matrix_bit_code) math_library(matrix_bit_code)
cc_library(jit_kernel endif (NOT WIN32)
cc_library(jit_kernel
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc
DEPS cpu_info cblas) DEPS cpu_info cblas)
cc_test(jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel) cc_test(jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel)
endif (NOT WIN32)
...@@ -18,10 +18,6 @@ limitations under the License. */ ...@@ -18,10 +18,6 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/fluid/platform/dynload/mklml.h"
#endif #endif
......
...@@ -15,13 +15,10 @@ limitations under the License. */ ...@@ -15,13 +15,10 @@ limitations under the License. */
#pragma once #pragma once
#include <math.h> #include <math.h>
#include <string> #include <string>
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h" #include "paddle/fluid/platform/hostdevice.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
......
...@@ -19,10 +19,6 @@ limitations under the License. */ ...@@ -19,10 +19,6 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/fluid/platform/dynload/mklml.h"
#endif #endif
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
......
...@@ -16,9 +16,6 @@ limitations under the License. */ ...@@ -16,9 +16,6 @@ limitations under the License. */
#include <limits> #include <limits>
#include <string> #include <string>
#include "paddle/fluid/operators/math/jit_kernel_macro.h" #include "paddle/fluid/operators/math/jit_kernel_macro.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -263,6 +260,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> { ...@@ -263,6 +260,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> {
} \ } \
} }
#ifndef _WIN32 // commented out crf decoding
#ifdef __AVX__ #ifdef __AVX__
INTRIAVX_FLOAT(kEQ8); INTRIAVX_FLOAT(kEQ8);
INTRIAVX_FLOAT(kGT8LT16); INTRIAVX_FLOAT(kGT8LT16);
...@@ -275,6 +273,7 @@ INTRIAVX2_FLOAT(jit::avx2, kGT8LT16); ...@@ -275,6 +273,7 @@ INTRIAVX2_FLOAT(jit::avx2, kGT8LT16);
INTRIAVX2_FLOAT(jit::avx2, kEQ16); INTRIAVX2_FLOAT(jit::avx2, kEQ16);
INTRIAVX2_FLOAT(jit::avx2, kGT16); INTRIAVX2_FLOAT(jit::avx2, kGT16);
#endif #endif
#endif // WIN32
#ifdef __AVX512F__ #ifdef __AVX512F__
INTRIAVX2_FLOAT(jit::avx512f, kEQ8); INTRIAVX2_FLOAT(jit::avx512f, kEQ8);
INTRIAVX2_FLOAT(jit::avx512f, kGT8LT16); INTRIAVX2_FLOAT(jit::avx512f, kGT8LT16);
......
...@@ -20,10 +20,6 @@ limitations under the License. */ ...@@ -20,10 +20,6 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/fluid/platform/dynload/mklml.h"
#endif #endif
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
...@@ -66,14 +62,18 @@ namespace detail { ...@@ -66,14 +62,18 @@ namespace detail {
#ifdef __AVX__ #ifdef __AVX__
#if defined(_WIN32)
#define ALIGN32 __declspec(align(32))
#else
#define ALIGN32 __attribute__((aligned(32))) #define ALIGN32 __attribute__((aligned(32)))
#endif // _WIN32
#define _PS256_CONST(Name, Val) \ #define _PS256_CONST(Name, Val) \
static const float _ps256_##Name[8] ALIGN32 = {Val, Val, Val, Val, \ static const float ALIGN32 _ps256_##Name[8] = {Val, Val, Val, Val, \
Val, Val, Val, Val} Val, Val, Val, Val}
#define _PI256_CONST(Name, Val) \ #define _PI256_CONST(Name, Val) \
static const int _pi256_##Name[8] ALIGN32 = {Val, Val, Val, Val, \ static const int ALIGN32 _pi256_##Name[8] = {Val, Val, Val, Val, \
Val, Val, Val, Val} Val, Val, Val, Val}
_PI256_CONST(0x7f, 0x7f); _PI256_CONST(0x7f, 0x7f);
...@@ -98,7 +98,7 @@ typedef union imm_xmm_union { ...@@ -98,7 +98,7 @@ typedef union imm_xmm_union {
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) \ #define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) \
{ \ { \
imm_xmm_union u ALIGN32; \ imm_xmm_union ALIGN32 u; \
u.imm = imm_; \ u.imm = imm_; \
xmm0_ = u.xmm[0]; \ xmm0_ = u.xmm[0]; \
xmm1_ = u.xmm[1]; \ xmm1_ = u.xmm[1]; \
...@@ -106,7 +106,7 @@ typedef union imm_xmm_union { ...@@ -106,7 +106,7 @@ typedef union imm_xmm_union {
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) \ #define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) \
{ \ { \
imm_xmm_union u ALIGN32; \ imm_xmm_union ALIGN32 u; \
u.xmm[0] = xmm0_; \ u.xmm[0] = xmm0_; \
u.xmm[1] = xmm1_; \ u.xmm[1] = xmm1_; \
imm_ = u.imm; \ imm_ = u.imm; \
...@@ -508,12 +508,14 @@ class VTanhKernelImpl : public VTanhKernel<T> { ...@@ -508,12 +508,14 @@ class VTanhKernelImpl : public VTanhKernel<T> {
vaddbias_->Compute(-1.f, y, y); \ vaddbias_->Compute(-1.f, y, y); \
} }
#ifndef __WIN32
#ifdef __AVX__ #ifdef __AVX__
INTRI8_FLOAT(jit::avx, detail::ExpAVX); INTRI8_FLOAT(jit::avx, detail::ExpAVX);
INTRI16_FLOAT(jit::avx, detail::ExpAVX); INTRI16_FLOAT(jit::avx, detail::ExpAVX);
INTRI_GT8LT16_FLOAT(jit::avx, detail::ExpAVX); INTRI_GT8LT16_FLOAT(jit::avx, detail::ExpAVX);
INTRI_GT16_FLOAT(jit::avx, detail::ExpAVX); INTRI_GT16_FLOAT(jit::avx, detail::ExpAVX);
#endif #endif // AVX
#endif // WIN32
#ifdef __AVX2__ #ifdef __AVX2__
INTRI8_FLOAT(jit::avx2, detail::ExpAVX2); INTRI8_FLOAT(jit::avx2, detail::ExpAVX2);
INTRI16_FLOAT(jit::avx2, detail::ExpAVX2); INTRI16_FLOAT(jit::avx2, detail::ExpAVX2);
......
...@@ -18,10 +18,6 @@ limitations under the License. */ ...@@ -18,10 +18,6 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
......
...@@ -16,6 +16,18 @@ limitations under the License. */ ...@@ -16,6 +16,18 @@ limitations under the License. */
#include <stddef.h> #include <stddef.h>
#ifdef _WIN32
#if defined(__AVX2__)
#include <immintrin.h> //avx2
#elif defined(__AVX__)
#include <intrin.h> //avx
#endif // AVX
#else // WIN32
#ifdef __AVX__
#include <immintrin.h>
#endif
#endif // WIN32
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -62,7 +62,6 @@ static void *dlopen(const char *filename, int flag) { ...@@ -62,7 +62,6 @@ static void *dlopen(const char *filename, int flag) {
} }
return reinterpret_cast<void *>(hModule); return reinterpret_cast<void *>(hModule);
} }
#endif // !_WIN32 #endif // !_WIN32
static void ExecShellCommand(const std::string &cmd, std::string *message) { static void ExecShellCommand(const std::string &cmd, std::string *message) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册