diff --git a/paddle/fluid/operators/jit/CMakeLists.txt b/paddle/fluid/operators/jit/CMakeLists.txt index 0f213c5898fa6558af4bb80b38a0420f3acd2638..ced29741253e72a17413de51fb2c24a7fb1257d3 100644 --- a/paddle/fluid/operators/jit/CMakeLists.txt +++ b/paddle/fluid/operators/jit/CMakeLists.txt @@ -20,4 +20,6 @@ endif() cc_library(jit_kernel_helper SRCS ${jit_kernel_cc_srcs} DEPS ${JIT_KERNEL_DEPS}) cc_test(jit_kernel_test SRCS test.cc DEPS jit_kernel_helper) -cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS jit_kernel_helper) +if(NOT WIN32) + cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS jit_kernel_helper) +endif() diff --git a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc index f06892a40775d37ae88bed9f396427d8d7b8e599..f446226c9354899d4fda885c42082d434acdaae6 100644 --- a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc +++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc @@ -22,11 +22,16 @@ namespace operators { namespace jit { namespace more { namespace intrinsic { +// Note: intrinsic code is not runtime build. +// For example, if you build code on AVX, and run on AVX512 it can only use AVX void CRFDecoding(const int seq_len, const float* x, const float* w, float* alpha, int* track, int tag_num) { - const int step_size = - platform::MayIUse(platform::avx512f) ? ZMM_FLOAT_BLOCK : YMM_FLOAT_BLOCK; +#ifdef __AVX512F__ + const int step_size = ZMM_FLOAT_BLOCK; +#else + const int step_size = YMM_FLOAT_BLOCK; +#endif const int end = tag_num / step_size; const int rest = tag_num % step_size; /* Setup the alpha initial value.*/ @@ -157,7 +162,12 @@ void CRFDecoding(const int seq_len, const float* x, const float* w, } bool CRFDecodingKernel::UseMe(const int& d) const { - return platform::MayIUse(platform::avx); +#ifdef __AVX512F__ + constexpr int block = ZMM_FLOAT_BLOCK; +#else + constexpr int block = YMM_FLOAT_BLOCK; +#endif + return platform::MayIUse(platform::avx) && d >= block; } } // namespace intrinsic diff --git a/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc b/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc index bac709bc9eaeb8ae4e0da0f0f991ea7f6a3a8004..4b502fbf6bca85e4eb5faea2523f0323c1f20a8f 100644 --- a/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc +++ b/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc @@ -154,7 +154,7 @@ void LayerNorm(float* x, float* out, float* mean, float* var, } bool LayerNormKernel::UseMe(const int& d) const { - return platform::MayIUse(platform::avx); + return platform::MayIUse(platform::avx) && d >= YMM_FLOAT_BLOCK; } } // namespace intrinsic diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 5be7cc5d1c873eab2723600bda84d31c4507ac3f..c9194ec6b1fafea5dfd8a42e0cf3545a8bc8a7b9 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -37,7 +37,7 @@ template void ExpectEQ(const T* target, const T* refer, int n) { if (std::is_floating_point::value) { for (int i = 0; i < n; ++i) { - EXPECT_NEAR(target[i], refer[i], 1e-3); + EXPECT_NEAR(target[i], refer[i], 1e-5); } } else { for (int i = 0; i < n; ++i) {