Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
31676583
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
31676583
编写于
10月 31, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add back jit simd instructions. stage.
上级
bf2e4cb1
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
34 addition
and
39 deletion
+34
-39
CMakeLists.txt
CMakeLists.txt
+1
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+2
-1
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
+0
-3
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-1
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+4
-4
paddle/fluid/operators/math/cpu_vec.h
paddle/fluid/operators/math/cpu_vec.h
+0
-4
paddle/fluid/operators/math/detail/activation_functions.h
paddle/fluid/operators/math/detail/activation_functions.h
+1
-4
paddle/fluid/operators/math/jit_kernel_blas.cc
paddle/fluid/operators/math/jit_kernel_blas.cc
+0
-4
paddle/fluid/operators/math/jit_kernel_crf_decode.cc
paddle/fluid/operators/math/jit_kernel_crf_decode.cc
+2
-3
paddle/fluid/operators/math/jit_kernel_exp.cc
paddle/fluid/operators/math/jit_kernel_exp.cc
+11
-9
paddle/fluid/operators/math/jit_kernel_rnn.cc
paddle/fluid/operators/math/jit_kernel_rnn.cc
+0
-4
paddle/fluid/platform/cpu_info.h
paddle/fluid/platform/cpu_info.h
+12
-0
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+0
-1
未找到文件。
CMakeLists.txt
浏览文件 @
31676583
...
@@ -181,10 +181,10 @@ include(external/eigen) # download eigen3
...
@@ -181,10 +181,10 @@ include(external/eigen) # download eigen3
include
(
external/pybind11
)
# download pybind11
include
(
external/pybind11
)
# download pybind11
include
(
external/cares
)
include
(
external/cares
)
include
(
external/cub
)
include
(
external/cub
)
include
(
external/xxhash
)
# download xxhash
if
(
NOT WIN32
)
if
(
NOT WIN32
)
# there is no official support of snappystream, warpctc, nccl, cupti in windows
# there is no official support of snappystream, warpctc, nccl, cupti in windows
include
(
external/xxhash
)
# download xxhash
include
(
external/snappy
)
# download snappy
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
include
(
external/snappystream
)
# download snappystream
include
(
external/warpctc
)
# download, build, install warpctc
include
(
external/warpctc
)
# download, build, install warpctc
...
...
cmake/inference_lib.cmake
浏览文件 @
31676583
...
@@ -87,13 +87,14 @@ copy(boost_lib
...
@@ -87,13 +87,14 @@ copy(boost_lib
DSTS
${
dst_dir
}
DSTS
${
dst_dir
}
DEPS boost
DEPS boost
)
)
if
(
NOT WIN32
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/xxhash"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/xxhash"
)
copy
(
xxhash_lib
copy
(
xxhash_lib
SRCS
${
XXHASH_INCLUDE_DIR
}
${
XXHASH_LIBRARIES
}
SRCS
${
XXHASH_INCLUDE_DIR
}
${
XXHASH_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS xxhash
DEPS xxhash
)
)
endif
(
NOT WIN32
)
if
(
NOT PROTOBUF_FOUND
)
if
(
NOT PROTOBUF_FOUND
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/protobuf"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/protobuf"
)
...
...
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
浏览文件 @
31676583
...
@@ -20,9 +20,6 @@
...
@@ -20,9 +20,6 @@
#include "paddle/fluid/inference/paddle_inference_api.h"
#include "paddle/fluid/inference/paddle_inference_api.h"
namespace
paddle
{
namespace
paddle
{
// DEFINE_string(dirname, "./lb",
// "Directory of the inference model.");
NativeConfig
GetConfig
()
{
NativeConfig
GetConfig
()
{
NativeConfig
config
;
NativeConfig
config
;
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
31676583
...
@@ -86,7 +86,7 @@ function(op_library TARGET)
...
@@ -86,7 +86,7 @@ function(op_library TARGET)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"fusion_seqconv_eltadd_relu_op"
"
channel_send_op"
"channel_create_op"
"channel_close_op"
"channel_recv
_op"
)
"fusion_seqconv_eltadd_relu_op"
"
hash
_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
return
()
endif
()
endif
()
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
31676583
...
@@ -74,8 +74,8 @@ cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split)
...
@@ -74,8 +74,8 @@ cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
math_library
(
matrix_bit_code
)
math_library
(
matrix_bit_code
)
cc_library
(
jit_kernel
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc
DEPS cpu_info cblas
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
cc_library
(
jit_kernel
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc
DEPS cpu_info cblas
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
paddle/fluid/operators/math/cpu_vec.h
浏览文件 @
31676583
...
@@ -18,10 +18,6 @@ limitations under the License. */
...
@@ -18,10 +18,6 @@ limitations under the License. */
#include <string>
#include <string>
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
#include "paddle/fluid/platform/dynload/mklml.h"
#endif
#endif
...
...
paddle/fluid/operators/math/detail/activation_functions.h
浏览文件 @
31676583
...
@@ -15,13 +15,10 @@ limitations under the License. */
...
@@ -15,13 +15,10 @@ limitations under the License. */
#pragma once
#pragma once
#include <math.h>
#include <math.h>
#include <string>
#include <string>
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/hostdevice.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
...
paddle/fluid/operators/math/jit_kernel_blas.cc
浏览文件 @
31676583
...
@@ -19,10 +19,6 @@ limitations under the License. */
...
@@ -19,10 +19,6 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/mklml.h"
#include "paddle/fluid/platform/dynload/mklml.h"
#endif
#endif
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
...
paddle/fluid/operators/math/jit_kernel_crf_decode.cc
浏览文件 @
31676583
...
@@ -16,9 +16,6 @@ limitations under the License. */
...
@@ -16,9 +16,6 @@ limitations under the License. */
#include <limits>
#include <limits>
#include <string>
#include <string>
#include "paddle/fluid/operators/math/jit_kernel_macro.h"
#include "paddle/fluid/operators/math/jit_kernel_macro.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -263,6 +260,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> {
...
@@ -263,6 +260,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> {
} \
} \
}
}
#ifndef _WIN32 // commented out crf decoding
#ifdef __AVX__
#ifdef __AVX__
INTRIAVX_FLOAT
(
kEQ8
);
INTRIAVX_FLOAT
(
kEQ8
);
INTRIAVX_FLOAT
(
kGT8LT16
);
INTRIAVX_FLOAT
(
kGT8LT16
);
...
@@ -275,6 +273,7 @@ INTRIAVX2_FLOAT(jit::avx2, kGT8LT16);
...
@@ -275,6 +273,7 @@ INTRIAVX2_FLOAT(jit::avx2, kGT8LT16);
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kEQ16
);
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kEQ16
);
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kGT16
);
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kGT16
);
#endif
#endif
#endif // WIN32
#ifdef __AVX512F__
#ifdef __AVX512F__
INTRIAVX2_FLOAT
(
jit
::
avx512f
,
kEQ8
);
INTRIAVX2_FLOAT
(
jit
::
avx512f
,
kEQ8
);
INTRIAVX2_FLOAT
(
jit
::
avx512f
,
kGT8LT16
);
INTRIAVX2_FLOAT
(
jit
::
avx512f
,
kGT8LT16
);
...
...
paddle/fluid/operators/math/jit_kernel_exp.cc
浏览文件 @
31676583
...
@@ -20,10 +20,6 @@ limitations under the License. */
...
@@ -20,10 +20,6 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/mklml.h"
#include "paddle/fluid/platform/dynload/mklml.h"
#endif
#endif
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
@@ -66,14 +62,18 @@ namespace detail {
...
@@ -66,14 +62,18 @@ namespace detail {
#ifdef __AVX__
#ifdef __AVX__
#if defined(_WIN32)
#define ALIGN32 __declspec(align(32))
#else
#define ALIGN32 __attribute__((aligned(32)))
#define ALIGN32 __attribute__((aligned(32)))
#endif // _WIN32
#define _PS256_CONST(Name, Val) \
#define _PS256_CONST(Name, Val) \
static const float
_ps256_##Name[8] ALIGN32
= {Val, Val, Val, Val, \
static const float
ALIGN32 _ps256_##Name[8]
= {Val, Val, Val, Val, \
Val, Val, Val, Val}
Val, Val, Val, Val}
#define _PI256_CONST(Name, Val) \
#define _PI256_CONST(Name, Val) \
static const int
_pi256_##Name[8] ALIGN32
= {Val, Val, Val, Val, \
static const int
ALIGN32 _pi256_##Name[8]
= {Val, Val, Val, Val, \
Val, Val, Val, Val}
Val, Val, Val, Val}
_PI256_CONST
(
0x7f
,
0x7f
);
_PI256_CONST
(
0x7f
,
0x7f
);
...
@@ -98,7 +98,7 @@ typedef union imm_xmm_union {
...
@@ -98,7 +98,7 @@ typedef union imm_xmm_union {
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) \
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) \
{ \
{ \
imm_xmm_union
u ALIGN32
; \
imm_xmm_union
ALIGN32 u
; \
u.imm = imm_; \
u.imm = imm_; \
xmm0_ = u.xmm[0]; \
xmm0_ = u.xmm[0]; \
xmm1_ = u.xmm[1]; \
xmm1_ = u.xmm[1]; \
...
@@ -106,7 +106,7 @@ typedef union imm_xmm_union {
...
@@ -106,7 +106,7 @@ typedef union imm_xmm_union {
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) \
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) \
{ \
{ \
imm_xmm_union
u ALIGN32
; \
imm_xmm_union
ALIGN32 u
; \
u.xmm[0] = xmm0_; \
u.xmm[0] = xmm0_; \
u.xmm[1] = xmm1_; \
u.xmm[1] = xmm1_; \
imm_ = u.imm; \
imm_ = u.imm; \
...
@@ -508,12 +508,14 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -508,12 +508,14 @@ class VTanhKernelImpl : public VTanhKernel<T> {
vaddbias_->Compute(-1.f, y, y); \
vaddbias_->Compute(-1.f, y, y); \
}
}
#ifndef __WIN32
#ifdef __AVX__
#ifdef __AVX__
INTRI8_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI8_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT8LT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT8LT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
#endif
#endif // AVX
#endif // WIN32
#ifdef __AVX2__
#ifdef __AVX2__
INTRI8_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
INTRI8_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
...
...
paddle/fluid/operators/math/jit_kernel_rnn.cc
浏览文件 @
31676583
...
@@ -18,10 +18,6 @@ limitations under the License. */
...
@@ -18,10 +18,6 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#ifdef __AVX__
#include <immintrin.h>
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
...
paddle/fluid/platform/cpu_info.h
浏览文件 @
31676583
...
@@ -16,6 +16,18 @@ limitations under the License. */
...
@@ -16,6 +16,18 @@ limitations under the License. */
#include <stddef.h>
#include <stddef.h>
#ifdef _WIN32
#if defined(__AVX2__)
#include <immintrin.h> //avx2
#elif defined(__AVX__)
#include <intrin.h> //avx
#endif // AVX
#else // WIN32
#ifdef __AVX__
#include <immintrin.h>
#endif
#endif // WIN32
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
...
paddle/fluid/platform/port.h
浏览文件 @
31676583
...
@@ -62,7 +62,6 @@ static void *dlopen(const char *filename, int flag) {
...
@@ -62,7 +62,6 @@ static void *dlopen(const char *filename, int flag) {
}
}
return
reinterpret_cast
<
void
*>
(
hModule
);
return
reinterpret_cast
<
void
*>
(
hModule
);
}
}
#endif // !_WIN32
#endif // !_WIN32
static
void
ExecShellCommand
(
const
std
::
string
&
cmd
,
std
::
string
*
message
)
{
static
void
ExecShellCommand
(
const
std
::
string
&
cmd
,
std
::
string
*
message
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录