Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2f9b5f23
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2f9b5f23
编写于
11月 21, 2018
作者:
T
tensor-tang
提交者:
GitHub
11月 21, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into fea/jit/rnn
上级
b4c826c5
1d9b2a45
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
685 addition
and
53 deletion
+685
-53
.gitignore
.gitignore
+1
-0
AUTHORS.md
AUTHORS.md
+1
-0
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+1
-0
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+4
-3
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+2
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+2
-0
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
...le/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+18
-6
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
+5
-3
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+4
-5
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-2
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+5
-1
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+10
-6
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+27
-10
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+0
-2
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
.../fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+201
-0
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+14
-0
paddle/fluid/operators/math/jit_code.h
paddle/fluid/operators/math/jit_code.h
+36
-0
paddle/fluid/operators/math/jit_kernel.h
paddle/fluid/operators/math/jit_kernel.h
+9
-0
paddle/fluid/operators/math/jit_kernel_blas.cc
paddle/fluid/operators/math/jit_kernel_blas.cc
+41
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-1
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+3
-1
python/paddle/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py
...e/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py
+263
-0
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
+31
-13
未找到文件。
.gitignore
浏览文件 @
2f9b5f23
python/paddle/fluid/tests/unittests/reader_reset_test.recordio
paddle/operators/check_t.save
paddle/operators/check_t.save
paddle/operators/check_tensor.ls
paddle/operators/check_tensor.ls
paddle/operators/tensor.save
paddle/operators/tensor.save
...
...
AUTHORS.md
浏览文件 @
2f9b5f23
...
@@ -42,6 +42,7 @@
...
@@ -42,6 +42,7 @@
| QiJune | Jun Qi |
| QiJune | Jun Qi |
| qingqing01 | Qing-Qing Dang |
| qingqing01 | Qing-Qing Dang |
| reyoung | Yang Yu |
| reyoung | Yang Yu |
| Sand3r- | Michal Gallus |
| Superjom | Chun-Wei Yan |
| Superjom | Chun-Wei Yan |
| tensor-tang | Jian Tang |
| tensor-tang | Jian Tang |
| tianbingsz | Tian-Bing Xu |
| tianbingsz | Tian-Bing Xu |
...
...
paddle/fluid/framework/operator.h
浏览文件 @
2f9b5f23
...
@@ -100,6 +100,7 @@ class OperatorBase {
...
@@ -100,6 +100,7 @@ class OperatorBase {
const
std
::
string
&
Type
()
const
{
return
type_
;
}
const
std
::
string
&
Type
()
const
{
return
type_
;
}
bool
HasAttr
(
const
std
::
string
&
name
)
const
{
return
attrs_
.
count
(
name
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
const
T
&
Attr
(
const
std
::
string
&
name
)
const
{
inline
const
T
&
Attr
(
const
std
::
string
&
name
)
const
{
PADDLE_ENFORCE
(
attrs_
.
count
(
name
)
!=
0
,
"%s should be in AttributeMap"
,
PADDLE_ENFORCE
(
attrs_
.
count
(
name
)
!=
0
,
"%s should be in AttributeMap"
,
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
2f9b5f23
...
@@ -7,16 +7,17 @@ set(analysis_deps # analysis_deps can be extended accross the project
...
@@ -7,16 +7,17 @@ set(analysis_deps # analysis_deps can be extended accross the project
add_subdirectory
(
ir_passes
)
add_subdirectory
(
ir_passes
)
add_subdirectory
(
passes
)
add_subdirectory
(
passes
)
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
${
INFER_IR_PASSES
}
)
cc_library
(
analysis_helper SRCS helper.cc DEPS framework_proto proto_desc graph paddle_fluid_api
)
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
${
INFER_IR_PASSES
}
analysis_helper
)
cc_library
(
argument SRCS argument.cc DEPS scope proto_desc
)
cc_library
(
argument SRCS argument.cc DEPS scope proto_desc
)
cc_library
(
analysis_pass SRCS analysis_pass.cc DEPS proto_desc
)
cc_library
(
analysis_pass SRCS analysis_pass.cc DEPS proto_desc
)
cc_library
(
analysis SRCS
cc_library
(
analysis SRCS
analyzer.cc
analyzer.cc
helper.cc
analysis_pass
analysis_pass
DEPS
${
analysis_deps
}
DEPS
${
analysis_deps
}
analysis_helper
)
)
cc_test
(
test_dot SRCS dot_tester.cc DEPS analysis
)
cc_test
(
test_dot SRCS dot_tester.cc DEPS analysis
)
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
2f9b5f23
...
@@ -30,6 +30,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
...
@@ -30,6 +30,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
Argument
argument
;
Argument
argument
;
argument
.
SetModelDir
(
FLAGS_inference_model_dir
);
argument
.
SetModelDir
(
FLAGS_inference_model_dir
);
argument
.
SetIrAnalysisPasses
({
"infer_clean_graph_pass"
});
argument
.
SetIrAnalysisPasses
({
"infer_clean_graph_pass"
});
argument
.
SetUseGPU
(
false
);
Analyzer
analyser
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
analyser
.
Run
(
&
argument
);
...
@@ -41,6 +42,7 @@ TEST(Analyzer, analysis_with_tensorrt) {
...
@@ -41,6 +42,7 @@ TEST(Analyzer, analysis_with_tensorrt) {
argument
.
SetTensorRtWorkspaceSize
(
1
<<
20
);
argument
.
SetTensorRtWorkspaceSize
(
1
<<
20
);
argument
.
SetModelDir
(
FLAGS_inference_model_dir
);
argument
.
SetModelDir
(
FLAGS_inference_model_dir
);
argument
.
SetIrAnalysisPasses
({
"infer_clean_graph_pass"
});
argument
.
SetIrAnalysisPasses
({
"infer_clean_graph_pass"
});
argument
.
SetUseGPU
(
false
);
Analyzer
analyser
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
analyser
.
Run
(
&
argument
);
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
2f9b5f23
...
@@ -116,6 +116,7 @@ struct Argument {
...
@@ -116,6 +116,7 @@ struct Argument {
std
::
vector
<
std
::
string
>
);
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
use_gpu
,
UseGPU
,
bool
);
DECL_ARGUMENT_FIELD
(
use_gpu
,
UseGPU
,
bool
);
DECL_ARGUMENT_FIELD
(
gpu_device_id
,
GPUDeviceId
,
int
);
DECL_ARGUMENT_FIELD
(
use_tensorrt
,
UseTensorRT
,
bool
);
DECL_ARGUMENT_FIELD
(
use_tensorrt
,
UseTensorRT
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_node_teller
,
TensorRtNodeTeller
,
DECL_ARGUMENT_FIELD
(
tensorrt_node_teller
,
TensorRtNodeTeller
,
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
);
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
);
...
...
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
浏览文件 @
2f9b5f23
...
@@ -4,4 +4,6 @@ set(analysis_deps ${analysis_deps}
...
@@ -4,4 +4,6 @@ set(analysis_deps ${analysis_deps}
subgraph_detector tensorrt_subgraph_pass
subgraph_detector tensorrt_subgraph_pass
CACHE INTERNAL
""
)
CACHE INTERNAL
""
)
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
APPEND
${
pass_file
}
"USE_PASS(tensorrt_subgraph_pass);
\n
"
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
tensorrt_subgraph_pass CACHE INTERNAL
""
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
tensorrt_subgraph_pass CACHE INTERNAL
""
)
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
浏览文件 @
2f9b5f23
...
@@ -30,15 +30,28 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
...
@@ -30,15 +30,28 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
if
(
!
argument
->
scope_valid
())
{
if
(
!
argument
->
scope_valid
())
{
argument
->
SetScope
(
new
framework
::
Scope
);
argument
->
SetScope
(
new
framework
::
Scope
);
}
}
PADDLE_ENFORCE
(
argument
->
use_gpu_valid
());
// The load program should run on the same device with the inference program,
// so that the parameters will on the same device, or they will keep copying
// between difference devices.
platform
::
Place
place
;
if
(
argument
->
use_gpu
())
{
PADDLE_ENFORCE
(
argument
->
gpu_device_id_valid
());
place
=
platform
::
CUDAPlace
(
argument
->
gpu_device_id
());
}
else
{
place
=
platform
::
CPUPlace
();
}
if
(
argument
->
model_dir_valid
())
{
if
(
argument
->
model_dir_valid
())
{
auto
program
=
LoadModel
(
argument
->
model_dir
(),
argument
->
scope_ptr
());
auto
program
=
LoadModel
(
argument
->
model_dir
(),
argument
->
scope_ptr
(),
place
);
argument
->
SetMainProgram
(
program
.
release
());
argument
->
SetMainProgram
(
program
.
release
());
}
else
if
(
argument
->
model_program_path_valid
()
&&
}
else
if
(
argument
->
model_program_path_valid
()
&&
argument
->
model_params_path_valid
())
{
argument
->
model_params_path_valid
())
{
auto
program
=
auto
program
=
LoadModel
(
argument
->
model_program_path
(),
argument
->
model_params_path
(),
LoadModel
(
argument
->
model_program_path
(),
argument
->
model_params_path
(),
argument
->
scope_ptr
());
argument
->
scope_ptr
()
,
place
);
argument
->
SetMainProgram
(
program
.
release
());
argument
->
SetMainProgram
(
program
.
release
());
}
else
{
}
else
{
PADDLE_THROW
(
PADDLE_THROW
(
...
@@ -52,16 +65,15 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
...
@@ -52,16 +65,15 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
}
}
std
::
unique_ptr
<
framework
::
ProgramDesc
>
IrGraphBuildPass
::
LoadModel
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
IrGraphBuildPass
::
LoadModel
(
const
std
::
string
&
path
,
framework
::
Scope
*
scope
)
{
const
std
::
string
&
path
,
framework
::
Scope
*
scope
,
platform
::
CPUPlace
place
;
const
platform
::
Place
&
place
)
{
framework
::
Executor
exe
(
place
);
framework
::
Executor
exe
(
place
);
return
Load
(
&
exe
,
scope
,
path
);
return
Load
(
&
exe
,
scope
,
path
);
}
}
std
::
unique_ptr
<
framework
::
ProgramDesc
>
IrGraphBuildPass
::
LoadModel
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
IrGraphBuildPass
::
LoadModel
(
const
std
::
string
&
program_path
,
const
std
::
string
&
params_path
,
const
std
::
string
&
program_path
,
const
std
::
string
&
params_path
,
framework
::
Scope
*
scope
)
{
framework
::
Scope
*
scope
,
const
platform
::
Place
&
place
)
{
platform
::
CPUPlace
place
;
framework
::
Executor
exe
(
place
);
framework
::
Executor
exe
(
place
);
return
Load
(
&
exe
,
scope
,
program_path
,
params_path
);
return
Load
(
&
exe
,
scope
,
program_path
,
params_path
);
}
}
...
...
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
浏览文件 @
2f9b5f23
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
#include <string>
#include <string>
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -32,11 +33,12 @@ class IrGraphBuildPass : public AnalysisPass {
...
@@ -32,11 +33,12 @@ class IrGraphBuildPass : public AnalysisPass {
std
::
string
repr
()
const
override
;
std
::
string
repr
()
const
override
;
private:
private:
std
::
unique_ptr
<
framework
::
ProgramDesc
>
LoadModel
(
const
std
::
string
&
path
,
std
::
unique_ptr
<
framework
::
ProgramDesc
>
LoadModel
(
framework
::
Scope
*
scope
);
const
std
::
string
&
path
,
framework
::
Scope
*
scope
,
const
platform
::
Place
&
place
);
std
::
unique_ptr
<
framework
::
ProgramDesc
>
LoadModel
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
LoadModel
(
const
std
::
string
&
program_path
,
const
std
::
string
&
params_path
,
const
std
::
string
&
program_path
,
const
std
::
string
&
params_path
,
framework
::
Scope
*
scope
);
framework
::
Scope
*
scope
,
const
platform
::
Place
&
place
);
std
::
string
model_binary_str_
;
std
::
string
model_binary_str_
;
};
};
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
2f9b5f23
...
@@ -27,11 +27,10 @@ endif()
...
@@ -27,11 +27,10 @@ endif()
cc_library
(
reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope
)
cc_library
(
reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope
)
cc_library
(
analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder
)
cc_library
(
analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder
)
cc_library
(
paddle_pass_builder SRCS paddle_pass_builder.cc
)
cc_library
(
paddle_pass_builder SRCS paddle_pass_builder.cc
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder ir_pass_manager
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS scope lod_tensor enforce
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder DEPS zero_copy_tensor
)
cc_test
(
test_paddle_inference_api
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
SRCS api_tester.cc
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
2f9b5f23
...
@@ -285,6 +285,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -285,6 +285,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
status_program_optimized_
=
true
;
status_program_optimized_
=
true
;
argument_
.
SetUseGPU
(
config_
.
use_gpu
);
argument_
.
SetUseGPU
(
config_
.
use_gpu
);
argument_
.
SetGPUDeviceId
(
config_
.
device
);
// Analyze inference_program
// Analyze inference_program
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
config_
.
model_dir
.
empty
())
{
argument_
.
SetModelDir
(
config_
.
model_dir
);
argument_
.
SetModelDir
(
config_
.
model_dir
);
...
@@ -491,8 +492,7 @@ bool AnalysisPredictor::LoadParameters() {
...
@@ -491,8 +492,7 @@ bool AnalysisPredictor::LoadParameters() {
}
}
// Use NaiveExecutor to Load parameters.
// Use NaiveExecutor to Load parameters.
platform
::
CPUPlace
place
;
framework
::
NaiveExecutor
e
(
place_
);
framework
::
NaiveExecutor
e
(
place
);
e
.
Prepare
(
scope_
.
get
(),
*
load_program
,
0
,
false
);
e
.
Prepare
(
scope_
.
get
(),
*
load_program
,
0
,
false
);
e
.
Run
();
e
.
Run
();
VLOG
(
3
)
<<
"get "
<<
scope_
->
LocalVarNames
().
size
()
<<
" vars after load"
;
VLOG
(
3
)
<<
"get "
<<
scope_
->
LocalVarNames
().
size
()
<<
" vars after load"
;
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
2f9b5f23
...
@@ -116,8 +116,12 @@ class CpuPassStrategy : public PassStrategy {
...
@@ -116,8 +116,12 @@ class CpuPassStrategy : public PassStrategy {
class
GpuPassStrategy
:
public
PassStrategy
{
class
GpuPassStrategy
:
public
PassStrategy
{
public:
public:
GpuPassStrategy
()
:
PassStrategy
({})
{
GpuPassStrategy
()
:
PassStrategy
({})
{
// TODO(NHZlX) Problem with Data synchronization between GPU and CPU
// When running in GPU mode, the parameters are all on GPU. But the
// opearations of "conv_bn_fuse_pass" are on CPU.
passes_
.
assign
({
passes_
.
assign
({
"infer_clean_graph_pass"
,
"conv_bn_fuse_pass"
,
"infer_clean_graph_pass"
,
// "infer_clean_graph_pass", "conv_bn_fuse_pass",
});
});
}
}
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
2f9b5f23
set
(
INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
)
set
(
INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
INFERENCE_EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
analysis
${
analysis_deps
}
ir_pass_manager analysis_predictor
)
endif
()
function
(
download_model install_dir model_name
)
function
(
download_model install_dir model_name
)
if
(
NOT EXISTS
${
install_dir
}
)
if
(
NOT EXISTS
${
install_dir
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
INFERENCE_URL
}
${
model_name
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
INFERENCE_URL
}
${
model_name
}
)
...
@@ -75,11 +79,11 @@ endif()
...
@@ -75,11 +79,11 @@ endif()
inference_analysis_api_test
(
test_analyzer_ocr
${
OCR_INSTALL_DIR
}
analyzer_vis_tester.cc
)
inference_analysis_api_test
(
test_analyzer_ocr
${
OCR_INSTALL_DIR
}
analyzer_vis_tester.cc
)
# resnet50
# resnet50
inference_analysis_api_test_with_fake_data
(
test_analyzer_resnet50
inference_analysis_api_test_with_fake_data
(
test_analyzer_resnet50
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/resnet50"
analyzer_resnet50_tester.cc
"resnet50_model.tar.gz"
)
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/resnet50"
analyzer_resnet50_tester.cc
"resnet50_model.tar.gz"
)
# mobilenet with depthwise_conv op
# mobilenet with depthwise_conv op
inference_analysis_api_test_with_fake_data
(
test_analyzer_mobilenet
inference_analysis_api_test_with_fake_data
(
test_analyzer_mobilenet
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mobilenet_depthwise_conv"
analyzer_resnet50_tester.cc
"mobilenet_model.tar.gz"
)
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mobilenet_depthwise_conv"
analyzer_resnet50_tester.cc
"mobilenet_model.tar.gz"
)
# anakin
# anakin
...
@@ -89,15 +93,15 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
...
@@ -89,15 +93,15 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
set
(
ANAKIN_RNN1_INSTALL_DIR
"
${
ANAKIN_INSTALL_DIR
}
/rnn1"
)
set
(
ANAKIN_RNN1_INSTALL_DIR
"
${
ANAKIN_INSTALL_DIR
}
/rnn1"
)
inference_download
(
${
ANAKIN_RNN1_INSTALL_DIR
}
${
INFERENCE_URL
}
"anakin_test%2Fditu_rnn.anakin2.model.bin"
)
inference_download
(
${
ANAKIN_RNN1_INSTALL_DIR
}
${
INFERENCE_URL
}
"anakin_test%2Fditu_rnn.anakin2.model.bin"
)
inference_download
(
${
ANAKIN_RNN1_INSTALL_DIR
}
${
INFERENCE_URL
}
"anakin_test%2Fditu_rnn_data.txt"
)
inference_download
(
${
ANAKIN_RNN1_INSTALL_DIR
}
${
INFERENCE_URL
}
"anakin_test%2Fditu_rnn_data.txt"
)
cc_test
(
test_anakin_rnn1 SRCS anakin_rnn1_tester.cc
cc_test
(
test_anakin_rnn1 SRCS anakin_rnn1_tester.cc
ARGS --model=
${
ANAKIN_RNN1_INSTALL_DIR
}
/anakin_test%2Fditu_rnn.anakin2.model.bin
ARGS --model=
${
ANAKIN_RNN1_INSTALL_DIR
}
/anakin_test%2Fditu_rnn.anakin2.model.bin
--datapath=
${
ANAKIN_RNN1_INSTALL_DIR
}
/anakin_test%2Fditu_rnn_data.txt
--datapath=
${
ANAKIN_RNN1_INSTALL_DIR
}
/anakin_test%2Fditu_rnn_data.txt
DEPS inference_anakin_api_shared SERIAL
)
DEPS inference_anakin_api_shared SERIAL
)
# anakin mobilenet
# anakin mobilenet
if
(
WITH_GPU
)
if
(
WITH_GPU
)
set
(
ANAKIN_MOBILENET_INSTALL_DIR
"
${
ANAKIN_INSTALL_DIR
}
/mobilenet"
)
set
(
ANAKIN_MOBILENET_INSTALL_DIR
"
${
ANAKIN_INSTALL_DIR
}
/mobilenet"
)
inference_download
(
${
ANAKIN_MOBILENET_INSTALL_DIR
}
${
INFERENCE_URL
}
"mobilenet_v2.anakin.bin"
)
inference_download
(
${
ANAKIN_MOBILENET_INSTALL_DIR
}
${
INFERENCE_URL
}
"mobilenet_v2.anakin.bin"
)
cc_test
(
test_anakin_mobilenet SRCS anakin_mobilenet_tester.cc
cc_test
(
test_anakin_mobilenet SRCS anakin_mobilenet_tester.cc
ARGS --model=
${
ANAKIN_MOBILENET_INSTALL_DIR
}
/mobilenet_v2.anakin.bin
ARGS --model=
${
ANAKIN_MOBILENET_INSTALL_DIR
}
/mobilenet_v2.anakin.bin
DEPS inference_anakin_api_shared dynload_cuda SERIAL
)
DEPS inference_anakin_api_shared dynload_cuda SERIAL
)
endif
()
endif
()
...
@@ -109,6 +113,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
...
@@ -109,6 +113,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_download_and_uncompress
(
${
TRT_MODEL_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"trt_test_models.tar.gz"
)
inference_download_and_uncompress
(
${
TRT_MODEL_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"trt_test_models.tar.gz"
)
endif
()
endif
()
inference_analysis_test
(
test_trt_models SRCS trt_models_tester.cc
inference_analysis_test
(
test_trt_models SRCS trt_models_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
analysis
${
analysis_deps
}
ir_pass_manager analysis_predictor
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models SERIAL
)
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models SERIAL
)
endif
()
endif
()
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
2f9b5f23
...
@@ -222,19 +222,36 @@ void TestMultiThreadPrediction(
...
@@ -222,19 +222,36 @@ void TestMultiThreadPrediction(
// The inputs of each thread are all the same.
// The inputs of each thread are all the same.
std
::
vector
<
PaddleTensor
>
outputs_tid
;
std
::
vector
<
PaddleTensor
>
outputs_tid
;
auto
&
predictor
=
predictors
[
tid
];
auto
&
predictor
=
predictors
[
tid
];
LOG
(
INFO
)
<<
"running thread "
<<
tid
;
Timer
timer
;
// warmup run
timer
.
tic
();
LOG
(
INFO
)
<<
"Running thread "
<<
tid
<<
", warm up run..."
;
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
{
for
(
const
auto
&
input
:
inputs
)
{
Timer
warmup_timer
;
ASSERT_TRUE
(
predictor
->
Run
(
input
,
&
outputs_tid
));
warmup_timer
.
tic
();
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
PrintTime
(
batch_size
,
1
,
num_threads
,
tid
,
warmup_timer
.
toc
(),
1
);
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
}
}
#endif
}
}
auto
time
=
timer
.
toc
();
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
" run "
<<
num_times
<<
" times..."
;
total_time
+=
time
;
{
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
time
/
num_times
,
Timer
timer
;
inputs
.
size
());
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
const
auto
&
input
:
inputs
)
{
ASSERT_TRUE
(
predictor
->
Run
(
input
,
&
outputs_tid
));
}
}
auto
time
=
timer
.
toc
();
total_time
+=
time
;
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
time
/
num_times
,
inputs
.
size
());
}
});
});
}
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
2f9b5f23
...
@@ -145,5 +145,3 @@ TEST(TensorRT_mobilenet, analysis) {
...
@@ -145,5 +145,3 @@ TEST(TensorRT_mobilenet, analysis) {
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
tensorrt_subgraph_pass
);
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
0 → 100644
浏览文件 @
2f9b5f23
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <mkldnn/include/mkldnn.hpp>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#include "xbyak.h"
#include "xbyak_util.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
DataLayout
;
using
mkldnn
::
memory
;
static
mkldnn
::
memory
::
format
StringToMKLDNNFormat
(
std
::
string
&
format
)
{
std
::
transform
(
format
.
begin
(),
format
.
end
(),
format
.
begin
(),
::
tolower
);
if
(
!
format
.
compare
(
"nchw"
))
{
return
memory
::
format
::
nchw
;
}
else
if
(
!
format
.
compare
(
"nchw16c"
))
{
return
memory
::
format
::
nChw16c
;
}
else
if
(
!
format
.
compare
(
"nchw8c"
))
{
return
memory
::
format
::
nChw8c
;
}
else
if
(
!
format
.
compare
(
"nhwc"
))
{
return
memory
::
format
::
nhwc
;
}
else
{
return
memory
::
format
::
any
;
}
}
static
void
UpdateDataFormat
(
const
framework
::
ExecutionContext
&
ctx
,
framework
::
Tensor
*
tensor
,
const
char
*
attribute
)
{
if
(
ctx
.
op
().
HasAttr
(
attribute
))
{
auto
format_as_string
=
ctx
.
Attr
<
std
::
string
>
(
attribute
);
auto
format
=
StringToMKLDNNFormat
(
format_as_string
);
if
(
format
!=
memory
::
format
::
any
)
{
tensor
->
set_format
(
format
);
}
}
}
template
<
typename
T
>
static
void
ReorderInput
(
framework
::
Tensor
*
tensor
,
const
platform
::
Place
&
place
,
const
mkldnn
::
engine
&
engine
,
bool
isFourDim
)
{
using
platform
::
to_void_cast
;
auto
dims
=
paddle
::
framework
::
vectorize2int
(
tensor
->
dims
());
framework
::
Tensor
out_tensor
;
out_tensor
.
Resize
(
tensor
->
dims
());
out_tensor
.
set_format
(
isFourDim
?
memory
::
format
::
nchw
:
memory
::
format
::
nc
);
out_tensor
.
set_layout
(
tensor
->
layout
());
mkldnn
::
memory
input_memory
=
{
{{
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
tensor
->
format
()},
engine
},
to_void_cast
<
T
>
(
tensor
->
data
<
T
>
())};
mkldnn
::
memory
output_memory
=
{
{{
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
out_tensor
.
format
()},
engine
},
to_void_cast
<
T
>
(
out_tensor
.
mutable_data
<
T
>
(
place
))};
platform
::
Reorder
(
input_memory
,
output_memory
);
tensor
->
ShareDataWith
(
out_tensor
);
}
template
<
typename
T
>
class
ElementwiseMulMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
Tensor
=
framework
::
Tensor
;
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
z
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
y_data
=
y
->
data
<
T
>
();
T
*
z_data
=
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
x_dims
=
x
->
dims
();
auto
y_dims_untrimmed
=
y
->
dims
();
auto
x_int_dims
=
paddle
::
framework
::
vectorize2int
(
x_dims
);
UpdateDataFormat
(
ctx
,
(
Tensor
*
)
x
,
"x_data_format"
);
UpdateDataFormat
(
ctx
,
(
Tensor
*
)
y
,
"y_data_format"
);
Xbyak
::
util
::
Cpu
cpu
;
const
bool
is_avx512_enabled
=
cpu
.
has
(
Xbyak
::
util
::
Cpu
::
tAVX512F
);
const
bool
are_dims_divisable
=
!
(
x_int_dims
[
1
]
%
16
);
const
bool
is_x_format_correct
=
x
->
format
()
==
memory
::
format
::
nChw16c
;
const
bool
is_y_format_correct
=
y
->
format
()
==
memory
::
format
::
nc
;
if
(
is_x_format_correct
&&
is_y_format_correct
&&
are_dims_divisable
&&
is_avx512_enabled
)
{
int
pre
,
n
,
post
;
get_mid_dims
(
x_dims
,
y_dims_untrimmed
,
axis
,
&
pre
,
&
n
,
&
post
);
if
(
post
==
1
)
{
PADDLE_THROW
(
"Not implemented when post is 1"
);
}
else
{
// Just check whether it works for RE-Resnext.
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
4
,
"X should have 4 dimensions"
);
int
n
=
x_dims
[
0
];
int
c
=
x_dims
[
1
];
int
h
=
x_dims
[
2
];
int
w
=
x_dims
[
3
];
PADDLE_ENFORCE
(
y_dims_untrimmed
[
0
]
==
n
&&
y_dims_untrimmed
[
1
]
==
c
,
"Y should be in nc format"
);
constexpr
int
simd_width
=
16
;
int
C
=
c
/
simd_width
;
const
auto
&
multiply
=
math
::
jitkernel
::
KernelPool
::
Instance
()
.
template
Get
<
math
::
jitkernel
::
EltwiseMulnChw16cNCKernel
<
T
>
>
(
n
);
#pragma omp parallel for collapse(2)
for
(
int
ni
=
0
;
ni
<
n
;
ni
++
)
{
for
(
int
ci
=
0
;
ci
<
C
;
ci
++
)
{
auto
ptr_x
=
x_data
+
ni
*
C
*
h
*
w
*
simd_width
+
ci
*
h
*
w
*
simd_width
;
auto
ptr_y
=
y_data
+
ni
*
C
*
simd_width
+
ci
*
simd_width
;
auto
ptr_z
=
z_data
+
ni
*
C
*
h
*
w
*
simd_width
+
ci
*
h
*
w
*
simd_width
;
multiply
->
Compute
(
ptr_x
,
ptr_y
,
ptr_z
,
h
,
w
);
}
}
}
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
x
->
format
());
}
else
{
// Fallback to naive version:
const
bool
are_inputs_in_same_format
=
x
->
format
()
==
y
->
format
();
const
bool
is_x_nchw
=
x
->
format
()
==
memory
::
format
::
nchw
;
const
bool
is_x_nc
=
x
->
format
()
==
memory
::
format
::
nc
;
const
bool
is_y_nchw
=
y
->
format
()
==
memory
::
format
::
nchw
;
const
bool
is_y_nc
=
y
->
format
()
==
memory
::
format
::
nc
;
if
(
!
are_inputs_in_same_format
)
{
using
platform
::
MKLDNNDeviceContext
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
if
(
!
(
is_x_nchw
||
is_x_nc
))
ReorderInput
<
T
>
((
Tensor
*
)
x
,
ctx
.
GetPlace
(),
mkldnn_engine
,
x
->
dims
().
size
()
==
4
);
if
(
!
(
is_y_nchw
||
is_y_nc
))
ReorderInput
<
T
>
((
Tensor
*
)
y
,
ctx
.
GetPlace
(),
mkldnn_engine
,
y
->
dims
().
size
()
==
4
);
}
auto
mul_func
=
[](
T
a
,
T
b
)
->
T
{
return
a
*
b
;
};
TransformFunctor
<
decltype
(
mul_func
),
T
,
paddle
::
platform
::
CPUDeviceContext
,
T
>
functor
(
x
,
y
,
z
,
ctx
.
template
device_context
<
paddle
::
platform
::
CPUDeviceContext
>(),
mul_func
);
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims_untrimmed
.
size
()
:
axis
);
PADDLE_ENFORCE
(
axis
>=
0
&&
axis
<
x_dims
.
size
(),
"Axis should be in range [0, x_dims)"
);
auto
y_dims
=
trim_trailing_singular_dims
(
y_dims_untrimmed
);
axis
=
(
y_dims
.
size
()
==
0
)
?
x_dims
.
size
()
:
axis
;
int
pre
,
n
,
post
;
get_mid_dims
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
);
if
(
post
==
1
)
{
functor
.
RunRowWise
(
n
,
pre
);
}
else
{
functor
.
RunMidWise
(
n
,
pre
,
post
);
}
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
x
->
format
());
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
elementwise_mul
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
ElementwiseMulMKLDNNKernel
<
float
>
)
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
2f9b5f23
...
@@ -97,6 +97,20 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -97,6 +97,20 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
.
EqualGreaterThan
(
-
1
);
.
EqualGreaterThan
(
-
1
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false). Used by MKLDNN."
)
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false). Used by MKLDNN."
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"x_data_format"
,
"(string, default NCHW) Only used in mkldnn"
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
,
\"
NCHW16C
\"
,
\"
NCHW8C
\"
. "
"Defaults to
\"\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
""
);
AddAttr
<
std
::
string
>
(
"y_data_format"
,
"(string, default
\"\"
) Only used in mkldnn"
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
,
\"
NCHW16C
\"
,
\"
NCHW8C
\"
. "
"Defaults to
\"\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
""
);
AddComment
(
string
::
Sprintf
(
R"DOC(
AddComment
(
string
::
Sprintf
(
R"DOC(
Elementwise %s Operator
Elementwise %s Operator
...
...
paddle/fluid/operators/math/jit_code.h
浏览文件 @
2f9b5f23
...
@@ -408,6 +408,42 @@ class LSTMJitCode : public VActJitCode {
...
@@ -408,6 +408,42 @@ class LSTMJitCode : public VActJitCode {
}
}
};
};
#ifdef PADDLE_WITH_MKLDNN
struct
EltwiseMulnChw16cNC
:
public
Xbyak
::
CodeGenerator
{
explicit
EltwiseMulnChw16cNC
(
size_t
code_size
=
256
*
1024
)
:
Xbyak
::
CodeGenerator
(
code_size
)
{
// RDI is ptr x_input
// RSI is ptr y_input
// RDX is ptr output
// RCX is height
// r8 is width
push
(
rbx
);
xor_
(
rax
,
rax
);
xor_
(
r10
,
r10
);
vmovups
(
zmm3
,
ptr
[
rsi
]);
L
(
"h_loop"
);
xor_
(
rbx
,
rbx
);
L
(
"w_loop"
);
vmovups
(
zmm2
,
ptr
[
rdi
+
rax
]);
vmulps
(
zmm1
,
zmm2
,
zmm3
);
vmovups
(
ptr
[
rdx
+
rax
],
zmm1
);
add
(
rax
,
64
);
inc
(
rbx
);
cmp
(
r8
,
rbx
);
jnz
(
"w_loop"
);
inc
(
r10
);
cmp
(
r10
,
rcx
);
jnz
(
"h_loop"
);
pop
(
rbx
);
ret
();
}
};
#endif
}
// namespace gen
}
// namespace gen
}
// namespace jitkernel
}
// namespace jitkernel
}
// namespace math
}
// namespace math
...
...
paddle/fluid/operators/math/jit_kernel.h
浏览文件 @
2f9b5f23
...
@@ -89,6 +89,15 @@ class VAddBiasKernel : public Kernel {
...
@@ -89,6 +89,15 @@ class VAddBiasKernel : public Kernel {
void
(
*
Compute
)(
const
T
*
,
const
T
*
,
T
*
,
int
);
void
(
*
Compute
)(
const
T
*
,
const
T
*
,
T
*
,
int
);
};
};
#ifdef PADDLE_WITH_MKLDNN
template
<
typename
T
>
class
EltwiseMulnChw16cNCKernel
:
public
Kernel
{
public:
// nChw16c = nChw16c .* NC
void
(
*
Compute
)(
const
float
*
,
const
float
*
,
float
*
,
int
,
int
);
};
#endif
template
<
typename
T
>
template
<
typename
T
>
class
VActKernel
:
public
Kernel
{
class
VActKernel
:
public
Kernel
{
public:
public:
...
...
paddle/fluid/operators/math/jit_kernel_blas.cc
浏览文件 @
2f9b5f23
...
@@ -184,6 +184,44 @@ bool VAddKernelImpl<double>::useMKL(int d) {
...
@@ -184,6 +184,44 @@ bool VAddKernelImpl<double>::useMKL(int d) {
}
}
#endif
#endif
#ifdef PADDLE_WITH_MKLDNN
/* EltwiseMul for nChw16c & NC inputs JitKernel */
template
<
typename
T
>
class
EltwiseMulnChw16cNCKernelImpl
:
public
math
::
jitkernel
::
EltwiseMulnChw16cNCKernel
<
T
>
{
public:
JITKERNEL_DECLARE_STATIC_FUNC
;
explicit
EltwiseMulnChw16cNCKernelImpl
(
int
d
)
:
EltwiseMulnChw16cNCKernel
<
T
>
()
{
using
mul_func_t
=
void
(
*
)(
const
float
*
,
const
float
*
,
float
*
,
int
,
int
);
#ifdef PADDLE_WITH_XBYAK
if
(
useJIT
(
d
))
{
// roughly estimate the size of code
size_t
sz
=
96
+
d
/
YMM_FLOAT_BLOCK
*
4
*
8
;
sz
=
sz
>
4096
?
sz
:
4096
;
jitcode_
.
reset
(
new
gen
::
EltwiseMulnChw16cNC
(
sz
));
this
->
Compute
=
(
mul_func_t
)
jitcode_
->
getCode
();
return
;
}
#endif
PADDLE_THROW
(
"This kernel shouldn't be used in Non-Xbyak, Non-MKL-DNN "
"environemnt"
);
}
#ifdef PADDLE_WITH_XBYAK
private:
std
::
unique_ptr
<
gen
::
EltwiseMulnChw16cNC
>
jitcode_
{
nullptr
};
};
template
<
>
bool
EltwiseMulnChw16cNCKernelImpl
<
float
>::
useJIT
(
int
d
)
{
return
true
;
}
#endif
#endif
/* VAddRelu JitKernel */
/* VAddRelu JitKernel */
template
<
typename
T
>
template
<
typename
T
>
class
VAddReluKernelImpl
:
public
VAddReluKernel
<
T
>
{
class
VAddReluKernelImpl
:
public
VAddReluKernel
<
T
>
{
...
@@ -349,6 +387,9 @@ REGISTER_JITKERNEL(vscal, VScalKernel);
...
@@ -349,6 +387,9 @@ REGISTER_JITKERNEL(vscal, VScalKernel);
REGISTER_JITKERNEL
(
vaddbias
,
VAddBiasKernel
);
REGISTER_JITKERNEL
(
vaddbias
,
VAddBiasKernel
);
REGISTER_JITKERNEL
(
vrelu
,
VReluKernel
);
REGISTER_JITKERNEL
(
vrelu
,
VReluKernel
);
REGISTER_JITKERNEL
(
videntity
,
VIdentityKernel
);
REGISTER_JITKERNEL
(
videntity
,
VIdentityKernel
);
#ifdef PADDLE_WITH_MKLDNN
REGISTER_JITKERNEL
(
eltwise_mul_nchw16c
,
EltwiseMulnChw16cNCKernel
);
#endif
}
// namespace jitkernel
}
// namespace jitkernel
}
// namespace math
}
// namespace math
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
2f9b5f23
...
@@ -359,6 +359,9 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -359,6 +359,9 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
GetMutable
<
platform
::
Communicator
>
();
return
self
.
GetMutable
<
platform
::
Communicator
>
();
},
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
#endif
#ifndef _WIN32
.
def
(
"get_reader"
,
.
def
(
"get_reader"
,
[](
Variable
&
self
)
->
framework
::
ReaderHolder
*
{
[](
Variable
&
self
)
->
framework
::
ReaderHolder
*
{
PADDLE_ENFORCE
(
self
.
IsType
<
framework
::
ReaderHolder
>
());
PADDLE_ENFORCE
(
self
.
IsType
<
framework
::
ReaderHolder
>
());
...
@@ -366,7 +369,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -366,7 +369,7 @@ All parameter, weight, gradient are variables in Paddle.
},
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
#endif
#endif
;
;
// NOLINT
#if !defined(_WIN32)
#if !defined(_WIN32)
py
::
class_
<
framework
::
ReaderHolder
>
(
m
,
"Reader"
,
""
)
py
::
class_
<
framework
::
ReaderHolder
>
(
m
,
"Reader"
,
""
)
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
2f9b5f23
...
@@ -362,7 +362,9 @@ class OpTest(unittest.TestCase):
...
@@ -362,7 +362,9 @@ class OpTest(unittest.TestCase):
else
:
else
:
return
[]
return
[]
places
=
[
fluid
.
CPUPlace
()]
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
):
cpu_only
=
self
.
_cpu_only
if
hasattr
(
self
,
'_cpu_only'
)
else
False
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
)
\
and
not
cpu_only
:
places
.
append
(
core
.
CUDAPlace
(
0
))
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
return
places
...
...
python/paddle/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py
0 → 100644
浏览文件 @
2f9b5f23
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
from
test_elementwise_mul_op
import
*
class
TestElementwiseMulMKLDNNOp_BroadcastNCHW16c
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
x
=
x
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
self
.
y
=
np
.
random
.
rand
(
1
,
16
).
astype
(
self
.
dtype
)
self
.
out
=
x
*
self
.
y
.
reshape
(
1
,
16
,
1
,
1
)
self
.
out
=
self
.
out
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_BroadcastNCHW16c
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw16c"
self
.
attrs
[
"y_data_format"
]
=
"nc"
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
@
unittest
.
skip
(
"Not implemented yet."
)
# TODO(mgallus): enable when implemented.
class
TestElementwiseMulMKLDNNOp_BroadcastNCHW8c
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
x
=
np
.
random
.
rand
(
1
,
8
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
x
=
x
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
8
,
2
,
2
)
self
.
y
=
np
.
random
.
rand
(
1
,
8
).
astype
(
self
.
dtype
)
self
.
out
=
x
*
self
.
y
.
reshape
(
1
,
8
,
1
,
1
)
self
.
out
=
self
.
out
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
8
,
2
,
2
)
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_BroadcastNCHW8c
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw8c"
self
.
attrs
[
"y_data_format"
]
=
"nc"
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
class
TestElementwiseMulMKLDNNOp_FallbackNCHW
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
,
16
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
*
self
.
y
.
reshape
(
1
,
16
,
1
,
1
)
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
class
TestElementwiseMulMKLDNNOp_FallbackNCHW16C
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
x
=
x
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
y
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
y
=
y
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
self
.
out
=
self
.
x
*
self
.
y
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_FallbackNCHW16C
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw16c"
self
.
attrs
[
"y_data_format"
]
=
"nchw16c"
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
class
TestElementwiseMulMKLDNNOp_FallbackNoReorders
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
x
=
x
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
y
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
y
=
y
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
self
.
out
=
self
.
x
*
self
.
y
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_FallbackNoReorders
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw16c"
self
.
attrs
[
"y_data_format"
]
=
"nchw16c"
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
class
TestElementwiseMulMKLDNNOp_FallbackWithReorder1
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
y
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
y
=
y
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
self
.
out
=
self
.
x
*
y
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_FallbackWithReorder1
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw"
self
.
attrs
[
"y_data_format"
]
=
"nchw16c"
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
class
TestElementwiseMulMKLDNNOp_FallbackWithReorder2
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
self
.
y
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
x
=
x
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
self
.
out
=
x
*
self
.
y
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_FallbackWithReorder2
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw16c"
self
.
attrs
[
"y_data_format"
]
=
"nchw"
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
class
TestElementwiseMulMKLDNNOp_FallbackNoReorders2
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
1
,
16
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
,
16
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
*
self
.
y
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_FallbackNoReorders2
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nc"
self
.
attrs
[
"y_data_format"
]
=
"nc"
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
浏览文件 @
2f9b5f23
...
@@ -21,13 +21,24 @@ from paddle.fluid.op import Operator
...
@@ -21,13 +21,24 @@ from paddle.fluid.op import Operator
class
ElementwiseMulOp
(
OpTest
):
class
ElementwiseMulOp
(
OpTest
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
False
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"elementwise_mul"
self
.
op_type
=
"elementwise_mul"
self
.
dtype
=
np
.
float32
self
.
axis
=
-
1
self
.
init_dtype
()
self
.
init_input_output
()
self
.
init_kernel_type
()
self
.
init_axis
()
self
.
inputs
=
{
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float64"
),
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
self
.
x
),
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float64"
)
'Y'
:
OpTest
.
np_dtype_to_fluid_dtype
(
self
.
y
)
}
}
self
.
outputs
=
{
'Out'
:
np
.
multiply
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
self
.
outputs
=
{
'Out'
:
self
.
out
}
self
.
attrs
=
{
'axis'
:
self
.
axis
,
'use_mkldnn'
:
self
.
use_mkldnn
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
...
@@ -41,6 +52,17 @@ class ElementwiseMulOp(OpTest):
...
@@ -41,6 +52,17 @@ class ElementwiseMulOp(OpTest):
def
test_check_grad_ingore_y
(
self
):
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
([
'X'
],
'Out'
,
no_grad_set
=
set
(
'Y'
))
self
.
check_grad
([
'X'
],
'Out'
,
no_grad_set
=
set
(
'Y'
))
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
self
.
out
=
np
.
multiply
(
self
.
x
,
self
.
y
)
def
init_dtype
(
self
):
pass
def
init_axis
(
self
):
pass
class
TestElementwiseMulOp_scalar
(
ElementwiseMulOp
):
class
TestElementwiseMulOp_scalar
(
ElementwiseMulOp
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -63,17 +85,13 @@ class TestElementwiseMulOp_Vector(ElementwiseMulOp):
...
@@ -63,17 +85,13 @@ class TestElementwiseMulOp_Vector(ElementwiseMulOp):
class
TestElementwiseMulOp_broadcast_0
(
ElementwiseMulOp
):
class
TestElementwiseMulOp_broadcast_0
(
ElementwiseMulOp
):
def
setUp
(
self
):
def
init_input_output
(
self
):
self
.
op_type
=
"elementwise_mul"
self
.
x
=
np
.
random
.
rand
(
2
,
3
,
4
).
astype
(
self
.
dtype
)
self
.
inputs
=
{
self
.
y
=
np
.
random
.
rand
(
2
).
astype
(
self
.
dtype
)
'X'
:
np
.
random
.
rand
(
2
,
3
,
4
).
astype
(
np
.
float64
),
self
.
out
=
self
.
x
*
self
.
y
.
reshape
(
2
,
1
,
1
)
'Y'
:
np
.
random
.
rand
(
2
).
astype
(
np
.
float64
)
}
self
.
attrs
=
{
'axis'
:
0
}
def
init_axis
(
self
):
self
.
outputs
=
{
self
.
axis
=
0
'Out'
:
self
.
inputs
[
'X'
]
*
self
.
inputs
[
'Y'
].
reshape
(
2
,
1
,
1
)
}
class
TestElementwiseMulOp_broadcast_1
(
ElementwiseMulOp
):
class
TestElementwiseMulOp_broadcast_1
(
ElementwiseMulOp
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录