Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
e1b0d7cb
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e1b0d7cb
编写于
2月 04, 2020
作者:
石
石晓伟
提交者:
GitHub
2月 04, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove anakin from code, test=develop (#22420)
上级
0a678ca0
变更
114
显示空白变更内容
内联
并排
Showing
114 changed file
with
10 addition
and
7532 deletion
+10
-7532
CMakeLists.txt
CMakeLists.txt
+0
-1
cmake/anakin_subgraph.cmake
cmake/anakin_subgraph.cmake
+0
-45
cmake/configure.cmake
cmake/configure.cmake
+0
-10
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+0
-7
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+0
-5
paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc
...le/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc
+0
-83
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+0
-167
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+0
-31
paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.cc
...amework/ir/simplify_anakin_priorbox_detection_out_pass.cc
+0
-233
paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h
...ramework/ir/simplify_anakin_priorbox_detection_out_pass.h
+0
-39
paddle/fluid/framework/ir/subgraph_detector.h
paddle/fluid/framework/ir/subgraph_detector.h
+1
-1
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+1
-9
paddle/fluid/inference/anakin/CMakeLists.txt
paddle/fluid/inference/anakin/CMakeLists.txt
+0
-5
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+0
-23
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+0
-64
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+0
-72
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+0
-55
paddle/fluid/inference/anakin/convert/affine_channel.h
paddle/fluid/inference/anakin/convert/affine_channel.h
+0
-40
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+0
-85
paddle/fluid/inference/anakin/convert/batch_norm.h
paddle/fluid/inference/anakin/convert/batch_norm.h
+0
-37
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+0
-41
paddle/fluid/inference/anakin/convert/concat.h
paddle/fluid/inference/anakin/convert/concat.h
+0
-39
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+0
-109
paddle/fluid/inference/anakin/convert/conv2d.h
paddle/fluid/inference/anakin/convert/conv2d.h
+0
-37
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+0
-115
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+0
-37
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+0
-112
paddle/fluid/inference/anakin/convert/density_prior_box.h
paddle/fluid/inference/anakin/convert/density_prior_box.h
+0
-40
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+0
-69
paddle/fluid/inference/anakin/convert/detection_out.h
paddle/fluid/inference/anakin/convert/detection_out.h
+0
-39
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+0
-55
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+0
-39
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+0
-75
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+0
-55
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+0
-122
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+0
-51
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+0
-48
paddle/fluid/inference/anakin/convert/flatten.h
paddle/fluid/inference/anakin/convert/flatten.h
+0
-37
paddle/fluid/inference/anakin/convert/helper.cc
paddle/fluid/inference/anakin/convert/helper.cc
+0
-32
paddle/fluid/inference/anakin/convert/helper.h
paddle/fluid/inference/anakin/convert/helper.h
+0
-95
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+0
-58
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+0
-39
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+0
-237
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+0
-74
paddle/fluid/inference/anakin/convert/pool2d.h
paddle/fluid/inference/anakin/convert/pool2d.h
+0
-39
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+0
-61
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+0
-51
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+0
-49
paddle/fluid/inference/anakin/convert/reshape.h
paddle/fluid/inference/anakin/convert/reshape.h
+0
-37
paddle/fluid/inference/anakin/convert/roi_align.cc
paddle/fluid/inference/anakin/convert/roi_align.cc
+0
-54
paddle/fluid/inference/anakin/convert/roi_align.h
paddle/fluid/inference/anakin/convert/roi_align.h
+0
-39
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+0
-52
paddle/fluid/inference/anakin/convert/scale.h
paddle/fluid/inference/anakin/convert/scale.h
+0
-39
paddle/fluid/inference/anakin/convert/shuffle_channel.cc
paddle/fluid/inference/anakin/convert/shuffle_channel.cc
+0
-47
paddle/fluid/inference/anakin/convert/shuffle_channel.h
paddle/fluid/inference/anakin/convert/shuffle_channel.h
+0
-38
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+0
-47
paddle/fluid/inference/anakin/convert/softmax.h
paddle/fluid/inference/anakin/convert/softmax.h
+0
-37
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+0
-59
paddle/fluid/inference/anakin/convert/split.h
paddle/fluid/inference/anakin/convert/split.h
+0
-39
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+0
-50
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+0
-39
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+0
-92
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
.../fluid/inference/anakin/convert/test_affine_channel_op.cc
+0
-72
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+0
-87
paddle/fluid/inference/anakin/convert/test_concat_op.cc
paddle/fluid/inference/anakin/convert/test_concat_op.cc
+0
-67
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+0
-75
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+0
-69
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+0
-81
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+0
-64
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+0
-64
paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
+0
-55
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+0
-119
paddle/fluid/inference/anakin/convert/test_relu_op.cc
paddle/fluid/inference/anakin/convert/test_relu_op.cc
+0
-70
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+0
-102
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+0
-63
paddle/fluid/inference/anakin/convert/test_split_op.cc
paddle/fluid/inference/anakin/convert/test_split_op.cc
+0
-119
paddle/fluid/inference/anakin/convert/test_sum_op.cc
paddle/fluid/inference/anakin/convert/test_sum_op.cc
+0
-64
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+0
-100
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+0
-52
paddle/fluid/inference/anakin/convert/transpose.h
paddle/fluid/inference/anakin/convert/transpose.h
+0
-37
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+0
-228
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+0
-207
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+0
-168
paddle/fluid/inference/anakin/op_teller.cc
paddle/fluid/inference/anakin/op_teller.cc
+0
-80
paddle/fluid/inference/anakin/op_teller.h
paddle/fluid/inference/anakin/op_teller.h
+0
-70
paddle/fluid/inference/anakin/test_anakin_engine.cc
paddle/fluid/inference/anakin/test_anakin_engine.cc
+0
-92
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+0
-14
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+0
-18
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+0
-12
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+0
-284
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
...fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
+0
-51
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+0
-18
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+1
-47
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-46
paddle/fluid/inference/api/api_anakin_engine.cc
paddle/fluid/inference/api/api_anakin_engine.cc
+0
-462
paddle/fluid/inference/api/api_anakin_engine.h
paddle/fluid/inference/api/api_anakin_engine.h
+0
-126
paddle/fluid/inference/api/high_level_api.md
paddle/fluid/inference/api/high_level_api.md
+5
-6
paddle/fluid/inference/api/high_level_api_cn.md
paddle/fluid/inference/api/high_level_api_cn.md
+1
-3
paddle/fluid/inference/api/paddle_anakin_config.h
paddle/fluid/inference/api/paddle_anakin_config.h
+0
-49
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+1
-22
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+0
-1
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+0
-3
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+0
-13
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+0
-1
paddle/fluid/inference/capi/c_api.h
paddle/fluid/inference/capi/c_api.h
+0
-11
paddle/fluid/inference/capi/pd_config.cc
paddle/fluid/inference/capi/pd_config.cc
+0
-36
paddle/fluid/inference/tests/api/anakin_mlu_tester.cc
paddle/fluid/inference/tests/api/anakin_mlu_tester.cc
+0
-98
paddle/fluid/inference/tests/api/anakin_rnn2_tester.cc
paddle/fluid/inference/tests/api/anakin_rnn2_tester.cc
+0
-261
paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
+0
-3
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+0
-4
paddle/fluid/operators/anakin/CMakeLists.txt
paddle/fluid/operators/anakin/CMakeLists.txt
+0
-2
paddle/fluid/operators/anakin/anakin_engine_op.cc
paddle/fluid/operators/anakin/anakin_engine_op.cc
+0
-53
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+0
-138
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+0
-9
未找到文件。
CMakeLists.txt
浏览文件 @
e1b0d7cb
...
...
@@ -151,7 +151,6 @@ if(NOT WIN32)
include
(
cupti
)
endif
()
include
(
anakin_subgraph
)
include
(
flags
)
# set paddle compile flags
include
(
cudnn
)
# set cudnn libraries, must before configure
...
...
cmake/anakin_subgraph.cmake
已删除
100644 → 0
浏览文件 @
0a678ca0
set
(
ANAKIN_ROOT
"/usr"
CACHE PATH
"ANAKIN ROOT"
)
find_path
(
ANAKIN_INCLUDE_DIR anakin_config.h
PATHS
${
ANAKIN_ROOT
}
${
ANAKIN_ROOT
}
/include
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
NO_DEFAULT_PATH
)
find_library
(
ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
PATHS
${
ANAKIN_ROOT
}
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
NO_DEFAULT_PATH
DOC
"Path to ANAKIN library."
)
if
(
ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY
)
set
(
ANAKIN_FOUND ON
)
else
()
set
(
ANAKIN_FOUND OFF
)
endif
()
if
(
ANAKIN_FOUND
)
message
(
STATUS
"Current ANAKIN header is
${
ANAKIN_INCLUDE_DIR
}
/anakin_config.h. "
)
include_directories
(
${
ANAKIN_ROOT
}
)
include_directories
(
${
ANAKIN_ROOT
}
/include
)
include_directories
(
${
ANAKIN_ROOT
}
/saber
)
link_directories
(
${
ANAKIN_ROOT
}
)
add_definitions
(
-DPADDLE_WITH_ANAKIN
)
endif
()
if
(
ANAKIN_FOUND
)
if
(
ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86
)
message
(
STATUS
"Compile with anakin mlu place."
)
add_definitions
(
-DANAKIN_MLU_PLACE
)
elseif
(
ANAKIN_BM AND NOT WITH_GPU AND NOT ANAKIN_X86
)
message
(
STATUS
"Compile with anakin bm place."
)
add_definitions
(
-DANAKIN_BM_PLACE
)
elseif
(
ANAKIN_X86
)
message
(
STATUS
"Compile with anakin x86 place."
)
add_definitions
(
-DANAKIN_X86_PLACE
)
endif
()
endif
()
if
(
ANAKIN_FOUND AND WITH_GPU AND WITH_DSO
)
message
(
STATUS
"Compile with anakin subgraph."
)
set
(
ANAKIN_SUBGRAPH ON
)
endif
()
cmake/configure.cmake
浏览文件 @
e1b0d7cb
...
...
@@ -109,16 +109,6 @@ if(WITH_GPU)
endif
()
include_directories
(
${
TENSORRT_INCLUDE_DIR
}
)
endif
()
if
(
ANAKIN_FOUND
)
if
(
${
CUDA_VERSION_MAJOR
}
VERSION_LESS 8
)
message
(
WARNING
"Anakin needs CUDA >= 8.0 to compile. Force ANAKIN_FOUND = OFF"
)
set
(
ANAKIN_FOUND OFF CACHE STRING
"Anakin is valid only when CUDA >= 8.0."
FORCE
)
endif
()
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
message
(
WARNING
"Anakin needs CUDNN >= 7.0 to compile. Force ANAKIN_FOUND = OFF"
)
set
(
ANAKIN_FOUND OFF CACHE STRING
"Anakin is valid only when CUDNN >= 7.0."
FORCE
)
endif
()
endif
()
elseif
(
WITH_AMD_GPU
)
add_definitions
(
-DPADDLE_WITH_HIP
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-D__HIP_PLATFORM_HCC__"
)
...
...
cmake/inference_lib.cmake
浏览文件 @
e1b0d7cb
...
...
@@ -137,13 +137,6 @@ function(copy_part_of_thrid_party TARGET DST)
SRCS
${
LITE_BINARY_DIR
}
/inference_lite_lib/*
DSTS
${
dst_dir
}
)
endif
()
if
(
ANAKIN_FOUND
)
set
(
dst_dir
"
${
DST
}
/third_party/install/anakin"
)
copy
(
${
TARGET
}
SRCS
${
ANAKIN_ROOT
}
/*
DSTS
${
dst_dir
}
)
endif
()
endfunction
()
# inference library for only inference
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
e1b0d7cb
...
...
@@ -71,7 +71,6 @@ pass_library(identity_scale_op_clean_pass base)
pass_library
(
sync_batch_norm_pass base
)
pass_library
(
runtime_context_cache_pass base
)
pass_library
(
quant_conv2d_dequant_fuse_pass inference
)
pass_library
(
fillconstant_elementwisemul_fuse inference
)
pass_library
(
shuffle_channel_detect_pass inference
)
pass_library
(
delete_quant_dequant_op_pass inference
)
pass_library
(
simplify_with_basic_ops_pass base
)
...
...
@@ -81,10 +80,6 @@ if(WITH_GPU)
pass_library
(
cudnn_placement_pass base DEPS placement_pass_base
)
endif
()
if
(
ANAKIN_SUBGRAPH
)
pass_library
(
simplify_anakin_priorbox_detection_out_pass inference
)
endif
()
if
(
WITH_MKLDNN
)
pass_library
(
mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn
)
pass_library
(
depthwise_conv_mkldnn_pass base DIR mkldnn
)
...
...
paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
#define GET_NODES \
GET_IR_NODE(fill_constant); \
GET_IR_NODE(fill_constant_out); \
GET_IR_NODE(elementwise_mul); \
GET_IR_NODE(elementwise_mul_out);
void
FillconstantElementwisemulFuse
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
const
std
::
string
pattern_name
=
"fillconstant_elementwisemul_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
);
GraphPatternDetector
gpd
;
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
)
->
assert_is_op_input
(
"elementwise_mul"
,
"X"
)
->
AsInput
();
patterns
::
FillConstantElementWiseMulFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
x
);
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
GET_NODES
;
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
auto
*
elementwise_in
=
subgraph
.
at
(
x
);
float
constant_value
=
boost
::
get
<
float
>
(
fill_constant
->
Op
()
->
GetAttr
(
"value"
));
framework
::
OpDesc
new_op_desc
;
new_op_desc
.
SetType
(
"scale"
);
new_op_desc
.
SetInput
(
"X"
,
{
elementwise_in
->
Name
()});
new_op_desc
.
SetAttr
(
"scale"
,
constant_value
);
new_op_desc
.
SetAttr
(
"bias"
,
static_cast
<
float
>
(
0.0
));
new_op_desc
.
SetAttr
(
"bias_after_scale"
,
true
);
new_op_desc
.
SetOutput
(
"Out"
,
{
elementwise_mul_out
->
Name
()});
new_op_desc
.
Flush
();
// Create a new node for the fused op.
auto
*
scale_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
IR_NODE_LINK_TO
(
elementwise_in
,
scale_op
);
// Input
IR_NODE_LINK_TO
(
scale_op
,
elementwise_mul_out
);
// Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
,
{
fill_constant
,
fill_constant_out
,
elementwise_mul
});
};
gpd
(
graph
,
handler
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
fillconstant_elementwisemul_fuse
,
paddle
::
framework
::
ir
::
FillconstantElementwisemulFuse
);
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
e1b0d7cb
...
...
@@ -1878,173 +1878,6 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
return
concat_out
;
}
PDNode
*
patterns
::
AnakinDetectionPattern
::
operator
()(
std
::
vector
<
PDNode
*>
conv_in
,
int
times
,
std
::
string
priorbox_type
,
bool
is_reshape
)
{
// The times represents the repeat times of the
// {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
const
int
kNumFields
=
7
;
const
int
kPriorBoxLocOffset
=
1
;
const
int
kReshape1Offset
=
2
;
const
int
kReshape1OutOffset
=
3
;
const
int
kPriorBoxVarOffset
=
4
;
const
int
kReshape2Offset
=
5
;
const
int
kReshape2OutOffset
=
6
;
const
int
kBoxCoderThirdInputOffset
=
times
;
const
int
kMultiClassSecondInputNmsOffset
=
times
+
1
;
std
::
vector
<
PDNode
*>
nodes
;
std
::
string
op_after_priorbox
=
is_reshape
?
"reshape2"
:
"flatten2"
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"prior_box"
+
std
::
to_string
(
i
)))
->
assert_is_op
(
priorbox_type
));
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"box_out"
+
std
::
to_string
(
i
)))
->
assert_is_op_output
(
priorbox_type
,
"Boxes"
)
->
assert_is_op_input
(
op_after_priorbox
,
"X"
)
->
AsIntermediate
());
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"reshape1"
+
std
::
to_string
(
i
)))
->
assert_is_op
(
op_after_priorbox
));
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"reshape1_out"
+
std
::
to_string
(
i
)))
->
assert_is_op_output
(
op_after_priorbox
)
->
assert_is_op_nth_input
(
"concat"
,
"X"
,
i
)
->
AsIntermediate
());
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"box_var_out"
+
std
::
to_string
(
i
)))
->
assert_is_op_output
(
priorbox_type
,
"Variances"
)
->
assert_is_op_input
(
op_after_priorbox
,
"X"
)
->
AsIntermediate
());
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"reshape2"
+
std
::
to_string
(
i
)))
->
assert_is_op
(
op_after_priorbox
));
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"reshape2_out"
+
std
::
to_string
(
i
)))
->
assert_is_op_output
(
op_after_priorbox
)
->
assert_is_op_nth_input
(
"concat"
,
"X"
,
i
)
->
AsIntermediate
());
}
auto
concat_op1
=
pattern
->
NewNode
(
GetNodeName
(
"concat1"
))
->
assert_is_op
(
"concat"
)
->
assert_op_has_n_inputs
(
"concat"
,
times
);
auto
concat_out1
=
pattern
->
NewNode
(
GetNodeName
(
"concat1_out"
))
->
assert_is_op_output
(
"concat"
)
->
AsIntermediate
();
auto
concat_op2
=
pattern
->
NewNode
(
GetNodeName
(
"concat2"
))
->
assert_is_op
(
"concat"
)
->
assert_op_has_n_inputs
(
"concat"
,
times
);
auto
concat_out2
=
pattern
->
NewNode
(
GetNodeName
(
"concat2_out"
))
->
assert_is_op_output
(
"concat"
)
->
AsIntermediate
();
auto
box_coder_op
=
pattern
->
NewNode
(
GetNodeName
(
"box_coder"
))
->
assert_is_op
(
"box_coder"
)
->
assert_op_has_n_inputs
(
"box_coder"
,
3
);
auto
box_coder_out
=
pattern
->
NewNode
(
GetNodeName
(
"box_coder_out"
))
->
assert_is_op_output
(
"box_coder"
)
->
AsIntermediate
();
auto
transpose_before_nms
=
pattern
->
NewNode
(
GetNodeName
(
"transpose_before_nms"
))
->
assert_is_op
(
"transpose2"
);
auto
transpose_before_nms_out
=
pattern
->
NewNode
(
GetNodeName
(
"transpose_before_nms_out"
))
->
assert_is_op_output
(
"transpose2"
)
->
assert_is_op_input
(
"multiclass_nms"
,
"Scores"
)
->
AsIntermediate
();
auto
multiclass_nms_op
=
pattern
->
NewNode
(
GetNodeName
(
"multiclass_nms"
))
->
assert_is_op
(
"multiclass_nms"
)
->
assert_op_has_n_inputs
(
"multiclass_nms"
,
2
);
auto
multiclass_nms_out
=
pattern
->
NewNode
(
GetNodeName
(
"multiclass_nms_out"
))
->
assert_is_op_output
(
"multiclass_nms"
)
->
AsOutput
();
std
::
vector
<
PDNode
*>
reshape1_outs
;
std
::
vector
<
PDNode
*>
reshape2_outs
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
conv_in
[
i
]
->
AsInput
();
// prior_box
nodes
[
i
*
kNumFields
]
->
LinksFrom
({
conv_in
[
i
]});
// prior_box box out
nodes
[
i
*
kNumFields
+
kPriorBoxLocOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
]});
// reshape
nodes
[
i
*
kNumFields
+
kReshape1Offset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kPriorBoxLocOffset
]});
// reshape_out
nodes
[
i
*
kNumFields
+
kReshape1OutOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kReshape1Offset
]});
nodes
[
i
*
kNumFields
+
kPriorBoxVarOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
]});
// reshape
nodes
[
i
*
kNumFields
+
kReshape2Offset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kPriorBoxVarOffset
]});
// reshape_out
nodes
[
i
*
kNumFields
+
kReshape2OutOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kReshape2Offset
]});
reshape1_outs
.
push_back
(
nodes
[
i
*
kNumFields
+
kReshape1OutOffset
]);
reshape2_outs
.
push_back
(
nodes
[
i
*
kNumFields
+
kReshape2OutOffset
]);
}
concat_op1
->
LinksFrom
(
reshape1_outs
);
concat_op2
->
LinksFrom
(
reshape2_outs
);
concat_out1
->
LinksFrom
({
concat_op1
});
concat_out2
->
LinksFrom
({
concat_op2
});
conv_in
[
kBoxCoderThirdInputOffset
]
->
AsInput
();
conv_in
[
kMultiClassSecondInputNmsOffset
]
->
AsInput
();
box_coder_op
->
LinksFrom
(
{
concat_out1
,
concat_out2
,
conv_in
[
kBoxCoderThirdInputOffset
]});
box_coder_out
->
LinksFrom
({
box_coder_op
});
transpose_before_nms
->
LinksFrom
({
conv_in
[
kMultiClassSecondInputNmsOffset
]});
transpose_before_nms_out
->
LinksFrom
({
transpose_before_nms
});
multiclass_nms_op
->
LinksFrom
({
box_coder_out
,
transpose_before_nms_out
})
.
LinksTo
({
multiclass_nms_out
});
return
multiclass_nms_out
;
}
PDNode
*
patterns
::
FillConstantElementWiseMulFuse
::
operator
()(
PDNode
*
elementwise_op_input
)
{
auto
fill_constant
=
pattern
->
NewNode
(
fill_constant_repr
())
->
assert_is_op
(
"fill_constant"
);
auto
fill_constant_out
=
pattern
->
NewNode
(
fill_constant_out_repr
())
->
assert_is_op_output
(
"fill_constant"
)
->
assert_is_op_input
(
"elementwise_mul"
,
"Y"
)
->
AsIntermediate
();
auto
elementwise_mul_op
=
pattern
->
NewNode
(
elementwise_mul_repr
())
->
assert_is_op
(
"elementwise_mul"
);
auto
elementwise_mul_out
=
pattern
->
NewNode
(
elementwise_mul_out_repr
())
->
assert_is_op_output
(
"elementwise_mul"
)
->
AsOutput
();
fill_constant_out
->
LinksFrom
({
fill_constant
});
elementwise_mul_op
->
LinksFrom
({
elementwise_op_input
,
fill_constant_out
});
elementwise_mul_out
->
LinksFrom
({
elementwise_mul_op
});
return
elementwise_mul_out
;
}
void
patterns
::
QuantDequantOpFuse
::
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_type
,
const
std
::
string
&
weight_name
,
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
e1b0d7cb
...
...
@@ -1093,37 +1093,6 @@ struct TransposeFlattenConcat : public PatternBase {
}
};
struct
AnakinDetectionPattern
:
public
PatternBase
{
AnakinDetectionPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"anakin_detect_pattern"
)
{}
PDNode
*
operator
()(
std
::
vector
<
PDNode
*>
conv_inputs
,
int
times
,
std
::
string
priorbox_type
,
bool
is_reshape
);
std
::
string
GetNodeName
(
const
std
::
string
&
op_type
)
{
return
PDNodeName
(
name_scope_
,
repr_
,
id_
,
op_type
);
}
PDNode
*
GetPDNode
(
const
std
::
string
&
op_type
)
{
return
pattern
->
RetrieveNode
(
GetNodeName
(
op_type
));
}
};
struct
FillConstantElementWiseMulFuse
:
public
PatternBase
{
FillConstantElementWiseMulFuse
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"anakin_fillconstant_elementwisemul_fuse"
)
{}
PDNode
*
operator
()(
PDNode
*
elementwise_op_input
);
// declare operator node's name
PATTERN_DECL_NODE
(
fill_constant
);
PATTERN_DECL_NODE
(
fill_constant_out
);
PATTERN_DECL_NODE
(
elementwise_mul
);
PATTERN_DECL_NODE
(
elementwise_mul_out
);
};
struct
QuantDequantOpFuse
:
public
PatternBase
{
QuantDequantOpFuse
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"quant_dequant_fuse"
)
{}
...
...
paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
RunSimplifyAnakinDetection
(
ir
::
Graph
*
graph
,
int
times
,
bool
is_density
,
bool
is_reshape
)
{
const
std
::
string
pattern_name
=
"simplify_anakin_detection_pattern_pass"
+
std
::
to_string
(
times
);
std
::
string
priorbox_type
=
is_density
?
"density_prior_box"
:
"prior_box"
;
GraphPatternDetector
gpd
;
std
::
vector
<
PDNode
*>
input_nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
input_nodes
.
push_back
(
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
+
std
::
to_string
(
i
))
->
assert_is_op_input
(
priorbox_type
,
"Input"
)
->
AsInput
());
}
input_nodes
.
push_back
(
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
+
std
::
to_string
(
times
))
->
assert_is_op_input
(
"box_coder"
,
"TargetBox"
)
->
AsInput
());
input_nodes
.
push_back
(
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
+
std
::
to_string
(
times
+
1
))
->
assert_is_op_input
(
"transpose2"
)
->
AsInput
());
patterns
::
AnakinDetectionPattern
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
input_nodes
,
times
,
priorbox_type
,
is_reshape
);
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
const
int
kNumFields
=
7
;
const
int
kPriorBoxLocOffset
=
1
;
const
int
kReshape1Offset
=
2
;
const
int
kReshape1OutOffset
=
3
;
const
int
kPriorBoxVarOffset
=
4
;
const
int
kReshape2Offset
=
5
;
const
int
kReshape2OutOffset
=
6
;
std
::
vector
<
Node
*>
nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
PADDLE_ENFORCE
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"prior_box"
+
std
::
to_string
(
i
))));
PADDLE_ENFORCE
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"box_out"
+
std
::
to_string
(
i
))));
PADDLE_ENFORCE
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape1"
+
std
::
to_string
(
i
))));
PADDLE_ENFORCE
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape1_out"
+
std
::
to_string
(
i
))));
PADDLE_ENFORCE
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape2"
+
std
::
to_string
(
i
))));
PADDLE_ENFORCE
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape2_out"
+
std
::
to_string
(
i
))));
PADDLE_ENFORCE
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"box_var_out"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"prior_box"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"box_out"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape1"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape1_out"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"box_var_out"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape2"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"reshape2_out"
+
std
::
to_string
(
i
))));
}
Node
*
concat_op1
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"concat1"
));
Node
*
concat_out1
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"concat1_out"
));
Node
*
concat_op2
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"concat2"
));
Node
*
concat_out2
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"concat2_out"
));
Node
*
box_coder_third_input
=
subgraph
.
at
(
input_nodes
[
times
]);
Node
*
box_coder_op
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"box_coder"
));
Node
*
box_coder_out
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"box_coder_out"
));
Node
*
multiclass_nms_second_input
=
subgraph
.
at
(
input_nodes
[
times
+
1
]);
Node
*
transpose_before_nms
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"transpose_before_nms"
));
Node
*
transpose_before_nms_out
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"transpose_before_nms_out"
));
Node
*
multiclass_nms
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"multiclass_nms"
));
Node
*
multiclass_nms_out
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"multiclass_nms_out"
));
std
::
string
code_type
=
boost
::
get
<
std
::
string
>
(
box_coder_op
->
Op
()
->
GetAttr
(
"code_type"
));
bool
box_normalized
=
boost
::
get
<
bool
>
(
box_coder_op
->
Op
()
->
GetAttr
(
"box_normalized"
));
int
background_label
=
boost
::
get
<
int
>
(
multiclass_nms
->
Op
()
->
GetAttr
(
"background_label"
));
float
score_threshold
=
boost
::
get
<
float
>
(
multiclass_nms
->
Op
()
->
GetAttr
(
"score_threshold"
));
int
nms_top_k
=
boost
::
get
<
int
>
(
multiclass_nms
->
Op
()
->
GetAttr
(
"nms_top_k"
));
float
nms_threshold
=
boost
::
get
<
float
>
(
multiclass_nms
->
Op
()
->
GetAttr
(
"nms_threshold"
));
float
nms_eta
=
boost
::
get
<
float
>
(
multiclass_nms
->
Op
()
->
GetAttr
(
"nms_eta"
));
int
keep_top_k
=
boost
::
get
<
int
>
(
multiclass_nms
->
Op
()
->
GetAttr
(
"keep_top_k"
));
std
::
vector
<
std
::
string
>
concat1_input_names
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
concat1_input_names
.
push_back
(
nodes
[
i
*
kNumFields
+
kPriorBoxLocOffset
]
->
Name
());
}
framework
::
OpDesc
concat1_desc
;
concat1_desc
.
SetType
(
"concat"
);
concat1_desc
.
SetInput
(
"X"
,
concat1_input_names
);
concat1_desc
.
SetAttr
(
"axis"
,
2
);
concat1_desc
.
SetOutput
(
"Out"
,
{
concat_out1
->
Name
()});
auto
*
new_add_concat_op
=
graph
->
CreateOpNode
(
&
concat1_desc
);
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
nodes
[
i
*
kNumFields
+
kPriorBoxLocOffset
]
->
outputs
.
push_back
(
new_add_concat_op
);
new_add_concat_op
->
inputs
.
push_back
(
nodes
[
i
*
kNumFields
+
kPriorBoxLocOffset
]);
}
framework
::
OpDesc
new_op_desc
;
new_op_desc
.
SetType
(
"detection_out"
);
new_op_desc
.
SetInput
(
"PriorBox"
,
{
concat_out1
->
Name
()});
new_op_desc
.
SetInput
(
"TargetBox"
,
{
box_coder_third_input
->
Name
()});
new_op_desc
.
SetInput
(
"Scores"
,
{
multiclass_nms_second_input
->
Name
()});
new_op_desc
.
SetAttr
(
"code_type"
,
code_type
);
new_op_desc
.
SetAttr
(
"box_normalized"
,
box_normalized
);
new_op_desc
.
SetAttr
(
"background_label"
,
background_label
);
new_op_desc
.
SetAttr
(
"score_threshold"
,
score_threshold
);
new_op_desc
.
SetAttr
(
"nms_top_k"
,
nms_top_k
);
new_op_desc
.
SetAttr
(
"nms_threshold"
,
nms_threshold
);
new_op_desc
.
SetAttr
(
"nms_eta"
,
nms_eta
);
new_op_desc
.
SetAttr
(
"keep_top_k"
,
keep_top_k
);
new_op_desc
.
SetOutput
(
"Out"
,
{
multiclass_nms_out
->
Name
()});
new_op_desc
.
Flush
();
// Create a new node for the fused op.
auto
*
detection_out_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
nodes
[
i
*
kNumFields
+
kPriorBoxLocOffset
]
->
outputs
.
push_back
(
concat_op1
);
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kReshape1Offset
]);
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kReshape1OutOffset
]);
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kPriorBoxVarOffset
]);
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kReshape2Offset
]);
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kReshape2OutOffset
]);
}
delete_nodes
.
insert
(
concat_op1
);
delete_nodes
.
insert
(
concat_op2
);
delete_nodes
.
insert
(
concat_out2
);
delete_nodes
.
insert
(
box_coder_op
);
delete_nodes
.
insert
(
box_coder_out
);
delete_nodes
.
insert
(
transpose_before_nms
);
delete_nodes
.
insert
(
transpose_before_nms_out
);
delete_nodes
.
insert
(
multiclass_nms
);
new_add_concat_op
->
outputs
.
push_back
(
concat_out1
);
concat_out1
->
inputs
.
push_back
(
new_add_concat_op
);
detection_out_op
->
inputs
.
push_back
(
concat_out1
);
detection_out_op
->
inputs
.
push_back
(
box_coder_third_input
);
detection_out_op
->
inputs
.
push_back
(
multiclass_nms_second_input
);
detection_out_op
->
outputs
.
push_back
(
multiclass_nms_out
);
concat_out1
->
outputs
.
push_back
(
detection_out_op
);
box_coder_third_input
->
outputs
.
push_back
(
detection_out_op
);
multiclass_nms_second_input
->
outputs
.
push_back
(
detection_out_op
);
multiclass_nms_out
->
inputs
.
push_back
(
detection_out_op
);
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
,
delete_nodes
);
};
gpd
(
graph
,
handler
);
}
void
SimplifyAnakinDetectionPatternPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
const
int
pattern_nums
=
6
;
const
std
::
string
pattern_name
=
"simplify_anakin_detection_pattern_pass"
;
FusePassBase
::
Init
(
pattern_name
,
graph
);
std
::
vector
<
bool
>
options
=
{
true
,
false
};
for
(
const
auto
&
is_density
:
options
)
{
for
(
const
auto
&
is_reshape
:
options
)
{
for
(
int
i
=
1
;
i
<=
pattern_nums
;
i
++
)
{
RunSimplifyAnakinDetection
(
graph
,
i
,
is_density
,
is_reshape
);
}
}
}
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
typedef
paddle
::
framework
::
ir
::
SimplifyAnakinDetectionPatternPass
priorbox_pattern
;
REGISTER_PASS
(
simplify_anakin_priorbox_detection_out_pass
,
priorbox_pattern
);
paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <unordered_set>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
// There may be many transpose-flatten structures in a model, and the output of
// these structures will be used as inputs to the concat Op. This pattern will
// be detected by our pass. The times here represents the repeat times of this
// structure.
class
SimplifyAnakinDetectionPatternPass
:
public
FusePassBase
{
public:
virtual
~
SimplifyAnakinDetectionPatternPass
()
{}
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/subgraph_detector.h
浏览文件 @
e1b0d7cb
...
...
@@ -65,7 +65,7 @@ class SubGraphFuser {
using
NodeInsideSubgraphTeller
=
SubgraphDetector
::
NodeInsideSubgraphTeller
;
SubGraphFuser
(
Graph
*
graph
,
const
NodeInsideSubgraphTeller
&
teller
,
int
min_subgraph_size
,
std
::
string
name
=
"
anakin
_engine"
)
int
min_subgraph_size
,
std
::
string
name
=
"
tensorrt
_engine"
)
:
graph_
(
graph
),
node_inside_subgraph_teller_
(
teller
),
min_subgraph_size_
{
min_subgraph_size
},
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
e1b0d7cb
...
...
@@ -30,10 +30,6 @@ if (TENSORRT_FOUND)
add_subdirectory
(
tensorrt
)
endif
()
if
(
ANAKIN_SUBGRAPH
)
add_subdirectory
(
anakin
)
endif
()
if
(
WITH_LITE
)
add_subdirectory
(
lite
)
endif
()
...
...
@@ -68,9 +64,6 @@ if(NOT APPLE)
set_target_properties
(
paddle_fluid PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
endif
()
if
(
ANAKIN_FOUND
)
set
(
ANAKIN_SHARED_INFERENCE_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_anakin_engine.cc
)
endif
()
set
(
SHARED_INFERENCE_SRCS
io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/../framework/data_feed.cc
...
...
@@ -80,8 +73,7 @@ set(SHARED_INFERENCE_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/details/zero_copy_tensor.cc
${
mkldnn_quantizer_src_file
}
${
ANAKIN_SHARED_INFERENCE_SRCS
}
)
${
mkldnn_quantizer_src_file
}
)
# Create shared inference library defaultly
cc_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
...
...
paddle/fluid/inference/anakin/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
0a678ca0
cc_library
(
anakin_engine SRCS engine.cc DEPS framework_proto boost
)
cc_library
(
anakin_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost
)
target_link_libraries
(
anakin_engine anakin anakin_saber_common
)
cc_test
(
test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine
)
add_subdirectory
(
convert
)
paddle/fluid/inference/anakin/convert/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
0a678ca0
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
roi_align.cc shuffle_channel.cc helper.cc DEPS anakin_engine framework_proto
scope op_registry gtest gflags
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv
)
cc_test
(
test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter
)
cc_test
(
test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling
)
cc_test
(
test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split
)
cc_test
(
test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split
)
cc_test
(
test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op
)
cc_test
(
test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter
)
cc_test
(
test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax
)
cc_test
(
test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op
)
cc_test
(
test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op
)
cc_test
(
test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op
)
cc_test
(
test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op
)
cc_test
(
test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op
)
cc_test
(
test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor
)
cc_test
(
test_anakin_affine_channel SRCS test_affine_channel_op.cc DEPS anakin_op_converter affine_channel_op
)
paddle/fluid/inference/anakin/convert/activation.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/activation.h"
#include <algorithm>
#include <map>
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
ActivationOpConverter
(
const
std
::
string
&
op_type
)
:
op_type_
(
op_type
)
{
auto
it
=
anakin_op_types_
.
find
(
op_type_
);
PADDLE_ENFORCE
(
it
!=
anakin_op_types_
.
end
(),
"activation op type is not support"
);
anakin_op_type_
=
it
->
second
;
}
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ActivationOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
if
(
op_type_
==
"swish"
)
{
float
beta
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"beta"
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"clip_relu_num"
,
beta
);
}
if
(
op_type_
==
"relu6"
)
{
float
threshold
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"threshold"
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"clip_relu_num"
,
threshold
);
}
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
swish
,
SwishOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
relu6
,
Relu6OpConverter
);
paddle/fluid/inference/anakin/convert/activation.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ActivationOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
explicit
ActivationOpConverter
(
const
std
::
string
&
op_type
);
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ActivationOpConverter
()
{}
private:
std
::
string
op_type_
;
std
::
string
anakin_op_type_
;
std
::
map
<
std
::
string
,
std
::
string
>
anakin_op_types_
{{
"tanh"
,
"TanH"
},
{
"sigmoid"
,
"Sigmoid"
},
{
"relu6"
,
"ClippedRelu"
},
{
"swish"
,
"Swish"
}};
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TanhOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
TanhOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"tanh"
)
{}
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SigmoidOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"sigmoid"
)
{}
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Relu6OpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Relu6OpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"relu6"
)
{}
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SwishOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SwishOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"swish"
)
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/affine_channel.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/affine_channel.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
AffineChannelOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"AffineChannel"
,
{
input_name
},
{
output_name
});
// Copy the Scale to CPUPlace and get the pointer.
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
weight1
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
scale_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Copy the Bias to CPUPlace and get the pointer.
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
bias_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
);
paddle/fluid/inference/anakin/convert/affine_channel.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AffineChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
AffineChannelOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
AffineChannelOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/batch_norm.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/batch_norm.h"
#include <math.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
BatchNormOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
1
);
std
::
map
<
std
::
string
,
std
::
string
>
inputs
;
for
(
auto
k
:
{
"X"
,
"Scale"
,
"Bias"
,
"Mean"
,
"Variance"
})
{
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
k
).
size
(),
1UL
);
}
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Y"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Y"
).
front
();
auto
epsilon
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"epsilon"
));
auto
bn_op_name
=
op_name
+
":bn"
;
auto
bn_output
=
bn_op_name
+
"_output"
;
this
->
engine_
->
AddOp
(
bn_op_name
,
"BatchNorm"
,
{
input
},
{
bn_output
});
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"epsilon"
,
epsilon
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"momentum"
,
static_cast
<
float
>
(
1.0
));
auto
scale_op_name
=
op_name
+
":scale"
;
this
->
engine_
->
AddOp
(
scale_op_name
,
"Scale"
,
{
bn_output
},
{
output
});
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"num_axes"
,
1
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"bias_term"
,
true
);
auto
*
mean_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
mean_v
);
auto
weight1
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
mean_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
variance_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
variance_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
auto
*
weight3
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
std
::
vector
<
float
>
({
1
}),
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
scale
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
scale_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
bias_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
);
paddle/fluid/inference/anakin/convert/batch_norm.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
BatchNormOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
BatchNormOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
BatchNormOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/concat.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include <algorithm>
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ConcatOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
auto
input_names
=
op_desc
.
Input
(
"X"
);
auto
y_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Concat"
,
input_names
,
{
y_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
);
paddle/fluid/inference/anakin/convert/concat.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ConcatOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ConcatOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ConcatOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/conv2d.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Output"
).
size
(),
1UL
);
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize
<
int
>
(
weight_tensor
->
dims
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilation_rate"
,
dilations
);
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
auto
weight_scale
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"weight_scale"
));
PBlock
<
TargetT
>
*
weight1
=
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
bool
is_valid_int8
=
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
[
0
]
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_tensor
<
TargetT
,
PrecisionT
>
(
*
weight_tensor
,
weight_shape
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
);
paddle/fluid/inference/anakin/convert/conv2d.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Conv2dOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Conv2dOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Conv2dFusionOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Bias"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Output"
).
size
(),
1UL
);
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize
<
int
>
(
weight_tensor
->
dims
());
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
this
->
engine_
->
template
AddOpAttr
<
int
>(
op_name
,
"filter_num"
,
filter_num
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilation_rate"
,
dilations
);
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
auto
weight_scale
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"weight_scale"
));
PBlock
<
TargetT
>
*
weight1
=
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
bool
is_valid_int8
=
((
weight_data
[
i
]
>=
-
128
)
&&
(
weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
[
0
]
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize
<
int
>
(
weight_tensor
->
dims
());
auto
*
weight1
=
pblock_from_tensor
<
TargetT
,
PrecisionT
>
(
*
weight_tensor
,
weight_shape
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
b_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Conv2dFusionOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Conv2dFusionOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/density_prior_box.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/density_prior_box.h"
#include <algorithm>
#include <map>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DensityPriorBoxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
image_name
=
op_desc
.
Input
(
"Image"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Boxes"
).
front
();
auto
op_type
=
op_desc
.
Type
();
auto
op_name
=
op_type
+
":"
+
op_desc
.
Output
(
"Boxes"
).
front
();
// only for density_prior_box
std
::
vector
<
float
>
fixed_sizes
=
{};
std
::
vector
<
float
>
fixed_ratios
=
{};
std
::
vector
<
int
>
densities
=
{};
std
::
vector
<
float
>
min_sizes
=
{};
std
::
vector
<
float
>
max_sizes
=
{};
std
::
vector
<
float
>
aspect_ratios
=
{};
bool
is_clip
=
false
;
bool
is_flip
=
false
;
if
(
op_type
==
"density_prior_box"
)
{
fixed_sizes
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"fixed_sizes"
));
fixed_ratios
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"fixed_ratios"
));
densities
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"densities"
));
is_clip
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"clip"
));
}
else
if
(
op_type
==
"prior_box"
)
{
min_sizes
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"min_sizes"
));
max_sizes
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"max_sizes"
));
aspect_ratios
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"aspect_ratios"
));
is_clip
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"clip"
));
is_flip
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"flip"
));
}
std
::
vector
<
float
>
dens
;
for
(
auto
&
ele
:
densities
)
{
dens
.
push_back
(
static_cast
<
float
>
(
ele
));
}
auto
variances
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"variances"
));
// lack img_h, img_w
auto
step_h
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"step_h"
));
auto
step_w
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"step_w"
));
auto
offset
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"offset"
));
PTuple
<
std
::
string
>
t_order
;
t_order
.
push_back
(
"MIN"
);
t_order
.
push_back
(
"COM"
);
t_order
.
push_back
(
"MAX"
);
std
::
vector
<
float
>
temp_v
=
{};
this
->
engine_
->
AddOp
(
op_name
,
"PriorBox"
,
{
input_name
,
image_name
},
{
output_name
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"min_size"
,
min_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"max_size"
,
max_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"aspect_ratio"
,
aspect_ratios
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"fixed_size"
,
fixed_sizes
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"fixed_ratio"
,
fixed_ratios
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"density"
,
dens
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"is_flip"
,
is_flip
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"is_clip"
,
is_clip
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"variance"
,
variances
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"img_h"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"img_w"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"step_h"
,
step_h
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"step_w"
,
step_w
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"offset"
,
offset
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
std
::
string
>
>
(
op_name
,
"order"
,
t_order
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
);
paddle/fluid/inference/anakin/convert/density_prior_box.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DensityPriorBoxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DensityPriorBoxOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DensityPriorBoxOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/detection_out.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/detection_out.h"
#include <algorithm>
#include <map>
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DetectionOutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
target_name
=
op_desc
.
Input
(
"TargetBox"
).
front
();
auto
prior_box_name
=
op_desc
.
Input
(
"PriorBox"
).
front
();
auto
scores_name
=
op_desc
.
Input
(
"Scores"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
code_type
=
boost
::
get
<
std
::
string
>
(
op_desc
.
GetAttr
(
"code_type"
));
auto
background_label
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"background_label"
));
auto
score_threshold
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"score_threshold"
));
auto
nms_top_k
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"nms_top_k"
));
auto
nms_threshold
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"nms_threshold"
));
auto
nms_eta
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"nms_eta"
));
auto
keep_top_k
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"keep_top_k"
));
std
::
string
anakin_code_type
;
if
(
code_type
==
"decode_center_size"
)
{
anakin_code_type
=
"CENTER_SIZE"
;
}
else
if
(
code_type
==
"encode_center_size"
)
{
PADDLE_THROW
(
"Not support encode_center_size code_type in DetectionOut of anakin"
);
}
this
->
engine_
->
AddOp
(
op_name
,
"DetectionOutput"
,
{
target_name
,
scores_name
,
prior_box_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"share_location"
,
true
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"variance_encode_in_target"
,
false
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"class_num"
,
static_cast
<
int
>
(
0
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"background_id"
,
background_label
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"keep_top_k"
,
keep_top_k
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"code_type"
,
anakin_code_type
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"conf_thresh"
,
score_threshold
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_top_k"
,
nms_top_k
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_thresh"
,
nms_threshold
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"nms_eta"
,
nms_eta
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
);
paddle/fluid/inference/anakin/convert/detection_out.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DetectionOutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DetectionOutOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DetectionOutOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/dropout.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/dropout.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
DropoutOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Mask"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
},
{
out_name
});
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
auto
*
weight1
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
std
::
vector
<
float
>
({
factor
}),
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
);
paddle/fluid/inference/anakin/convert/dropout.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
DropoutOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
DropoutOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DropoutOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/elementwise.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseAddOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_name
=
op_desc
.
Input
(
"Y"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
std
::
string
elementwise_type
=
"Add"
;
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ElementwiseMulOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_name
=
op_desc
.
Input
(
"Y"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
std
::
string
elementwise_type
=
"Mul"
;
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
);
paddle/fluid/inference/anakin/convert/elementwise.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ElementwiseAddOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseAddOpConverter
()
{}
private:
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ElementwiseMulOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseMulOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/fc.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FcBaseOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_names
=
op_desc
.
InputNames
();
bool
with_bias
=
input_names
.
size
()
>=
3
;
std
::
string
w_name
=
"Y"
;
std
::
string
i_name
=
"X"
;
if
(
with_bias
)
{
w_name
=
"W"
;
i_name
=
"Input"
;
}
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
// get weights
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
w_name
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
auto
weight_tensor
=
tensor_from_var
(
*
y_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize
<
int
>
(
weight_tensor
->
dims
());
int
out_dim
=
weight_shape
[
1
];
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
auto
*
weight_data
=
weight_tensor
->
data
<
float
>
();
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
->
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
->
numel
());
for
(
int
i
=
0
;
i
<
w_m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w_k
;
j
++
)
{
trans_weight_data
[
i
+
j
*
w_m
]
=
weight_data
[
i
*
w_k
+
j
];
}
}
int
weight_num
=
weight_tensor
->
numel
();
bool
enable_int8
=
boost
::
get
<
bool
>
(
op_desc
.
HasAttr
(
"enable_int8"
));
if
(
enable_int8
)
{
if
(
weight_shape
.
size
()
<
4UL
)
{
weight_shape
.
insert
(
weight_shape
.
begin
(),
4UL
-
weight_shape
.
size
(),
1
);
}
::
anakin
::
saber
::
Shape
anakin_shape
(
weight_shape
);
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
auto
weight_scale
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"weight_scale"
));
PBlock
<
TargetT
>
*
weight1
=
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
std
::
vector
<
char
>
weight_int8
;
for
(
int
i
=
0
;
i
<
weight_num
;
i
++
)
{
bool
is_valid_int8
=
((
trans_weight_data
[
i
]
>=
-
128
)
&&
(
trans_weight_data
[
i
]
<=
127
));
PADDLE_ENFORCE
(
is_valid_int8
,
"We are in anakin subgraph int8 mode, the weight of fc "
"should be in range [-128, 127]"
);
weight_int8
.
push_back
(
static_cast
<
char
>
(
trans_weight_data
[
i
]));
}
memcpy
(
static_cast
<
void
*>
(
weight1
->
h_tensor
().
mutable_data
()),
static_cast
<
void
*>
(
weight_int8
.
data
()),
sizeof
(
char
)
*
weight_num
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
Graph
()
->
SetOpPrec
(
op_name
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
Graph
()
->
SetWeightsScale
(
op_name
,
{
weight_scale
[
0
]
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
trans_weight_data
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
// get bias
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
b_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
);
paddle/fluid/inference/anakin/convert/fc.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcBaseOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FcBaseOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
FcBaseOpConverter
()
{}
};
// with bias
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FcOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FcOpConverter
()
=
default
;
};
// without bias
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
MulOpConverter
:
public
FcBaseOpConverter
<
TargetT
,
PrecisionT
>
{
public:
MulOpConverter
()
=
default
;
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/flatten.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/flatten.h"
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
FlattenOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
PADDLE_ENFORCE
(
axis
==
1
,
"the anakin flatten op converter now only support aixs == 1."
);
std
::
vector
<
int
>
out_dims
=
{
0
,
-
1
,
1
,
1
};
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
out_dims
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
);
paddle/fluid/inference/anakin/convert/flatten.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
FlattenOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
FlattenOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
FlattenOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/helper.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
)
{
auto
&
src
=
var
.
Get
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
dst
(
new
framework
::
LoDTensor
());
dst
->
Resize
(
src
.
dims
());
TensorCopySync
((
src
),
place
,
dst
.
get
());
return
dst
;
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/helper.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "saber/saber_types.h"
using
anakin
::
saber
::
Shape
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_INT8
;
using
anakin
::
PBlock
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
);
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
std
::
vector
<
int
>
shape_vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
Shape
shape
(
shape_vec
);
PBlock
<
TargetT
>*
weight
=
new
PBlock
<
TargetT
>
(
shape
,
AK_FLOAT
);
engine
->
RegistBlock
(
weight
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
tensor
.
data
<
float
>
(),
tensor
.
numel
(),
cpu_data
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
std
::
vector
<
int
>
shape_vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
Shape
shape
(
shape_vec
);
PBlock
<
TargetT
>*
weight
=
new
PBlock
<
TargetT
>
(
shape
,
AK_FLOAT
);
engine
->
RegistBlock
(
weight
);
auto
*
weight_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy
(
std
::
begin
(
vec
),
std
::
end
(
vec
),
weight_data
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
int
size
=
vec
.
size
();
return
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}),
engine
);
}
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_var
(
const
framework
::
Variable
&
var
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
auto
tensor
=
tensor_from_var
(
var
,
platform
::
CPUPlace
());
auto
shape
=
framework
::
vectorize
<
int
>
(
tensor
->
dims
());
return
pblock_from_tensor
<
TargetT
,
PrecisionT
>
(
*
tensor
,
shape
,
engine
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/im2sequence.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Im2SequenceConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
0
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Im2Sequence"
,
{
x_name
},
{
out_name
});
std
::
vector
<
int
>
dilations
=
{
1
,
1
};
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
kernels
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"kernels"
));
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"paddings"
,
paddings
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"window_size"
,
kernels
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dilations"
,
dilations
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
);
paddle/fluid/inference/anakin/convert/im2sequence.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Im2SequenceConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Im2SequenceConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Im2SequenceConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/op_converter.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "framework/core/types.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "saber/saber_types.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinOpConverter
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
PrecisionT
>
;
public:
AnakinOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{}
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
AnakinEngineT
*
engine
,
bool
test_mode
=
false
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
std
::
string
op_type
=
op_desc
.
Type
();
AnakinOpConverter
*
it
=
nullptr
;
if
(
op_type
==
"depthwise_conv2d"
)
op_type
=
"conv2d"
;
if
(
op_type
==
"reshape2"
)
op_type
=
"reshape"
;
if
(
op_type
==
"transpose2"
)
op_type
=
"transpose"
;
if
(
op_type
==
"flatten2"
)
op_type
=
"flatten"
;
if
(
!
it
)
{
it
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
"no OpConverter for optype [%s]"
,
op_type
);
it
->
SetEngine
(
engine
);
(
*
it
)(
op
,
block_desc
,
scope
,
test_mode
);
}
void
ConvertBlock
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
framework
::
Scope
&
scope
,
AnakinEngineT
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
framework
::
proto
::
BlockDesc
*
block
=
block_desc
->
Proto
();
for
(
auto
i
=
0
;
i
<
block
->
ops_size
();
i
++
)
{
auto
&
op
=
block
->
ops
(
i
);
ConvertOp
(
op
,
*
block_desc
,
parameters
,
scope
,
engine
);
}
}
// The scope here should be inited with the parameter vars.
void
ConvertBlockToAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
unordered_set
<
std
::
string
>
&
parameters
,
const
std
::
vector
<
std
::
string
>
&
outputs
,
AnakinEngineT
*
engine
)
{
ConvertBlock
(
block_desc
,
parameters
,
*
scope
,
engine
);
// if the max_batch size
int
max_batch_size
=
engine
->
GetMaxBatchSize
();
PADDLE_ENFORCE
(
max_batch_size
>
0
,
"the max_batch_size setted from config->EnableAnakinEngine "
"must largger than 0"
);
// If the user does not specify this variable, we use the input shape from
// the block_desc.
auto
max_input_shape
=
engine
->
GetMaxInputShape
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
// Register outputs with anakin using the RegistVar interface before Freeze.
// Note that RegistVar's parameters can only be outputs, not inputs.
for
(
auto
&
output
:
outputs
)
{
engine
->
Graph
()
->
RegistVar
(
output
);
}
engine
->
Freeze
();
// Add scale for tensor in int8 mode.
auto
tensor_scales
=
engine
->
GetTensorScales
();
for
(
auto
&
item
:
tensor_scales
)
{
engine
->
Graph
()
->
SetVarScale
(
item
.
first
,
item
.
second
);
}
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
std
::
vector
<
int
>
input_shape
;
input_shape
.
resize
(
4
);
input_shape
[
0
]
=
max_batch_size
;
if
(
max_input_shape
.
count
(
input
))
{
PADDLE_ENFORCE
(
max_input_shape
[
input
].
size
()
==
4
,
"the dimensions of max_input_shape setted from "
"config->EnableAnakinEngine must be 4"
);
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
input_shape
[
i
]
=
max_input_shape
[
input
][
i
];
}
}
else
{
auto
*
var
=
block_desc
->
FindVar
(
input
);
PADDLE_ENFORCE
(
var
,
"no variable called %s"
,
input
);
auto
var_shape
=
var
->
GetShape
();
std
::
cout
<<
"input :"
<<
input
<<
std
::
endl
;
PADDLE_ENFORCE
(
var_shape
.
size
()
==
4
);
for
(
size_t
i
=
1
;
i
<
var_shape
.
size
();
i
++
)
{
input_shape
[
i
]
=
var_shape
[
i
];
}
}
temp_max_input_shape
[
input
]
=
input_shape
;
engine
->
SetInputShape
(
input
,
input_shape
);
}
engine
->
SetMaxInputShape
(
temp_max_input_shape
);
engine
->
Optimize
();
engine
->
InitNet
();
}
void
SetEngine
(
AnakinEngineT
*
engine
)
{
engine_
=
engine
;
}
virtual
~
AnakinOpConverter
()
{}
protected:
bool
test_mode_
;
AnakinEngineT
*
engine_
{
nullptr
};
private:
std
::
unordered_map
<
std
::
string
,
AnakinOpConverter
<
TargetT
,
PrecisionT
>
*>
converters_
;
framework
::
Scope
*
scope_
{
nullptr
};
std
::
mutex
mutex_
;
};
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
#ifdef ANAKIN_X86_PLACE
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
place_type__, place_class__, \
precision_type__, precision_class__) \
struct anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter() { \
LOG(INFO) << "register convert " << #op_type__ << " "; \
::paddle::inference::Registry< \
::paddle::inference::anakin::AnakinOpConverter< \
place_class__, precision_class__>>::Global() \
.Register<Converter__>(#op_type__); \
} \
}; \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__; \
int Touch_anakin_##op_type__##_##place_type__##_##precision_type__() { \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__ \
.Touch(); \
return 0; \
}
#define WRAP(...) __VA_ARGS__
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, \
precision_type__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE( \
op_type__, \
::paddle::inference::anakin::Converter__<WRAP( \
::anakin::saber::NV, ::anakin::Precision::precision_type__)>, \
CUDA, ::anakin::saber::NV, precision_type__, \
::anakin::Precision::precision_type__)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, \
precision_type__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE( \
op_type__, \
::paddle::inference::anakin::Converter__<WRAP( \
::anakin::saber::X86, ::anakin::Precision::precision_type__)>, \
CPU, ::anakin::saber::X86, precision_type__, \
::anakin::Precision::precision_type__)
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#endif
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \
extern int Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); \
int use_converter_anakin_##op_type__##_##place_type__##_##precision_type__ \
UNUSED = \
Touch_anakin_##op_type__##_##place_type__##_##precision_type__();
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)
#endif
paddle/fluid/inference/anakin/convert/pool2d.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/pool2d.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
Pool2dOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
bool
global_pooling
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"global_pooling"
));
std
::
string
pool_type
=
boost
::
get
<
std
::
string
>
(
op_desc
.
GetAttr
(
"pooling_type"
));
std
::
vector
<
int
>
ksize
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"ksize"
));
std
::
vector
<
int
>
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
std
::
vector
<
int
>
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
bool
ceil_mode
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"ceil_mode"
));
std
::
string
anakin_pool_type
;
if
(
pool_type
==
"max"
)
{
anakin_pool_type
=
"MAX"
;
}
else
if
(
pool_type
==
"avg"
)
{
if
(
paddings
[
0
]
||
paddings
[
1
])
{
anakin_pool_type
=
"AVGEXC"
;
}
else
{
anakin_pool_type
=
"AVG"
;
}
}
else
{
PADDLE_THROW
(
"TensorRT unsupported pooling type!"
);
}
this
->
engine_
->
AddOp
(
op_name
,
"Pooling"
,
{
x_name
},
{
y_name
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"pool_size"
,
ksize
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"strides"
,
strides
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"padding"
,
paddings
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"method"
,
anakin_pool_type
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"global_pooling"
,
global_pooling
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"cmp_out_shape_floor_as_conv"
,
!
ceil_mode
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
);
paddle/fluid/inference/anakin/convert/pool2d.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Pool2dOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Pool2dOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Pool2dOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/relu.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include <algorithm>
#include <map>
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
LeakyReluOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
this
->
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
alpha
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
);
paddle/fluid/inference/anakin/convert/relu.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ReluOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ReluOpConverter
()
{}
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
LeakyReluOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
LeakyReluOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
LeakyReluOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/reshape.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/reshape.h"
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ReshapeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1UL
);
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Reshape"
,
{
input
},
{
output
});
auto
shape
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"shape"
));
if
(
shape
.
size
()
<
4
)
{
shape
.
insert
(
shape
.
end
(),
4
-
shape
.
size
(),
1
);
}
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
shape
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
);
paddle/fluid/inference/anakin/convert/reshape.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ReshapeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ReshapeOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ReshapeOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/roi_align.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/roi_align.h"
#include <algorithm>
#include <map>
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
RoiAlignOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"ROIs"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
input_rois_name
=
op_desc
.
Input
(
"ROIs"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
spatial_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"spatial_scale"
));
auto
pooled_height
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"pooled_height"
));
auto
pooled_width
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"pooled_width"
));
auto
sampling_ratio
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"sampling_ratio"
));
this
->
engine_
->
AddOp
(
op_name
,
"RoiAlign"
,
{
input_x_name
,
input_rois_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"spatial_scale"
,
spatial_scale
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"pooled_height"
,
pooled_height
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"pooled_width"
,
pooled_width
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"sampling_ratio"
,
sampling_ratio
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
);
paddle/fluid/inference/anakin/convert/roi_align.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
RoiAlignOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
RoiAlignOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
RoiAlignOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/scale.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/scale.h"
#include <algorithm>
#include <map>
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ScaleOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
float
scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"scale"
));
float
bias
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"bias"
));
float
bias_after_scale
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"bias_after_scale"
));
PADDLE_ENFORCE
(
bias_after_scale
,
"The anakin scale layer only support bias after scale now."
);
this
->
engine_
->
AddOp
(
op_name
,
"Power"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"shift"
,
bias
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"scale"
,
scale
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"power"
,
static_cast
<
float
>
(
1.0
));
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
);
paddle/fluid/inference/anakin/convert/scale.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ScaleOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ScaleOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ScaleOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/shuffle_channel.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/shuffle_channel.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
ShuffleChannelOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"ShuffleChannel"
,
{
input
},
{
output
});
auto
group
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"group"
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"group"
,
group
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
shuffle_channel
,
ShuffleChannelOpConverter
);
paddle/fluid/inference/anakin/convert/shuffle_channel.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
ShuffleChannelOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
ShuffleChannelOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ShuffleChannelOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/softmax.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/softmax.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SoftMaxOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1UL
);
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_var_desc
=
block_desc
.
FindVar
(
input
);
PADDLE_ENFORCE
(
input_var_desc
,
"Cant find %s variable When runing Anakin Softmax converter."
,
input
);
auto
input_shape_in_fluid
=
input_var_desc
->
GetShape
();
size_t
input_dims
=
input_shape_in_fluid
.
size
();
this
->
engine_
->
AddOp
(
op_name
,
"Softmax"
,
{
input
},
{
output
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
static_cast
<
int
>
(
input_dims
-
1
));
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
);
paddle/fluid/inference/anakin/convert/softmax.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SoftMaxOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SoftMaxOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SoftMaxOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/split.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/split.h"
#include <algorithm>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SplitOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_names
=
op_desc
.
Output
(
"Out"
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
std
::
vector
<
int
>
output_lengths
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"sections"
));
int
split_num
=
output_lengths
.
size
();
PADDLE_ENFORCE
(
split_num
>
1
,
"anakin split op converter: the split num should > 1"
);
int
num_sum
=
0
;
std
::
vector
<
int
>
slice_point
;
for
(
int
i
=
0
;
i
<
split_num
-
1
;
i
++
)
{
num_sum
+=
output_lengths
[
i
];
slice_point
.
push_back
(
num_sum
);
}
this
->
engine_
->
AddOp
(
op_name
,
"Slice"
,
{
input_name
},
y_names
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"slice_point"
,
slice_point
);
// slice_dim is useless in anakin
this
->
engine_
->
AddOpAttr
(
op_name
,
"slice_dim"
,
4
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
);
paddle/fluid/inference/anakin/convert/split.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SplitOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SplitOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SplitOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/sum.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/sum.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
SumOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
input_names
=
op_desc
.
Input
(
"X"
);
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
std
::
vector
<
float
>
coeff
=
{
1
,
1
};
std
::
string
elementwise_type
=
"Add"
;
this
->
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
input_names
,
{
out_name
});
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
float
>
>
(
op_name
,
"coeff"
,
coeff
);
this
->
engine_
->
template
AddOpAttr
<
std
::
string
>(
op_name
,
"type"
,
elementwise_type
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
);
paddle/fluid/inference/anakin/convert/sum.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SumOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SumOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SumOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/test_activation_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/activation.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
static
void
test_activation_op
(
const
std
::
string
&
op_type
,
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
desc
.
SetType
(
op_type
);
desc
.
SetInput
(
"X"
,
{
"act-X"
});
desc
.
SetOutput
(
"Out"
,
{
"act-Out"
});
if
(
op_type
==
"swish"
)
{
desc
.
SetAttr
(
"beta"
,
1.0
f
);
}
if
(
op_type
==
"relu6"
)
{
desc
.
SetAttr
(
"threshold"
,
6.0
f
);
}
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
5
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
sigm_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"sigmoid"
,
ctx
,
true
);
}
TEST
(
tanh_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"tanh"
,
ctx
,
true
);
}
TEST
(
relu6_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"relu6"
,
ctx
,
true
);
}
TEST
(
swish_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"swish"
,
ctx
,
true
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
sigmoid
);
USE_OP
(
tanh
);
USE_OP
(
relu6
);
USE_OP
(
swish
);
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
relu6
);
USE_ANAKIN_CONVERTER
(
swish
);
paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/affine_channel.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_affine_channel_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
// Declare the difference between the inputs.
std
::
unordered_set
<
std
::
string
>
parameters
({
"scale"
,
"bias"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
3
,
5
,
2
});
validator
.
DeclParamVar
(
"scale"
,
{
3
});
validator
.
DeclParamVar
(
"bias"
,
{
3
});
// Prepare Op descriptions.
framework
::
OpDesc
desc
;
desc
.
SetType
(
"affine_channel"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetInput
(
"Bias"
,
{
"bias"
});
desc
.
SetInput
(
"Scale"
,
{
"scale"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
// Layout must be explicitly specified here as NCHW.
desc
.
SetAttr
(
"data_layout"
,
std
::
string
(
"NCHW"
));
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
affine_channel_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_affine_channel_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
affine_channel_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_affine_channel_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
affine_channel
);
USE_ANAKIN_CONVERTER
(
affine_channel
);
paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_batchnorm_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
(
{
"batch_norm_scale"
,
"batch_norm_bias"
,
"batch_norm_mean"
,
"batch_norm_variance"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
std
::
vector
<
int
>
param_shape
{
2
};
validator
.
DeclInputVar
(
"batch_norm_X"
,
{
1
,
2
,
5
,
5
});
validator
.
DeclParamVar
(
"batch_norm_scale"
,
param_shape
);
validator
.
DeclParamVar
(
"batch_norm_bias"
,
param_shape
);
validator
.
DeclParamVar
(
"batch_norm_mean"
,
param_shape
);
validator
.
DeclParamVar
(
"batch_norm_variance"
,
param_shape
);
validator
.
DeclOutputVar
(
"batch_norm_Y"
,
{
1
,
2
,
5
,
5
});
validator
.
DeclOutputVar
(
"batch_norm_save_mean"
,
param_shape
);
validator
.
DeclOutputVar
(
"batch_norm_save_variance"
,
param_shape
);
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"batch_norm"
);
desc
.
SetInput
(
"X"
,
{
"batch_norm_X"
});
desc
.
SetInput
(
"Scale"
,
{
"batch_norm_scale"
});
desc
.
SetInput
(
"Bias"
,
{
"batch_norm_bias"
});
desc
.
SetInput
(
"Mean"
,
{
"batch_norm_mean"
});
desc
.
SetInput
(
"Variance"
,
{
"batch_norm_variance"
});
desc
.
SetOutput
(
"Y"
,
{
"batch_norm_Y"
});
desc
.
SetOutput
(
"MeanOut"
,
{
"batch_norm_mean"
});
desc
.
SetOutput
(
"VarianceOut"
,
{
"batch_norm_variance"
});
desc
.
SetOutput
(
"SavedMean"
,
{
"batch_norm_save_mean"
});
desc
.
SetOutput
(
"SavedVariance"
,
{
"batch_norm_save_variance"
});
float
eps
=
1e-5
f
;
bool
is_test
=
true
;
desc
.
SetAttr
(
"epsilon"
,
eps
);
desc
.
SetAttr
(
"is_test"
,
is_test
);
validator
.
SetOp
(
*
desc
.
Proto
());
std
::
unordered_set
<
std
::
string
>
neglected_output
=
{
"batch_norm_save_mean"
,
"batch_norm_save_variance"
,
"batch_norm_mean"
,
"batch_norm_variance"
};
validator
.
Execute
(
1
,
neglected_output
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
batch_norm_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_batchnorm_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
batch_norm_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_batchnorm_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
batch_norm
);
USE_ANAKIN_CONVERTER
(
batch_norm
);
paddle/fluid/inference/anakin/convert/test_concat_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_concat_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
validator
.
DeclOutputVar
(
"concat_out"
,
{
1
,
6
,
1
,
1
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"concat"
);
desc
.
SetInput
(
"X"
,
{
"concat_x1"
,
"concat_x2"
,
"concat_x3"
});
desc
.
SetOutput
(
"Out"
,
{
"concat_out"
});
int
axis
=
1
;
desc
.
SetAttr
(
"axis"
,
axis
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
concat_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_concat_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
concat_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_concat_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
concat
);
USE_ANAKIN_CONVERTER
(
concat
);
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/conv2d.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_conv2d_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"conv2d"
);
desc
.
SetInput
(
"Input"
,
{
"conv2d-X"
});
desc
.
SetInput
(
"Filter"
,
{
"conv2d-Y"
});
desc
.
SetOutput
(
"Output"
,
{
"conv2d-Out"
});
const
std
::
vector
<
int
>
strides
({
1
,
1
});
const
std
::
vector
<
int
>
paddings
({
0
,
0
});
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
const
int
groups
=
1
;
desc
.
SetAttr
(
"strides"
,
strides
);
desc
.
SetAttr
(
"paddings"
,
paddings
);
desc
.
SetAttr
(
"dilations"
,
dilations
);
desc
.
SetAttr
(
"groups"
,
groups
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
3
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
conv2d_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_conv2d_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
conv2d_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_conv2d_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
conv2d
);
USE_ANAKIN_CONVERTER
(
conv2d
);
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/dropout.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_dropout_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"dropout"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
desc
.
SetOutput
(
"Mask"
,
{
"mask"
});
float
dropout_prob
=
0.5
;
desc
.
SetAttr
(
"dropout_prob"
,
dropout_prob
);
desc
.
SetAttr
(
"is_test"
,
true
);
validator
.
SetOp
(
*
desc
.
Proto
());
std
::
unordered_set
<
std
::
string
>
neglected_output
=
{
"mask"
};
validator
.
Execute
(
1
,
neglected_output
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
dropout_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_dropout_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
dropout_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_dropout_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
dropout
);
USE_ANAKIN_CONVERTER
(
dropout
);
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
static
void
test_elementwise_op
(
const
std
::
string
&
op_type
,
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
op_type
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetInput
(
"Y"
,
{
"y"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
int
axis
=
-
1
;
desc
.
SetAttr
(
"axis"
,
axis
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
elementwise_op
,
native_add_gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
NV
>
(
"elementwise_add"
,
ctx
,
true
);
}
TEST
(
elementwise_op
,
native_mul_gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
NV
>
(
"elementwise_mul"
,
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
elementwise_op
,
native_add_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
X86
>
(
"elementwise_add"
,
ctx
,
false
);
}
TEST
(
elementwise_op
,
native_mul_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_elementwise_op
<::
anakin
::
saber
::
X86
>
(
"elementwise_mul"
,
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
elementwise_add
);
USE_OP
(
elementwise_mul
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_mul
);
paddle/fluid/inference/anakin/convert/test_fc_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_mul_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"mul"
);
desc
.
SetInput
(
"X"
,
{
"mul_x"
});
desc
.
SetInput
(
"Y"
,
{
"mul_y"
});
desc
.
SetOutput
(
"Out"
,
{
"mul_out"
});
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
10
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
mul_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_mul_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
mul_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_mul_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
mul
);
USE_ANAKIN_CONVERTER
(
fc
);
paddle/fluid/inference/anakin/convert/test_flatten_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_flatten_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"flatten-X"
,
{
3
,
10
,
10
,
4
});
validator
.
DeclOutputVar
(
"flatten-Out"
,
{
3
,
400
,
1
,
1
});
framework
::
OpDesc
desc
;
desc
.
SetType
(
"flatten"
);
desc
.
SetInput
(
"X"
,
{
"flatten-X"
});
desc
.
SetOutput
(
"Out"
,
{
"flatten-Out"
});
desc
.
SetAttr
(
"axis"
,
1
);
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
5
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
flatten_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_flatten_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
flatten_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_flatten_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
reshape
);
USE_OP_ITSELF
(
flatten
);
USE_ANAKIN_CONVERTER
(
flatten
);
paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
TEST
(
im2sequence_op
,
native
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
&
scope
);
std
::
vector
<
int
>
kernels
=
{
6
,
1
};
std
::
vector
<
int
>
strides
=
{
1
,
1
};
std
::
vector
<
int
>
paddings
=
{
0
,
0
,
0
,
0
};
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
*
kernels
[
0
]
*
kernels
[
1
]});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"im2sequence"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
desc
.
SetAttr
(
"kernels"
,
kernels
);
desc
.
SetAttr
(
"strides"
,
strides
);
desc
.
SetAttr
(
"paddings"
,
paddings
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
im2sequence
);
USE_ANAKIN_CONVERTER
(
im2sequence
);
paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_pool2d
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
,
bool
global_pooling
,
bool
ceil_mode
,
std
::
string
pool_type
=
"max"
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
validator
.
DeclInputVar
(
"pool2d_x"
,
{
1
,
3
,
6
,
7
});
if
(
global_pooling
)
validator
.
DeclOutputVar
(
"pool2d_out"
,
{
1
,
3
,
1
,
1
});
else
if
(
ceil_mode
)
validator
.
DeclOutputVar
(
"pool2d_out"
,
{
1
,
3
,
3
,
4
});
else
validator
.
DeclOutputVar
(
"pool2d_out"
,
{
1
,
3
,
3
,
3
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"pool2d"
);
desc
.
SetInput
(
"X"
,
{
"pool2d_x"
});
desc
.
SetOutput
(
"Out"
,
{
"pool2d_out"
});
std
::
vector
<
int
>
ksize
({
2
,
2
});
std
::
vector
<
int
>
strides
({
2
,
2
});
std
::
vector
<
int
>
paddings
({
0
,
0
});
std
::
string
pooling_t
=
pool_type
;
desc
.
SetAttr
(
"pooling_type"
,
pooling_t
);
desc
.
SetAttr
(
"ksize"
,
ksize
);
desc
.
SetAttr
(
"strides"
,
strides
);
desc
.
SetAttr
(
"paddings"
,
paddings
);
desc
.
SetAttr
(
"global_pooling"
,
global_pooling
);
desc
.
SetAttr
(
"ceil_mode"
,
ceil_mode
);
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
Pool2dOpConverter
,
normal
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
false
);
}
TEST
(
Pool2dOpConverter
,
test_global_pooling
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
true
,
false
);
}
TEST
(
Pool2dOpConverter
,
max_ceil_test
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
true
);
}
TEST
(
Pool2dOpConverter
,
avg_ceil_test
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_pool2d
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
,
false
,
true
,
"avg"
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
Pool2dOpConverter
,
normal_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
false
);
}
TEST
(
Pool2dOpConverter
,
test_global_pooling_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
true
,
false
);
}
TEST
(
Pool2dOpConverter
,
max_ceil_test_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
true
);
}
TEST
(
Pool2dOpConverter
,
avg_ceil_test_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_pool2d
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
,
false
,
true
,
"avg"
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
pool2d
);
USE_ANAKIN_CONVERTER
(
pool2d
);
paddle/fluid/inference/anakin/convert/test_relu_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
static
void
test_activation_op
(
const
std
::
string
&
op_type
,
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
desc
.
SetType
(
op_type
);
desc
.
SetInput
(
"X"
,
{
"act-X"
});
desc
.
SetOutput
(
"Out"
,
{
"act-Out"
});
if
(
op_type
==
"leaky_relu"
)
{
desc
.
SetAttr
(
"alpha"
,
0.1
f
);
}
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
5
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
relu_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"relu"
,
ctx
,
true
);
}
TEST
(
leaky_relu_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"leaky_relu"
,
ctx
,
true
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
relu
);
USE_OP
(
leaky_relu
);
USE_ANAKIN_CONVERTER
(
relu
);
USE_ANAKIN_CONVERTER
(
leaky_relu
);
paddle/fluid/inference/anakin/convert/test_reshape_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_reshape1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
,
1
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
8
,
1
,
1
});
framework
::
OpDesc
desc
;
desc
.
SetType
(
"reshape"
);
desc
.
SetInput
(
"X"
,
{
"reshape-X"
});
desc
.
SetOutput
(
"Out"
,
{
"reshape-Out"
});
// desc.SetAttr("shape", std::vector<int>({3, 2, 1, 3}));
desc
.
SetAttr
(
"shape"
,
std
::
vector
<
int
>
({
1
,
8
,
1
,
1
}));
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
);
}
template
<
typename
TargetT
>
void
test_reshape2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"reshape-X"
,
{
1
,
2
,
4
});
validator
.
DeclOutputVar
(
"reshape-Out"
,
{
1
,
4
,
2
});
framework
::
OpDesc
desc
;
desc
.
SetType
(
"reshape"
);
desc
.
SetInput
(
"X"
,
{
"reshape-X"
});
desc
.
SetOutput
(
"Out"
,
{
"reshape-Out"
});
// desc.SetAttr("shape", std::vector<int>({3, 2, 1, 3}));
desc
.
SetAttr
(
"shape"
,
std
::
vector
<
int
>
({
0
,
-
1
,
2
}));
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
reshape1_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_reshape1_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
TEST
(
reshape2_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
reshape1_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
TEST
(
reshape2_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_reshape2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
reshape
);
USE_ANAKIN_CONVERTER
(
reshape
);
paddle/fluid/inference/anakin/convert/test_softmax_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_softmax_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
framework
::
Scope
scope
;
std
::
unordered_set
<
std
::
string
>
parameters
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"softmax-X"
,
{
1
,
10
,
2
});
validator
.
DeclOutputVar
(
"softmax-Out"
,
{
1
,
10
,
2
});
framework
::
OpDesc
desc
;
desc
.
SetType
(
"softmax"
);
desc
.
SetInput
(
"X"
,
{
"softmax-X"
});
desc
.
SetOutput
(
"Out"
,
{
"softmax-Out"
});
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
softmax_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_softmax_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
relu_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_softmax_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
softmax
);
USE_ANAKIN_CONVERTER
(
softmax
);
paddle/fluid/inference/anakin/convert/test_split_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/split.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
int
Axis
>
void
AnakinSliceTest
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
,
const
std
::
vector
<
int
>
&
in_shape
,
const
std
::
vector
<
int
>
&
sections
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"split_input"
,
in_shape
);
std
::
vector
<
std
::
string
>
output_vars
;
for
(
size_t
i
=
0
;
i
<
sections
.
size
();
++
i
)
{
auto
out_shape
=
in_shape
;
out_shape
[
Axis
]
=
sections
[
i
];
std
::
string
output_name
=
"split_out"
+
std
::
to_string
(
i
);
validator
.
DeclOutputVar
(
output_name
,
out_shape
);
output_vars
.
push_back
(
output_name
);
}
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"split"
);
desc
.
SetInput
(
"X"
,
{
"split_input"
});
desc
.
SetOutput
(
"Out"
,
output_vars
);
desc
.
SetAttr
(
"axis"
,
Axis
);
desc
.
SetAttr
(
"num"
,
0
);
desc
.
SetAttr
(
"sections"
,
sections
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
// batch = 0, axis = 1, same shape
TEST
(
split_op
,
test_same_shape_axis1_batch1
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
1
>
(
ctx
,
true
,
{
1
,
4
,
2
,
2
},
{
2
,
2
});
}
// batch = 0, axis = 1, different shape
TEST
(
split_op
,
test_different_shape_axis1_batch1
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
1
>
(
ctx
,
true
,
{
1
,
3
,
2
,
2
},
{
2
,
1
});
}
// batch = 0, axis = 2, same shape
TEST
(
split_op
,
test_same_shape_axis2_batch1
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
2
>
(
ctx
,
true
,
{
1
,
3
,
4
,
2
},
{
2
,
2
});
}
// batch = 0, axis = 2, different shape
TEST
(
split_op
,
test_different_shape_axis2_batch1
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
2
>
(
ctx
,
true
,
{
1
,
3
,
3
,
2
},
{
2
,
1
});
}
// batch = 0, axis = 3, same shape
TEST
(
split_op
,
test_same_shape_axis3_batch1
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
3
>
(
ctx
,
true
,
{
1
,
3
,
2
,
4
},
{
2
,
2
});
}
// batch = 0, axis = 3, different shape
TEST
(
split_op
,
test_different_shape_axis3_batch1
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
NV
,
3
>
(
ctx
,
true
,
{
1
,
3
,
2
,
3
},
{
2
,
1
});
}
#ifdef ANAKIN_X86_PLACE
TEST
(
split_op
,
test_different_shape_axis1_batch1_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
1
>
(
ctx
,
false
,
{
1
,
3
,
2
,
3
},
{
2
,
1
});
}
TEST
(
split_op
,
test_different_shape_axis2_batch1_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
2
>
(
ctx
,
false
,
{
1
,
3
,
4
,
2
},
{
2
,
2
});
}
TEST
(
split_op
,
test_different_shape_axis3_batch1_cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
AnakinSliceTest
<::
anakin
::
saber
::
X86
,
3
>
(
ctx
,
false
,
{
1
,
3
,
2
,
4
},
{
2
,
2
});
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
split
);
USE_ANAKIN_CONVERTER
(
split
);
paddle/fluid/inference/anakin/convert/test_sum_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/sum.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
#include "paddle/fluid/operators/sum_op.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
static
void
test_sum_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"sum"
);
desc
.
SetInput
(
"X"
,
{
"sum_x1"
,
"sum_x2"
});
desc
.
SetOutput
(
"Out"
,
{
"sum_out"
});
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
sum_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_sum_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
sum_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_sum_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
sum
);
USE_ANAKIN_CONVERTER
(
sum
);
paddle/fluid/inference/anakin/convert/test_transpose_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
>
void
test_transpose1_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
2
,
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
4
,
2
,
5
,
3
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"transpose"
);
desc
.
SetInput
(
"X"
,
{
"transpose-X"
});
desc
.
SetOutput
(
"Out"
,
{
"transpose-Out"
});
desc
.
SetAttr
(
"axis"
,
std
::
vector
<
int
>
({
2
,
0
,
3
,
1
}));
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
3
);
}
template
<
typename
TargetT
>
void
test_transpose2_op
(
const
platform
::
DeviceContext
&
context
,
bool
use_gpu
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
<
TargetT
,
::
anakin
::
Precision
::
FP32
>
validator
(
parameters
,
&
scope
,
context
,
use_gpu
);
validator
.
DeclInputVar
(
"transpose-X"
,
{
3
,
4
,
5
});
validator
.
DeclOutputVar
(
"transpose-Out"
,
{
3
,
5
,
4
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"transpose"
);
desc
.
SetInput
(
"X"
,
{
"transpose-X"
});
desc
.
SetOutput
(
"Out"
,
{
"transpose-Out"
});
desc
.
SetAttr
(
"axis"
,
std
::
vector
<
int
>
({
0
,
2
,
1
}));
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
transpose1_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_transpose1_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
TEST
(
transpose2_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
NV
>
(
ctx
,
true
);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST
(
transpose1_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
TEST
(
transpose2_op
,
cpu
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUDeviceContext
ctx
(
cpu_place
);
test_transpose2_op
<::
anakin
::
saber
::
X86
>
(
ctx
,
false
);
}
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
transpose
);
USE_ANAKIN_CONVERTER
(
transpose
);
paddle/fluid/inference/anakin/convert/transpose.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/transpose.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
void
TransposeOpConverter
<
TargetT
,
PrecisionT
>::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
output
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Permute"
,
{
input
},
{
output
});
auto
axis
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"axis"
));
size_t
axis_size
=
axis
.
size
();
while
(
axis
.
size
()
<
4
)
{
axis
.
push_back
(
axis_size
);
axis_size
+=
1
;
}
this
->
engine_
->
template
AddOpAttr
<
PTuple
<
int
>
>
(
op_name
,
"dims"
,
axis
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
);
paddle/fluid/inference/anakin/convert/transpose.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
TransposeOpConverter
:
public
AnakinOpConverter
<
TargetT
,
PrecisionT
>
{
public:
TransposeOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
BlockDesc
&
block_desc
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
TransposeOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/ut_helper.h
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gtest/gtest.h>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
using
anakin
::
Precision
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
/*
* Get a random float value between [low, high]
*/
float
random
(
float
low
,
float
high
)
{
static
std
::
random_device
rd
;
static
std
::
mt19937
mt
(
rd
());
std
::
uniform_real_distribution
<
double
>
dist
(
low
,
high
);
return
dist
(
mt
);
}
void
RandomizeTensor
(
framework
::
LoDTensor
*
tensor
,
const
platform
::
Place
&
place
)
{
auto
dims
=
tensor
->
dims
();
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
dims
);
auto
*
temp_data
=
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
temp_data
+
i
)
=
random
(
0.
,
1.
);
}
TensorCopySync
(
temp_tensor
,
place
,
tensor
);
}
/*
* Help to validate the correctness between Fluid Op and the corresponding
* anakin
* layer.
*/
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
AnakinConvertValidation
{
using
AnakinNvEngineT
=
AnakinEngine
<
TargetT
,
PrecisionT
>
;
public:
AnakinConvertValidation
()
=
delete
;
AnakinConvertValidation
(
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
framework
::
Scope
*
scope
,
const
platform
::
DeviceContext
&
ctx
,
bool
use_gpu
=
true
)
:
parameters_
(
parameters
),
scope_
(
scope
),
ctx_
(
ctx
),
use_gpu_
(
use_gpu
)
{
engine_
.
reset
(
new
AnakinEngine
<
TargetT
,
PrecisionT
>
(
true
));
}
// Declare a Variable as input with random initialization.
void
DeclInputVar
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
tensor_dims
)
{
DeclVar
(
name
,
tensor_dims
);
// should decalre anakin input here.
}
void
DeclParamVar
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
dim_vec
)
{
DeclVar
(
name
,
dim_vec
);
}
void
DeclOutputVar
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
dim_vec
)
{
DeclVar
(
name
,
dim_vec
);
// should declare anakin output here.
}
void
DeclVar
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
dim_vec
)
{
auto
*
x
=
scope_
->
Var
(
name
);
auto
*
x_tensor
=
x
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
RandomizeTensor
(
x_tensor
,
ctx_
.
GetPlace
());
std
::
vector
<
int64_t
>
dim_vec_int64
;
for
(
auto
&
ele
:
dim_vec
)
{
dim_vec_int64
.
push_back
(
static_cast
<
int64_t
>
(
ele
));
}
// Add var_desc to block_desc
auto
*
block_desc
=
program_desc_
.
MutableBlock
(
framework
::
kRootBlockIndex
);
auto
*
var_desc
=
block_desc
->
Var
(
name
);
var_desc
->
SetShape
(
dim_vec_int64
);
}
void
SetOp
(
const
framework
::
proto
::
OpDesc
&
desc
)
{
op_
=
framework
::
OpRegistry
::
CreateOp
(
desc
);
op_desc_
.
reset
(
new
framework
::
OpDesc
(
desc
,
nullptr
));
// should init anakin engine here.
auto
&
block_desc
=
program_desc_
.
Block
(
framework
::
kRootBlockIndex
);
Singleton
<
AnakinOpConverter
<
TargetT
,
PrecisionT
>>::
Global
().
ConvertOp
(
desc
,
block_desc
,
parameters_
,
*
scope_
,
engine_
.
get
(),
true
/*test_mode*/
);
engine_
->
Freeze
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
temp_max_input_shape
;
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
if
(
parameters_
.
count
(
input
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
*
scope_
,
input
);
auto
t_shape
=
framework
::
vectorize
<
int
>
(
t
.
dims
());
while
(
t_shape
.
size
()
<
4
)
{
t_shape
.
push_back
(
1
);
}
engine_
->
SetInputShape
(
input
,
t_shape
);
temp_max_input_shape
[
input
]
=
t_shape
;
}
engine_
->
SetMaxInputShape
(
temp_max_input_shape
);
engine_
->
Optimize
();
engine_
->
InitNet
();
}
// We use the set 'neglected_output' here, because some Ops like batch norm,
// the outputs specified in the op des are only used during training,
// so we should neglect those output during inference.
void
Execute
(
int
batch_size
,
std
::
unordered_set
<
std
::
string
>
neglected_output
=
{})
{
// Execute Fluid Op
op_
->
Run
(
*
scope_
,
ctx_
.
GetPlace
());
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
if
(
parameters_
.
count
(
input
))
continue
;
auto
*
var
=
scope_
->
FindVar
(
input
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
inputs
.
insert
({
input
,
tensor
});
}
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
;
std
::
vector
<
std
::
vector
<
float
>>
fluid_outputs
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
if
(
neglected_output
.
count
(
output
))
continue
;
std
::
vector
<
float
>
fluid_out
;
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx_
,
&
fluid_out
);
fluid_outputs
.
push_back
(
fluid_out
);
outputs
.
insert
({
output
,
tensor
});
}
if
(
!
use_gpu_
)
{
engine_
->
Execute
(
inputs
,
outputs
);
}
else
{
cudaStream_t
stream
;
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream
),
0
);
engine_
->
Execute
(
inputs
,
outputs
,
stream
);
}
int
i_output
=
0
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
if
(
neglected_output
.
count
(
output
))
continue
;
std
::
vector
<
float
>
anakin_out
;
auto
*
var
=
scope_
->
FindVar
(
output
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx_
,
&
anakin_out
);
size_t
anakin_out_size
=
anakin_out
.
size
();
auto
fluid_out
=
fluid_outputs
[
i_output
++
];
for
(
size_t
i
=
0
;
i
<
anakin_out_size
;
i
++
)
{
EXPECT_LT
(
std
::
abs
(
fluid_out
[
i
]
-
anakin_out
[
i
]),
1e-3
);
}
}
}
private:
std
::
unique_ptr
<
AnakinNvEngineT
>
engine_
{
nullptr
};
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_
;
std
::
unique_ptr
<
framework
::
OpDesc
>
op_desc_
;
framework
::
ProgramDesc
program_desc_
;
const
std
::
unordered_set
<
std
::
string
>&
parameters_
;
framework
::
Scope
*
scope_
;
const
platform
::
DeviceContext
&
ctx_
;
bool
use_gpu_
{
true
};
};
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
#ifdef ANAKIN_X86_PLACE
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinConvertValidation
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
#endif
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/engine.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/engine.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <utility>
#include "paddle/fluid/framework/ddim.h"
using
anakin
::
Precision
;
using
anakin
::
OpRunType
;
using
paddle
::
framework
::
LoDTensor
;
template
<
typename
T
,
Precision
P
,
OpRunType
O
>
using
AnakinNetT
=
anakin
::
Net
<
T
,
P
,
O
>
;
template
<
typename
T
,
Precision
P
>
using
AnakinGraphT
=
anakin
::
graph
::
Graph
<
T
,
P
>
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
extern
std
::
once_flag
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
init_anakin_
;
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AnakinEngine
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
vector
<
std
::
string
>
program_inputs
,
bool
auto_config_layout
)
:
device_
(
device
),
max_batch_size_
(
max_batch_size
),
max_input_shape_
(
max_input_shape
),
program_inputs_
(
program_inputs
),
auto_config_layout_
(
auto_config_layout
)
{
::
anakin
::
TargetWrapper
<
TargetT
>::
set_device
(
device_
);
std
::
call_once
(
init_anakin_
,
[
this
]()
{
::
anakin
::
Env
<
TargetT
>::
env_init
();
});
graph_
.
reset
(
new
AnakinGraphT
<
TargetT
,
PrecisionType
>
());
net_
.
reset
(
new
AnakinNetT
<
TargetT
,
PrecisionType
,
RunType
>
(
need_summary
));
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::~
AnakinEngine
()
{}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
SetInputShape
(
const
std
::
string
&
name
,
std
::
vector
<
int
>
shape
)
{
graph_
->
AddOpAttr
<::
anakin
::
PTuple
<
int
>>
(
name
,
"input_shape"
,
std
::
move
(
shape
));
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
InitNet
()
{
net_
->
init
(
*
graph_
,
auto_config_layout_
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AddOp
(
const
std
::
string
&
name
,
const
std
::
string
&
type
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
vector
<
std
::
string
>
&
outputs
)
{
PADDLE_ENFORCE
(
graph_
->
AddOp
(
name
,
type
,
inputs
,
outputs
),
"Add operation."
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
BindInput
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
)
{
#ifdef PADDLE_WITH_CUDA
cudaDeviceSynchronize
();
#endif
for
(
const
auto
&
input
:
inputs
)
{
auto
*
tensor
=
input
.
second
;
auto
*
data
=
tensor
->
data
<
float
>
();
auto
fluid_input_shape
=
framework
::
vectorize
<
int
>
(
tensor
->
dims
());
while
(
fluid_input_shape
.
size
()
<
4
)
{
fluid_input_shape
.
push_back
(
1
);
}
auto
*
anakin_input
=
net_
->
get_in
(
input
.
first
);
std
::
vector
<
int
>
max_input_shape
=
max_input_shape_
[
input
.
first
];
int
max_shape_sum
=
std
::
accumulate
(
max_input_shape
.
begin
(),
max_input_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
if
(
tensor
->
numel
()
>
max_shape_sum
)
{
PADDLE_ENFORCE
(
std
::
find
(
program_inputs_
.
begin
(),
program_inputs_
.
end
(),
input
.
first
)
==
program_inputs_
.
end
(),
"The anakin input max shape should be greater than"
" or equal to the real input shape, Please set the max "
"input shape using EnableAnakinEngine"
);
VLOG
(
3
)
<<
"Anakin Net will be reset because of the inputs out of range: "
<<
input
.
first
;
graph_
->
Reshape
(
input
.
first
,
fluid_input_shape
);
net_
.
reset
(
new
AnakinNetT
<
TargetT
,
PrecisionType
,
RunType
>
(
true
));
net_
->
init
(
*
graph_
);
anakin_input
=
net_
->
get_in
(
input
.
first
);
}
anakin_input
->
reshape
(
fluid_input_shape
);
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
device_
,
fluid_input_shape
);
anakin_input
->
copy_from
(
tmp_anakin_tensor
);
}
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
)
{
BindInput
(
inputs
);
net_
->
prediction
();
for
(
const
auto
&
output
:
outputs
)
{
platform
::
CPUPlace
cpu_place
;
auto
*
tensor
=
output
.
second
;
auto
*
anakin_output
=
net_
->
get_out
(
output
.
first
);
auto
*
anakin_data
=
anakin_output
->
data
();
auto
anakin_output_shape
=
anakin_output
->
valid_shape
();
tensor
->
Resize
(
framework
::
make_ddim
(
anakin_output_shape
));
auto
*
fluid_data
=
tensor
->
mutable_data
<
float
>
(
cpu_place
);
memory
::
Copy
(
cpu_place
,
static_cast
<
void
*>
(
fluid_data
),
cpu_place
,
static_cast
<
void
*>
(
anakin_data
),
tensor
->
numel
()
*
sizeof
(
float
));
}
}
#ifdef PADDLE_WITH_CUDA
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
)
{
BindInput
(
inputs
);
net_
->
prediction
();
cudaDeviceSynchronize
();
for
(
const
auto
&
output
:
outputs
)
{
platform
::
CUDAPlace
gpu_place
(
device_
);
auto
*
tensor
=
output
.
second
;
auto
*
anakin_output
=
net_
->
get_out
(
output
.
first
);
auto
*
anakin_data
=
anakin_output
->
data
();
auto
anakin_output_shape
=
anakin_output
->
valid_shape
();
tensor
->
Resize
(
framework
::
make_ddim
(
anakin_output_shape
));
auto
*
fluid_data
=
tensor
->
mutable_data
<
float
>
(
gpu_place
);
memory
::
Copy
(
gpu_place
,
static_cast
<
void
*>
(
fluid_data
),
gpu_place
,
static_cast
<
void
*>
(
anakin_data
),
tensor
->
numel
()
*
sizeof
(
float
),
stream
);
}
cudaDeviceSynchronize
();
}
#endif
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Freeze
()
{
PADDLE_ENFORCE
(
graph_
->
Freeze
(),
"Freeze anakin subgraph."
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Optimize
()
{
PADDLE_ENFORCE
(
graph_
->
Optimize
(),
"Graph optimization."
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
RegistBlock
(
::
anakin
::
PBlock
<
TargetT
>
*
block_p
)
{
PADDLE_ENFORCE
(
graph_
->
RegistBlock
(
block_p
),
"Block register."
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
std
::
unique_ptr
<
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Clone
()
{
auto
*
engine
=
new
AnakinEngine
();
engine
->
net_
=
std
::
move
(
net_
->
Clone
());
return
std
::
unique_ptr
<
AnakinEngine
>
(
engine
);
}
#ifdef PADDLE_WITH_CUDA
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>;
#endif
#ifdef ANAKIN_X86_PLACE
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>;
template
class
AnakinEngine
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
template
class
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>;
#endif
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/engine.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#ifdef EXIT // NOLINT
#undef EXIT // NOLINT
#endif // NOLINT
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "saber/saber_types.h"
using
anakin
::
Precision
;
namespace
anakin
{
template
<
typename
,
Precision
,
OpRunType
>
class
Net
;
namespace
graph
{
template
<
typename
,
Precision
>
class
Graph
;
}
// namespace graph
}
// namespace anakin
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
,
::
anakin
::
OpRunType
RunType
=
::
anakin
::
OpRunType
::
ASYNC
>
class
AnakinEngine
{
using
NetT
=
::
anakin
::
Net
<
TargetT
,
PrecisionType
,
RunType
>
;
using
GraphT
=
::
anakin
::
graph
::
Graph
<
TargetT
,
PrecisionType
>
;
public:
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
,
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
std
::
vector
<
std
::
string
>
program_inputs
=
{},
bool
auto_config_layout
=
false
);
~
AnakinEngine
();
void
InitNet
();
void
SetInputShape
(
const
std
::
string
&
name
,
std
::
vector
<
int
>
shape
);
void
AddOp
(
const
std
::
string
&
name
,
const
std
::
string
&
type
,
const
std
::
vector
<
std
::
string
>
&
inputs
,
const
std
::
vector
<
std
::
string
>
&
outputs
);
template
<
typename
T
>
void
AddOpAttr
(
const
std
::
string
&
op_name
,
const
std
::
string
&
attr_name
,
const
T
&
attr_value
)
{
PADDLE_ENFORCE
(
graph_
->
AddOpAttr
(
op_name
,
attr_name
,
attr_value
),
"Add operation's attribution."
);
}
NetT
*
Net
()
{
return
net_
.
get
();
}
GraphT
*
Graph
()
{
return
graph_
.
get
();
}
std
::
unique_ptr
<
AnakinEngine
>
Clone
();
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
&
GetMaxInputShape
()
{
return
max_input_shape_
;
}
void
SetMaxInputShape
(
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
shape
)
{
max_input_shape_
=
shape
;
}
const
std
::
vector
<
std
::
string
>
&
GetScalableInputs
()
{
return
program_inputs_
;
}
void
SetScalableInputs
(
std
::
vector
<
std
::
string
>
program_inputs
)
{
program_inputs_
=
program_inputs
;
}
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
void
Freeze
();
void
Optimize
();
void
RegistBlock
(
::
anakin
::
PBlock
<
TargetT
>
*
block_p
);
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
void
AddTensorScale
(
const
std
::
string
&
tensor_name
,
float
scale
)
{
tensor_scales_
[
tensor_name
]
=
scale
;
}
std
::
unordered_map
<
std
::
string
,
float
>
GetTensorScales
()
{
return
tensor_scales_
;
}
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
#ifdef PADDLE_WITH_CUDA
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
cudaStream_t
stream
);
#endif
private:
void
BindInput
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
);
private:
bool
initialized_
{
false
};
int
device_
;
int
max_batch_size_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
;
std
::
vector
<
std
::
string
>
program_inputs_
;
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
static
std
::
once_flag
init_anakin_
;
std
::
unordered_map
<
std
::
string
,
float
>
tensor_scales_
;
// Always be false in gpu mode but true in most cpu cases.
bool
auto_config_layout_
;
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
>
class
AnakinEngineManager
{
using
AnakinEngineT
=
AnakinEngine
<
TargetT
,
PrecisionType
>
;
public:
bool
HasEngine
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
return
false
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
AnakinEngineT
*
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
}
AnakinEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
vector
<
std
::
string
>
program_inputs
,
bool
auto_config_layout
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
TargetT
,
PrecisionType
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
,
auto_config_layout
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
}
void
DeleteALL
()
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
}
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
AnakinEngineT
>>
engines_
;
std
::
mutex
mut_
;
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/op_teller.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/op_teller.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
// Just tell by the op_types.
struct
SimpleOpTypeSetTeller
:
public
Teller
{
SimpleOpTypeSetTeller
()
{
teller_set
.
insert
(
"mul"
);
teller_set
.
insert
(
"fc"
);
teller_set
.
insert
(
"conv2d_fusion"
);
teller_set
.
insert
(
"split"
);
teller_set
.
insert
(
"relu"
);
teller_set
.
insert
(
"pool2d"
);
teller_set
.
insert
(
"elementwise_add"
);
teller_set
.
insert
(
"elementwise_mul"
);
teller_set
.
insert
(
"concat"
);
teller_set
.
insert
(
"tanh"
);
teller_set
.
insert
(
"conv2d"
);
teller_set
.
insert
(
"batch_norm"
);
teller_set
.
insert
(
"softmax"
);
teller_set
.
insert
(
"flatten2"
);
teller_set
.
insert
(
"reshape2"
);
teller_set
.
insert
(
"transpose2"
);
teller_set
.
insert
(
"density_prior_box"
);
teller_set
.
insert
(
"detection_out"
);
teller_set
.
insert
(
"dropout"
);
teller_set
.
insert
(
"sigmoid"
);
teller_set
.
insert
(
"sum"
);
teller_set
.
insert
(
"depthwise_conv2d"
);
teller_set
.
insert
(
"prior_box"
);
teller_set
.
insert
(
"leaky_relu"
);
teller_set
.
insert
(
"affine_channel"
);
teller_set
.
insert
(
"relu6"
);
teller_set
.
insert
(
"swish"
);
teller_set
.
insert
(
"shuffle_channel"
);
}
bool
operator
()(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
override
{
return
teller_set
.
count
(
op_type
);
}
private:
std
::
unordered_set
<
std
::
string
>
teller_set
;
};
bool
OpTeller
::
Tell
(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
{
for
(
auto
&
teller
:
tellers_
)
{
if
(
op_type
==
"pool2d"
||
op_type
==
"conv2d"
||
op_type
==
"depthwise_conv2d"
||
op_type
==
"conv2d_transpose"
)
{
std
::
vector
<
int
>
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
desc
.
GetAttr
(
"paddings"
));
if
(
paddings
.
size
()
>
2
)
return
false
;
}
if
((
*
teller
)(
op_type
,
desc
))
return
true
;
}
return
false
;
}
OpTeller
::
OpTeller
()
{
tellers_
.
emplace_back
(
new
SimpleOpTypeSetTeller
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/op_teller.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
/*
* Single Op teller definition.
* One can override this and define a more complex tell logic, considerring more
* issues such as op_desc.
*/
struct
Teller
{
virtual
bool
operator
()(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
=
0
;
virtual
~
Teller
()
=
default
;
};
/*
* A real example:
*
* struct SomeTeller : public Teller {
* bool operator()(const std::string& op_type,
* const framework::OpDesc& desc) override {
* return op_type == "fc" && desc.Inputs().size() == 2;
* }
*};
*/
/*
* class OpTeller helps to tell whether a fluid
* operator can be transformed to a TensorRT layer.
*/
class
OpTeller
{
public:
static
OpTeller
&
Global
()
{
static
std
::
unique_ptr
<
OpTeller
>
x
(
new
OpTeller
);
return
*
x
;
}
bool
Tell
(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
);
private:
OpTeller
();
private:
std
::
vector
<
std
::
unique_ptr
<
Teller
>>
tellers_
;
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/test_anakin_engine.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <map>
#include "paddle/fluid/inference/anakin/engine.h"
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
TestAnakinEngine
:
public
::
testing
::
Test
{
protected:
void
SetUp
()
override
;
void
TearDown
()
override
{}
protected:
using
AnakinNvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
std
::
unique_ptr
<
AnakinNvEngineT
>
engine_
{
nullptr
};
};
void
TestAnakinEngine
::
SetUp
()
{
engine_
.
reset
(
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
true
));
}
TEST_F
(
TestAnakinEngine
,
Execute
)
{
engine_
->
AddOp
(
"op1"
,
"Dense"
,
{
"x"
},
{
"y"
});
engine_
->
AddOpAttr
(
"op1"
,
"out_dim"
,
2
);
engine_
->
AddOpAttr
(
"op1"
,
"bias_term"
,
false
);
engine_
->
AddOpAttr
(
"op1"
,
"axis"
,
1
);
std
::
vector
<
int
>
shape
=
{
1
,
1
,
1
,
2
};
Shape
tmp_shape
(
shape
);
PBlock
<
NV
>
*
weight1
=
new
PBlock
<
NV
>
(
tmp_shape
,
AK_FLOAT
);
engine_
->
RegistBlock
(
weight1
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
cpu_data
[
0
]
=
2.
;
weight1
->
d_tensor
().
set_shape
(
tmp_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
"op1"
,
"weight_1"
,
*
weight1
);
engine_
->
Freeze
();
// PTuple<int> input_shape = {1};
// engine_->AddOpAttr("x", "input_shape", input_shape);
engine_
->
SetInputShape
(
"x"
,
{
1
,
1
,
1
,
1
});
engine_
->
Optimize
();
engine_
->
InitNet
();
framework
::
LoDTensor
x
;
framework
::
LoDTensor
y
;
x
.
Resize
({
1
,
1
,
1
,
1
});
y
.
Resize
({
1
,
1
,
1
,
2
});
auto
*
x_data
=
x
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
float
x_data_cpu
[]
=
{
1.
};
cudaMemcpy
(
x_data
,
x_data_cpu
,
sizeof
(
float
),
cudaMemcpyHostToDevice
);
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
=
{{
"x"
,
&
x
}};
auto
*
y_data
=
y
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
=
{{
"y"
,
&
y
}};
cudaStream_t
stream
;
cudaStreamCreate
(
&
stream
);
engine_
->
Execute
(
inputs
,
outputs
,
stream
);
auto
*
y_data_gpu
=
y_data
;
float
y_data_cpu
[
2
];
cudaMemcpy
(
y_data_cpu
,
y_data_gpu
,
sizeof
(
float
)
*
2
,
cudaMemcpyDeviceToHost
);
LOG
(
INFO
)
<<
"output value: "
<<
y_data_cpu
[
0
]
<<
", "
<<
y_data_cpu
[
1
];
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/argument.h
浏览文件 @
e1b0d7cb
...
...
@@ -59,7 +59,6 @@ struct Argument {
using
unique_ptr_t
=
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
;
using
fusion_statis_t
=
std
::
unordered_map
<
std
::
string
,
int
>
;
using
anakin_max_shape_t
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
;
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
// If we set the model using config.SetModelBuffer,
...
...
@@ -184,19 +183,6 @@ struct Argument {
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_calib_mode
,
TensorRtUseCalibMode
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_max_input_shape
,
AnakinMaxInputShape
,
anakin_max_shape_t
);
DECL_ARGUMENT_FIELD
(
anakin_max_batch_size
,
AnakinMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_precision_mode
,
AnakinPrecisionMode
,
AnalysisConfig
::
Precision
);
DECL_ARGUMENT_FIELD
(
anakin_auto_config_layout
,
AnakinAutoConfigLayout
,
bool
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_passes_filter
,
AnakinPassesFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
anakin_ops_filter
,
AnakinOpsFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
lite_passes_filter
,
LitePassesFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
lite_ops_filter
,
LiteOpsFilter
,
std
::
vector
<
std
::
string
>
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
e1b0d7cb
...
...
@@ -139,24 +139,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"use_gpu"
,
new
bool
(
argument
->
use_gpu
()));
}
if
(
pass_name
==
"anakin_subgraph_pass"
)
{
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
pass
->
Set
(
"use_gpu"
,
new
bool
(
argument
->
use_gpu
()));
pass
->
Set
(
"gpu_device_id"
,
new
int
(
argument
->
gpu_device_id
()));
pass
->
Set
(
"model_from_memory"
,
new
bool
(
argument
->
model_from_memory
()));
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
pass
->
Set
(
"max_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
anakin_max_input_shape
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
anakin_max_batch_size
()));
bool
enable_int8
=
argument
->
anakin_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"anakin_ops_filter"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
anakin_ops_filter
()));
pass
->
Set
(
"auto_config_layout"
,
new
bool
(
argument
->
anakin_auto_config_layout
()));
}
disable_logs_
=
argument
->
disable_logs
();
if
(
pass_name
==
"fc_fuse_pass"
)
{
pass
->
Set
(
"use_gpu"
,
new
bool
(
argument
->
use_gpu
()));
...
...
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
浏览文件 @
e1b0d7cb
...
...
@@ -12,18 +12,6 @@ if (WITH_GPU AND TENSORRT_FOUND)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
tensorrt_subgraph_pass CACHE INTERNAL
""
)
endif
()
if
(
ANAKIN_SUBGRAPH
)
cc_library
(
anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_util anakin_op_teller
)
set
(
analysis_deps
${
analysis_deps
}
subgraph_util anakin_subgraph_pass
CACHE INTERNAL
""
)
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
APPEND
${
pass_file
}
"USE_PASS(anakin_subgraph_pass);
\n
"
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
anakin_subgraph_pass CACHE INTERNAL
""
)
endif
()
if
(
WITH_LITE
)
cc_library
(
lite_subgraph_pass SRCS lite_subgraph_pass.cc DEPS
${
analysis_deps
}
subgraph_util lite_op_teller
)
set
(
analysis_deps
${
analysis_deps
}
subgraph_util lite_subgraph_pass CACHE INTERNAL
""
)
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/op_teller.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
framework
::
ir
::
Node
;
void
analysis
::
AnakinSubgraphPass
::
ApplyImpl
(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"anakin_subgraph_pass"
,
graph
);
auto
&
anakin_ops_filter
=
Get
<
std
::
vector
<
std
::
string
>>
(
"anakin_ops_filter"
);
auto
teller
=
[
&
anakin_ops_filter
](
const
framework
::
ir
::
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
else
if
(
std
::
find
(
anakin_ops_filter
.
begin
(),
anakin_ops_filter
.
end
(),
node
->
Op
()
->
Type
())
!=
anakin_ops_filter
.
end
())
return
false
;
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
};
framework
::
ir
::
SubGraphFuser
fuser
(
graph
,
teller
,
6
/* min_subgraph_size */
);
fuser
();
std
::
vector
<
std
::
string
>
graph_param_names
=
ExtractParameters
(
graph
->
Nodes
());
// those parameter already exist in anakin, and should not have another copy
// in fluid.
std
::
vector
<
std
::
string
>
repetitive_params
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
!
framework
::
ir
::
Agent
(
node
).
subgraph
()
->
empty
())
{
CreateAnakinOp
(
node
,
graph
,
graph_param_names
,
&
repetitive_params
);
std
::
unordered_set
<
const
Node
*>
nodes2remove
(
framework
::
ir
::
Agent
(
node
).
subgraph
()
->
begin
(),
framework
::
ir
::
Agent
(
node
).
subgraph
()
->
end
());
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
,
nodes2remove
);
}
}
std
::
unordered_set
<
const
Node
*>
nodes2remove
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
framework
::
ir
::
Agent
(
node
).
deleted
())
{
nodes2remove
.
insert
(
node
);
}
}
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
,
nodes2remove
);
graph
->
Set
(
framework
::
ir
::
kRepetitiveParamAttr
,
new
std
::
vector
<
std
::
string
>
(
repetitive_params
));
}
std
::
string
GenerateAnakinEngineKey
(
const
std
::
set
<
std
::
string
>
&
engine_inputs
,
const
std
::
set
<
std
::
string
>
&
engine_outputs
,
std
::
string
id
)
{
std
::
string
engine_hash_key
=
""
;
for
(
auto
name
:
engine_inputs
)
{
engine_hash_key
+=
name
;
}
for
(
auto
name
:
engine_outputs
)
{
engine_hash_key
+=
name
;
}
engine_hash_key
+=
id
;
auto
engine_key
=
std
::
to_string
(
std
::
hash
<
std
::
string
>
()(
engine_hash_key
));
return
engine_key
;
}
void
AnakinSubgraphPass
::
CreateAnakinOp
(
framework
::
ir
::
Node
*
node
,
framework
::
ir
::
Graph
*
graph
,
const
std
::
vector
<
std
::
string
>
&
graph_params
,
std
::
vector
<
std
::
string
>
*
repetitive_params
)
const
{
auto
*
op_desc
=
node
->
Op
();
auto
&
subgraph
=
*
framework
::
ir
::
Agent
(
node
).
subgraph
();
PADDLE_ENFORCE
(
!
subgraph
.
empty
());
framework
::
ProgramDesc
*
program_desc
=
Get
<
framework
::
ProgramDesc
*>
(
"program"
);
// Add new block for TensorRTEngineOP
const
framework
::
BlockDesc
&
main_block
=
program_desc
->
Block
(
framework
::
kRootBlockIndex
);
// const framework::BlockDesc& main_block = program_desc->Block(0);
framework
::
BlockDesc
*
new_block
=
program_desc
->
AppendBlock
(
main_block
);
// An fake block desc.
framework
::
proto
::
BlockDesc
block_proto
;
framework
::
BlockDesc
block_desc
(
nullptr
,
&
block_proto
);
block_desc
.
Proto
()
->
set_parent_idx
(
-
1
);
block_desc
.
Proto
()
->
set_idx
(
0
);
string
::
PrettyLogDetail
(
"--- detect a sub-graph with %d nodes"
,
subgraph
.
size
());
for
(
auto
*
node
:
subgraph
)
{
auto
*
new_block_op
=
new_block
->
AppendOp
();
auto
*
op
=
block_desc
.
AppendOp
();
*
new_block_op
->
Proto
()
=
*
node
->
Op
()
->
Proto
();
*
op
->
Proto
()
=
*
node
->
Op
()
->
Proto
();
}
// Then, we will use the input_names_with_id and output_names_with_id to
// generate the eigine key.
// So, We use set instead of unordered_set here to ensure that the engine key
// is unique.
std
::
set
<
std
::
string
>
input_names
;
std
::
set
<
std
::
string
>
input_names_with_id
;
std
::
vector
<
std
::
string
>
params
;
for
(
auto
*
x
:
node
->
inputs
)
{
input_names
.
insert
(
x
->
Name
());
input_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
if
(
std
::
count
(
graph_params
.
begin
(),
graph_params
.
end
(),
x
->
Name
())
>
0
)
{
params
.
push_back
(
x
->
Name
());
}
}
std
::
copy
(
params
.
begin
(),
params
.
end
(),
std
::
back_inserter
(
*
repetitive_params
));
op_desc
->
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()));
std
::
set
<
std
::
string
>
output_names
;
std
::
set
<
std
::
string
>
output_names_with_id
;
for
(
auto
*
x
:
node
->
outputs
)
{
output_names
.
insert
(
x
->
Name
());
output_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
}
op_desc
->
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
(
output_names
.
begin
(),
output_names
.
end
()));
op_desc
->
SetType
(
"anakin_engine"
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
graph_var_map
[
node
->
Name
()]
=
node
;
}
}
auto
&
subgraph_nodes
=
*
framework
::
ir
::
Agent
(
node
).
subgraph
();
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
RenameAndGetOutputs
(
subgraph_nodes
,
&
block_desc
,
input_names_with_id
,
&
output_names_with_id
,
&
output_names
,
&
output_name_map
,
graph_var_map
,
false
);
// When anakin engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
// to Tensor.
std
::
vector
<
std
::
string
>
output_mapping
;
for
(
auto
name
:
output_names
)
{
PADDLE_ENFORCE
(
output_name_map
.
count
(
name
)
!=
0
);
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
(),
"the block has no var-desc"
);
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
op_desc
->
SetBlockAttr
(
"sub_block"
,
new_block
);
SetAttr
(
op_desc
->
Proto
(),
"subgraph"
,
block_desc
.
Proto
()
->
SerializeAsString
());
// Set attrs
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
SetAttr
(
op_desc
->
Proto
(),
"output_name_mapping"
,
output_mapping
);
int
predictor_id
=
Get
<
int
>
(
"predictor_id"
);
auto
engine_key
=
GenerateAnakinEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
predictor_id
));
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
auto
program_inputs
=
program_desc
->
GetFeedTargetNames
();
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
SetAttr
(
op_desc
->
Proto
(),
"use_gpu"
,
use_gpu
);
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
SetAttr
(
op_desc
->
Proto
(),
"enable_int8"
,
enable_int8
);
if
(
enable_int8
)
{
CreateAnakinEngine
<::
anakin
::
Precision
::
INT8
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
else
{
CreateAnakinEngine
<::
anakin
::
Precision
::
FP32
>
(
&
block_desc
,
params
,
input_names
,
output_mapping
,
program_inputs
,
engine_key
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
AnakinSubgraphPass
::
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
{
framework
::
BlockDesc
block_desc_temp
(
nullptr
,
block_desc
->
Proto
());
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
false
,
engine_key
);
#endif
}
else
{
#ifdef ANAKIN_X86_PLACE
bool
auto_config_layout
=
Get
<
bool
>
(
"auto_config_layout"
);
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
auto_config_layout
,
engine_key
);
#endif
}
auto
*
scope
=
param_scope
();
std
::
unordered_set
<
std
::
string
>
param_set
(
params
.
begin
(),
params
.
end
());
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
#endif
}
else
{
#ifdef ANAKIN_X86_PLACE
auto
*
anakin_engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
<
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
ConvertBlockToAnakinEngine
(
&
block_desc_temp
,
scope
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()),
param_set
,
output_mapping
,
anakin_engine
);
#endif
}
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
REGISTER_PASS
(
anakin_subgraph_pass
,
paddle
::
inference
::
analysis
::
AnakinSubgraphPass
);
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
class
AnakinSubgraphPass
:
public
framework
::
ir
::
FusePassBase
{
public:
void
ApplyImpl
(
framework
::
ir
::
Graph
*
graph
)
const
override
;
private:
void
CreateAnakinOp
(
framework
::
ir
::
Node
*
x
,
framework
::
ir
::
Graph
*
graph
,
const
std
::
vector
<
std
::
string
>
&
graph_params
,
std
::
vector
<
std
::
string
>
*
repetitive_params
)
const
;
void
CleanIntermediateOutputs
(
framework
::
ir
::
Node
*
node
);
template
<::
anakin
::
Precision
PrecisionT
>
void
CreateAnakinEngine
(
framework
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
set
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_mapping
,
const
std
::
vector
<
std
::
string
>
&
program_inputs
,
const
std
::
string
&
engine_key
)
const
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
e1b0d7cb
...
...
@@ -43,10 +43,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
set
(
inference_deps
${
inference_deps
}
tensorrt_engine tensorrt_converter
)
endif
()
if
(
ANAKIN_SUBGRAPH
)
set
(
inference_deps
${
inference_deps
}
anakin_op_converter anakin_engine
)
endif
()
if
(
WITH_NGRAPH
)
set
(
inference_deps
${
inference_deps
}
ngraph
)
endif
()
...
...
@@ -64,17 +60,3 @@ if(WITH_TESTING)
endif
()
cc_test
(
test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor benchmark
${
inference_deps
}
ARGS --dirname=
${
WORD2VEC_MODEL_DIR
}
)
if
(
ANAKIN_FOUND
)
# Do not turn warnings into errors.
set_source_files_properties
(
api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS
"-Wno-error"
)
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3
)
target_link_libraries
(
inference_anakin_api anakin anakin_saber_common
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3
)
target_link_libraries
(
inference_anakin_api_shared anakin anakin_saber_common
)
function
(
anakin_target target_name
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
endfunction
()
anakin_target
(
inference_anakin_api
)
anakin_target
(
inference_anakin_api_shared
)
endif
()
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
e1b0d7cb
...
...
@@ -22,7 +22,6 @@
namespace
paddle
{
extern
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
;
PassStrategy
*
AnalysisConfig
::
pass_builder
()
const
{
...
...
@@ -120,15 +119,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_mkldnn_quantizer_
);
CP_MEMBER
(
mkldnn_quantizer_config_
);
CP_MEMBER
(
use_anakin_
);
CP_MEMBER
(
anakin_max_batchsize_
);
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
CP_MEMBER
(
anakin_precision_mode_
);
CP_MEMBER
(
anakin_auto_config_layout_
);
CP_MEMBER
(
anakin_passes_filter_
);
CP_MEMBER
(
anakin_ops_filter_
);
CP_MEMBER
(
use_lite_
);
CP_MEMBER
(
lite_precision_mode_
);
CP_MEMBER
(
lite_passes_filter_
);
...
...
@@ -338,25 +328,6 @@ void AnalysisConfig::Update() {
pass_builder
()
->
AppendAnalysisPass
(
"memory_optimize_pass"
);
}
if
(
use_anakin_
)
{
PADDLE_ENFORCE
(
!
use_tensorrt_
,
"Anakin sub-graph and TensorRT sub-graph are not allowed to "
"run at the same time!"
);
if
(
use_gpu_
)
{
LOG
(
INFO
)
<<
"Run Anakin GPU mode"
;
}
else
{
LOG
(
INFO
)
<<
"Run Anakin CPU mode"
;
}
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kAnakinSubgraphPasses
)
{
if
(
std
::
find
(
anakin_passes_filter_
.
begin
(),
anakin_passes_filter_
.
end
(),
pass
)
==
anakin_passes_filter_
.
end
())
{
pass_builder
()
->
AppendPass
(
pass
);
}
}
}
if
(
use_lite_
)
{
#ifndef PADDLE_WITH_LITE
LOG
(
WARNING
)
<<
"You tried to enable the lite subgraph "
...
...
@@ -413,10 +384,9 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
specify_input_name_
;
ss
<<
cpu_math_library_num_threads_
;
ss
<<
use_anakin_
;
ss
<<
anakin_min_subgraph_size_
;
ss
<<
use_lite_
;
return
ss
.
str
();
}
...
...
@@ -490,22 +460,6 @@ void AnalysisConfig::DisableGlogInfo() {
Update
();
}
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
bool
auto_config_layout
,
std
::
vector
<
std
::
string
>
passes_filter
,
std
::
vector
<
std
::
string
>
ops_filter
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
anakin_min_subgraph_size_
=
min_subgraph_size
;
anakin_passes_filter_
=
passes_filter
;
anakin_ops_filter_
=
ops_filter
;
use_anakin_
=
true
;
anakin_precision_mode_
=
precision_mode
;
anakin_auto_config_layout_
=
auto_config_layout
;
Update
();
}
void
AnalysisConfig
::
EnableLiteEngine
(
AnalysisConfig
::
Precision
precision_mode
,
const
std
::
vector
<
std
::
string
>
&
passes_filter
,
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
e1b0d7cb
...
...
@@ -50,10 +50,6 @@
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#endif
#if PADDLE_WITH_ANAKIN
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#endif
namespace
paddle
{
using
inference
::
Singleton
;
...
...
@@ -390,7 +386,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetEnableMemoryOptim
(
config_
.
enable_memory_optim
());
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
argument_
.
SetUseAnakin
(
config_
.
anakin_engine_enabled
());
argument_
.
SetPredictorID
(
predictor_id_
);
argument_
.
SetOptimCacheDir
(
config_
.
opt_cache_dir_
);
if
(
!
config_
.
model_dir
().
empty
())
{
...
...
@@ -417,17 +412,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetTensorRtUseCalibMode
(
config_
.
trt_use_calib_mode_
);
}
if
(
config_
.
anakin_engine_enabled
())
{
argument_
.
SetAnakinMaxBatchSize
(
config_
.
anakin_max_batchsize_
);
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
argument_
.
SetAnakinPrecisionMode
(
config_
.
anakin_precision_mode_
);
argument_
.
SetAnakinAutoConfigLayout
(
config_
.
anakin_auto_config_layout_
);
argument_
.
SetAnakinPassesFilter
(
config_
.
anakin_passes_filter_
);
argument_
.
SetAnakinOpsFilter
(
config_
.
anakin_ops_filter_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
if
(
config_
.
lite_engine_enabled
())
{
argument_
.
SetLitePrecisionMode
(
config_
.
lite_precision_mode_
);
argument_
.
SetLitePassesFilter
(
config_
.
lite_passes_filter_
);
...
...
@@ -950,33 +934,3 @@ USE_TRT_CONVERTER(layer_norm);
USE_TRT_CONVERTER
(
gelu
);
USE_TRT_CONVERTER
(
multihead_matmul
);
#endif
#if PADDLE_WITH_ANAKIN
USE_ANAKIN_CONVERTER
(
mul
);
USE_ANAKIN_CONVERTER
(
fc
);
USE_ANAKIN_CONVERTER
(
conv2d
);
USE_ANAKIN_CONVERTER
(
conv2d_fusion
);
USE_ANAKIN_CONVERTER
(
concat
);
USE_ANAKIN_CONVERTER
(
split
);
USE_ANAKIN_CONVERTER
(
relu
);
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
pool2d
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_mul
);
USE_ANAKIN_CONVERTER
(
batch_norm
);
USE_ANAKIN_CONVERTER
(
flatten
);
USE_ANAKIN_CONVERTER
(
reshape
);
USE_ANAKIN_CONVERTER
(
transpose
);
USE_ANAKIN_CONVERTER
(
softmax
);
USE_ANAKIN_CONVERTER
(
detection_out
);
USE_ANAKIN_CONVERTER
(
density_prior_box
);
USE_ANAKIN_CONVERTER
(
dropout
);
USE_ANAKIN_CONVERTER
(
sum
);
USE_ANAKIN_CONVERTER
(
prior_box
);
USE_ANAKIN_CONVERTER
(
leaky_relu
);
USE_ANAKIN_CONVERTER
(
affine_channel
);
USE_ANAKIN_CONVERTER
(
relu6
);
USE_ANAKIN_CONVERTER
(
swish
);
USE_ANAKIN_CONVERTER
(
shuffle_channel
);
#endif
paddle/fluid/inference/api/api_anakin_engine.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/inference/api/api_anakin_engine.h"
#include "paddle/fluid/inference/api/paddle_api.h"
#include "framework/core/net/net.h"
#include "framework/operators/ops.h"
#include "saber/funcs/timer.h"
namespace
paddle
{
using
paddle
::
contrib
::
AnakinConfig
;
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
extern
std
::
mutex
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
mutex_
;
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
extern
std
::
once_flag
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
init_anakin_
;
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
InitEnv
()
{
std
::
call_once
(
this
->
init_anakin_
,
[
this
]()
{
anakin
::
Env
<
T
>::
env_init
(
this
->
config_
.
max_stream
);
});
anakin
::
TargetWrapper
<
T
>::
set_device
(
this
->
config_
.
device_id
);
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
InitNet
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
delete
this
->
executor_p_
;
this
->
executor_p_
=
new
anakin
::
Net
<
T
,
P
,
R
>
(
*
this
->
graph_p_
,
true
);
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
SetContext
()
{
this
->
ctx_p_
=
std
::
make_shared
<
anakin
::
Context
<
T
>>
(
this
->
config_
.
device_id
,
this
->
config_
.
data_stream_id
,
this
->
config_
.
compute_stream_id
);
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
InitGraph
()
{
this
->
graph_p_
=
std
::
make_shared
<
anakin
::
graph
::
Graph
<
T
,
anakin
::
Precision
::
FP32
>>
();
if
(
!
this
->
config_
.
model_file
.
empty
())
{
this
->
graph_p_
->
load
(
this
->
config_
.
model_file
);
}
else
if
(
this
->
config_
.
model_buf_p
)
{
this
->
graph_p_
->
load
(
this
->
config_
.
model_buf_p
,
this
->
config_
.
model_buf_len
);
}
else
{
LOG
(
FATAL
)
<<
"Model load error."
;
}
this
->
input_names_
=
this
->
graph_p_
->
get_ins
();
this
->
output_names_
=
this
->
graph_p_
->
get_outs
();
for
(
auto
&
input_str
:
this
->
input_names_
)
{
if
(
this
->
config_
.
init_inputs_shape
.
find
(
input_str
)
==
this
->
config_
.
init_inputs_shape
.
end
())
{
LOG
(
FATAL
)
<<
input_str
<<
" should be set in init_inputs_shape."
;
}
std
::
vector
<
int
>
shape
=
this
->
config_
.
init_inputs_shape
.
find
(
input_str
)
->
second
;
this
->
graph_p_
->
Reshape
(
input_str
,
shape
);
}
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
OptimizeGraph
()
{
if
(
!
this
->
graph_p_
->
Optimize
())
{
LOG
(
FATAL
)
<<
"Graph optimization error."
;
}
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
InitPredictor
()
{
this
->
InitEnv
();
this
->
SetContext
();
this
->
InitGraph
();
this
->
OptimizeGraph
();
this
->
InitNet
();
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
Predict
(
int
batch_size
)
{
anakin
::
TargetWrapper
<
T
>::
device_sync
();
this
->
executor_p_
->
prediction
();
anakin
::
TargetWrapper
<
T
>::
device_sync
();
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
bool
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
if
(
this
->
config_
.
re_allocable
)
{
return
this
->
RunImpl
(
inputs
,
output_data
,
batch_size
);
}
else
{
// Run inputs data that exceeds batch size in batches.
// 1. Reassign the batch size.
if
(
batch_size
==
-
1
)
{
if
(
!
inputs
[
0
].
lod
.
empty
())
{
batch_size
=
inputs
[
0
].
lod
[
0
].
size
()
-
1
;
}
else
{
batch_size
=
inputs
[
0
].
shape
[
0
];
}
}
// 2. If the data don't need to be batched, run it directly.
if
(
batch_size
<=
this
->
config_
.
init_batch_size
)
{
return
this
->
RunImpl
(
inputs
,
output_data
);
}
// 3. Check the batch size and define temporary variables.
std
::
vector
<
PaddleTensor
>
cur_inputs
;
std
::
vector
<
PaddleTensor
>
outputs_master
;
std
::
vector
<
std
::
vector
<
paddle
::
PaddleTensor
>>
outputs_vec
;
for
(
const
auto
&
input
:
inputs
)
{
if
(
!
input
.
lod
.
empty
())
{
if
(
input
.
lod
.
size
()
!=
1
)
{
return
false
;
}
if
(
input
.
lod
[
0
].
size
()
-
1
!=
batch_size
)
{
return
false
;
}
}
else
{
LOG
(
INFO
)
<<
"Non-lod mode to be implemented."
;
return
false
;
}
PaddleTensor
tensor
;
tensor
.
name
=
input
.
name
;
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
cur_inputs
.
push_back
(
tensor
);
}
for
(
auto
output
:
*
output_data
)
{
PaddleTensor
tensor
;
tensor
.
name
=
output
.
name
;
outputs_master
.
push_back
(
tensor
);
}
// 4. Batch execution.
for
(
size_t
start_batch
=
0
;
start_batch
<
batch_size
;)
{
auto
end_batch
=
start_batch
+
this
->
config_
.
init_batch_size
;
if
(
end_batch
>
batch_size
)
{
end_batch
=
batch_size
;
}
auto
cur_outputs
=
outputs_master
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
auto
start
=
inputs
[
i
].
lod
[
0
][
start_batch
];
auto
end
=
inputs
[
i
].
lod
[
0
][
end_batch
];
std
::
vector
<
size_t
>
offsets
;
for
(
size_t
j
=
start_batch
;
j
<=
end_batch
;
j
++
)
{
offsets
.
push_back
(
inputs
[
i
].
lod
[
0
][
j
]
-
inputs
[
i
].
lod
[
0
][
start_batch
]);
}
auto
mem_start
=
static_cast
<
float
*>
(
inputs
[
i
].
data
.
data
())
+
start
;
cur_inputs
[
i
].
data
=
PaddleBuf
(
mem_start
,
(
end
-
start
)
*
sizeof
(
float
));
cur_inputs
[
i
].
lod
=
std
::
vector
<
std
::
vector
<
size_t
>>
({
offsets
});
cur_inputs
[
i
].
shape
=
std
::
vector
<
int
>
({
static_cast
<
int
>
(
end
-
start
),
1
,
1
,
1
});
}
if
(
!
this
->
RunImpl
(
cur_inputs
,
&
cur_outputs
))
{
return
false
;
}
outputs_vec
.
push_back
(
cur_outputs
);
start_batch
=
end_batch
;
}
// 5. Copy the results to contiguous memory.
// Assume that each batch has the same final outputs size.
auto
count
=
[](
const
std
::
vector
<
int
>
&
v
)
{
int
cnt
=
1
;
for_each
(
v
.
begin
(),
v
.
end
(),
[
&
cnt
](
int
n
)
{
cnt
*=
n
;
});
return
cnt
;
};
for
(
size_t
i
=
0
;
i
<
output_data
->
size
();
i
++
)
{
std
::
vector
<
int
>
shape
=
outputs_vec
[
i
][
0
].
shape
;
shape
[
0
]
=
batch_size
;
int
total_cnt
=
count
(
shape
);
(
*
output_data
)[
i
].
shape
=
shape
;
(
*
output_data
)[
i
].
data
.
Resize
(
total_cnt
*
sizeof
(
float
));
float
*
addr
=
static_cast
<
float
*>
((
*
output_data
)[
i
].
data
.
data
());
for
(
const
auto
&
single_out
:
outputs_vec
)
{
int
cnt
=
count
(
single_out
[
i
].
shape
);
memcpy
(
addr
,
single_out
[
i
].
data
.
data
(),
cnt
*
sizeof
(
float
));
addr
+=
cnt
;
}
}
}
return
true
;
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
bool
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
RunImpl
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
anakin
::
TargetWrapper
<
T
>::
set_device
(
this
->
config_
.
device_id
);
for
(
const
auto
&
input
:
inputs
)
{
if
(
input
.
dtype
!=
PaddleDType
::
FLOAT32
)
{
LOG
(
FATAL
)
<<
"Only support float type inputs. "
<<
input
.
name
<<
"'s type is not float"
;
}
auto
d_tensor_p
=
this
->
executor_p_
->
get_in
(
input
.
name
);
// For backward compatibility.
auto
net_shape
=
d_tensor_p
->
shape
();
if
(
net_shape
.
size
()
!=
input
.
shape
.
size
())
{
LOG
(
FATAL
)
<<
" input "
<<
input
.
name
<<
"'s shape size should be equal to that of net"
;
}
#ifndef ANAKIN_MLU_PLACE
int
sum
=
1
;
for_each
(
input
.
shape
.
begin
(),
input
.
shape
.
end
(),
[
&
](
int
n
)
{
sum
*=
n
;
});
if
(
sum
>
net_shape
.
count
())
{
if
(
this
->
config_
.
re_allocable
)
{
this
->
graph_p_
->
Reshape
(
input
.
name
,
input
.
shape
);
this
->
InitNet
();
d_tensor_p
=
this
->
executor_p_
->
get_in
(
input
.
name
);
}
else
{
LOG
(
FATAL
)
<<
"Run failed because Anakin was expected not to reallocate "
"memory."
;
}
}
#endif
std
::
vector
<
int
>
tmp_shape
;
for
(
auto
s
:
input
.
shape
)
{
tmp_shape
.
push_back
(
s
);
}
auto
*
data
=
static_cast
<
float
*>
(
input
.
data
.
data
());
anakin
::
saber
::
Tensor
<
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
>
h_tensor
(
data
,
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
(),
0
,
tmp_shape
);
#ifndef ANAKIN_MLU_PLACE
d_tensor_p
->
reshape
(
tmp_shape
);
#endif
if
(
input
.
lod
.
size
()
>
0
)
{
if
(
input
.
lod
.
size
()
>
1
)
{
LOG
(
FATAL
)
<<
" input lod first dim should <=1, but you set "
<<
input
.
lod
.
size
();
}
std
::
vector
<
int
>
lod
(
input
.
lod
[
0
].
begin
(),
input
.
lod
[
0
].
end
());
std
::
vector
<
std
::
vector
<
int
>>
offset
({
lod
});
d_tensor_p
->
set_seq_offset
(
offset
);
VLOG
(
3
)
<<
"offset.size(): "
<<
offset
[
0
].
size
();
for
(
int
i
=
0
;
i
<
offset
[
0
].
size
();
i
++
)
{
VLOG
(
3
)
<<
offset
[
0
][
i
];
}
}
d_tensor_p
->
copy_from
(
h_tensor
);
}
this
->
Predict
(
batch_size
);
if
(
output_data
->
empty
())
{
LOG
(
FATAL
)
<<
"The output param in the Run function is incorrect."
;
}
for
(
auto
&
output
:
*
output_data
)
{
if
(
std
::
find
(
this
->
output_names_
.
begin
(),
this
->
output_names_
.
end
(),
output
.
name
)
==
this
->
output_names_
.
end
())
{
LOG
(
FATAL
)
<<
output
.
name
<<
" is not in the outputs of the graph."
;
}
auto
*
d_tensor_p
=
this
->
executor_p_
->
get_out
(
output
.
name
);
auto
tmp_shape
=
d_tensor_p
->
valid_shape
();
#ifdef ANAKIN_MLU_PLACE
tmp_shape
.
set_num
(
batch_size
);
#endif
output
.
shape
=
tmp_shape
;
if
(
output
.
data
.
length
()
<
tmp_shape
.
count
()
*
sizeof
(
float
))
{
output
.
data
.
Resize
(
tmp_shape
.
count
()
*
sizeof
(
float
));
}
auto
*
data
=
static_cast
<
float
*>
(
output
.
data
.
data
());
anakin
::
saber
::
Tensor
<
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
>
h_tensor
(
data
,
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
(),
0
,
tmp_shape
);
h_tensor
.
copy_from
(
*
d_tensor_p
);
}
return
true
;
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
bool
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
Reset
(
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>
*
predictor
)
{
this
->
config_
=
predictor
->
GetConfig
();
this
->
graph_p_
=
predictor
->
GetGraph
();
this
->
input_names_
=
predictor
->
GetInputNames
();
this
->
output_names_
=
predictor
->
GetOutputNames
();
this
->
ctx_p_
=
std
::
make_shared
<
anakin
::
Context
<
T
>>
(
this
->
config_
.
device_id
,
this
->
config_
.
data_stream_id
,
this
->
config_
.
compute_stream_id
);
this
->
InitNet
();
return
true
;
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
std
::
unique_ptr
<
PaddlePredictor
>
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
New
()
{
return
std
::
unique_ptr
<
PaddlePredictor
>
(
new
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>
());
}
// the cloned new Predictor of anakin share the same net weights from original
// Predictor
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
std
::
unique_ptr
<
PaddlePredictor
>
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
Clone
()
{
VLOG
(
3
)
<<
"Anakin Predictor::clone"
;
std
::
unique_ptr
<
PaddlePredictor
>
cls
=
std
::
move
(
this
->
New
());
auto
anakin_predictor_p
=
dynamic_cast
<
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>
*>
(
cls
.
get
());
if
(
!
anakin_predictor_p
)
{
LOG
(
FATAL
)
<<
"fail to call Init"
;
}
anakin_predictor_p
->
Reset
(
this
);
return
cls
;
}
#ifdef ANAKIN_MLU_PLACE
template
<
Precision
P
,
OpRunType
R
>
std
::
unique_ptr
<
PaddlePredictor
>
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
New
()
{
return
std
::
unique_ptr
<
PaddlePredictor
>
(
new
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>
());
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
SetContext
()
{
this
->
ctx_p_
=
std
::
make_shared
<
anakin
::
Context
<
anakin
::
MLU
>>
(
this
->
config_
.
device_id
,
this
->
config_
.
data_stream_id
,
this
->
config_
.
compute_stream_id
);
this
->
ctx_p_
->
set_model_parallel
(
this
->
config_
.
model_parallel
);
this
->
ctx_p_
->
set_fusion
(
this
->
config_
.
op_fuse
);
this
->
ctx_p_
->
enable_batch_changable
();
this
->
ctx_p_
->
enable_channel_duplicate
();
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
OptimizeGraph
()
{
if
(
!
this
->
graph_p_
->
fusion_optimize
(
this
->
config_
.
op_fuse
))
{
LOG
(
FATAL
)
<<
"Graph optimization error."
;
}
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
InitNet
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
delete
this
->
executor_p_
;
this
->
executor_p_
=
new
anakin
::
Net
<
anakin
::
MLU
,
P
,
R
>
();
this
->
executor_p_
->
fusion_init
(
*
this
->
graph_p_
,
this
->
ctx_p_
,
true
);
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
Predict
(
int
batch_size
)
{
this
->
executor_p_
->
fusion_prediction
(
batch_size
);
}
#endif
#ifdef ANAKIN_BM_PLACE
template
<
Precision
P
,
OpRunType
R
>
std
::
unique_ptr
<
PaddlePredictor
>
PaddleInferenceAnakinBMPredictor
<
P
,
R
>::
New
()
{
return
std
::
unique_ptr
<
PaddlePredictor
>
(
new
PaddleInferenceAnakinBMPredictor
<
P
,
R
>
());
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinBMPredictor
<
P
,
R
>::
OptimizeGraph
()
{
if
(
!
this
->
graph_p_
->
fusion_optimize
())
{
LOG
(
FATAL
)
<<
"Graph optimization error."
;
}
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinBMPredictor
<
P
,
R
>::
InitNet
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
delete
this
->
executor_p_
;
this
->
executor_p_
=
new
anakin
::
Net
<
anakin
::
BM
,
P
,
R
>
();
this
->
executor_p_
->
fusion_init
(
*
this
->
graph_p_
,
this
->
ctx_p_
,
true
);
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinBMPredictor
<
P
,
R
>::
Predict
(
int
batch_size
)
{
this
->
executor_p_
->
fusion_prediction
();
}
#endif
#ifdef PADDLE_WITH_CUDA
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
NV
,
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
ASYNC
>;
#endif
#ifdef ANAKIN_X86_PLACE
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
X86
,
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
ASYNC
>;
#endif
#ifdef ANAKIN_MLU_PLACE
template
class
PaddleInferenceAnakinMLUPredictor
<
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
SYNC
>;
#endif
#ifdef ANAKIN_BM_PLACE
template
class
PaddleInferenceAnakinBMPredictor
<
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
ASYNC
>;
#endif
// A factory to help create difference predictor.
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
contrib
::
AnakinConfig
,
PaddleEngineKind
::
kAnakin
>
(
const
contrib
::
AnakinConfig
&
config
)
{
#ifdef PADDLE_WITH_CUDA
if
(
config
.
target_type
==
contrib
::
AnakinConfig
::
NVGPU
)
{
return
std
::
unique_ptr
<
PaddlePredictor
>
(
new
PaddleInferenceAnakinPredictor
<
anakin
::
NV
,
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
ASYNC
>
(
config
));
}
#endif
#ifdef ANAKIN_X86_PLACE
if
(
config
.
target_type
==
contrib
::
AnakinConfig
::
X86
)
{
return
std
::
unique_ptr
<
PaddlePredictor
>
(
new
PaddleInferenceAnakinPredictor
<
anakin
::
X86
,
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
ASYNC
>
(
config
));
}
#endif
#ifdef ANAKIN_MLU_PLACE
if
(
config
.
target_type
==
contrib
::
AnakinConfig
::
MLU
)
{
return
std
::
unique_ptr
<
PaddlePredictor
>
(
new
PaddleInferenceAnakinMLUPredictor
<
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
SYNC
>
(
config
));
}
#endif
#ifdef ANAKIN_BM_PLACE
if
(
config
.
target_type
==
contrib
::
AnakinConfig
::
BM
)
{
return
std
::
unique_ptr
<
PaddlePredictor
>
(
new
PaddleInferenceAnakinBMPredictor
<
anakin
::
Precision
::
FP32
,
::
anakin
::
OpRunType
::
ASYNC
>
(
config
));
}
#endif
LOG
(
FATAL
)
<<
"Anakin Predictor create on unknown platform: "
<<
config
.
target_type
;
return
nullptr
;
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
DisplayOpTimer
(
anakin
::
Net
<
T
,
P
,
R
>
*
net_executor
,
int
epoch
)
{
#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER
std
::
vector
<
float
>
op_time
=
net_executor
->
get_op_time
();
auto
exec_funcs
=
net_executor
->
get_exec_funcs
();
auto
op_param
=
net_executor
->
get_op_param
();
for
(
int
i
=
0
;
i
<
op_time
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"name: "
<<
exec_funcs
[
i
].
name
<<
" op_type: "
<<
exec_funcs
[
i
].
op_name
<<
" op_param: "
<<
op_param
[
i
]
<<
" time "
<<
op_time
[
i
]
/
epoch
;
}
std
::
map
<
std
::
string
,
float
>
op_map
;
for
(
int
i
=
0
;
i
<
op_time
.
size
();
i
++
)
{
auto
it
=
op_map
.
find
(
op_param
[
i
]);
if
(
it
!=
op_map
.
end
())
op_map
[
op_param
[
i
]]
+=
op_time
[
i
];
else
op_map
.
insert
(
std
::
pair
<
std
::
string
,
float
>
(
op_param
[
i
],
op_time
[
i
]));
}
for
(
auto
it
=
op_map
.
begin
();
it
!=
op_map
.
end
();
++
it
)
{
LOG
(
INFO
)
<<
it
->
first
<<
" "
<<
(
it
->
second
)
/
epoch
<<
" ms"
;
}
#endif
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::~
PaddleInferenceAnakinPredictor
()
{
DisplayOpTimer
<
T
,
P
,
R
>
(
this
->
executor_p_
,
this
->
config_
.
init_batch_size
);
delete
this
->
executor_p_
;
this
->
executor_p_
=
nullptr
;
}
}
// namespace paddle
paddle/fluid/inference/api/api_anakin_engine.h
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file contains the implementation of inference API with Anakin engine
* embeded, this API can only support Anakin models.
*/
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "framework/core/net/net.h"
#include "framework/graph/graph.h"
#include "paddle/fluid/inference/api/paddle_anakin_config.h"
#include "saber/core/shape.h"
#include "saber/saber_types.h"
namespace
paddle
{
using
contrib
::
AnakinConfig
;
using
anakin
::
Precision
;
using
anakin
::
OpRunType
;
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
class
PaddleInferenceAnakinPredictor
:
public
PaddlePredictor
{
public:
PaddleInferenceAnakinPredictor
()
=
default
;
explicit
PaddleInferenceAnakinPredictor
(
const
AnakinConfig
&
config
)
:
config_
(
config
)
{
this
->
InitPredictor
();
}
// NOTE Unlike the native engine, the buffers of anakin engine's output_data
// should be allocated first.
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
bool
Reset
(
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>*
predictor
);
void
InitPredictor
();
std
::
shared_ptr
<
anakin
::
graph
::
Graph
<
T
,
P
>>
GetGraph
()
{
return
this
->
graph_p_
;
}
std
::
vector
<
std
::
string
>
GetInputNames
()
override
{
return
this
->
input_names_
;
}
std
::
vector
<
std
::
string
>
GetOutputNames
()
override
{
return
this
->
output_names_
;
}
const
AnakinConfig
&
GetConfig
()
const
{
return
this
->
config_
;
}
~
PaddleInferenceAnakinPredictor
()
override
;
protected:
void
InitEnv
();
void
InitGraph
();
virtual
void
OptimizeGraph
();
virtual
void
InitNet
();
virtual
void
SetContext
();
virtual
void
Predict
(
int
batch_size
);
virtual
std
::
unique_ptr
<
PaddlePredictor
>
New
();
static
std
::
mutex
mutex_
;
AnakinConfig
config_
;
std
::
shared_ptr
<
anakin
::
Context
<
T
>>
ctx_p_
;
std
::
shared_ptr
<
anakin
::
graph
::
Graph
<
T
,
P
>>
graph_p_
;
anakin
::
Net
<
T
,
P
,
R
>*
executor_p_
{
nullptr
};
std
::
vector
<
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
output_names_
;
private:
bool
RunImpl
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
);
static
std
::
once_flag
init_anakin_
;
};
#ifdef ANAKIN_MLU_PLACE
template
<
Precision
P
,
OpRunType
R
>
class
PaddleInferenceAnakinMLUPredictor
final
:
public
PaddleInferenceAnakinPredictor
<
anakin
::
MLU
,
P
,
R
>
{
public:
PaddleInferenceAnakinMLUPredictor
()
=
default
;
explicit
PaddleInferenceAnakinMLUPredictor
(
const
AnakinConfig
&
config
)
{
this
->
config_
=
config
;
this
->
InitPredictor
();
}
std
::
unique_ptr
<
PaddlePredictor
>
New
()
override
;
void
SetContext
()
override
;
void
OptimizeGraph
()
override
;
void
InitNet
()
override
;
void
Predict
(
int
batch_size
)
override
;
};
#endif
#ifdef ANAKIN_BM_PLACE
template
<
Precision
P
,
OpRunType
R
>
class
PaddleInferenceAnakinBMPredictor
final
:
public
PaddleInferenceAnakinPredictor
<
anakin
::
BM
,
P
,
R
>
{
public:
PaddleInferenceAnakinBMPredictor
()
=
default
;
explicit
PaddleInferenceAnakinBMPredictor
(
const
AnakinConfig
&
config
)
{
this
->
config_
=
config
;
this
->
InitPredictor
();
}
std
::
unique_ptr
<
PaddlePredictor
>
New
()
override
;
void
OptimizeGraph
()
override
;
void
InitNet
()
override
;
void
Predict
(
int
batch_size
)
override
;
};
#endif
}
// namespace paddle
paddle/fluid/inference/api/high_level_api.md
浏览文件 @
e1b0d7cb
...
...
@@ -24,17 +24,16 @@ that is important when there are multiple inputs and need to distinguish which v
## engine
The inference APIs has two different underlying engines
-
the native engine
, which is consists of the native operators and framework,
-
the
Anakin engine, which has an Anakin library embedded.
-
the native engine
-
the
tensorrt engine
The native engine takes a native Paddle model as input, and supports any model that trained by Paddle,
the Anakin engine is faster for some model,
but it can only take the Anakin model as input(user need to transform the format first manually) and currently not all Paddle models are supported.
The native engine, which is consists of the native operators and framework, takes a native Paddle model
as input, and supports any model that trained by Paddle.
```
c++
enum
class
PaddleEngineKind
{
kNative
=
0
,
// Use the native Fluid facility.
kA
nakin
,
// Use Anakin for inference
.
kA
utoMixedTensorRT
// Automatically mixing TensorRT with the Fluid ops
.
};
```
...
...
paddle/fluid/inference/api/high_level_api_cn.md
浏览文件 @
e1b0d7cb
...
...
@@ -29,10 +29,9 @@ struct PaddleTensor {
## engine
高层 API 底层有多种优化实现,我们称之为 engine,目前有
三
种 engine
高层 API 底层有多种优化实现,我们称之为 engine,目前有
两
种 engine
-
原生 engine,由 paddle 原生的 forward operator 组成,可以天然支持所有paddle 训练出的模型,
-
Anakin engine,封装了
[
Anakin
](
https://github.com/PaddlePaddle/Anakin
)
,在某些模型上性能不错,但只能接受自带模型格式,无法支持所有 paddle 模型,
-
TensorRT mixed engine,用子图的方式支持了
[
TensorRT
](
https://developer.nvidia.com/tensorrt
)
,支持所有paddle 模型,并自动切割部分计算子图到 TensorRT 上加速(WIP)
其实现为
...
...
@@ -40,7 +39,6 @@ struct PaddleTensor {
```
c++
enum
class
PaddleEngineKind
{
kNative
=
0
,
// Use the native Fluid facility.
kAnakin
,
// Use Anakin for inference.
kAutoMixedTensorRT
// Automatically mixing TensorRT with the Fluid ops.
};
```
...
...
paddle/fluid/inference/api/paddle_anakin_config.h
已删除
100644 → 0
浏览文件 @
0a678ca0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle_api.h" // NOLINT
namespace
paddle
{
namespace
contrib
{
// Configurations for Anakin engine.
struct
AnakinConfig
:
public
PaddlePredictor
::
Config
{
enum
TargetType
{
NVGPU
=
0
,
X86
,
MLU
,
BM
};
int
device_id
{
0
};
std
::
string
model_file
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
init_inputs_shape
;
int
init_batch_size
{
-
1
};
bool
re_allocable
{
true
};
int
max_stream
{
4
};
int
data_stream_id
{
0
};
int
compute_stream_id
{
0
};
char
*
model_buf_p
{
nullptr
};
size_t
model_buf_len
{
0
};
TargetType
target_type
;
#ifdef ANAKIN_MLU_PLACE
int
model_parallel
{
8
};
int
data_parallel
{
1
};
bool
op_fuse
{
false
};
bool
sparse
{
false
};
#endif
};
}
// namespace contrib
}
// namespace paddle
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
e1b0d7cb
...
...
@@ -161,25 +161,13 @@ struct AnalysisConfig {
*/
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
/**
* \brief Turn on the usage of
Anakin
sub-graph engine.
* \brief Turn on the usage of
Lite
sub-graph engine.
*/
void
EnableAnakinEngine
(
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
int
min_subgraph_size
=
6
,
Precision
precision
=
Precision
::
kFloat32
,
bool
auto_config_layout
=
false
,
std
::
vector
<
std
::
string
>
passes_filter
=
{},
std
::
vector
<
std
::
string
>
ops_filter
=
{});
void
EnableLiteEngine
(
AnalysisConfig
::
Precision
precision_mode
=
Precision
::
kFloat32
,
const
std
::
vector
<
std
::
string
>&
passes_filter
=
{},
const
std
::
vector
<
std
::
string
>&
ops_filter
=
{});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
bool
anakin_engine_enabled
()
const
{
return
use_anakin_
;
}
/** A boolean state indicating whether the Lite sub-graph engine is used.
*/
bool
lite_engine_enabled
()
const
{
return
use_lite_
;
}
...
...
@@ -350,15 +338,6 @@ struct AnalysisConfig {
mutable
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
use_anakin_
{
false
};
int
anakin_max_batchsize_
;
int
anakin_min_subgraph_size_
{
6
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
Precision
anakin_precision_mode_
;
bool
anakin_auto_config_layout_
{
false
};
std
::
vector
<
std
::
string
>
anakin_passes_filter_
;
std
::
vector
<
std
::
string
>
anakin_ops_filter_
;
bool
use_lite_
{
false
};
std
::
vector
<
std
::
string
>
lite_passes_filter_
;
std
::
vector
<
std
::
string
>
lite_ops_filter_
;
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
e1b0d7cb
...
...
@@ -352,7 +352,6 @@ enum class PaddleEngineKind {
kNative
=
0
,
/*!< Use the native Fluid facility. */
kAutoMixedTensorRT
,
/*!< Automatically mix Fluid with TensorRT. */
kAnalysis
,
/*!< More optimization. */
kAnakin
/*!< Use Anakin for inference, not mature yet. */
};
template
<
typename
ConfigT
,
PaddleEngineKind
engine
>
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
e1b0d7cb
...
...
@@ -28,6 +28,3 @@ limitations under the License. */
#include "paddle_analysis_config.h" // NOLINT
#include "paddle_api.h" // NOLINT
#if (defined PADDLE_WITH_ANAKIN)
#include "paddle_anakin_config.h" // NOLINT
#endif
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
e1b0d7cb
...
...
@@ -92,19 +92,6 @@ const std::vector<std::string> kTRTSubgraphPasses({
"transpose_flatten_concat_fuse_pass"
,
});
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"quant_conv2d_dequant_fuse_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"shuffle_channel_detect_pass"
,
//
"anakin_subgraph_pass"
,
//
"fc_gru_fuse_pass"
,
//
});
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
({
#ifdef PADDLE_WITH_LITE
"lite_subgraph_pass"
,
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
e1b0d7cb
...
...
@@ -162,7 +162,6 @@ class GpuPassStrategy : public PassStrategy {
};
extern
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
;
}
// namespace paddle
paddle/fluid/inference/capi/c_api.h
浏览文件 @
e1b0d7cb
...
...
@@ -188,17 +188,6 @@ typedef struct PD_MaxInputShape {
int
shape_size
;
}
PD_MaxInputShape
;
PADDLE_CAPI_EXPORT
extern
void
PD_EnableAnakinEngine
(
PD_AnalysisConfig
*
config
,
int
max_batch_size
=
1
,
PD_MaxInputShape
*
max_input_shape
=
NULL
,
int
max_input_shape_size
=
0
,
int
min_subgraph_size
=
6
,
Precision
precision
=
Precision
::
kFloat32
,
bool
auto_config_layout
=
false
,
char
**
passes_filter
=
NULL
,
int
passes_filter_size
=
0
,
char
**
ops_filter
=
NULL
,
int
ops_filter_size
=
0
);
PADDLE_CAPI_EXPORT
extern
bool
PD_AnakinEngineEnabled
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
void
PD_SwitchIrDebug
(
PD_AnalysisConfig
*
config
,
bool
x
=
true
);
...
...
paddle/fluid/inference/capi/pd_config.cc
浏览文件 @
e1b0d7cb
...
...
@@ -165,42 +165,6 @@ bool PD_TensorrtEngineEnabled(const PD_AnalysisConfig* config) {
return
config
->
config
.
tensorrt_engine_enabled
();
}
void
PD_EnableAnakinEngine
(
PD_AnalysisConfig
*
config
,
int
max_batch_size
,
PD_MaxInputShape
*
max_input_shape
,
int
max_input_shape_size
,
int
min_subgraph_size
,
Precision
precision
,
bool
auto_config_layout
,
char
**
passes_filter
,
int
passes_filter_size
,
char
**
ops_filter
,
int
ops_filter_size
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
);
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
mis
;
if
(
max_input_shape
)
{
for
(
int
i
=
0
;
i
<
max_input_shape_size
;
++
i
)
{
std
::
vector
<
int
>
tmp_shape
;
tmp_shape
.
assign
(
max_input_shape
[
i
].
shape
,
max_input_shape
[
i
].
shape
+
max_input_shape
[
i
].
shape_size
);
mis
[
std
::
string
(
max_input_shape
[
i
].
name
)]
=
std
::
move
(
tmp_shape
);
}
}
std
::
vector
<
std
::
string
>
pf
;
std
::
vector
<
std
::
string
>
of
;
if
(
passes_filter
)
{
pf
.
assign
(
passes_filter
,
passes_filter
+
passes_filter_size
);
}
if
(
ops_filter
)
{
of
.
assign
(
ops_filter
,
ops_filter
+
ops_filter_size
);
}
config
->
config
.
EnableAnakinEngine
(
max_batch_size
,
mis
,
min_subgraph_size
,
paddle
::
ConvertToACPrecision
(
precision
),
auto_config_layout
,
pf
,
of
);
}
bool
PD_AnakinEngineEnabled
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
);
return
config
->
config
.
anakin_engine_enabled
();
}
void
PD_SwitchIrDebug
(
PD_AnalysisConfig
*
config
,
bool
x
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
);
config
->
config
.
SwitchIrDebug
(
x
);
...
...
paddle/fluid/inference/tests/api/anakin_mlu_tester.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string
(
model
,
""
,
"Directory of the inference model."
);
namespace
paddle
{
contrib
::
AnakinConfig
Config
()
{
// Determine the use of memory here.
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
init_inputs_shape
;
init_inputs_shape
[
"input_0"
]
=
std
::
vector
<
int
>
({
1
,
3
,
112
,
112
});
contrib
::
AnakinConfig
config
;
config
.
target_type
=
contrib
::
AnakinConfig
::
MLU
;
config
.
model_file
=
FLAGS_model
;
config
.
init_inputs_shape
=
init_inputs_shape
;
// Determine the device execution context.
config
.
device_id
=
0
;
config
.
data_stream_id
=
0
;
config
.
compute_stream_id
=
0
;
// Set re_allocable and op_fuse TRUE.
config
.
re_allocable
=
true
;
config
.
op_fuse
=
true
;
return
config
;
}
void
single_test
()
{
// 1. Defining basic data structures.
auto
config
=
paddle
::
Config
();
auto
predictor
=
paddle
::
CreatePaddlePredictor
<
paddle
::
contrib
::
AnakinConfig
,
paddle
::
PaddleEngineKind
::
kAnakin
>
(
config
);
// 2. Define the data structure of the predictor inputs and outputs.
std
::
vector
<
paddle
::
PaddleTensor
>
input_tensors
;
std
::
vector
<
paddle
::
PaddleTensor
>
output_tensors
;
// 3. Define and fill the inputs tensor.
int
num
=
1
;
int
channel
=
3
;
int
height
=
112
;
int
width
=
112
;
std
::
vector
<
float
>
input
(
num
*
channel
*
height
*
width
,
1
);
std
::
vector
<
std
::
vector
<
float
>>
inputs
({
input
});
const
std
::
vector
<
std
::
string
>
input_names
{
"input_0"
};
for
(
auto
&
name
:
input_names
)
{
paddle
::
PaddleTensor
tensor
;
tensor
.
name
=
name
;
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
input_tensors
.
push_back
(
tensor
);
}
for
(
size_t
j
=
0
;
j
<
input_tensors
.
size
();
j
++
)
{
input_tensors
[
j
].
data
=
paddle
::
PaddleBuf
(
&
inputs
[
j
][
0
],
inputs
[
j
].
size
()
*
sizeof
(
float
));
// The shape of each execution can be changed.
input_tensors
[
j
].
shape
=
std
::
vector
<
int
>
({
num
,
channel
,
height
,
width
});
}
// 4. Set the output placeholder of predictor.
PaddleTensor
predict_out
,
score_out
;
predict_out
.
name
=
"landmark_predict_out"
;
score_out
.
name
=
"landmark_score_out"
;
output_tensors
.
push_back
(
predict_out
);
output_tensors
.
push_back
(
score_out
);
// 5. Execution predict.
predictor
->
Run
(
input_tensors
,
&
output_tensors
);
// 6. Take out the output data.
for
(
auto
out
:
output_tensors
)
{
float
*
data_o
=
static_cast
<
float
*>
(
out
.
data
.
data
());
LOG
(
INFO
)
<<
out
.
name
<<
" size = "
<<
out
.
data
.
length
()
/
sizeof
(
float
);
}
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
paddle
::
single_test
();
return
0
;
}
paddle/fluid/inference/tests/api/anakin_rnn2_tester.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <cmath>
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#define BUFFER_SIZE (10000)
#define COMPARE_OUTPUTS (1)
#define PRINT_INPUTS (0)
DEFINE_string
(
model
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
datapath
,
""
,
"Path of the dataset."
);
DEFINE_string
(
truthpath
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"Batch size per execution."
);
DEFINE_int32
(
repeats
,
1
,
"Number of iterations."
);
DEFINE_int32
(
start_line
,
0
,
"The starting line of the text file read (this line will be read)."
);
DEFINE_int32
(
end_line
,
1000000
,
"The ending line of the text file read (this line will be read)."
);
DEFINE_int32
(
init_batch_size
,
40
,
"Max batch size for Anakin memory allocation."
);
DEFINE_int32
(
threads_num
,
2
,
"Threads num for Anakin."
);
class
Data
{
public:
Data
(
std
::
string
file_name
,
size_t
batch_size
,
size_t
start
=
0
,
size_t
end
=
1000000
)
:
_batch_size
(
batch_size
),
_total_length
(
0
),
_inputs_size
(
6
)
{
_file
.
open
(
file_name
);
_file
.
seekg
(
_file
.
end
);
_total_length
=
_file
.
tellg
();
_file
.
seekg
(
_file
.
beg
);
read_file_to_vec
(
start
,
end
);
reset_current_line
();
}
void
reset_current_line
();
const
std
::
vector
<
std
::
string
>&
get_lines
();
void
read_file_to_vec
(
const
size_t
start
,
const
size_t
end
);
int
get_next_batches
(
std
::
vector
<
std
::
vector
<
float
>>*
inputs
,
std
::
vector
<
std
::
vector
<
size_t
>>*
seq_offsets
);
private:
std
::
fstream
_file
;
int
_batch_size
;
size_t
_total_length
;
size_t
_inputs_size
;
std
::
vector
<
std
::
string
>
_lines
;
size_t
_current_line
;
};
void
Data
::
read_file_to_vec
(
const
size_t
start
,
const
size_t
end
)
{
std
::
string
line
;
size_t
count
=
0
;
_lines
.
clear
();
while
(
std
::
getline
(
_file
,
line
))
{
if
(
count
>=
start
&&
count
<=
end
)
{
_lines
.
push_back
(
line
);
}
count
++
;
}
}
const
std
::
vector
<
std
::
string
>&
Data
::
get_lines
()
{
return
_lines
;
}
void
Data
::
reset_current_line
()
{
_current_line
=
0
;
}
int
Data
::
get_next_batches
(
std
::
vector
<
std
::
vector
<
float
>>*
data
,
std
::
vector
<
std
::
vector
<
size_t
>>*
offsets
)
{
data
->
clear
();
offsets
->
clear
();
data
->
resize
(
_inputs_size
);
offsets
->
resize
(
_inputs_size
);
for
(
auto
&
offset
:
*
offsets
)
{
offset
.
push_back
(
0
);
}
int
seq_num
=
-
1
;
int
pre_query_index
=
-
1
;
while
(
_current_line
<
_lines
.
size
())
{
int
cur_query_index
=
-
1
;
std
::
vector
<
std
::
string
>
line
;
paddle
::
inference
::
split
(
_lines
[
_current_line
],
';'
,
&
line
);
for
(
size_t
i
=
0
;
i
<
line
.
size
();
i
++
)
{
std
::
vector
<
float
>
float_v
;
paddle
::
inference
::
split_to_float
(
line
[
i
],
' '
,
&
float_v
);
if
(
i
==
0
)
{
cur_query_index
=
float_v
[
0
];
if
(
pre_query_index
!=
-
1
&&
cur_query_index
!=
pre_query_index
)
{
return
seq_num
;
}
seq_num
++
;
_current_line
++
;
}
else
{
if
(
float_v
.
size
()
==
0
)
{
float_v
.
push_back
(
-
1
);
}
(
*
data
)[
i
-
1
].
insert
((
*
data
)[
i
-
1
].
end
(),
float_v
.
begin
(),
float_v
.
end
());
(
*
offsets
)[
i
-
1
].
push_back
((
*
offsets
)[
i
-
1
][
seq_num
]
+
float_v
.
size
());
}
}
if
(
seq_num
+
1
>=
_batch_size
)
{
return
seq_num
;
}
else
{
pre_query_index
=
cur_query_index
;
}
}
return
seq_num
;
}
namespace
paddle
{
contrib
::
AnakinConfig
GetConfig
()
{
contrib
::
AnakinConfig
config
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
init_inputs_shape
;
init_inputs_shape
[
"q_basic"
]
=
std
::
vector
<
int
>
({
1000
,
1
,
1
,
1
});
init_inputs_shape
[
"q_bigram0"
]
=
std
::
vector
<
int
>
({
1000
,
1
,
1
,
1
});
init_inputs_shape
[
"pt_basic"
]
=
std
::
vector
<
int
>
({
2000
,
1
,
1
,
1
});
init_inputs_shape
[
"pa_basic"
]
=
std
::
vector
<
int
>
({
4000
,
1
,
1
,
1
});
init_inputs_shape
[
"pa_bigram0"
]
=
std
::
vector
<
int
>
({
4000
,
1
,
1
,
1
});
init_inputs_shape
[
"pt_bigram0"
]
=
std
::
vector
<
int
>
({
2000
,
1
,
1
,
1
});
// using AnakinConfig::X86 if you need to use cpu to do inference
config
.
target_type
=
contrib
::
AnakinConfig
::
NVGPU
;
config
.
model_file
=
FLAGS_model
;
config
.
device_id
=
0
;
config
.
init_batch_size
=
FLAGS_init_batch_size
;
config
.
init_inputs_shape
=
init_inputs_shape
;
config
.
re_allocable
=
false
;
return
config
;
}
void
single_test
(
PaddlePredictor
*
predictor_master
)
{
auto
predictor
=
predictor_master
->
Clone
();
Data
data
(
FLAGS_datapath
,
FLAGS_batch_size
,
FLAGS_start_line
,
FLAGS_end_line
);
std
::
vector
<
std
::
vector
<
float
>>
inputs
;
std
::
vector
<
std
::
vector
<
size_t
>>
seq_offsets
;
std
::
vector
<
float
>
compare_outputs
;
const
std
::
vector
<
std
::
string
>
input_names
{
"q_basic"
,
"q_bigram0"
,
"pt_basic"
,
"pt_bigram0"
,
"pa_basic"
,
"pa_bigram0"
};
std
::
vector
<
PaddleTensor
>
input_tensors
;
std
::
vector
<
PaddleTensor
>
output_tensors
;
for
(
auto
&
name
:
input_names
)
{
PaddleTensor
tensor
;
tensor
.
name
=
name
;
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
input_tensors
.
push_back
(
tensor
);
}
PaddleTensor
tensor_out
;
tensor_out
.
name
=
"save_infer_model/scale_0"
;
tensor_out
.
shape
=
std
::
vector
<
int
>
({});
tensor_out
.
data
=
PaddleBuf
();
tensor_out
.
dtype
=
PaddleDType
::
FLOAT32
;
output_tensors
.
push_back
(
tensor_out
);
inference
::
Timer
timer
;
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
i
++
)
{
data
.
reset_current_line
();
size_t
count
=
0
;
float
time_sum
=
0
;
while
(
data
.
get_next_batches
(
&
inputs
,
&
seq_offsets
)
>=
0
)
{
#if PRINT_INPUTS
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"data "
<<
i
;
for
(
size_t
j
=
0
;
j
<
inputs
[
i
].
size
();
j
++
)
{
LOG
(
INFO
)
<<
j
<<
": "
<<
inputs
[
i
][
j
];
}
for
(
auto
j
:
seq_offsets
[
i
])
{
LOG
(
INFO
)
<<
"offsets: "
<<
i
<<
": "
<<
j
;
}
}
#endif
for
(
size_t
j
=
0
;
j
<
input_tensors
.
size
();
j
++
)
{
input_tensors
[
j
].
data
=
PaddleBuf
(
&
inputs
[
j
][
0
],
inputs
[
j
].
size
()
*
sizeof
(
float
));
input_tensors
[
j
].
lod
=
std
::
vector
<
std
::
vector
<
size_t
>>
({
seq_offsets
[
j
]});
input_tensors
[
j
].
shape
=
std
::
vector
<
int
>
({
static_cast
<
int
>
(
inputs
[
j
].
size
()),
1
,
1
,
1
});
}
timer
.
tic
();
predictor
->
Run
(
input_tensors
,
&
output_tensors
);
float
time
=
timer
.
toc
();
#if COMPARE_OUTPUTS
float
*
data_o
=
static_cast
<
float
*>
(
output_tensors
[
0
].
data
.
data
());
LOG
(
INFO
)
<<
"outputs[0].data.size() = "
<<
output_tensors
[
0
].
data
.
length
()
/
sizeof
(
float
);
size_t
sum
=
1
;
for_each
(
output_tensors
[
0
].
shape
.
begin
(),
output_tensors
[
0
].
shape
.
end
(),
[
&
](
int
n
)
{
sum
*=
n
;
});
for
(
size_t
j
=
0
;
j
<
sum
;
++
j
)
{
LOG
(
INFO
)
<<
"output["
<<
j
<<
"]: "
<<
data_o
[
j
];
compare_outputs
.
push_back
(
data_o
[
j
]);
}
#endif
LOG
(
INFO
)
<<
"Single Time: "
<<
time
;
count
++
;
if
(
count
>
10
)
{
time_sum
+=
timer
.
toc
();
}
}
inference
::
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeats
,
1
,
0
,
time_sum
/
(
count
-
10
));
#if COMPARE_OUTPUTS
Data
data
(
FLAGS_truthpath
,
1
);
const
std
::
vector
<
std
::
string
>
truth_vals
=
data
.
get_lines
();
for
(
size_t
j
=
0
;
j
<
truth_vals
.
size
();
j
++
)
{
float
truth
=
std
::
atof
(
truth_vals
[
j
].
c_str
());
float
compa
=
compare_outputs
[
j
];
float
diff
=
std
::
abs
(
truth
-
compa
);
LOG
(
INFO
)
<<
"[DIFF "
<<
j
<<
" ] "
<<
diff
;
if
(
diff
>
0.0001
)
{
LOG
(
FATAL
)
<<
"The result is wrong!"
;
}
}
LOG
(
INFO
)
<<
"The result is correct!"
;
#endif
}
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
std
::
vector
<
std
::
thread
>
threads
;
auto
config
=
paddle
::
GetConfig
();
config
.
data_stream_id
=
0
;
config
.
compute_stream_id
=
0
;
std
::
unique_ptr
<
paddle
::
PaddlePredictor
>
predictor_master
=
paddle
::
CreatePaddlePredictor
<
paddle
::
contrib
::
AnakinConfig
,
paddle
::
PaddleEngineKind
::
kAnakin
>
(
config
);
for
(
int
i
=
0
;
i
<
FLAGS_threads_num
;
i
++
)
{
threads
.
push_back
(
std
::
thread
(
paddle
::
single_test
,
predictor_master
.
get
()));
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
return
0
;
}
paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
浏览文件 @
e1b0d7cb
...
...
@@ -92,9 +92,6 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
CHECK
(
quantizer_enable
)
<<
"NO"
;
PD_SetMkldnnCacheCapacity
(
config
,
0
);
PD_SetModel
(
config
,
prog_file
.
c_str
(),
params_file
.
c_str
());
PD_EnableAnakinEngine
(
config
);
bool
anakin_enable
=
PD_AnakinEngineEnabled
(
config
);
LOG
(
INFO
)
<<
anakin_enable
;
PD_DeleteAnalysisConfig
(
config
);
}
#endif
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
e1b0d7cb
...
...
@@ -34,10 +34,6 @@ if (WITH_GPU AND TENSORRT_FOUND)
add_subdirectory
(
tensorrt
)
endif
()
if
(
ANAKIN_SUBGRAPH
)
add_subdirectory
(
anakin
)
endif
()
if
(
WITH_LITE
)
add_subdirectory
(
lite
)
endif
()
...
...
paddle/fluid/operators/anakin/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
0a678ca0
op_library
(
anakin_engine_op DEPS anakin_engine anakin_op_converter
)
# file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(anakin_engine);\n")
paddle/fluid/operators/anakin/anakin_engine_op.cc
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <string>
#include <vector>
#include "paddle/fluid/operators/anakin/anakin_engine_op.h"
namespace
paddle
{
namespace
operators
{
class
AnakinEngineOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Xs"
,
"A list of inputs."
).
AsDuplicable
();
AddOutput
(
"Ys"
,
"A list of outputs"
).
AsDuplicable
();
AddAttr
<
std
::
string
>
(
"subgraph"
,
"the subgraph."
);
AddAttr
<
std
::
string
>
(
"engine_key"
,
"The engine_key here is used to distinguish different TRT Engines"
);
AddAttr
<
framework
::
BlockDesc
*>
(
"sub_block"
,
"the trt block"
);
AddComment
(
"Anakin engine operator."
);
}
};
class
AnakinEngineInferVarType
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
anakin_engine
,
ops
::
AnakinEngineOp
,
ops
::
AnakinEngineOpMaker
,
ops
::
AnakinEngineOpMaker
);
#endif // PADDLE_WITH_CUDA
paddle/fluid/operators/anakin/anakin_engine_op.h
已删除
100644 → 0
浏览文件 @
0a678ca0
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
operators
{
using
inference
::
Singleton
;
using
inference
::
anakin
::
AnakinEngine
;
class
AnakinEngineOp
:
public
framework
::
OperatorBase
{
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
std
::
string
engine_key_
;
std
::
string
engine_serialized_data_
;
bool
use_gpu_
;
bool
enable_int8_
;
public:
AnakinEngineOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{
input_names_
=
Inputs
(
"Xs"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
enable_int8_
=
Attr
<
bool
>
(
"enable_int8"
);
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
}
}
protected:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
RunAnakin
(
scope
,
dev_place
);
}
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
PADDLE_ENFORCE
(
!
input_names_
.
empty
(),
"should pass more than one inputs"
);
std
::
vector
<
std
::
string
>
output_maps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
inputs
.
insert
({
x
,
&
t
});
}
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
;
int
output_index
=
0
;
for
(
const
auto
&
y
:
Outputs
(
"Ys"
))
{
auto
*
fluid_v
=
scope
.
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
}
if
(
enable_int8_
)
{
Execute
<::
anakin
::
Precision
::
INT8
>
(
inputs
,
outputs
,
dev_place
);
}
else
{
Execute
<::
anakin
::
Precision
::
FP32
>
(
inputs
,
outputs
,
dev_place
);
}
}
template
<::
anakin
::
Precision
PrecisionT
>
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
use_gpu_
)
{
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
)
.
stream
();
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
,
stream
);
#endif
}
else
{
#ifdef ANAKIN_X86_PLACE
auto
*
engine
=
inference
::
Singleton
<
inference
::
anakin
::
AnakinEngineManager
<
::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Get
(
engine_key_
);
engine
->
Execute
(
inputs
,
outputs
);
#else
LOG
(
FATAL
)
<<
"Unknown Platform for AnakinEngine!"
;
#endif
}
}
};
}
// namespace operators
}
// namespace paddle
#endif // PADDLE_WITH_CUDA
paddle/fluid/pybind/inference_api.cc
浏览文件 @
e1b0d7cb
...
...
@@ -422,15 +422,6 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"min_subgraph_size"
)
=
3
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_calib_mode"
)
=
true
)
.
def
(
"enable_anakin_engine"
,
&
AnalysisConfig
::
EnableAnakinEngine
,
py
::
arg
(
"max_batch_size"
)
=
1
,
py
::
arg
(
"max_input_shape"
)
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
py
::
arg
(
"min_subgraph_size"
)
=
6
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"auto_config_layout"
)
=
false
,
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"switch_ir_debug"
,
&
AnalysisConfig
::
SwitchIrDebug
,
py
::
arg
(
"x"
)
=
true
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录