Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
cf5af3b5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
cf5af3b5
编写于
4月 14, 2019
作者:
石
石晓伟
提交者:
GitHub
4月 14, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16847 from NHZlX/cherry_pick_anakin_to_1.4_2
Cherry-pick from 16837 & 16846 to 1.4
上级
9f1927bd
8121b3ec
变更
36
隐藏空白更改
内联
并排
Showing
36 changed file
with
154 addition
and
504 deletion
+154
-504
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+13
-38
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+16
-1
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+1
-19
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+1
-15
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+1
-19
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+1
-19
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+1
-19
paddle/fluid/inference/anakin/convert/density_prior_box.cc
paddle/fluid/inference/anakin/convert/density_prior_box.cc
+2
-22
paddle/fluid/inference/anakin/convert/detection_out.cc
paddle/fluid/inference/anakin/convert/detection_out.cc
+1
-19
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+1
-19
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+2
-29
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+2
-37
paddle/fluid/inference/anakin/convert/flatten.cc
paddle/fluid/inference/anakin/convert/flatten.cc
+1
-19
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+1
-15
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+31
-19
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+1
-19
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+2
-33
paddle/fluid/inference/anakin/convert/reshape.cc
paddle/fluid/inference/anakin/convert/reshape.cc
+1
-19
paddle/fluid/inference/anakin/convert/roi_align.cc
paddle/fluid/inference/anakin/convert/roi_align.cc
+1
-19
paddle/fluid/inference/anakin/convert/scale.cc
paddle/fluid/inference/anakin/convert/scale.cc
+1
-19
paddle/fluid/inference/anakin/convert/softmax.cc
paddle/fluid/inference/anakin/convert/softmax.cc
+1
-20
paddle/fluid/inference/anakin/convert/split.cc
paddle/fluid/inference/anakin/convert/split.cc
+1
-19
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+1
-19
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+38
-0
paddle/fluid/inference/anakin/convert/transpose.cc
paddle/fluid/inference/anakin/convert/transpose.cc
+1
-15
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+3
-2
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+7
-3
paddle/fluid/inference/anakin/op_teller.cc
paddle/fluid/inference/anakin/op_teller.cc
+2
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+4
-2
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+3
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+2
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+3
-5
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+1
-0
未找到文件。
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
cf5af3b5
...
...
@@ -43,47 +43,22 @@ void ActivationOpConverter<TargetT, PrecisionT>::operator()(
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
this
->
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
this
->
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
if
(
op_type_
==
"swish"
)
{
float
beta
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"beta"
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"clip_relu_num"
,
beta
);
}
if
(
op_type_
==
"relu6"
)
{
float
threshold
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"threshold"
));
this
->
engine_
->
AddOpAttr
(
op_name
,
"clip_relu_num"
,
threshold
);
}
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
sigmoid_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_nv_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_nv_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_nv_int8
);
#endif
using
sigmoid_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sigmoid_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SigmoidOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
tanh_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
tanh_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TanhOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sigmoid
,
sigmoid_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
tanh
,
tanh_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
swish
,
SwishOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
relu6
,
Relu6OpConverter
);
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
cf5af3b5
...
...
@@ -37,7 +37,9 @@ class ActivationOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
std
::
string
op_type_
;
std
::
string
anakin_op_type_
;
std
::
map
<
std
::
string
,
std
::
string
>
anakin_op_types_
{{
"tanh"
,
"TanH"
},
{
"sigmoid"
,
"Sigmoid"
}};
{
"sigmoid"
,
"Sigmoid"
},
{
"relu6"
,
"ClippedRelu"
},
{
"swish"
,
"Swish"
}};
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
...
...
@@ -52,6 +54,19 @@ class SigmoidOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
SigmoidOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"sigmoid"
)
{}
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
Relu6OpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
Relu6OpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"relu6"
)
{}
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
class
SwishOpConverter
:
public
ActivationOpConverter
<
TargetT
,
PrecisionT
>
{
public:
SwishOpConverter
()
:
ActivationOpConverter
<
TargetT
,
PrecisionT
>
(
"swish"
)
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
cf5af3b5
...
...
@@ -52,22 +52,4 @@ void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
affine_channel_nv_fp32
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_nv_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_nv_int8
);
#endif
using
affine_channel_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
affine_channel_cpu_int8
=
::
paddle
::
inference
::
anakin
::
AffineChannelOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
affine_channel
,
affine_channel_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
affine_channel
,
AffineChannelOpConverter
);
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
cf5af3b5
...
...
@@ -82,18 +82,4 @@ void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
bn_nv_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_nv_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_nv_int8
);
#endif
using
bn_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
bn_cpu_int8
=
::
paddle
::
inference
::
anakin
::
BatchNormOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
batch_norm
,
bn_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
batch_norm
,
BatchNormOpConverter
);
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
cf5af3b5
...
...
@@ -38,22 +38,4 @@ void ConcatOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
concat_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_nv_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_nv_int8
);
#endif
using
concat_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
concat_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ConcatOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
concat
,
concat_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
concat
,
ConcatOpConverter
);
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
cf5af3b5
...
...
@@ -105,22 +105,4 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
conv2d_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_nv_int8
);
#endif
using
conv2d_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d
,
conv2d_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d
,
Conv2dOpConverter
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
cf5af3b5
...
...
@@ -111,22 +111,4 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
conv2d_fusion_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_nv_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_nv_int8
);
#endif
using
conv2d_fusion_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
conv2d_fusion_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Conv2dFusionOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
conv2d_fusion_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
);
paddle/fluid/inference/anakin/convert/density_prior_box.cc
浏览文件 @
cf5af3b5
...
...
@@ -108,25 +108,5 @@ void DensityPriorBoxOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
ds_pr_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
ds_pr_nv_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_nv_int8
);
#endif
using
ds_pr_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
ds_pr_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DensityPriorBoxOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
density_prior_box
,
ds_pr_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
prior_box
,
ds_pr_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
density_prior_box
,
DensityPriorBoxOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
prior_box
,
DensityPriorBoxOpConverter
);
paddle/fluid/inference/anakin/convert/detection_out.cc
浏览文件 @
cf5af3b5
...
...
@@ -66,22 +66,4 @@ void DetectionOutOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
detection_out_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_nv_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_nv_int8
);
#endif
using
detection_out_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
detection_out_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DetectionOutOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
detection_out
,
detection_out_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
detection_out
,
DetectionOutOpConverter
);
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
cf5af3b5
...
...
@@ -52,22 +52,4 @@ void DropoutOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
dropout_nv_fp32
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_nv_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_nv_int8
);
#endif
using
dropout_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
dropout_cpu_int8
=
::
paddle
::
inference
::
anakin
::
DropoutOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
dropout
,
dropout_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
);
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
cf5af3b5
...
...
@@ -71,32 +71,5 @@ void ElementwiseMulOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
elet_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
elet_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_nv_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_nv_int8
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_nv_int8
);
#endif
using
elet_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
elet_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseAddOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
eletmul_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
eletmul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ElementwiseMulOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_add
,
elet_cpu_int8
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
eletmul_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
);
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
cf5af3b5
...
...
@@ -117,40 +117,5 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
mul_nv_fp32
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_nv_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_nv_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_nv_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_nv_int8
);
#endif
using
mul_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
fc_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
mul_cpu_int8
=
::
paddle
::
inference
::
anakin
::
MulOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
fc_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FcOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
mul
,
mul_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
fc
,
fc_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
);
paddle/fluid/inference/anakin/convert/flatten.cc
浏览文件 @
cf5af3b5
...
...
@@ -45,22 +45,4 @@ void FlattenOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
flatten_nv_fp32
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_nv_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_nv_int8
);
#endif
using
flatten_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
flatten_cpu_int8
=
::
paddle
::
inference
::
anakin
::
FlattenOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
flatten
,
flatten_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
flatten
,
FlattenOpConverter
);
paddle/fluid/inference/anakin/convert/im2sequence.cc
浏览文件 @
cf5af3b5
...
...
@@ -55,18 +55,4 @@ void Im2SequenceConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
im2sequence_nv_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_nv_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_nv_int8
);
#endif
using
im2sequence_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
im2sequence_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Im2SequenceConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
im2sequence
,
im2sequence_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
);
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
cf5af3b5
...
...
@@ -183,25 +183,37 @@ template class AnakinOpConverter<::anakin::saber::X86,
return 0; \
}
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \
::anakin::saber::NV, INT8, \
::anakin::Precision::INT8)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86, FP32, \
::anakin::Precision::FP32)
#define REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \
::anakin::saber::X86, INT8, \
::anakin::Precision::INT8)
#define WRAP(...) __VA_ARGS__
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, \
precision_type__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE( \
op_type__, \
::paddle::inference::anakin::Converter__<WRAP( \
::anakin::saber::NV, ::anakin::Precision::precision_type__)>, \
CUDA, ::anakin::saber::NV, precision_type__, \
::anakin::Precision::precision_type__)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, \
precision_type__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE( \
op_type__, \
::paddle::inference::anakin::Converter__<WRAP( \
::anakin::saber::X86, ::anakin::Precision::precision_type__)>, \
CPU, ::anakin::saber::X86, precision_type__, \
::anakin::Precision::precision_type__)
#ifdef PADDLE_WITH_CUDA
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#else
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#endif
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \
extern int Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); \
...
...
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
cf5af3b5
...
...
@@ -71,22 +71,4 @@ void Pool2dOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
pool2d_nv_float32
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_nv_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_float32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_nv_int8
);
#endif
using
pool2d_cpu_float32
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
pool2d_cpu_int8
=
::
paddle
::
inference
::
anakin
::
Pool2dOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_float32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
pool2d
,
pool2d_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
pool2d
,
Pool2dOpConverter
);
paddle/fluid/inference/anakin/convert/relu.cc
浏览文件 @
cf5af3b5
...
...
@@ -57,36 +57,5 @@ void LeakyReluOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
relu_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
leaky_nv_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_nv_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_fp32
);
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_nv_int8
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_nv_int8
);
#endif
using
relu_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
leaky_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
relu_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReluOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
using
leaky_cpu_int8
=
::
paddle
::
inference
::
anakin
::
LeakyReluOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_fp32
);
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
relu
,
relu_cpu_int8
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
leaky_relu
,
leaky_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
leaky_relu
,
LeakyReluOpConverter
);
paddle/fluid/inference/anakin/convert/reshape.cc
浏览文件 @
cf5af3b5
...
...
@@ -46,22 +46,4 @@ void ReshapeOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
reshape_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_nv_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_nv_int8
);
#endif
using
reshape_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
reshape_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ReshapeOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
reshape
,
reshape_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
reshape
,
ReshapeOpConverter
);
paddle/fluid/inference/anakin/convert/roi_align.cc
浏览文件 @
cf5af3b5
...
...
@@ -51,22 +51,4 @@ void RoiAlignOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
roi_align_nv_fp32
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_nv_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_nv_int8
);
#endif
using
roi_align_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
roi_align_cpu_int8
=
::
paddle
::
inference
::
anakin
::
RoiAlignOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
roi_align
,
roi_align_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
roi_align
,
RoiAlignOpConverter
);
paddle/fluid/inference/anakin/convert/scale.cc
浏览文件 @
cf5af3b5
...
...
@@ -49,22 +49,4 @@ void ScaleOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
scale_nv_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_nv_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_nv_int8
);
#endif
using
scale_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
scale_cpu_int8
=
::
paddle
::
inference
::
anakin
::
ScaleOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
scale
,
scale_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
scale
,
ScaleOpConverter
);
paddle/fluid/inference/anakin/convert/softmax.cc
浏览文件 @
cf5af3b5
...
...
@@ -44,23 +44,4 @@ void SoftMaxOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
sm_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_nv_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_nv_int8
);
#endif
using
sm_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sm_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SoftMaxOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
softmax
,
sm_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
softmax
,
SoftMaxOpConverter
);
paddle/fluid/inference/anakin/convert/split.cc
浏览文件 @
cf5af3b5
...
...
@@ -55,23 +55,5 @@ void SplitOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
split_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
split_nv_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
split
,
split_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_nv_int8
);
#endif
using
split_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
split_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SplitOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
split
,
split_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
split
,
SplitOpConverter
);
paddle/fluid/inference/anakin/convert/sum.cc
浏览文件 @
cf5af3b5
...
...
@@ -47,22 +47,4 @@ void SumOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
sum_nv_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_nv_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_nv_int8
);
#endif
using
sum_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
sum_cpu_int8
=
::
paddle
::
inference
::
anakin
::
SumOpConverter
<::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
sum
,
sum_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
);
paddle/fluid/inference/anakin/convert/test_activation_op.cc
浏览文件 @
cf5af3b5
...
...
@@ -36,6 +36,14 @@ static void test_activation_op(const std::string& op_type,
desc
.
SetInput
(
"X"
,
{
"act-X"
});
desc
.
SetOutput
(
"Out"
,
{
"act-Out"
});
if
(
op_type
==
"swish"
)
{
desc
.
SetAttr
(
"beta"
,
1.0
f
);
}
if
(
op_type
==
"relu6"
)
{
desc
.
SetAttr
(
"threshold"
,
6.0
f
);
}
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
...
...
@@ -55,6 +63,18 @@ TEST(tanh_op, gpu) {
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"tanh"
,
ctx
,
true
);
}
TEST
(
relu6_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"relu6"
,
ctx
,
true
);
}
TEST
(
swish_op
,
gpu
)
{
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDADeviceContext
ctx
(
gpu_place
);
test_activation_op
<::
anakin
::
saber
::
NV
>
(
"swish"
,
ctx
,
true
);
}
#endif
/*
...
...
@@ -69,6 +89,18 @@ TEST(tanh_op, cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
}
TEST(relu6_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu6", ctx, false);
}
TEST(swish_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("swish", ctx, false);
}
*/
}
// namespace anakin
...
...
@@ -77,10 +109,16 @@ TEST(tanh_op, cpu) {
USE_OP
(
sigmoid
);
USE_OP
(
tanh
);
USE_OP
(
relu6
);
USE_OP
(
swish
);
USE_CPU_ANAKIN_CONVERTER
(
sigmoid
);
USE_CPU_ANAKIN_CONVERTER
(
tanh
);
USE_CPU_ANAKIN_CONVERTER
(
relu6
);
USE_CPU_ANAKIN_CONVERTER
(
swish
);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
relu6
);
USE_ANAKIN_CONVERTER
(
swish
);
#endif
paddle/fluid/inference/anakin/convert/transpose.cc
浏览文件 @
cf5af3b5
...
...
@@ -49,18 +49,4 @@ void TransposeOpConverter<TargetT, PrecisionT>::operator()(
}
// namespace inference
}
// namespace paddle
#ifdef PADDLE_WITH_CUDA
using
transpose_nv_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_nv_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
NV
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CUDA_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_fp32
);
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_nv_int8
);
#endif
using
transpose_cpu_fp32
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
FP32
>
;
using
transpose_cpu_int8
=
::
paddle
::
inference
::
anakin
::
TransposeOpConverter
<
::
anakin
::
saber
::
X86
,
::
anakin
::
Precision
::
INT8
>
;
REGISTER_CPU_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_fp32
);
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER
(
transpose
,
transpose_cpu_int8
);
REGISTER_ANAKIN_OP_CONVERTER
(
transpose
,
TransposeOpConverter
);
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
cf5af3b5
...
...
@@ -36,13 +36,14 @@ template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
AnakinEngine
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
vector
<
std
::
string
>
program_inputs
)
std
::
vector
<
std
::
string
>
program_inputs
,
bool
auto_config_layout
)
:
graph_
(
new
AnakinGraphT
<
TargetT
,
PrecisionType
>
()),
net_
(
new
AnakinNetT
<
TargetT
,
PrecisionType
,
RunType
>
(
need_summary
))
{
device_
=
device
;
max_batch_size_
=
max_batch_size
;
max_input_shape_
=
max_input_shape
;
program_inputs_
=
program_inputs
;
auto_config_layout_
=
auto_config_layout
;
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
...
...
@@ -57,7 +58,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::SetInputShape(
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
InitNet
()
{
net_
->
init
(
*
graph_
);
net_
->
init
(
*
graph_
,
auto_config_layout_
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
cf5af3b5
...
...
@@ -58,7 +58,8 @@ class AnakinEngine {
explicit
AnakinEngine
(
bool
need_summary
=
false
,
int
device
=
0
,
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
std
::
vector
<
std
::
string
>
program_inputs
=
{});
std
::
vector
<
std
::
string
>
program_inputs
=
{},
bool
auto_config_layout
=
false
);
~
AnakinEngine
();
void
InitNet
();
void
SetInputShape
(
const
std
::
string
&
name
,
std
::
vector
<
int
>
shape
);
...
...
@@ -120,6 +121,8 @@ class AnakinEngine {
std
::
unique_ptr
<
NetT
>
net_
;
std
::
vector
<
std
::
string
>
program_inputs_
;
std
::
unordered_map
<
std
::
string
,
float
>
tensor_scales_
;
// Always be false in gpu mode but true in most cpu cases.
bool
auto_config_layout_
;
};
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
>
...
...
@@ -138,10 +141,11 @@ class AnakinEngineManager {
AnakinEngineT
*
Create
(
bool
need_summary
,
int
device
,
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
vector
<
std
::
string
>
program_inputs
,
std
::
string
engine_name
)
{
bool
auto_config_layout
,
std
::
string
engine_name
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
auto
*
p
=
new
AnakinEngine
<
TargetT
,
PrecisionType
>
(
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
);
need_summary
,
device
,
max_batch_size
,
max_input_shape
,
program_inputs
,
auto_config_layout
);
engines_
[
engine_name
].
reset
(
p
);
return
p
;
}
...
...
paddle/fluid/inference/anakin/op_teller.cc
浏览文件 @
cf5af3b5
...
...
@@ -46,6 +46,8 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set
.
insert
(
"prior_box"
);
teller_set
.
insert
(
"leaky_relu"
);
teller_set
.
insert
(
"affine_channel"
);
teller_set
.
insert
(
"relu6"
);
teller_set
.
insert
(
"swish"
);
}
bool
operator
()(
const
std
::
string
&
op_type
,
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
cf5af3b5
...
...
@@ -171,6 +171,7 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
anakin_min_subgraph_size
,
AnakinMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
anakin_precision_mode
,
AnakinPrecisionMode
,
AnalysisConfig
::
Precision
);
DECL_ARGUMENT_FIELD
(
anakin_auto_config_layout
,
AnakinAutoConfigLayout
,
bool
);
DECL_ARGUMENT_FIELD
(
use_anakin
,
UseAnakin
,
bool
);
DECL_ARGUMENT_FIELD
(
anakin_passes_filter
,
AnakinPassesFilter
,
std
::
vector
<
std
::
string
>
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
cf5af3b5
...
...
@@ -128,6 +128,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"anakin_ops_filter"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
anakin_ops_filter
()));
pass
->
Set
(
"auto_config_layout"
,
new
bool
(
argument
->
anakin_auto_config_layout
()));
}
pre_pass
=
pass_name
;
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
cf5af3b5
...
...
@@ -226,18 +226,20 @@ void AnakinSubgraphPass::CreateAnakinEngine(
auto
max_batch_size
=
Get
<
int
>
(
"max_batch_size"
);
auto
max_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_input_shape"
);
bool
auto_config_layout
=
Get
<
bool
>
(
"auto_config_layout"
);
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
NV
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
max_input_shape
,
program_inputs
,
false
,
engine_key
);
#endif
}
else
{
inference
::
Singleton
<
anakin
::
AnakinEngineManager
<::
anakin
::
saber
::
X86
,
PrecisionT
>>::
Global
()
.
Create
(
true
,
Get
<
int
>
(
"gpu_device_id"
),
max_batch_size
,
max_input_shape
,
program_inputs
,
engine_key
);
max_input_shape
,
program_inputs
,
auto_config_layout
,
engine_key
);
}
auto
*
scope
=
param_scope
();
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
cf5af3b5
...
...
@@ -117,6 +117,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
anakin_max_input_shape_
);
CP_MEMBER
(
anakin_min_subgraph_size_
);
CP_MEMBER
(
anakin_precision_mode_
);
CP_MEMBER
(
anakin_auto_config_layout_
);
CP_MEMBER
(
anakin_passes_filter_
);
CP_MEMBER
(
anakin_ops_filter_
);
...
...
@@ -398,7 +399,7 @@ void AnalysisConfig::SwitchIrDebug(int x) {
void
AnalysisConfig
::
EnableAnakinEngine
(
int
max_batch_size
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
std
::
vector
<
std
::
string
>
passes_filter
,
bool
auto_config_layout
,
std
::
vector
<
std
::
string
>
passes_filter
,
std
::
vector
<
std
::
string
>
ops_filter
)
{
anakin_max_batchsize_
=
max_batch_size
;
anakin_max_input_shape_
=
max_input_shape
;
...
...
@@ -407,6 +408,7 @@ void AnalysisConfig::EnableAnakinEngine(
anakin_ops_filter_
=
ops_filter
;
use_anakin_
=
true
;
anakin_precision_mode_
=
precision_mode
;
anakin_auto_config_layout_
=
auto_config_layout
;
Update
();
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
cf5af3b5
...
...
@@ -387,6 +387,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetAnakinMaxInputShape
(
config_
.
anakin_max_input_shape_
);
argument_
.
SetAnakinMinSubgraphSize
(
config_
.
anakin_min_subgraph_size_
);
argument_
.
SetAnakinPrecisionMode
(
config_
.
anakin_precision_mode_
);
argument_
.
SetAnakinAutoConfigLayout
(
config_
.
anakin_auto_config_layout_
);
argument_
.
SetAnakinPassesFilter
(
config_
.
anakin_passes_filter_
);
argument_
.
SetAnakinOpsFilter
(
config_
.
anakin_ops_filter_
);
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
...
...
@@ -893,4 +894,6 @@ USE_ANAKIN_CONVERTER(sum);
USE_ANAKIN_CONVERTER
(
prior_box
);
USE_ANAKIN_CONVERTER
(
leaky_relu
);
USE_ANAKIN_CONVERTER
(
affine_channel
);
USE_ANAKIN_CONVERTER
(
relu6
);
USE_ANAKIN_CONVERTER
(
swish
);
#endif
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
cf5af3b5
...
...
@@ -153,6 +153,7 @@ struct AnalysisConfig {
int
max_batch_size
=
1
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
int
min_subgraph_size
=
6
,
Precision
precision
=
Precision
::
kFloat32
,
bool
auto_config_layout
=
false
,
std
::
vector
<
std
::
string
>
passes_filter
=
{},
std
::
vector
<
std
::
string
>
ops_filter
=
{});
...
...
@@ -294,6 +295,7 @@ struct AnalysisConfig {
int
anakin_min_subgraph_size_
{
6
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
anakin_max_input_shape_
;
Precision
anakin_precision_mode_
;
bool
anakin_auto_config_layout_
{
false
};
std
::
vector
<
std
::
string
>
anakin_passes_filter_
;
std
::
vector
<
std
::
string
>
anakin_ops_filter_
;
std
::
map
<
std
::
string
,
std
::
string
>
engine_opt_info_
;
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
cf5af3b5
...
...
@@ -78,11 +78,9 @@ const std::vector<std::string> kAnakinSubgraphPasses({
"fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
// "conv_bn_fuse_pass", //
// "conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass"
,
//
"anakin_subgraph_pass"
,
//
"fc_gru_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"anakin_subgraph_pass"
,
//
"fc_gru_fuse_pass"
,
//
});
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
cf5af3b5
...
...
@@ -236,6 +236,7 @@ void BindAnalysisConfig(py::module *m) {
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
py
::
arg
(
"min_subgraph_size"
)
=
6
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"auto_config_layout"
)
=
false
,
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录