Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
4b9fa423
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4b9fa423
编写于
4月 11, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-pick from 16813 : change singleton to graph RegistBlock
test=release/1.4
上级
e14ab180
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
86 addition
and
52 deletion
+86
-52
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+21
-2
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+2
-2
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+7
-5
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+5
-3
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+6
-4
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+2
-1
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+6
-4
paddle/fluid/inference/anakin/convert/helper.h
paddle/fluid/inference/anakin/convert/helper.h
+28
-21
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+6
-0
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+1
-0
paddle/fluid/inference/anakin/test_anakin_engine.cc
paddle/fluid/inference/anakin/test_anakin_engine.cc
+2
-5
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+0
-5
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
4b9fa423
...
@@ -48,18 +48,37 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
...
@@ -48,18 +48,37 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul_out
,
mul_out
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul_out
,
mul_out
,
fc_pattern
);
auto
base_op_desc
=
*
mul
->
Op
()
->
Proto
();
auto
base_op_desc
=
mul
->
Op
();
// Create an FC Node.
// Create an FC Node.
OpDesc
desc
(
base_op_desc
,
nullptr
);
// OpDesc desc(base_op_desc, nullptr);
OpDesc
desc
;
std
::
string
fc_x_in
=
subgraph
.
at
(
x
)
->
Name
();
std
::
string
fc_x_in
=
subgraph
.
at
(
x
)
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
std
::
string
fc_out_out
=
fc_out
->
Name
();
std
::
string
fc_out_out
=
fc_out
->
Name
();
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out_out
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out_out
}));
desc
.
SetAttr
(
"in_num_col_dims"
,
mul
->
Op
()
->
GetAttr
(
"x_num_col_dims"
));
desc
.
SetAttr
(
"in_num_col_dims"
,
mul
->
Op
()
->
GetAttr
(
"x_num_col_dims"
));
// For anakin subgraph int8
// When in anakin subgraph int8 mode, the pattern like "fake_quant + mul +
// fake_dequant"
// can be detected by the quant_dequant_fuse_pass. This pass will add
// "input_scale",
// "weight_scale" which are extracted from fake_quant op and fake_dequant op
// to mul op,
// and then delete the fake_quant op and fake_dequant op in the graph. If
// the mul op
// has the scale info, we should add those to the fused fc.
if
(
base_op_desc
->
HasAttr
(
"enable_int8"
))
{
desc
.
SetAttr
(
"enable_int8"
,
base_op_desc
->
GetAttr
(
"enable_int8"
));
desc
.
SetAttr
(
"input_scale"
,
base_op_desc
->
GetAttr
(
"input_scale"
));
desc
.
SetAttr
(
"weight_scale"
,
base_op_desc
->
GetAttr
(
"weight_scale"
));
}
desc
.
SetType
(
"fc"
);
desc
.
SetType
(
"fc"
);
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
,
{
mul
,
elementwise_add
,
mul_out
});
GraphSafeRemoveNodes
(
graph
,
{
mul
,
elementwise_add
,
mul_out
});
...
...
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
4b9fa423
...
@@ -38,13 +38,13 @@ void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -38,13 +38,13 @@ void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
// Copy the Scale to CPUPlace and get the pointer.
// Copy the Scale to CPUPlace and get the pointer.
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
auto
weight1
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
scale_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Copy the Bias to CPUPlace and get the pointer.
// Copy the Bias to CPUPlace and get the pointer.
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
bias_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
4b9fa423
...
@@ -54,25 +54,27 @@ void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -54,25 +54,27 @@ void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
auto
*
mean_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
());
auto
*
mean_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
mean_v
);
PADDLE_ENFORCE_NOT_NULL
(
mean_v
);
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
mean_v
);
auto
weight1
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
mean_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
auto
*
variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
variance_v
);
PADDLE_ENFORCE_NOT_NULL
(
variance_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
variance_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
variance_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
auto
*
weight3
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
1
}));
auto
*
weight3
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
std
::
vector
<
float
>
({
1
}),
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
scale
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
auto
scale
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
scale_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
bias_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
}
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
4b9fa423
...
@@ -71,8 +71,9 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -71,8 +71,9 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
const
float
int8_range
=
127.
;
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
PBlock
<
TargetT
>
*
weight1
=
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
int
weight_num
=
weight_tensor
->
numel
();
...
@@ -94,7 +95,8 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -94,7 +95,8 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
{
weight_scale
/
int8_range
},
false
);
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
}
else
{
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
auto
*
weight1
=
pblock_from_tensor
<
TargetT
,
PrecisionT
>
(
*
weight_tensor
,
weight_shape
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
}
}
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
4b9fa423
...
@@ -73,8 +73,9 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -73,8 +73,9 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
const
float
int8_range
=
127.
;
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
PBlock
<
TargetT
>
*
weight1
=
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
int
weight_num
=
weight_tensor
->
numel
();
...
@@ -98,9 +99,10 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -98,9 +99,10 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
}
else
{
}
else
{
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
auto
*
weight1
=
pblock_from_tensor
<
TargetT
,
PrecisionT
>
(
*
weight_tensor
,
weight_shape
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
b_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
}
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
4b9fa423
...
@@ -39,7 +39,8 @@ void DropoutOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -39,7 +39,8 @@ void DropoutOpConverter<TargetT, PrecisionT>::operator()(
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
auto
factor
=
1
-
dropout_prob
;
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
factor
}));
auto
*
weight1
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
std
::
vector
<
float
>
({
factor
}),
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
4b9fa423
...
@@ -77,8 +77,9 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -77,8 +77,9 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
const
float
int8_range
=
127.
;
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
PBlock
<
TargetT
>
*
weight1
=
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
std
::
vector
<
char
>
weight_int8
;
std
::
vector
<
char
>
weight_int8
;
for
(
int
i
=
0
;
i
<
weight_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
weight_num
;
i
++
)
{
bool
is_valid_int8
=
bool
is_valid_int8
=
...
@@ -98,7 +99,8 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -98,7 +99,8 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
{
weight_scale
/
int8_range
},
false
);
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
}
else
{
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
trans_weight_data
);
auto
*
weight1
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
trans_weight_data
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
...
@@ -106,7 +108,7 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
...
@@ -106,7 +108,7 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
if
(
with_bias
)
{
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
b_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
}
...
...
paddle/fluid/inference/anakin/convert/helper.h
浏览文件 @
4b9fa423
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "framework/core/net/net.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/core/types.h"
...
@@ -29,8 +30,8 @@
...
@@ -29,8 +30,8 @@
using
anakin
::
saber
::
Shape
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_INT8
;
using
anakin
::
PBlock
;
using
anakin
::
PBlock
;
using
anakin
::
graph
::
GraphGlobalMem
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -38,31 +39,34 @@ namespace anakin {
...
@@ -38,31 +39,34 @@ namespace anakin {
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
);
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
);
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
std
::
vector
<
int
>
shape
)
{
PBlock
<
TargetT
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
while
(
shape
.
size
()
<
4
)
{
std
::
vector
<
int
>
shape_vec
,
shape
.
insert
(
shape
.
begin
(),
1
);
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
}
Shape
anakin_shape
(
shape
);
Shape
shape
(
shape_vec
);
auto
*
weight
=
PBlock
<
TargetT
>*
weight
=
new
PBlock
<
TargetT
>
(
shape
,
AK_FLOAT
);
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
engine
->
RegistBlock
(
weight
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
float
*
cpu_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
tensor
.
data
<
float
>
(),
tensor
.
numel
(),
cpu_data
);
std
::
copy_n
(
tensor
.
data
<
float
>
(),
tensor
.
numel
(),
cpu_data
);
weight
->
d_tensor
().
set_shape
(
anakin_
shape
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
return
weight
;
}
}
template
<
typename
T
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
PBlock
<
TargetT
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
std
::
vector
<
int
>
shape_vec
)
{
std
::
vector
<
int
>
shape_vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
while
(
shape_vec
.
size
()
<
4
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
}
Shape
shape
(
shape_vec
);
Shape
shape
(
shape_vec
);
auto
*
weight
=
PBlock
<
TargetT
>*
weight
=
new
PBlock
<
TargetT
>
(
shape
,
AK_FLOAT
);
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape
);
engine
->
RegistBlock
(
weight
);
auto
*
weight_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
auto
*
weight_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy
(
std
::
begin
(
vec
),
std
::
end
(
vec
),
weight_data
);
std
::
copy
(
std
::
begin
(
vec
),
std
::
end
(
vec
),
weight_data
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
set_shape
(
shape
);
...
@@ -70,17 +74,20 @@ PBlock<T>* pblock_from_vector(const std::vector<float>& vec,
...
@@ -70,17 +74,20 @@ PBlock<T>* pblock_from_vector(const std::vector<float>& vec,
return
weight
;
return
weight
;
}
}
template
<
typename
T
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
)
{
PBlock
<
TargetT
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
int
size
=
vec
.
size
();
int
size
=
vec
.
size
();
return
pblock_from_vector
<
T
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}));
return
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}),
engine
);
}
}
template
<
typename
T
>
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
T
>*
pblock_from_var
(
const
framework
::
Variable
&
var
)
{
PBlock
<
TargetT
>*
pblock_from_var
(
const
framework
::
Variable
&
var
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
auto
tensor
=
tensor_from_var
(
var
,
platform
::
CPUPlace
());
auto
tensor
=
tensor_from_var
(
var
,
platform
::
CPUPlace
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
return
pblock_from_tensor
<
T
>
(
*
tensor
,
shap
e
);
return
pblock_from_tensor
<
T
argetT
,
PrecisionT
>
(
*
tensor
,
shape
,
engin
e
);
}
}
}
// namespace anakin
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
4b9fa423
...
@@ -162,6 +162,12 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
...
@@ -162,6 +162,12 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
PADDLE_ENFORCE
(
graph_
->
Optimize
(),
"Graph optimization."
);
PADDLE_ENFORCE
(
graph_
->
Optimize
(),
"Graph optimization."
);
}
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
RegistBlock
(
::
anakin
::
PBlock
<
TargetT
>
*
block_p
)
{
PADDLE_ENFORCE
(
graph_
->
RegistBlock
(
block_p
),
"Block register."
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
std
::
unique_ptr
<
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>>
std
::
unique_ptr
<
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Clone
()
{
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Clone
()
{
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
4b9fa423
...
@@ -90,6 +90,7 @@ class AnakinEngine {
...
@@ -90,6 +90,7 @@ class AnakinEngine {
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
void
Freeze
();
void
Freeze
();
void
Optimize
();
void
Optimize
();
void
RegistBlock
(
::
anakin
::
PBlock
<
TargetT
>
*
block_p
);
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
int
GetDevice
()
{
return
device_
;
}
...
...
paddle/fluid/inference/anakin/test_anakin_engine.cc
浏览文件 @
4b9fa423
...
@@ -19,7 +19,6 @@ limitations under the License. */
...
@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/anakin/engine.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
NV
;
...
@@ -52,11 +51,9 @@ TEST_F(TestAnakinEngine, Execute) {
...
@@ -52,11 +51,9 @@ TEST_F(TestAnakinEngine, Execute) {
engine_
->
AddOpAttr
(
"op1"
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
"op1"
,
"axis"
,
1
);
std
::
vector
<
int
>
shape
=
{
1
,
1
,
1
,
2
};
std
::
vector
<
int
>
shape
=
{
1
,
1
,
1
,
2
};
Shape
tmp_shape
(
shape
);
Shape
tmp_shape
(
shape
);
// PBlock<NV> weight1(tmp_shape);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
tmp_shape
);
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
PBlock
<
NV
>
*
weight1
=
new
PBlock
<
NV
>
(
tmp_shape
,
AK_FLOAT
);
engine_
->
RegistBlock
(
weight1
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
cpu_data
[
0
]
=
2.
;
cpu_data
[
0
]
=
2.
;
weight1
->
d_tensor
().
set_shape
(
tmp_shape
);
weight1
->
d_tensor
().
set_shape
(
tmp_shape
);
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
4b9fa423
...
@@ -73,9 +73,7 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
...
@@ -73,9 +73,7 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
// The following passes works for Anakin sub-graph engine.
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
"infer_clean_graph_pass"
,
//
"graph_viz_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"graph_viz_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
...
@@ -83,11 +81,8 @@ const std::vector<std::string> kAnakinSubgraphPasses({
...
@@ -83,11 +81,8 @@ const std::vector<std::string> kAnakinSubgraphPasses({
// "conv_bn_fuse_pass", //
// "conv_bn_fuse_pass", //
// "conv_elementwise_add_fuse_pass", //
// "conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
"anakin_subgraph_pass"
,
//
"anakin_subgraph_pass"
,
//
"graph_viz_pass"
,
//
"fc_gru_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
});
});
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录