Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
4b9fa423
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4b9fa423
编写于
4月 11, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-pick from 16813 : change singleton to graph RegistBlock
test=release/1.4
上级
e14ab180
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
86 addition
and
52 deletion
+86
-52
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+21
-2
paddle/fluid/inference/anakin/convert/affine_channel.cc
paddle/fluid/inference/anakin/convert/affine_channel.cc
+2
-2
paddle/fluid/inference/anakin/convert/batch_norm.cc
paddle/fluid/inference/anakin/convert/batch_norm.cc
+7
-5
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+5
-3
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+6
-4
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+2
-1
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+6
-4
paddle/fluid/inference/anakin/convert/helper.h
paddle/fluid/inference/anakin/convert/helper.h
+28
-21
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+6
-0
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+1
-0
paddle/fluid/inference/anakin/test_anakin_engine.cc
paddle/fluid/inference/anakin/test_anakin_engine.cc
+2
-5
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+0
-5
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
4b9fa423
...
...
@@ -48,18 +48,37 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul_out
,
mul_out
,
fc_pattern
);
auto
base_op_desc
=
*
mul
->
Op
()
->
Proto
();
auto
base_op_desc
=
mul
->
Op
();
// Create an FC Node.
OpDesc
desc
(
base_op_desc
,
nullptr
);
// OpDesc desc(base_op_desc, nullptr);
OpDesc
desc
;
std
::
string
fc_x_in
=
subgraph
.
at
(
x
)
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
std
::
string
fc_out_out
=
fc_out
->
Name
();
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out_out
}));
desc
.
SetAttr
(
"in_num_col_dims"
,
mul
->
Op
()
->
GetAttr
(
"x_num_col_dims"
));
// For anakin subgraph int8
// When in anakin subgraph int8 mode, the pattern like "fake_quant + mul +
// fake_dequant"
// can be detected by the quant_dequant_fuse_pass. This pass will add
// "input_scale",
// "weight_scale" which are extracted from fake_quant op and fake_dequant op
// to mul op,
// and then delete the fake_quant op and fake_dequant op in the graph. If
// the mul op
// has the scale info, we should add those to the fused fc.
if
(
base_op_desc
->
HasAttr
(
"enable_int8"
))
{
desc
.
SetAttr
(
"enable_int8"
,
base_op_desc
->
GetAttr
(
"enable_int8"
));
desc
.
SetAttr
(
"input_scale"
,
base_op_desc
->
GetAttr
(
"input_scale"
));
desc
.
SetAttr
(
"weight_scale"
,
base_op_desc
->
GetAttr
(
"weight_scale"
));
}
desc
.
SetType
(
"fc"
);
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
,
{
mul
,
elementwise_add
,
mul_out
});
...
...
paddle/fluid/inference/anakin/convert/affine_channel.cc
浏览文件 @
4b9fa423
...
...
@@ -38,13 +38,13 @@ void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
// Copy the Scale to CPUPlace and get the pointer.
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
auto
weight1
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
scale_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// Copy the Bias to CPUPlace and get the pointer.
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
bias_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
...
...
paddle/fluid/inference/anakin/convert/batch_norm.cc
浏览文件 @
4b9fa423
...
...
@@ -54,25 +54,27 @@ void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
auto
*
mean_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
mean_v
);
auto
weight1
=
pblock_from_var
<
TargetT
>
(
*
mean_v
);
auto
weight1
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
mean_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_1"
,
*
weight1
);
auto
*
variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
variance_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
variance_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
variance_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_2"
,
*
weight2
);
auto
*
weight3
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
1
}));
auto
*
weight3
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
std
::
vector
<
float
>
({
1
}),
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
bn_op_name
,
"weight_3"
,
*
weight3
);
auto
*
scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
scale_v
);
auto
scale
=
pblock_from_var
<
TargetT
>
(
*
scale_v
);
auto
scale
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
scale_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_1"
,
*
scale
);
auto
*
bias_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
>
(
*
bias_v
);
auto
bias
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
bias_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
scale_op_name
,
"weight_2"
,
*
bias
);
}
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
4b9fa423
...
...
@@ -71,8 +71,9 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
PBlock
<
TargetT
>
*
weight1
=
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
...
...
@@ -94,7 +95,8 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
auto
*
weight1
=
pblock_from_tensor
<
TargetT
,
PrecisionT
>
(
*
weight_tensor
,
weight_shape
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
浏览文件 @
4b9fa423
...
...
@@ -73,8 +73,9 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
PBlock
<
TargetT
>
*
weight1
=
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
float
*
weight_data
=
weight_tensor
->
data
<
float
>
();
std
::
vector
<
char
>
weight_int8
;
int
weight_num
=
weight_tensor
->
numel
();
...
...
@@ -98,9 +99,10 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
}
else
{
auto
weight_tensor
=
tensor_from_var
(
*
filter_v
,
platform
::
CPUPlace
());
auto
weight_shape
=
framework
::
vectorize2int
(
weight_tensor
->
dims
());
auto
*
weight1
=
pblock_from_tensor
<
TargetT
>
(
*
weight_tensor
,
weight_shape
);
auto
*
weight1
=
pblock_from_tensor
<
TargetT
,
PrecisionT
>
(
*
weight_tensor
,
weight_shape
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
b_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
...
...
paddle/fluid/inference/anakin/convert/dropout.cc
浏览文件 @
4b9fa423
...
...
@@ -39,7 +39,8 @@ void DropoutOpConverter<TargetT, PrecisionT>::operator()(
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
std
::
vector
<
float
>
({
factor
}));
auto
*
weight1
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
std
::
vector
<
float
>
({
factor
}),
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
...
...
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
4b9fa423
...
...
@@ -77,8 +77,9 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
const
float
int8_range
=
127.
;
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"input_scale"
));
float
weight_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"weight_scale"
));
auto
*
weight1
=
::
anakin
::
graph
::
GraphGlobalMem
<
TargetT
>::
Global
()
.
template
new_block
<::
anakin
::
AK_INT8
>(
anakin_shape
);
PBlock
<
TargetT
>
*
weight1
=
new
PBlock
<
TargetT
>
(
anakin_shape
,
::
anakin
::
AK_INT8
);
this
->
engine_
->
RegistBlock
(
weight1
);
std
::
vector
<
char
>
weight_int8
;
for
(
int
i
=
0
;
i
<
weight_num
;
i
++
)
{
bool
is_valid_int8
=
...
...
@@ -98,7 +99,8 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
{
weight_scale
/
int8_range
},
false
);
this
->
engine_
->
AddTensorScale
(
input_name
,
in_scale
/
int8_range
);
}
else
{
auto
*
weight1
=
pblock_from_vector
<
TargetT
>
(
trans_weight_data
);
auto
*
weight1
=
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
trans_weight_data
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
...
...
@@ -106,7 +108,7 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
weight2
=
pblock_from_var
<
TargetT
>
(
*
b_v
);
auto
weight2
=
pblock_from_var
<
TargetT
,
PrecisionT
>
(
*
b_v
,
this
->
engine_
);
this
->
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
...
...
paddle/fluid/inference/anakin/convert/helper.h
浏览文件 @
4b9fa423
...
...
@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
...
...
@@ -29,8 +30,8 @@
using
anakin
::
saber
::
Shape
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_INT8
;
using
anakin
::
PBlock
;
using
anakin
::
graph
::
GraphGlobalMem
;
namespace
paddle
{
namespace
inference
{
...
...
@@ -38,31 +39,34 @@ namespace anakin {
std
::
unique_ptr
<
framework
::
LoDTensor
>
tensor_from_var
(
const
framework
::
Variable
&
var
,
const
platform
::
Place
&
place
);
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
std
::
vector
<
int
>
shape
)
{
while
(
shape
.
size
()
<
4
)
{
shape
.
insert
(
shape
.
begin
(),
1
);
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_tensor
(
const
framework
::
LoDTensor
&
tensor
,
std
::
vector
<
int
>
shape_vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
Shape
anakin_shape
(
shape
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
Shape
shape
(
shape_vec
);
PBlock
<
TargetT
>*
weight
=
new
PBlock
<
TargetT
>
(
shape
,
AK_FLOAT
);
engine
->
RegistBlock
(
weight
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
tensor
.
data
<
float
>
(),
tensor
.
numel
(),
cpu_data
);
weight
->
d_tensor
().
set_shape
(
anakin_
shape
);
weight
->
d_tensor
().
set_shape
(
shape
);
weight
->
d_tensor
().
copy_from
(
weight
->
h_tensor
());
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
std
::
vector
<
int
>
shape_vec
)
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
std
::
vector
<
int
>
shape_vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
while
(
shape_vec
.
size
()
<
4
)
{
shape_vec
.
insert
(
shape_vec
.
begin
(),
1
);
}
Shape
shape
(
shape_vec
);
auto
*
weight
=
GraphGlobalMem
<
T
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape
);
PBlock
<
TargetT
>*
weight
=
new
PBlock
<
TargetT
>
(
shape
,
AK_FLOAT
);
engine
->
RegistBlock
(
weight
);
auto
*
weight_data
=
static_cast
<
float
*>
(
weight
->
h_tensor
().
mutable_data
());
std
::
copy
(
std
::
begin
(
vec
),
std
::
end
(
vec
),
weight_data
);
weight
->
d_tensor
().
set_shape
(
shape
);
...
...
@@ -70,17 +74,20 @@ PBlock<T>* pblock_from_vector(const std::vector<float>& vec,
return
weight
;
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
)
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_vector
(
const
std
::
vector
<
float
>&
vec
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
int
size
=
vec
.
size
();
return
pblock_from_vector
<
T
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}));
return
pblock_from_vector
<
TargetT
,
PrecisionT
>
(
vec
,
std
::
vector
<
int
>
({
1
,
1
,
1
,
size
}),
engine
);
}
template
<
typename
T
>
PBlock
<
T
>*
pblock_from_var
(
const
framework
::
Variable
&
var
)
{
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionT
>
PBlock
<
TargetT
>*
pblock_from_var
(
const
framework
::
Variable
&
var
,
AnakinEngine
<
TargetT
,
PrecisionT
>*
engine
)
{
auto
tensor
=
tensor_from_var
(
var
,
platform
::
CPUPlace
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
return
pblock_from_tensor
<
T
>
(
*
tensor
,
shap
e
);
return
pblock_from_tensor
<
T
argetT
,
PrecisionT
>
(
*
tensor
,
shape
,
engin
e
);
}
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
4b9fa423
...
...
@@ -162,6 +162,12 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
PADDLE_ENFORCE
(
graph_
->
Optimize
(),
"Graph optimization."
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
RegistBlock
(
::
anakin
::
PBlock
<
TargetT
>
*
block_p
)
{
PADDLE_ENFORCE
(
graph_
->
RegistBlock
(
block_p
),
"Block register."
);
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
std
::
unique_ptr
<
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>>
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Clone
()
{
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
4b9fa423
...
...
@@ -90,6 +90,7 @@ class AnakinEngine {
int
GetMaxBatchSize
()
{
return
max_batch_size_
;
}
void
Freeze
();
void
Optimize
();
void
RegistBlock
(
::
anakin
::
PBlock
<
TargetT
>
*
block_p
);
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
bool
IsInit
()
{
return
initialized_
;
}
int
GetDevice
()
{
return
device_
;
}
...
...
paddle/fluid/inference/anakin/test_anakin_engine.cc
浏览文件 @
4b9fa423
...
...
@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/inference/anakin/engine.h"
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
...
...
@@ -52,11 +51,9 @@ TEST_F(TestAnakinEngine, Execute) {
engine_
->
AddOpAttr
(
"op1"
,
"axis"
,
1
);
std
::
vector
<
int
>
shape
=
{
1
,
1
,
1
,
2
};
Shape
tmp_shape
(
shape
);
// PBlock<NV> weight1(tmp_shape);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
tmp_shape
);
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
PBlock
<
NV
>
*
weight1
=
new
PBlock
<
NV
>
(
tmp_shape
,
AK_FLOAT
);
engine_
->
RegistBlock
(
weight1
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
cpu_data
[
0
]
=
2.
;
weight1
->
d_tensor
().
set_shape
(
tmp_shape
);
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
4b9fa423
...
...
@@ -73,9 +73,7 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
"graph_viz_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"graph_viz_pass"
,
//
"simplify_anakin_priorbox_detection_out_pass"
,
//
"fillconstant_elementwisemul_fuse"
,
//
"fc_fuse_pass"
,
//
...
...
@@ -83,11 +81,8 @@ const std::vector<std::string> kAnakinSubgraphPasses({
// "conv_bn_fuse_pass", //
// "conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
"anakin_subgraph_pass"
,
//
"graph_viz_pass"
,
//
"fc_gru_fuse_pass"
,
//
"graph_viz_pass"
,
//
});
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录