Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
b21770a2
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b21770a2
编写于
3月 20, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick from feature/anakin-engine: Add subgraph fuse support and anakin engine #16018
上级
084310f5
变更
36
隐藏空白更改
内联
并排
Showing
36 changed file
with
1129 addition
and
80 deletion
+1129
-80
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+12
-8
paddle/fluid/inference/anakin/convert/activation.cc
paddle/fluid/inference/anakin/convert/activation.cc
+0
-4
paddle/fluid/inference/anakin/convert/activation.h
paddle/fluid/inference/anakin/convert/activation.h
+3
-8
paddle/fluid/inference/anakin/convert/concat.cc
paddle/fluid/inference/anakin/convert/concat.cc
+1
-1
paddle/fluid/inference/anakin/convert/conv2d.cc
paddle/fluid/inference/anakin/convert/conv2d.cc
+3
-2
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+113
-0
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+35
-0
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+57
-0
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+37
-0
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+60
-11
paddle/fluid/inference/anakin/convert/fc.h
paddle/fluid/inference/anakin/convert/fc.h
+15
-3
paddle/fluid/inference/anakin/convert/op_converter.h
paddle/fluid/inference/anakin/convert/op_converter.h
+0
-8
paddle/fluid/inference/anakin/convert/relu.cc
paddle/fluid/inference/anakin/convert/relu.cc
+47
-0
paddle/fluid/inference/anakin/convert/relu.h
paddle/fluid/inference/anakin/convert/relu.h
+37
-0
paddle/fluid/inference/anakin/convert/test_activation_op.cc
paddle/fluid/inference/anakin/convert/test_activation_op.cc
+0
-3
paddle/fluid/inference/anakin/convert/test_concat_op.cc
paddle/fluid/inference/anakin/convert/test_concat_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+4
-4
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+50
-0
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+5
-5
paddle/fluid/inference/anakin/convert/test_relu_op.cc
paddle/fluid/inference/anakin/convert/test_relu_op.cc
+50
-0
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+1
-6
paddle/fluid/inference/anakin/engine.cc
paddle/fluid/inference/anakin/engine.cc
+11
-6
paddle/fluid/inference/anakin/engine.h
paddle/fluid/inference/anakin/engine.h
+5
-3
paddle/fluid/inference/anakin/op_teller.cc
paddle/fluid/inference/anakin/op_teller.cc
+13
-2
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+11
-1
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+12
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+273
-0
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
...fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
+37
-0
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
...e/fluid/inference/analysis/ir_passes/subgraph_detector.cc
+3
-1
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+1
-0
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+4
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+13
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+4
-0
paddle/fluid/operators/anakin/CMakeLists.txt
paddle/fluid/operators/anakin/CMakeLists.txt
+2
-0
paddle/fluid/operators/anakin/anakin_engine_op.cc
paddle/fluid/operators/anakin/anakin_engine_op.cc
+54
-0
paddle/fluid/operators/anakin/anakin_engine_op.h
paddle/fluid/operators/anakin/anakin_engine_op.h
+152
-0
未找到文件。
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
b21770a2
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc activation.cc pool2d.cc concat.cc split.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OPERATOR_DEPS
}
anakin_op_converter mul_op
)
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OPERATOR_DEPS
}
anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op
)
cc_test
(
test_anakin_activation SRCS test_activation_op.cc DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OPERATOR_DEPS
}
activation_op anakin_op_converter
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL
)
SERIAL
)
cc_test
(
test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL
)
cc_test
(
test_anakin_pool2d SRCS test_pool2d_op.cc DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OPERATOR_DEPS
}
anakin_op_converter pool_op pooling
)
cc_test
(
test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling
)
cc_test
(
test_anakin_concat SRCS test_concat_op.cc DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OPERATOR_DEPS
}
anakin_op_converter concat_op concat_and_split
)
cc_test
(
test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split
)
cc_test
(
test_anakin_split SRCS test_split_op.cc DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OPERATOR_DEPS
}
anakin_op_converter split_op concat_and_split
)
cc_test
(
test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split
)
cc_test
(
test_anakin_elementwise SRCS test_elementwise_op.cc DEPS
anakin_op_converter elementwise_add_op
)
cc_test
(
test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL
)
paddle/fluid/inference/anakin/convert/activation.cc
浏览文件 @
b21770a2
...
@@ -45,15 +45,11 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -45,15 +45,11 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
engine_
->
AddOp
(
op_name
,
"Activation"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
engine_
->
AddOpAttr
(
op_name
,
"type"
,
anakin_op_type_
);
if
(
op_type_
==
"relu"
)
{
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
sigmoid
,
SigmoidOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
tanh
,
TanhOpConverter
);
paddle/fluid/inference/anakin/convert/activation.h
浏览文件 @
b21770a2
...
@@ -34,13 +34,8 @@ class ActivationOpConverter : public AnakinOpConverter {
...
@@ -34,13 +34,8 @@ class ActivationOpConverter : public AnakinOpConverter {
private:
private:
std
::
string
op_type_
;
std
::
string
op_type_
;
std
::
string
anakin_op_type_
;
std
::
string
anakin_op_type_
;
std
::
map
<
std
::
string
,
std
::
string
>
anakin_ops_type_
{
std
::
map
<
std
::
string
,
std
::
string
>
anakin_ops_type_
{{
"tanh"
,
"TanH"
},
{
"relu"
,
"Relu"
},
{
"tanh"
,
"TanH"
},
{
"sigmoid"
,
"Sigmoid"
}};
{
"sigmoid"
,
"Sigmoid"
}};
};
class
ReluOpConverter
:
public
ActivationOpConverter
{
public:
ReluOpConverter
()
:
ActivationOpConverter
(
"relu"
)
{}
};
};
class
TanhOpConverter
:
public
ActivationOpConverter
{
class
TanhOpConverter
:
public
ActivationOpConverter
{
...
@@ -50,7 +45,7 @@ class TanhOpConverter : public ActivationOpConverter {
...
@@ -50,7 +45,7 @@ class TanhOpConverter : public ActivationOpConverter {
class
SigmoidOpConverter
:
public
ActivationOpConverter
{
class
SigmoidOpConverter
:
public
ActivationOpConverter
{
public:
public:
SigmoidOpConverter
()
:
ActivationOpConverter
(
"
tanh
"
)
{}
SigmoidOpConverter
()
:
ActivationOpConverter
(
"
sigmoid
"
)
{}
};
};
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/anakin/convert/concat.cc
浏览文件 @
b21770a2
...
@@ -32,8 +32,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -32,8 +32,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
const
framework
::
Scope
&
scope
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
auto
input_names
=
op_desc
.
Input
(
"X"
);
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
auto
input_names
=
op_desc
.
Input
(
"X"
);
PADDLE_ENFORCE
(
axis
>
0
,
PADDLE_ENFORCE
(
axis
>
0
,
"The axis attr of Concat op should be large than 0 for trt"
);
"The axis attr of Concat op should be large than 0 for trt"
);
...
...
paddle/fluid/inference/anakin/convert/conv2d.cc
浏览文件 @
b21770a2
...
@@ -51,10 +51,11 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -51,10 +51,11 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
// const int n_output = weight_tensor->dims()[0];
// const int n_output = weight_tensor->dims()[0];
const
int
n_input
=
weight_tensor
->
dims
()[
1
];
//
const int n_input = weight_tensor->dims()[1];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
auto
filter_num
=
n_input
*
filter_h
*
filter_w
;
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
engine_
->
AddOpAttr
<
int
>
(
op_name
,
"filter_num"
,
filter_num
);
engine_
->
AddOpAttr
<
int
>
(
op_name
,
"filter_num"
,
filter_num
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
...
...
paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
#include <algorithm>
#include <memory>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
Conv2dFusionOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Input"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Filter"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Bias"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Output"
).
size
(),
1UL
);
auto
input_name
=
op_desc
.
Input
(
"Input"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Output"
).
front
();
engine_
->
AddOp
(
op_name
,
"Convolution"
,
{
input_name
},
{
output_name
});
auto
*
filter_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Filter"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
filter_v
);
auto
*
filter_t
=
filter_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
weight_tensor
(
new
framework
::
LoDTensor
());
weight_tensor
->
Resize
(
filter_t
->
dims
());
TensorCopySync
((
*
filter_t
),
platform
::
CPUPlace
(),
weight_tensor
.
get
());
PADDLE_ENFORCE_EQ
(
weight_tensor
->
dims
().
size
(),
4UL
);
// const int n_output = weight_tensor->dims()[0];
// const int n_input = weight_tensor->dims()[1];
const
int
filter_h
=
weight_tensor
->
dims
()[
2
];
const
int
filter_w
=
weight_tensor
->
dims
()[
3
];
// auto filter_num = n_input * filter_h * filter_w ;
auto
filter_num
=
weight_tensor
->
dims
()[
0
];
engine_
->
AddOpAttr
<
int
>
(
op_name
,
"filter_num"
,
filter_num
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"kernel_size"
,
{
filter_h
,
filter_w
});
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"padding"
,
paddings
);
auto
dilations
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"dilations"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilation_rate"
,
dilations
);
const
int
groups
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"groups"
));
engine_
->
AddOpAttr
(
op_name
,
"group"
,
groups
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
true
);
auto
weight_shape
=
framework
::
vectorize2int
(
filter_t
->
dims
());
Shape
anakin_shape
(
weight_shape
);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
->
data
<
float
>
(),
weight_tensor
->
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
bias_tensor
.
Resize
(
b_t
->
dims
());
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
conv2d_fusion
,
Conv2dFusionOpConverter
);
paddle/fluid/inference/anakin/convert/conv2d_fusion.h
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
Conv2dFusionOpConverter
:
public
AnakinOpConverter
{
public:
Conv2dFusionOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Conv2dFusionOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/elementwise.cc
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ElementwiseAddOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
// Y is a weight
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_name
=
op_desc
.
Input
(
"Y"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
{
x_name
,
y_name
},
{
out_name
});
std
::
string
elementwise_type
=
"Add"
;
engine_
->
AddOpAttr
<
std
::
string
>
(
op_name
,
"type"
,
elementwise_type
);
std
::
vector
<
float
>
coeff
=
{
1.0
,
1.0
};
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
);
paddle/fluid/inference/anakin/convert/elementwise.h
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
ElementwiseAddOpConverter
:
public
AnakinOpConverter
{
public:
ElementwiseAddOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseAddOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
b21770a2
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
AK_FLOAT
;
...
@@ -24,28 +26,39 @@ namespace paddle {
...
@@ -24,28 +26,39 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
void
FcOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
void
FcBaseOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
auto
input_names
=
op_desc
.
InputNames
();
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
bool
with_bias
=
input_names
.
size
()
==
3
;
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
std
::
string
w_name
=
"Y"
;
std
::
string
i_name
=
"X"
;
if
(
with_bias
)
{
w_name
=
"W"
;
i_name
=
"Input"
;
}
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Y"
).
front
());
// get weights
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
w_name
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
auto
*
y_t
=
y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
y_t
=
y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
input_name
=
op_desc
.
Input
(
i_name
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
with_bias
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
int
out_dim
=
weight_shape
[
1
];
int
out_dim
=
weight_shape
[
1
];
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
const
int
w_m
=
weight_shape
[
0
];
const
int
w_k
=
weight_shape
[
1
];
weight_shape
.
push_back
(
1
);
weight_shape
.
push_back
(
1
);
weight_shape
.
push_back
(
1
);
weight_shape
.
push_back
(
1
);
...
@@ -54,18 +67,54 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -54,18 +67,54 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op,
framework
::
LoDTensor
weight_tensor
;
framework
::
LoDTensor
weight_tensor
;
weight_tensor
.
Resize
(
y_t
->
dims
());
weight_tensor
.
Resize
(
y_t
->
dims
());
TensorCopySync
((
*
y_t
),
platform
::
CPUPlace
(),
&
weight_tensor
);
TensorCopySync
((
*
y_t
),
platform
::
CPUPlace
(),
&
weight_tensor
);
auto
*
weight_data
=
weight_tensor
.
data
<
float
>
();
PADDLE_ENFORCE
(
w_m
*
w_k
==
weight_tensor
.
numel
());
std
::
vector
<
float
>
trans_weight_data
(
weight_tensor
.
numel
());
for
(
int
i
=
0
;
i
<
w_m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w_k
;
j
++
)
{
trans_weight_data
[
i
+
j
*
w_m
]
=
weight_data
[
i
*
w_k
+
j
];
}
}
auto
*
weight1
=
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
.
data
<
float
>
(),
weight_tensor
.
numel
(),
cpu_data
);
std
::
copy_n
(
trans_weight_data
.
data
(),
weight_tensor
.
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
// get bias
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
b_v
);
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
bias_shape
=
framework
::
vectorize2int
(
b_t
->
dims
());
framework
::
LoDTensor
bias_tensor
;
bias_tensor
.
Resize
(
b_t
->
dims
());
TensorCopySync
((
*
b_t
),
platform
::
CPUPlace
(),
&
bias_tensor
);
auto
*
bias_data
=
bias_tensor
.
data
<
float
>
();
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
bias_shape
.
insert
(
bias_shape
.
begin
(),
1
);
// bias_shape.push_back(1);
// bias_shape.push_back(1);
Shape
anakin_bias_shape
(
bias_shape
);
auto
*
weight2
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_bias_shape
);
float
*
cpu_data2
=
static_cast
<
float
*>
(
weight2
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
bias_data
,
bias_tensor
.
numel
(),
cpu_data2
);
weight2
->
d_tensor
().
set_shape
(
anakin_bias_shape
);
weight2
->
d_tensor
().
copy_from
(
weight2
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_2"
,
*
weight2
);
}
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
mul
,
MulOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
fc
,
FcOpConverter
);
paddle/fluid/inference/anakin/convert/fc.h
浏览文件 @
b21770a2
...
@@ -20,14 +20,26 @@ namespace paddle {
...
@@ -20,14 +20,26 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
class
FcOpConverter
:
public
AnakinOpConverter
{
class
Fc
Base
OpConverter
:
public
AnakinOpConverter
{
public:
public:
FcOpConverter
()
=
default
;
Fc
Base
OpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
bool
test_mode
)
override
;
virtual
~
FcOpConverter
()
{}
virtual
~
FcBaseOpConverter
()
{}
};
// with bias
class
FcOpConverter
:
public
FcBaseOpConverter
{
public:
FcOpConverter
()
=
default
;
};
// without bias
class
MulOpConverter
:
public
FcBaseOpConverter
{
public:
MulOpConverter
()
=
default
;
};
};
}
// namespace anakin
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/convert/op_converter.h
浏览文件 @
b21770a2
...
@@ -47,14 +47,6 @@ class AnakinOpConverter {
...
@@ -47,14 +47,6 @@ class AnakinOpConverter {
std
::
string
op_type
=
op_desc
.
Type
();
std
::
string
op_type
=
op_desc
.
Type
();
AnakinOpConverter
*
it
=
nullptr
;
AnakinOpConverter
*
it
=
nullptr
;
if
(
op_type
==
"mul"
)
{
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
);
std
::
string
Y
=
op_desc
.
Input
(
"Y"
)[
0
];
if
(
parameters
.
count
(
Y
))
{
it
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
"fc"
);
}
}
if
(
!
it
)
{
if
(
!
it
)
{
it
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
it
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
}
}
...
...
paddle/fluid/inference/anakin/convert/relu.cc
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include <algorithm>
#include <map>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
Shape
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
ReluOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"ReLU"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"alpha"
,
0
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
relu
,
ReluOpConverter
);
paddle/fluid/inference/anakin/convert/relu.h
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
ReluOpConverter
:
public
AnakinOpConverter
{
public:
ReluOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ReluOpConverter
()
{}
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/test_activation_op.cc
浏览文件 @
b21770a2
...
@@ -41,16 +41,13 @@ static void test_activation_op(const std::string &op_type) {
...
@@ -41,16 +41,13 @@ static void test_activation_op(const std::string &op_type) {
validator
.
Execute
(
5
);
validator
.
Execute
(
5
);
}
}
TEST
(
relu_op
,
test
)
{
test_activation_op
(
"relu"
);
}
TEST
(
sigm_op
,
test
)
{
test_activation_op
(
"sigmoid"
);
}
TEST
(
sigm_op
,
test
)
{
test_activation_op
(
"sigmoid"
);
}
TEST
(
tanh_op
,
test
)
{
test_activation_op
(
"tanh"
);
}
TEST
(
tanh_op
,
test
)
{
test_activation_op
(
"tanh"
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
relu
);
USE_OP
(
sigmoid
);
USE_OP
(
sigmoid
);
USE_OP
(
tanh
);
USE_OP
(
tanh
);
USE_ANAKIN_CONVERTER
(
relu
);
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
tanh
);
paddle/fluid/inference/anakin/convert/test_concat_op.cc
浏览文件 @
b21770a2
...
@@ -25,10 +25,10 @@ TEST(concat_op, test) {
...
@@ -25,10 +25,10 @@ TEST(concat_op, test) {
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
10
,
3
,
1
});
validator
.
DeclInputVar
(
"concat_x1"
,
{
1
,
2
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
3
,
1
});
validator
.
DeclInputVar
(
"concat_x2"
,
{
1
,
3
,
1
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
7
,
3
,
1
});
validator
.
DeclInputVar
(
"concat_x3"
,
{
1
,
1
,
1
,
1
});
validator
.
DeclOutputVar
(
"concat_out"
,
{
1
,
20
,
3
,
1
});
validator
.
DeclOutputVar
(
"concat_out"
,
{
1
,
6
,
1
,
1
});
// Prepare Op description
// Prepare Op description
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
...
paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
浏览文件 @
b21770a2
...
@@ -28,9 +28,9 @@ TEST(conv2d_op, test) {
...
@@ -28,9 +28,9 @@ TEST(conv2d_op, test) {
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"conv2d-Y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
2
,
5
,
5
});
validator
.
DeclInputVar
(
"conv2d-X"
,
{
1
,
3
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
3
,
2
,
3
,
3
});
validator
.
DeclParamVar
(
"conv2d-Y"
,
{
4
,
3
,
1
,
1
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
3
,
5
,
5
});
validator
.
DeclOutputVar
(
"conv2d-Out"
,
{
1
,
4
,
3
,
3
});
// Prepare Op description
// Prepare Op description
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -40,7 +40,7 @@ TEST(conv2d_op, test) {
...
@@ -40,7 +40,7 @@ TEST(conv2d_op, test) {
desc
.
SetOutput
(
"Output"
,
{
"conv2d-Out"
});
desc
.
SetOutput
(
"Output"
,
{
"conv2d-Out"
});
const
std
::
vector
<
int
>
strides
({
1
,
1
});
const
std
::
vector
<
int
>
strides
({
1
,
1
});
const
std
::
vector
<
int
>
paddings
({
1
,
1
});
const
std
::
vector
<
int
>
paddings
({
0
,
0
});
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
const
int
groups
=
1
;
const
int
groups
=
1
;
...
...
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
0 → 100644
浏览文件 @
b21770a2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
TEST
(
elementwise_op
,
native
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"elementwise_add_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"elementwise_y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"elementwise_out"
,
{
1
,
1
,
2
,
2
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"elementwise_add"
);
desc
.
SetInput
(
"X"
,
{
"elementwise_add_x"
});
desc
.
SetInput
(
"Y"
,
{
"elementwise_y"
});
desc
.
SetOutput
(
"Out"
,
{
"elementwise_out"
});
int
axis
=
-
1
;
desc
.
SetAttr
(
"axis"
,
axis
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
paddle/fluid/inference/anakin/convert/test_fc_op.cc
浏览文件 @
b21770a2
...
@@ -27,9 +27,9 @@ TEST(fc_op, test) {
...
@@ -27,9 +27,9 @@ TEST(fc_op, test) {
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
1
,
1
});
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
1
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
4
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
1
,
1
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
2
});
// Prepare Op description
// Prepare Op description
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -37,8 +37,8 @@ TEST(fc_op, test) {
...
@@ -37,8 +37,8 @@ TEST(fc_op, test) {
desc
.
SetInput
(
"X"
,
{
"mul_x"
});
desc
.
SetInput
(
"X"
,
{
"mul_x"
});
desc
.
SetInput
(
"Y"
,
{
"mul_y"
});
desc
.
SetInput
(
"Y"
,
{
"mul_y"
});
desc
.
SetOutput
(
"Out"
,
{
"mul_out"
});
desc
.
SetOutput
(
"Out"
,
{
"mul_out"
});
int
num_flatten_dims
=
3
;
//
int num_flatten_dims = 3;
desc
.
SetAttr
(
"x_num_col_dims"
,
num_flatten_dims
);
//
desc.SetAttr("x_num_col_dims", num_flatten_dims);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
10
);
validator
.
Execute
(
10
);
...
...
paddle/fluid/inference/anakin/convert/test_relu_op.cc
0 → 100644
浏览文件 @
b21770a2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
static
void
test_activation_op
(
const
std
::
string
&
op_type
)
{
auto
*
converter
=
Registry
<
AnakinOpConverter
>::
Global
().
Lookup
(
op_type
);
PADDLE_ENFORCE
(
converter
!=
nullptr
);
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"act-X"
,
{
10
,
6
,
1
,
1
});
validator
.
DeclOutputVar
(
"act-Out"
,
{
10
,
6
,
1
,
1
});
framework
::
OpDesc
desc
;
desc
.
SetType
(
op_type
);
desc
.
SetInput
(
"X"
,
{
"act-X"
});
desc
.
SetOutput
(
"Out"
,
{
"act-Out"
});
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
5
);
}
TEST
(
sigm_op
,
test
)
{
test_activation_op
(
"relu"
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
relu
);
USE_ANAKIN_CONVERTER
(
relu
);
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
b21770a2
...
@@ -161,10 +161,6 @@ class AnakinConvertValidation {
...
@@ -161,10 +161,6 @@ class AnakinConvertValidation {
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
fluid_out
);
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
fluid_out
);
fluid_outputs
.
push_back
(
fluid_out
);
fluid_outputs
.
push_back
(
fluid_out
);
// size_t fluid_out_size = fluid_out.size();
/*for (size_t i = 0; i < fluid_out_size; i++) {
std::cout << fluid_out[i] << std::endl;
}*/
outputs
.
insert
({
output
,
tensor
});
outputs
.
insert
({
output
,
tensor
});
}
}
...
@@ -180,8 +176,7 @@ class AnakinConvertValidation {
...
@@ -180,8 +176,7 @@ class AnakinConvertValidation {
size_t
anakin_out_size
=
anakin_out
.
size
();
size_t
anakin_out_size
=
anakin_out
.
size
();
auto
fluid_out
=
fluid_outputs
[
i_output
++
];
auto
fluid_out
=
fluid_outputs
[
i_output
++
];
for
(
size_t
i
=
0
;
i
<
anakin_out_size
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
anakin_out_size
;
i
++
)
{
LOG
(
INFO
)
<<
"Output["
<<
i
<<
"]: anakin["
<<
anakin_out
[
i
]
<<
"], "
EXPECT_LT
(
std
::
abs
(
fluid_out
[
i
]
-
anakin_out
[
i
]),
1e-3
);
<<
"fluid["
<<
fluid_out
[
i
]
<<
"]"
;
}
}
}
}
}
}
...
...
paddle/fluid/inference/anakin/engine.cc
浏览文件 @
b21770a2
...
@@ -68,29 +68,34 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
...
@@ -68,29 +68,34 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
auto
*
tensor
=
input
.
second
;
auto
*
tensor
=
input
.
second
;
auto
*
data
=
tensor
->
data
<
float
>
();
auto
*
data
=
tensor
->
data
<
float
>
();
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
::
anakin
::
saber
::
Shape
anakin_shape
(
shape
);
auto
*
anakin_input
=
net_
->
get_in
(
input
.
first
);
auto
*
anakin_input
=
net_
->
get_in
(
input
.
first
);
auto
anakin_input_shape
=
anakin_input
->
valid_shape
();
PADDLE_ENFORCE
(
tensor
->
numel
(),
anakin_input_shape
.
count
(),
"the fluid input size should be equal to anakin"
);
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
anakin_shape
);
anakin_
input_
shape
);
anakin_input
->
share
_from
(
tmp_anakin_tensor
);
anakin_input
->
copy
_from
(
tmp_anakin_tensor
);
}
}
for
(
const
auto
&
output
:
outputs
)
{
for
(
const
auto
&
output
:
outputs
)
{
auto
*
tensor
=
output
.
second
;
auto
*
tensor
=
output
.
second
;
auto
*
data
=
tensor
->
data
<
float
>
();
auto
*
data
=
tensor
->
data
<
float
>
();
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
auto
shape
=
framework
::
vectorize2int
(
tensor
->
dims
());
::
anakin
::
saber
::
Shape
anakin_shape
(
shape
);
auto
*
anakin_output
=
net_
->
get_out
(
output
.
first
);
auto
*
anakin_output
=
net_
->
get_out
(
output
.
first
);
auto
anakin_output_shape
=
anakin_output
->
valid_shape
();
PADDLE_ENFORCE
(
tensor
->
numel
(),
anakin_output_shape
.
count
(),
"the fluid output size should be equal to anakin"
);
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
::
anakin
::
saber
::
Tensor
<
TargetT
>
tmp_anakin_tensor
(
data
,
TargetT
(),
0
,
anakin_shape
);
anakin_
output_
shape
);
anakin_output
->
share_from
(
tmp_anakin_tensor
);
anakin_output
->
share_from
(
tmp_anakin_tensor
);
}
}
net_
->
prediction
();
net_
->
prediction
();
cudaDeviceSynchronize
();
}
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Freeze
()
{
void
AnakinEngine
<
TargetT
,
PrecisionType
,
RunType
>::
Freeze
()
{
PADDLE_ENFORCE
(
graph_
->
Freeze
(),
"Freeze anakin subgraph."
);
PADDLE_ENFORCE
(
graph_
->
Freeze
_v3
(),
"Freeze anakin subgraph."
);
}
}
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
template
<
typename
TargetT
,
Precision
PrecisionType
,
OpRunType
RunType
>
...
...
paddle/fluid/inference/anakin/engine.h
浏览文件 @
b21770a2
...
@@ -46,6 +46,9 @@ namespace anakin {
...
@@ -46,6 +46,9 @@ namespace anakin {
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
,
template
<
typename
TargetT
,
::
anakin
::
Precision
PrecisionType
,
::
anakin
::
OpRunType
RunType
=
::
anakin
::
OpRunType
::
ASYNC
>
::
anakin
::
OpRunType
RunType
=
::
anakin
::
OpRunType
::
ASYNC
>
class
AnakinEngine
{
class
AnakinEngine
{
using
NetT
=
::
anakin
::
Net
<
TargetT
,
PrecisionType
,
RunType
>
;
using
GraphT
=
::
anakin
::
graph
::
Graph
<
TargetT
,
PrecisionType
>
;
public:
public:
explicit
AnakinEngine
(
bool
need_summary
=
false
);
explicit
AnakinEngine
(
bool
need_summary
=
false
);
~
AnakinEngine
();
~
AnakinEngine
();
...
@@ -61,16 +64,15 @@ class AnakinEngine {
...
@@ -61,16 +64,15 @@ class AnakinEngine {
PADDLE_ENFORCE
(
graph_
->
AddOpAttr
(
op_name
,
attr_name
,
attr_value
),
PADDLE_ENFORCE
(
graph_
->
AddOpAttr
(
op_name
,
attr_name
,
attr_value
),
"Add operation's attribution."
);
"Add operation's attribution."
);
}
}
NetT
*
Net
()
{
return
net_
.
get
();
}
std
::
unique_ptr
<
AnakinEngine
>
Clone
();
std
::
unique_ptr
<
AnakinEngine
>
Clone
();
void
Freeze
();
void
Freeze
();
void
Optimize
();
void
Optimize
();
void
Save
(
std
::
string
path
)
{
graph_
->
save
(
path
);
}
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
void
Execute
(
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
inputs
,
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
const
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
&
outputs
);
private:
private:
using
NetT
=
::
anakin
::
Net
<
TargetT
,
PrecisionType
,
RunType
>
;
using
GraphT
=
::
anakin
::
graph
::
Graph
<
TargetT
,
PrecisionType
>
;
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
GraphT
>
graph_
;
std
::
unique_ptr
<
NetT
>
net_
;
std
::
unique_ptr
<
NetT
>
net_
;
};
};
...
...
paddle/fluid/inference/anakin/op_teller.cc
浏览文件 @
b21770a2
...
@@ -20,7 +20,18 @@ namespace anakin {
...
@@ -20,7 +20,18 @@ namespace anakin {
// Just tell by the op_types.
// Just tell by the op_types.
struct
SimpleOpTypeSetTeller
:
public
Teller
{
struct
SimpleOpTypeSetTeller
:
public
Teller
{
SimpleOpTypeSetTeller
()
{}
SimpleOpTypeSetTeller
()
{
// teller_set.insert("mul");
teller_set
.
insert
(
"fc"
);
teller_set
.
insert
(
"conv2d_fusion"
);
teller_set
.
insert
(
"split"
);
teller_set
.
insert
(
"relu"
);
teller_set
.
insert
(
"pool2d"
);
teller_set
.
insert
(
"elementwise_add"
);
teller_set
.
insert
(
"concat"
);
teller_set
.
insert
(
"tanh"
);
// teller_set.insert("conv2d");
}
bool
operator
()(
const
std
::
string
&
op_type
,
bool
operator
()(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
override
{
const
framework
::
OpDesc
&
desc
)
override
{
...
@@ -28,7 +39,7 @@ struct SimpleOpTypeSetTeller : public Teller {
...
@@ -28,7 +39,7 @@ struct SimpleOpTypeSetTeller : public Teller {
}
}
private:
private:
std
::
unordered_set
<
std
::
string
>
teller_set
{{
"mul"
}}
;
std
::
unordered_set
<
std
::
string
>
teller_set
;
};
};
bool
OpTeller
::
Tell
(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
{
bool
OpTeller
::
Tell
(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
{
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
b21770a2
...
@@ -13,8 +13,11 @@
...
@@ -13,8 +13,11 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include <memory>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
...
@@ -63,7 +66,14 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -63,7 +66,14 @@ void IRPassManager::CreatePasses(Argument *argument,
}
else
if
(
pass_name
==
"cpu_quantize_pass"
)
{
}
else
if
(
pass_name
==
"cpu_quantize_pass"
)
{
pass
->
Set
(
"quant_var_scales"
,
pass
->
Set
(
"quant_var_scales"
,
new
VarQuantScale
(
argument
->
quant_var_scales
()));
new
VarQuantScale
(
argument
->
quant_var_scales
()));
}
else
if
(
pass_name
==
"tensorrt_subgraph_pass"
)
{
}
if
(
pass_name
==
"anakin_subgraph_pass"
)
{
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
}
if
(
pass_name
==
"tensorrt_subgraph_pass"
)
{
pass
->
Set
(
"workspace_size"
,
new
int
(
argument
->
tensorrt_workspace_size
()));
pass
->
Set
(
"workspace_size"
,
new
int
(
argument
->
tensorrt_workspace_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
tensorrt_max_batch_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
tensorrt_max_batch_size
()));
pass
->
Set
(
"min_subgraph_size"
,
pass
->
Set
(
"min_subgraph_size"
,
...
...
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
浏览文件 @
b21770a2
...
@@ -14,3 +14,15 @@ if (WITH_GPU AND TENSORRT_FOUND)
...
@@ -14,3 +14,15 @@ if (WITH_GPU AND TENSORRT_FOUND)
file
(
APPEND
${
pass_file
}
"USE_PASS(tensorrt_subgraph_pass);
\n
"
)
file
(
APPEND
${
pass_file
}
"USE_PASS(tensorrt_subgraph_pass);
\n
"
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
tensorrt_subgraph_pass CACHE INTERNAL
""
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
tensorrt_subgraph_pass CACHE INTERNAL
""
)
endif
()
endif
()
if
(
WITH_ANAKIN_SUBGRAPH
)
cc_library
(
anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller
)
set
(
analysis_deps
${
analysis_deps
}
subgraph_detector anakin_subgraph_pass
CACHE INTERNAL
""
)
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
APPEND
${
pass_file
}
"USE_PASS(anakin_subgraph_pass);
\n
"
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
anakin_subgraph_pass CACHE INTERNAL
""
)
endif
()
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/inference/anakin/op_teller.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
framework
::
ir
::
Node
;
std
::
vector
<
std
::
string
>
ExtractAnakinParameters
(
const
std
::
unordered_set
<
Node
*>
&
nodes
);
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
analysis
::
AnakinSubgraphPass
::
ApplyImpl
(
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"anakin_subgraph_pass"
,
graph
.
get
());
auto
teller
=
[](
const
framework
::
ir
::
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
};
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
0
);
fuser
();
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
!
Agent
(
node
).
subgraph
()
->
empty
())
{
CreateAnakinOp
(
node
,
graph
.
get
());
std
::
unordered_set
<
const
Node
*>
nodes2remove
(
Agent
(
node
).
subgraph
()
->
begin
(),
Agent
(
node
).
subgraph
()
->
end
());
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
.
get
(),
nodes2remove
);
}
}
std
::
unordered_set
<
const
Node
*>
nodes2remove
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
Agent
(
node
).
deleted
())
{
nodes2remove
.
insert
(
node
);
}
}
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
.
get
(),
nodes2remove
);
return
graph
;
}
std
::
string
GenerateAnakinEngineKey
(
const
std
::
set
<
std
::
string
>
&
engine_inputs
,
const
std
::
set
<
std
::
string
>
&
engine_outputs
)
{
std
::
string
engine_hash_key
=
""
;
for
(
auto
name
:
engine_inputs
)
{
engine_hash_key
+=
name
;
}
for
(
auto
name
:
engine_outputs
)
{
engine_hash_key
+=
name
;
}
auto
engine_key
=
std
::
to_string
(
std
::
hash
<
std
::
string
>
()(
engine_hash_key
));
return
engine_key
;
}
void
AnakinSubgraphPass
::
CreateAnakinOp
(
framework
::
ir
::
Node
*
node
,
Graph
*
graph
)
const
{
auto
*
op_desc
=
node
->
Op
();
auto
&
subgraph
=
*
Agent
(
node
).
subgraph
();
PADDLE_ENFORCE
(
!
subgraph
.
empty
());
framework
::
ProgramDesc
*
program_desc
=
Get
<
framework
::
ProgramDesc
*>
(
"program"
);
// Add new block for TensorRTEngineOP
const
framework
::
BlockDesc
&
main_block
=
program_desc
->
Block
(
framework
::
kRootBlockIndex
);
// const framework::BlockDesc& main_block = program_desc->Block(0);
framework
::
BlockDesc
*
new_block
=
program_desc
->
AppendBlock
(
main_block
);
// An fake block desc.
framework
::
proto
::
BlockDesc
block_proto
;
framework
::
BlockDesc
block_desc
(
nullptr
,
&
block_proto
);
block_desc
.
Proto
()
->
set_parent_idx
(
-
1
);
block_desc
.
Proto
()
->
set_idx
(
0
);
string
::
PrettyLogDetail
(
"--- detect a sub-graph with %d nodes"
,
subgraph
.
size
());
for
(
auto
*
node
:
subgraph
)
{
auto
*
new_block_op
=
new_block
->
AppendOp
();
auto
*
op
=
block_desc
.
AppendOp
();
*
new_block_op
->
Proto
()
=
*
node
->
Op
()
->
Proto
();
*
op
->
Proto
()
=
*
node
->
Op
()
->
Proto
();
}
// Then, we will use the input_names_with_id and output_names_with_id to
// generate the eigine key.
// So, We use set instead of unordered_set here to ensure that the engine key
// is unique.
std
::
set
<
std
::
string
>
input_names
;
std
::
set
<
std
::
string
>
input_names_with_id
;
for
(
auto
*
x
:
node
->
inputs
)
{
input_names
.
insert
(
x
->
Name
());
input_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
}
op_desc
->
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()));
std
::
set
<
std
::
string
>
output_names
;
std
::
set
<
std
::
string
>
output_names_with_id
;
for
(
auto
*
x
:
node
->
outputs
)
{
output_names
.
insert
(
x
->
Name
());
output_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
}
op_desc
->
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
(
output_names
.
begin
(),
output_names
.
end
()));
op_desc
->
SetType
(
"anakin_engine"
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
// Why we do this?
// During the transition from fluid OP to anakin OP, we map
// the input and output Tensor(fluid data structure) of fluid OP
// to the corresponding ITensor (trt data structure) through the
// Tensor name. When we set up ITensor for an variable, we must
// ensure that it has not been set before.
// If there is variable in the fluid graph, which is not only the
// input of a OP, but also the output of a Op, there will be problems.
// So we have to rename the variable in the subgraph to make sure
// it is either an OP's input or an OP's output.
auto
&
subgraph_nodes
=
*
Agent
(
node
).
subgraph
();
for
(
size_t
index
=
0
;
index
<
block_desc
.
OpSize
();
++
index
)
{
framework
::
proto
::
OpDesc
*
op
=
block_desc
.
Op
(
index
)
->
Proto
();
auto
correspond_node
=
subgraph_nodes
[
index
];
PADDLE_ENFORCE_EQ
(
correspond_node
->
Name
(),
op
->
type
());
std
::
unordered_map
<
std
::
string
,
size_t
>
var2id
;
for
(
auto
*
in_var
:
correspond_node
->
inputs
)
{
var2id
[
in_var
->
Name
()]
=
in_var
->
id
();
}
// rename for the input variables of op inside subgraph
for
(
int
i
=
0
;
i
<
op
->
inputs_size
();
i
++
)
{
// one input
auto
*
in_var
=
op
->
mutable_inputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
in_var
->
arguments_size
();
k
++
)
{
// all the arguments
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
std
::
string
arg_value_with_id
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
if
(
input_names_with_id
.
count
(
arg_value_with_id
))
{
replaced_names
.
push_back
(
arg_value
);
}
else
{
replaced_names
.
push_back
(
arg_value_with_id
);
}
}
in_var
->
clear_arguments
();
for
(
size_t
k
=
0
;
k
<
replaced_names
.
size
();
k
++
)
{
in_var
->
add_arguments
(
replaced_names
[
k
]);
}
}
var2id
.
clear
();
for
(
auto
out_var
:
correspond_node
->
outputs
)
{
var2id
[
out_var
->
Name
()]
=
out_var
->
id
();
}
// rename for the output variables of op inside subgraph
for
(
int
i
=
0
;
i
<
op
->
outputs_size
();
i
++
)
{
framework
::
proto
::
OpDesc_Var
*
out_var
=
op
->
mutable_outputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
out_var
->
arguments_size
();
k
++
)
{
std
::
string
arg_value
=
out_var
->
arguments
(
k
);
std
::
string
arg_value_with_id
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
if
(
output_names_with_id
.
count
(
arg_value_with_id
))
{
output_name_map
[
arg_value
]
=
arg_value_with_id
;
}
replaced_names
.
push_back
(
arg_value_with_id
);
}
out_var
->
clear_arguments
();
for
(
size_t
k
=
0
;
k
<
replaced_names
.
size
();
k
++
)
{
out_var
->
add_arguments
(
replaced_names
[
k
]);
}
}
}
// When anakin engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
// to Tensor.
std
::
vector
<
std
::
string
>
output_mapping
;
for
(
auto
name
:
output_names
)
{
PADDLE_ENFORCE
(
output_name_map
.
count
(
name
)
!=
0
);
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
auto
*
vars
=
block_desc
.
Proto
()
->
mutable_vars
();
for
(
framework
::
ir
::
Node
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
*
vars
->
Add
()
=
*
node
->
Var
()
->
Proto
();
}
}
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
(),
"the block has no var-desc"
);
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
op_desc
->
SetBlockAttr
(
"sub_block"
,
new_block
);
SetAttr
(
op_desc
->
Proto
(),
"subgraph"
,
block_desc
.
Proto
()
->
SerializeAsString
());
// Set attrs
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
ExtractAnakinParameters
(
graph
->
Nodes
()));
SetAttr
(
op_desc
->
Proto
(),
"output_name_mapping"
,
output_mapping
);
auto
engine_key
=
GenerateAnakinEngineKey
(
input_names_with_id
,
output_names_with_id
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
}
std
::
vector
<
std
::
string
>
ExtractAnakinParameters
(
const
std
::
unordered_set
<
Node
*>
&
nodes
)
{
// We can judge whether a variable is a parameter by
// its presistable property, but sometimes the presistable
// of the feed op output is true, so we have to identify it.
std
::
vector
<
std
::
string
>
feed_outputs
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsOp
())
continue
;
std
::
string
op_type
=
node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
)
{
std
::
vector
<
std
::
string
>
output_names
=
node
->
Op
()
->
OutputArgumentNames
();
std
::
copy
(
output_names
.
begin
(),
output_names
.
end
(),
std
::
back_inserter
(
feed_outputs
));
}
}
std
::
vector
<
std
::
string
>
parameters
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsVar
())
continue
;
if
(
node
->
Var
()
->
Persistable
()
&&
std
::
find
(
feed_outputs
.
begin
(),
feed_outputs
.
end
(),
node
->
Name
())
==
feed_outputs
.
end
())
{
parameters
.
push_back
(
node
->
Name
());
}
}
return
parameters
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
REGISTER_PASS
(
anakin_subgraph_pass
,
paddle
::
inference
::
analysis
::
AnakinSubgraphPass
);
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
0 → 100644
浏览文件 @
b21770a2
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
class
AnakinSubgraphPass
:
public
framework
::
ir
::
FusePassBase
{
public:
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph
)
const
override
;
private:
void
CreateAnakinOp
(
framework
::
ir
::
Node
*
x
,
framework
::
ir
::
Graph
*
graph
)
const
;
void
CleanIntermediateOutputs
(
framework
::
ir
::
Node
*
node
);
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
浏览文件 @
b21770a2
...
@@ -14,6 +14,8 @@ limitations under the License. */
...
@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <utility>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...
@@ -418,7 +420,7 @@ void SubGraphFuser::ReplaceNodesWithSubGraphs() {
...
@@ -418,7 +420,7 @@ void SubGraphFuser::ReplaceNodesWithSubGraphs() {
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
// as deleted. 3. Replace the deleted node with the new Block Node.
// as deleted. 3. Replace the deleted node with the new Block Node.
framework
::
OpDesc
empty_desc
;
framework
::
OpDesc
empty_desc
;
empty_desc
.
SetType
(
"
tensorrt
_engine"
);
empty_desc
.
SetType
(
"
anakin
_engine"
);
auto
*
block_node
=
graph_
->
CreateOpNode
(
&
empty_desc
);
auto
*
block_node
=
graph_
->
CreateOpNode
(
&
empty_desc
);
Agent
(
block_node
).
set_subgraph
({});
Agent
(
block_node
).
set_subgraph
({});
auto
io
=
ExtractInputAndOutputOfSubGraph
(
subgraph
);
auto
io
=
ExtractInputAndOutputOfSubGraph
(
subgraph
);
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
b21770a2
...
@@ -30,6 +30,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
...
@@ -30,6 +30,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// The parameters are on the cpu, therefore, synchronization is not necessary.
// The parameters are on the cpu, therefore, synchronization is not necessary.
if
(
!
argument
->
use_gpu
())
return
;
if
(
!
argument
->
use_gpu
())
return
;
return
;
auto
&
graph
=
argument
->
main_graph
();
auto
&
graph
=
argument
->
main_graph
();
std
::
vector
<
std
::
string
>
repetitive_params
;
std
::
vector
<
std
::
string
>
repetitive_params
;
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
b21770a2
...
@@ -27,6 +27,10 @@ if(WITH_GPU AND TENSORRT_FOUND)
...
@@ -27,6 +27,10 @@ if(WITH_GPU AND TENSORRT_FOUND)
set
(
inference_deps
${
inference_deps
}
tensorrt_engine tensorrt_converter
)
set
(
inference_deps
${
inference_deps
}
tensorrt_engine tensorrt_converter
)
endif
()
endif
()
if
(
WITH_ANAKIN_SUBGRAPH
)
set
(
inference_deps
${
inference_deps
}
anakin_op_converter anakin_engine
)
endif
()
add_subdirectory
(
details
)
add_subdirectory
(
details
)
cc_library
(
analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder
)
cc_library
(
analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
b21770a2
...
@@ -43,6 +43,8 @@
...
@@ -43,6 +43,8 @@
#endif
#endif
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
DECLARE_bool
(
profile
);
DECLARE_bool
(
profile
);
namespace
paddle
{
namespace
paddle
{
...
@@ -805,3 +807,14 @@ USE_TRT_CONVERTER(prelu);
...
@@ -805,3 +807,14 @@ USE_TRT_CONVERTER(prelu);
USE_TRT_CONVERTER
(
conv2d_transpose
);
USE_TRT_CONVERTER
(
conv2d_transpose
);
USE_TRT_CONVERTER
(
leaky_relu
);
USE_TRT_CONVERTER
(
leaky_relu
);
#endif
#endif
USE_ANAKIN_CONVERTER
(
fc
);
USE_ANAKIN_CONVERTER
(
conv2d
);
USE_ANAKIN_CONVERTER
(
concat
);
USE_ANAKIN_CONVERTER
(
split
);
USE_ANAKIN_CONVERTER
(
relu
);
USE_ANAKIN_CONVERTER
(
sigmoid
);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
pool2d
);
USE_ANAKIN_CONVERTER
(
conv2d_fusion
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
b21770a2
...
@@ -34,6 +34,10 @@ if (WITH_GPU AND TENSORRT_FOUND)
...
@@ -34,6 +34,10 @@ if (WITH_GPU AND TENSORRT_FOUND)
add_subdirectory
(
tensorrt
)
add_subdirectory
(
tensorrt
)
endif
()
endif
()
if
(
WITH_ANAKIN_SUBGRAPH
)
add_subdirectory
(
anakin
)
endif
()
SET
(
OP_HEADER_DEPS xxhash
)
SET
(
OP_HEADER_DEPS xxhash
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
SET
(
OP_HEADER_DEPS
${
OP_HEADER_DEPS
}
cub
)
SET
(
OP_HEADER_DEPS
${
OP_HEADER_DEPS
}
cub
)
...
...
paddle/fluid/operators/anakin/CMakeLists.txt
0 → 100644
浏览文件 @
b21770a2
op_library
(
anakin_engine_op DEPS anakin_engine anakin_op_converter
)
# file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(anakin_engine);\n")
paddle/fluid/operators/anakin/anakin_engine_op.cc
0 → 100644
浏览文件 @
b21770a2
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <string>
#include <vector>
#include "paddle/fluid/operators/anakin/anakin_engine_op.h"
namespace
paddle
{
namespace
operators
{
class
AnakinEngineOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Xs"
,
"A list of inputs."
).
AsDuplicable
();
AddOutput
(
"Ys"
,
"A list of outputs"
).
AsDuplicable
();
AddAttr
<
std
::
string
>
(
"subgraph"
,
"the subgraph."
);
AddAttr
<
std
::
string
>
(
"engine_key"
,
"The engine_key here is used to distinguish different TRT Engines"
);
AddAttr
<
framework
::
BlockDesc
*>
(
"sub_block"
,
"the trt block"
);
AddComment
(
"Anakin engine operator."
);
}
};
class
AnakinEngineInferVarType
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
anakin_engine
,
ops
::
AnakinEngineOp
,
ops
::
AnakinEngineOpMaker
,
ops
::
AnakinEngineOpMaker
);
#endif // PADDLE_WITH_CUDA
paddle/fluid/operators/anakin/anakin_engine_op.h
0 → 100644
浏览文件 @
b21770a2
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
operators
{
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
inference
::
Singleton
;
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
using
inference
::
anakin
::
AnakinEngine
;
class
AnakinEngineOp
:
public
framework
::
OperatorBase
{
using
AnakinNvEngineT
=
AnakinEngine
<
NV
,
Precision
::
FP32
>
;
private:
std
::
vector
<
std
::
string
>
input_names_
;
std
::
unordered_set
<
std
::
string
>
param_names_
;
mutable
std
::
unique_ptr
<
AnakinNvEngineT
>
anakin_engine_
;
std
::
string
engine_key_
;
public:
AnakinEngineOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{
input_names_
=
Inputs
(
"Xs"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
auto
params
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"parameters"
);
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
}
}
protected:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
RunAnakin
(
scope
,
dev_place
);
}
void
RunAnakin
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
if
(
anakin_engine_
.
get
()
==
nullptr
)
{
anakin_engine_
.
reset
(
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
true
));
Prepare
(
scope
,
dev_place
,
anakin_engine_
.
get
());
}
auto
*
engine
=
anakin_engine_
.
get
();
PADDLE_ENFORCE
(
!
input_names_
.
empty
(),
"should pass more than one inputs"
);
std
::
vector
<
std
::
string
>
output_maps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
// Convert input tensor from fluid to engine.
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
auto
t_shape
=
framework
::
vectorize
(
t
.
dims
());
inputs
.
insert
({
x
,
&
t
});
}
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
;
int
output_index
=
0
;
for
(
const
auto
&
y
:
Outputs
(
"Ys"
))
{
std
::
vector
<
int
>
ddim
=
engine
->
Net
()
->
get_out
(
output_maps
[
output_index
])
->
valid_shape
();
// we need get the output anakin output shape.
auto
*
fluid_v
=
scope
.
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
fluid_t
->
mutable_data
<
float
>
(
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_place
));
outputs
.
insert
({
output_maps
[
output_index
],
fluid_t
});
output_index
+=
1
;
}
engine
->
Execute
(
inputs
,
outputs
);
}
void
Prepare
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
,
AnakinNvEngineT
*
engine
)
const
{
LOG
(
INFO
)
<<
"Prepare Anakin engine (Optimize model structure, Select OP "
"kernel etc). This process may cost a lot of time."
;
framework
::
proto
::
BlockDesc
block_desc
;
block_desc
.
ParseFromString
(
Attr
<
std
::
string
>
(
"subgraph"
));
std
::
vector
<
std
::
string
>
output_maps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
inference
::
Singleton
<
inference
::
anakin
::
AnakinOpConverter
>::
Global
()
.
ConvertBlock
(
block_desc
,
param_names_
,
scope
,
engine
);
engine
->
Freeze
();
engine
->
Optimize
();
for
(
const
auto
&
x
:
Inputs
(
"Xs"
))
{
if
(
param_names_
.
count
(
x
))
continue
;
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
x
);
auto
t_shape
=
framework
::
vectorize2int
(
t
.
dims
());
// all input shape should be 4 dims
if
(
t_shape
.
size
()
==
2
)
{
t_shape
.
push_back
(
1
);
t_shape
.
push_back
(
1
);
}
engine
->
SetInputShape
(
x
,
t_shape
);
}
engine
->
InitGraph
();
}
};
}
// namespace operators
}
// namespace paddle
#endif // PADDLE_WITH_CUDA
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录