Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
71b2ed61
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
71b2ed61
编写于
9月 18, 2019
作者:
石
石晓伟
提交者:
GitHub
9月 18, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support MLU nums, test=develop (#19372)
上级
e2c6bada
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
31 addition
and
24 deletion
+31
-24
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+2
-2
paddle/fluid/inference/api/api_anakin_engine.cc
paddle/fluid/inference/api/api_anakin_engine.cc
+25
-18
paddle/fluid/inference/api/api_anakin_engine.h
paddle/fluid/inference/api/api_anakin_engine.h
+4
-4
未找到文件。
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
71b2ed61
...
...
@@ -70,9 +70,9 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_
if
(
ANAKIN_FOUND
)
# Do not turn warnings into errors.
set_source_files_properties
(
api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS
"-Wno-error"
)
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash
)
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash
framework_proto eigen3
)
target_link_libraries
(
inference_anakin_api anakin anakin_saber_common
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash
framework_proto eigen3
)
target_link_libraries
(
inference_anakin_api_shared anakin anakin_saber_common
)
function
(
anakin_target target_name
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
...
...
paddle/fluid/inference/api/api_anakin_engine.cc
浏览文件 @
71b2ed61
...
...
@@ -42,6 +42,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitEnv() {
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
InitNet
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
delete
this
->
executor_p_
;
this
->
executor_p_
=
new
anakin
::
Net
<
T
,
P
,
R
>
(
*
this
->
graph_p_
,
true
);
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
...
...
@@ -89,7 +90,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitPredictor() {
this
->
InitNet
();
}
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
Predict
()
{
void
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
Predict
(
int
batch_size
)
{
anakin
::
TargetWrapper
<
T
>::
device_sync
();
this
->
executor_p_
->
prediction
();
anakin
::
TargetWrapper
<
T
>::
device_sync
();
...
...
@@ -99,7 +100,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
if
(
this
->
config_
.
re_allocable
)
{
return
this
->
RunImpl
(
inputs
,
output_data
);
return
this
->
RunImpl
(
inputs
,
output_data
,
batch_size
);
}
else
{
// Run inputs data that exceeds batch size in batches.
// 1. Reassign the batch size.
...
...
@@ -194,7 +195,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
template
<
typename
T
,
Precision
P
,
OpRunType
R
>
bool
PaddleInferenceAnakinPredictor
<
T
,
P
,
R
>::
RunImpl
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
)
{
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
anakin
::
TargetWrapper
<
T
>::
set_device
(
this
->
config_
.
device_id
);
for
(
const
auto
&
input
:
inputs
)
{
if
(
input
.
dtype
!=
PaddleDType
::
FLOAT32
)
{
...
...
@@ -207,12 +208,12 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
LOG
(
FATAL
)
<<
" input "
<<
input
.
name
<<
"'s shape size should be equal to that of net"
;
}
#ifndef ANAKIN_MLU_PLACE
int
sum
=
1
;
for_each
(
input
.
shape
.
begin
(),
input
.
shape
.
end
(),
[
&
](
int
n
)
{
sum
*=
n
;
});
if
(
sum
>
net_shape
.
count
())
{
if
(
this
->
config_
.
re_allocable
)
{
this
->
graph_p_
->
Reshape
(
input
.
name
,
input
.
shape
);
delete
this
->
executor_p_
;
this
->
InitNet
();
d_tensor_p
=
this
->
executor_p_
->
get_in
(
input
.
name
);
}
else
{
...
...
@@ -221,6 +222,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
"memory."
;
}
}
#endif
std
::
vector
<
int
>
tmp_shape
;
for
(
auto
s
:
input
.
shape
)
{
tmp_shape
.
push_back
(
s
);
...
...
@@ -229,8 +231,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
anakin
::
saber
::
Tensor
<
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
>
h_tensor
(
data
,
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
(),
0
,
tmp_shape
);
#ifndef ANAKIN_MLU_PLACE
d_tensor_p
->
reshape
(
tmp_shape
);
#endif
if
(
input
.
lod
.
size
()
>
0
)
{
if
(
input
.
lod
.
size
()
>
1
)
{
LOG
(
FATAL
)
<<
" input lod first dim should <=1, but you set "
...
...
@@ -246,9 +249,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
}
d_tensor_p
->
copy_from
(
h_tensor
);
}
this
->
Predict
();
this
->
Predict
(
batch_size
);
if
(
output_data
->
empty
())
{
LOG
(
FATAL
)
<<
"
At least one output should be set with tensors' names
."
;
LOG
(
FATAL
)
<<
"
The output param in the Run function is incorrect
."
;
}
for
(
auto
&
output
:
*
output_data
)
{
if
(
std
::
find
(
this
->
output_names_
.
begin
(),
this
->
output_names_
.
end
(),
...
...
@@ -256,14 +259,18 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
LOG
(
FATAL
)
<<
output
.
name
<<
" is not in the outputs of the graph."
;
}
auto
*
d_tensor_p
=
this
->
executor_p_
->
get_out
(
output
.
name
);
output
.
shape
=
d_tensor_p
->
valid_shape
();
if
(
output
.
data
.
length
()
<
d_tensor_p
->
valid_size
()
*
sizeof
(
float
))
{
output
.
data
.
Resize
(
d_tensor_p
->
valid_size
()
*
sizeof
(
float
));
auto
tmp_shape
=
d_tensor_p
->
valid_shape
();
#ifdef ANAKIN_MLU_PLACE
tmp_shape
.
set_num
(
batch_size
);
#endif
output
.
shape
=
tmp_shape
;
if
(
output
.
data
.
length
()
<
tmp_shape
.
count
()
*
sizeof
(
float
))
{
output
.
data
.
Resize
(
tmp_shape
.
count
()
*
sizeof
(
float
));
}
auto
*
data
=
static_cast
<
float
*>
(
output
.
data
.
data
());
anakin
::
saber
::
Tensor
<
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
>
h_tensor
(
data
,
typename
anakin
::
DefaultHostType
<
T
>::
Host_type
(),
0
,
d_tensor_p
->
valid_shape
()
);
tmp_shape
);
h_tensor
.
copy_from
(
*
d_tensor_p
);
}
return
true
;
...
...
@@ -317,6 +324,8 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::SetContext() {
this
->
config_
.
compute_stream_id
);
this
->
ctx_p_
->
set_model_parallel
(
this
->
config_
.
model_parallel
);
this
->
ctx_p_
->
set_fusion
(
this
->
config_
.
op_fuse
);
this
->
ctx_p_
->
enable_batch_changable
();
this
->
ctx_p_
->
enable_channel_duplicate
();
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
OptimizeGraph
()
{
...
...
@@ -327,14 +336,13 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
InitNet
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
delete
this
->
executor_p_
;
this
->
executor_p_
=
new
anakin
::
Net
<
anakin
::
MLU
,
P
,
R
>
();
this
->
executor_p_
->
fusion_init
(
*
this
->
graph_p_
,
this
->
ctx_p_
,
true
);
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
Predict
()
{
anakin
::
TargetWrapper
<
anakin
::
MLU
>::
device_sync
();
this
->
executor_p_
->
fusion_prediction
();
anakin
::
TargetWrapper
<
anakin
::
MLU
>::
device_sync
();
void
PaddleInferenceAnakinMLUPredictor
<
P
,
R
>::
Predict
(
int
batch_size
)
{
this
->
executor_p_
->
fusion_prediction
(
batch_size
);
}
#endif
...
...
@@ -353,14 +361,13 @@ void PaddleInferenceAnakinBMPredictor<P, R>::OptimizeGraph() {
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinBMPredictor
<
P
,
R
>::
InitNet
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
delete
this
->
executor_p_
;
this
->
executor_p_
=
new
anakin
::
Net
<
anakin
::
BM
,
P
,
R
>
();
this
->
executor_p_
->
fusion_init
(
*
this
->
graph_p_
,
this
->
ctx_p_
,
true
);
}
template
<
Precision
P
,
OpRunType
R
>
void
PaddleInferenceAnakinBMPredictor
<
P
,
R
>::
Predict
()
{
anakin
::
TargetWrapper
<
anakin
::
BM
>::
device_sync
();
void
PaddleInferenceAnakinBMPredictor
<
P
,
R
>::
Predict
(
int
batch_size
)
{
this
->
executor_p_
->
fusion_prediction
();
anakin
::
TargetWrapper
<
anakin
::
BM
>::
device_sync
();
}
#endif
...
...
paddle/fluid/inference/api/api_anakin_engine.h
浏览文件 @
71b2ed61
...
...
@@ -73,7 +73,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
virtual
void
OptimizeGraph
();
virtual
void
InitNet
();
virtual
void
SetContext
();
virtual
void
Predict
();
virtual
void
Predict
(
int
batch_size
);
virtual
std
::
unique_ptr
<
PaddlePredictor
>
New
();
static
std
::
mutex
mutex_
;
AnakinConfig
config_
;
...
...
@@ -85,7 +85,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
private:
bool
RunImpl
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
);
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
);
static
std
::
once_flag
init_anakin_
;
};
...
...
@@ -103,7 +103,7 @@ class PaddleInferenceAnakinMLUPredictor final
void
SetContext
()
override
;
void
OptimizeGraph
()
override
;
void
InitNet
()
override
;
void
Predict
()
override
;
void
Predict
(
int
batch_size
)
override
;
};
#endif
...
...
@@ -120,7 +120,7 @@ class PaddleInferenceAnakinBMPredictor final
std
::
unique_ptr
<
PaddlePredictor
>
New
()
override
;
void
OptimizeGraph
()
override
;
void
InitNet
()
override
;
void
Predict
()
override
;
void
Predict
(
int
batch_size
)
override
;
};
#endif
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录