Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
490e7462
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
490e7462
编写于
4月 23, 2019
作者:
L
luotao1
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix runtime_context_cache bug when gpu model has an op runs only on cpu
test=develop
上级
ea3504c7
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
34 addition
and
24 deletion
+34
-24
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+11
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+23
-19
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
.../fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
+0
-5
未找到文件。
paddle/fluid/framework/operator.cc
浏览文件 @
490e7462
...
@@ -1095,6 +1095,17 @@ Scope* OperatorWithKernel::PrepareData(
...
@@ -1095,6 +1095,17 @@ Scope* OperatorWithKernel::PrepareData(
if
(
!
new_scope
)
{
if
(
!
new_scope
)
{
new_scope
=
&
scope
.
NewScope
();
new_scope
=
&
scope
.
NewScope
();
}
}
// For inference, if a gpu model has an op which could only run on CPU,
// each result of different input will be the same with the first one.
// The reason is that if a gpu tensor is the input of a cpu kernel,
// we will create a new cpu tensor in new scope.
// However, if enable_cache_runtime_context, we get the cpu tensor each
// time, not the gpu tensor.
// Thus, we set pre_scope_ = nullptr to trigger `new RuntimeContext()` in
// RunImpl().
if
(
enable_cache_runtime_context
)
{
pre_scope_
=
nullptr
;
}
auto
*
trans_var
=
new_scope
->
Var
(
var_name
);
auto
*
trans_var
=
new_scope
->
Var
(
var_name
);
input_vars
[
i
]
=
trans_var
;
input_vars
[
i
]
=
trans_var
;
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
490e7462
...
@@ -98,7 +98,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
...
@@ -98,7 +98,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
#endif //
#endif //
"transpose_flatten_concat_fuse_pass"
,
"transpose_flatten_concat_fuse_pass"
,
// following two passes should be located in the last, since they will
// work on all fused ops.
"expected_kernel_cache_pass"
,
//
"expected_kernel_cache_pass"
,
//
"runtime_context_cache_pass"
});
});
use_gpu_
=
true
;
use_gpu_
=
true
;
...
@@ -115,25 +118,26 @@ void GpuPassStrategy::EnableMkldnnQuantizer() {
...
@@ -115,25 +118,26 @@ void GpuPassStrategy::EnableMkldnnQuantizer() {
CpuPassStrategy
::
CpuPassStrategy
()
:
PassStrategy
({})
{
CpuPassStrategy
::
CpuPassStrategy
()
:
PassStrategy
({})
{
// NOTE the large fusions should be located in the front, so that they will
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
// not be damaged by smaller ones.
passes_
.
assign
({
passes_
.
assign
({
"infer_clean_graph_pass"
,
//
"infer_clean_graph_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"seqconv_eltadd_relu_fuse_pass"
,
//
"seqconv_eltadd_relu_fuse_pass"
,
//
// "seqpool_concat_fuse_pass", //
// "seqpool_concat_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
"repeated_fc_relu_fuse_pass"
,
//
"repeated_fc_relu_fuse_pass"
,
//
"squared_mat_sub_fuse_pass"
,
//
"squared_mat_sub_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"is_test_pass"
,
//
"is_test_pass"
,
//
// following two passes should be located in the last, since
"expected_kernel_cache_pass"
,
//
// they will work on all fused ops.
});
"expected_kernel_cache_pass"
,
//
"runtime_context_cache_pass"
});
use_gpu_
=
false
;
use_gpu_
=
false
;
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
浏览文件 @
490e7462
...
@@ -110,11 +110,6 @@ void SetConfig(AnalysisConfig *cfg) {
...
@@ -110,11 +110,6 @@ void SetConfig(AnalysisConfig *cfg) {
if
(
FLAGS_zero_copy
)
{
if
(
FLAGS_zero_copy
)
{
cfg
->
SwitchUseFeedFetchOps
(
false
);
cfg
->
SwitchUseFeedFetchOps
(
false
);
}
}
// Enable runtime_context_cache_pass, disabled by default since it doesn't
// cover all the cases.
// See detail: https://github.com/PaddlePaddle/Paddle/issues/16609
// https://github.com/PaddlePaddle/Paddle/issues/16841
cfg
->
pass_builder
()
->
AppendPass
(
"runtime_context_cache_pass"
);
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录