Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a7188d5b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a7188d5b
编写于
11月 25, 2018
作者:
Y
Yan Chunwei
提交者:
GitHub
11月 25, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix executor transfer cache bug (#14518)
上级
c1bf9664
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
143 addition
and
33 deletion
+143
-33
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+7
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-0
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+1
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+17
-32
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+4
-0
paddle/fluid/framework/transfer_scope_cache.cc
paddle/fluid/framework/transfer_scope_cache.cc
+72
-0
paddle/fluid/framework/transfer_scope_cache.h
paddle/fluid/framework/transfer_scope_cache.h
+41
-0
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
a7188d5b
...
...
@@ -116,8 +116,14 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library
(
op_info SRCS op_info.cc DEPS attribute framework_proto
)
cc_library
(
shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context
)
if
(
NOT WIN32
)
cc_library
(
transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto
)
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler
)
shape_inference data_transform lod_tensor profiler transfer_scope_cache
)
else
()
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor
)
endif
(
NOT WIN32
)
cc_test
(
operator_test SRCS operator_test.cc DEPS operator op_registry device_context
)
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
a7188d5b
...
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/operators/detail/macros.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
...
...
paddle/fluid/framework/naive_executor.cc
浏览文件 @
a7188d5b
...
...
@@ -83,6 +83,7 @@ void NaiveExecutor::Run() {
for
(
auto
&
op
:
ops_
)
{
VLOG
(
3
)
<<
std
::
this_thread
::
get_id
()
<<
" run "
<<
op
->
Type
()
<<
" on scope "
<<
scope_
;
op
->
SetIsCalledByExecutor
(
false
);
op
->
Run
(
*
scope_
,
place_
);
}
}
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
a7188d5b
...
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -33,11 +34,6 @@ DEFINE_bool(check_nan_inf, false,
namespace
paddle
{
namespace
framework
{
// Combine two hash values to a single hash.
inline
size_t
CombineHash
(
size_t
seed
,
size_t
a
)
{
return
(
seed
^
a
)
+
0x9e3779b9
+
(
seed
<<
6
)
+
(
seed
>>
2
);
}
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
=
{
std
::
make_tuple
(
platform
::
CUDAPlace
(
0
),
LibraryType
::
kCUDNN
),
std
::
make_tuple
(
platform
::
CUDAPlace
(
0
),
LibraryType
::
kPlain
),
...
...
@@ -797,17 +793,6 @@ void OperatorWithKernel::TransferInplaceVarsBack(
Scope
*
OperatorWithKernel
::
TryTransferData
(
const
Scope
&
scope
,
const
OpKernelType
&
expected_kernel_key
,
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
)
const
{
// In the inference scenerio, the scopes will be reused across the batches, so
// the `new_scope` here will result in GPU memroy explosion over the running of
// operators.
// We use a thread_local cache to fix that issue, the key in the cache is the
// combination of the `scope` argument, from_kernel_type, target_kernel_type.
// Have a discussion with @Superjomn or the inference developers if some changes
// on this logic for this macro might not tested on the other scenerios.
#ifdef PADDLE_ON_INFERENCE
thread_local
std
::
unordered_map
<
size_t
,
Scope
*>
infer_transfer_scope_cache
;
#endif
Scope
*
new_scope
=
nullptr
;
for
(
auto
&
var_name_item
:
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
...
...
@@ -838,23 +823,23 @@ Scope* OperatorWithKernel::TryTransferData(
VLOG
(
30
)
<<
"Transform Variable "
<<
var_name
<<
" from "
<<
kernel_type_for_var
<<
" to "
<<
expected_kernel_key
;
#ifdef PADDLE_ON_INFERENCE
size_t
infer_cache_key
=
CombineHash
(
OpKernelType
::
Hash
()(
kernel_type_for_var
),
OpKernelType
::
Hash
()(
expected_kernel_key
));
infer_cache_key
=
CombineHash
(
infer_cache_key
,
std
::
hash
<
const
Scope
*>
()(
&
scope
));
auto
it
=
infer_transfer_scope_cache
.
find
(
infer_cache_key
);
if
(
it
!=
infer_transfer_scope_cache
.
end
())
{
new_scope
=
infer_transfer_scope_cache
[
infer_cache_key
];
}
else
{
new_scope
=
&
scope
.
NewScope
();
infer_transfer_scope_cache
[
infer_cache_key
]
=
new_scope
;
}
#endif
if
(
new_scope
==
nullptr
)
{
// In the inference scenerio, the scopes will be reused across the
// batches, so the `new_scope` here will result in GPU memroy explosion
// over the running of operators.
// We use a thread_local cache to fix that issue, the key in the cache is
// the combination of the `scope` argument, from_kernel_type,
// target_kernel_type.
// Have a discussion with @Superjomn or the inference developers if some
// changes on this logic for this macro might not tested on the other
// scenerios.
// If this op is not called by an Executor or ParallelExecutor, it should
// called by a NaiveExecutor, the NaiveExecutor will cache the scopes and
// variables, that behavior a lot different.
if
(
!
run_by_executor_
)
{
new_scope
=
TryCreateTransferScope
(
kernel_type_for_var
,
expected_kernel_key
,
&
scope
);
}
if
(
!
new_scope
)
{
new_scope
=
&
scope
.
NewScope
();
}
...
...
paddle/fluid/framework/operator.h
浏览文件 @
a7188d5b
...
...
@@ -127,6 +127,8 @@ class OperatorBase {
//! Get all outputs variable names
virtual
std
::
vector
<
std
::
string
>
OutputVars
(
bool
has_intermediate
)
const
;
void
SetIsCalledByExecutor
(
bool
x
)
{
run_by_executor_
=
x
;
}
protected:
std
::
string
type_
;
// NOTE: in case of OpGrad, inputs_ contains:
...
...
@@ -139,6 +141,8 @@ class OperatorBase {
// IG (Inputs Gradients)
VariableNameMap
outputs_
;
AttributeMap
attrs_
;
// Whether this operator executes in an Executor.
bool
run_by_executor_
{
true
};
private:
void
GenerateTemporaryNames
();
...
...
paddle/fluid/framework/transfer_scope_cache.cc
0 → 100644
浏览文件 @
a7188d5b
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/transfer_scope_cache.h"
namespace
paddle
{
namespace
framework
{
std
::
unordered_map
<
size_t
,
Scope
*>&
global_transfer_data_cache
()
{
thread_local
auto
*
x
=
new
std
::
unordered_map
<
size_t
,
Scope
*>
;
return
*
x
;
}
std
::
unordered_set
<
Scope
*>&
global_transfer_scope_cache
()
{
thread_local
auto
*
x
=
new
std
::
unordered_set
<
Scope
*>
;
return
*
x
;
}
Scope
*
TryCreateTransferScope
(
OpKernelType
type0
,
OpKernelType
type1
,
const
Scope
*
scope
)
{
Scope
*
new_scope
{
nullptr
};
size_t
infer_cache_key
=
CombineHash
(
OpKernelType
::
Hash
()(
type0
),
OpKernelType
::
Hash
()(
type1
));
infer_cache_key
=
CombineHash
(
infer_cache_key
,
std
::
hash
<
const
Scope
*>
()(
scope
));
auto
it
=
global_transfer_data_cache
().
find
(
infer_cache_key
);
if
(
it
!=
global_transfer_data_cache
().
end
())
{
new_scope
=
global_transfer_data_cache
()[
infer_cache_key
];
}
else
{
new_scope
=
&
scope
->
NewScope
();
global_transfer_data_cache
()[
infer_cache_key
]
=
new_scope
;
}
global_transfer_scope_cache
().
insert
(
new_scope
);
return
new_scope
;
}
void
RemoveKidsFromTransferScopeCache
(
Scope
*
scope
)
{
auto
it
=
global_transfer_scope_cache
().
find
(
scope
);
if
(
it
!=
global_transfer_scope_cache
().
end
())
{
global_transfer_scope_cache
().
erase
(
it
);
}
for
(
auto
*
s
:
scope
->
kids
())
{
auto
it
=
global_transfer_scope_cache
().
find
(
s
);
if
(
it
!=
global_transfer_scope_cache
().
end
())
{
global_transfer_scope_cache
().
erase
(
it
);
}
}
// remove global transfer data cache
auto
&
cache
=
global_transfer_data_cache
();
for
(
auto
it
=
cache
.
begin
();
it
!=
cache
.
end
();)
{
if
(
it
->
second
==
scope
)
it
=
cache
.
erase
(
it
);
else
it
++
;
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/transfer_scope_cache.h
0 → 100644
浏览文件 @
a7188d5b
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <thread> // NOLINT
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
std
::
unordered_map
<
size_t
,
Scope
*>&
global_transfer_data_cache
();
std
::
unordered_set
<
Scope
*>&
global_transfer_scope_cache
();
// Combine two hash values to a single hash.
static
size_t
CombineHash
(
size_t
seed
,
size_t
a
)
{
return
(
seed
^
a
)
+
0x9e3779b9
+
(
seed
<<
6
)
+
(
seed
>>
2
);
}
Scope
*
TryCreateTransferScope
(
OpKernelType
type0
,
OpKernelType
type1
,
const
Scope
*
scope
);
void
RemoveKidsFromTransferScopeCache
(
Scope
*
scope
);
}
// namespace framework
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录