Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a93a9eef
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a93a9eef
编写于
3月 22, 2019
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add op registry type
refine gc code test=develop
上级
5670530c
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
776 addition
and
300 deletion
+776
-300
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+5
-2
paddle/fluid/framework/details/early_delete_op_handle.h
paddle/fluid/framework/details/early_delete_op_handle.h
+0
-140
paddle/fluid/framework/details/op_registry.h
paddle/fluid/framework/details/op_registry.h
+88
-16
paddle/fluid/framework/details/reference_count_pass.cc
paddle/fluid/framework/details/reference_count_pass.cc
+89
-5
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+20
-96
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+7
-9
paddle/fluid/framework/executor_gc_helper.cc
paddle/fluid/framework/executor_gc_helper.cc
+185
-0
paddle/fluid/framework/executor_gc_helper.h
paddle/fluid/framework/executor_gc_helper.h
+40
-0
paddle/fluid/framework/no_need_buffer_vars_inference.cc
paddle/fluid/framework/no_need_buffer_vars_inference.cc
+23
-0
paddle/fluid/framework/no_need_buffer_vars_inference.h
paddle/fluid/framework/no_need_buffer_vars_inference.h
+60
-0
paddle/fluid/framework/op_info.h
paddle/fluid/framework/op_info.h
+6
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+29
-7
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+10
-1
paddle/fluid/framework/type_defs.h
paddle/fluid/framework/type_defs.h
+5
-0
paddle/fluid/operators/clip_op.cc
paddle/fluid/operators/clip_op.cc
+18
-1
paddle/fluid/operators/elementwise/elementwise_add_op.cc
paddle/fluid/operators/elementwise/elementwise_add_op.cc
+1
-2
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+20
-18
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+1
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
.../fluid/tests/unittests/test_eager_deletion_delete_vars.py
+165
-0
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
a93a9eef
...
...
@@ -126,7 +126,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_con
cc_library
(
version SRCS version.cc
)
cc_test
(
version_test SRCS version_test.cc DEPS version
)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version
)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc
no_need_buffer_vars_inference.cc
DEPS shape_inference op_info operator glog version
)
cc_library
(
op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc memory_optimize_helper
)
nv_test
(
op_registry_test SRCS op_registry_test.cc DEPS op_registry
)
...
...
@@ -164,6 +164,8 @@ else()
set
(
NGRAPH_EXE_DEPS
)
endif
()
cc_library
(
executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog
lod_rank_table feed_fetch_method sendrecvop_rpc
${
GLOB_DISTRIBUTE_DEPS
}
graph_to_program_pass variable_helper
${
NGRAPH_EXE_DEPS
}
)
...
...
@@ -174,7 +176,7 @@ else()
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
endif
()
target_link_libraries
(
executor
garbage_collector while_op
_helper
)
target_link_libraries
(
executor
while_op_helper executor_gc
_helper
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor parallel_ssa_graph_executor
...
...
@@ -194,6 +196,7 @@ cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_con
cc_test
(
var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
proto_desc
)
cc_test
(
inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS op_registry proto_desc op_info memory_optimize_helper
)
cc_library
(
selected_rows SRCS selected_rows.cc DEPS tensor
)
cc_test
(
selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows
)
...
...
paddle/fluid/framework/details/early_delete_op_handle.h
已删除
100644 → 0
浏览文件 @
5670530c
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
EarlyDeleteOpHandle
:
public
OpHandleBase
{
public:
EarlyDeleteOpHandle
(
ir
::
Node
*
node
,
const
Scope
*
scope
,
const
platform
::
Place
&
place
,
const
std
::
vector
<
std
::
string
>&
names
,
GarbageCollector
*
gc
)
:
OpHandleBase
(
node
),
scope_
(
scope
),
place_
(
place
),
names_
(
names
),
gc_
(
gc
)
{
#ifdef PADDLE_WITH_CUDA
if
(
IsStreamGarabageCollector
())
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_place
.
device
));
PADDLE_ENFORCE
(
cudaEventCreateWithFlags
(
&
event_
,
cudaEventDisableTiming
));
}
#endif
}
~
EarlyDeleteOpHandle
()
{
#ifdef PADDLE_WITH_CUDA
if
(
IsStreamGarabageCollector
())
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx_
->
GetPlace
());
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_place
.
device
));
PADDLE_ENFORCE
(
cudaEventDestroy
(
event_
));
}
#endif
}
std
::
string
Name
()
const
override
{
return
"early_delete"
;
}
protected:
void
RunImpl
()
override
{
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>
tensors
;
auto
*
local_scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
for
(
auto
&
var_name
:
names_
)
{
auto
*
var
=
local_scope
->
FindVar
(
var_name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
string
::
Sprintf
(
"Local Scope not has var %s"
,
var_name
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
tensors
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
tensors
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
LoDTensorArray
*
tensor_array
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
tensor
:
*
tensor_array
)
{
tensors
.
emplace_back
(
tensor
.
MoveMemoryHolder
());
}
}
}
if
(
!
tensors
.
empty
())
{
ClearTensors
(
tensors
);
}
}
private:
void
ClearTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
if
(
platform
::
is_cpu_place
(
place_
))
{
ClearCPUTensors
(
tensors
);
}
else
{
ClearGPUTensors
(
tensors
);
}
}
void
ClearCPUTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
auto
*
gc
=
dynamic_cast
<
CPUGarbageCollector
*>
(
gc_
);
if
(
gc
!=
nullptr
)
{
gc
->
Add
(
tensors
);
}
}
void
ClearGPUTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
gc
=
dynamic_cast
<
StreamGarbageCollector
*>
(
gc_
);
if
(
gc
!=
nullptr
)
{
auto
compute_stream
=
dev_ctx_
->
stream
();
auto
callback_stream
=
gc
->
stream
();
auto
callback_func
=
[
=
]()
{
PADDLE_ENFORCE
(
cudaEventRecord
(
event_
,
compute_stream
));
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
callback_stream
,
event_
,
0
));
};
gc_
->
Add
(
tensors
,
callback_func
);
}
else
{
gc_
->
Add
(
tensors
);
}
}
bool
IsStreamGarabageCollector
()
const
{
return
dynamic_cast
<
const
StreamGarbageCollector
*>
(
gc_
)
!=
nullptr
;
#endif
}
const
Scope
*
scope_
;
const
platform
::
Place
place_
;
std
::
vector
<
std
::
string
>
names_
;
GarbageCollector
*
gc_
;
#ifdef PADDLE_WITH_CUDA
platform
::
CUDADeviceContext
*
dev_ctx_
;
cudaEvent_t
event_
;
#endif
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/op_registry.h
浏览文件 @
a93a9eef
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/inplace_op_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
...
...
@@ -36,27 +37,86 @@ enum OpInfoFillType {
kGradOpDescMaker
=
2
,
kVarTypeInference
=
3
,
kShapeInference
=
4
,
kInplaceOpInference
=
5
kInplaceOpInference
=
5
,
kNoNeedBufferVarsInference
=
6
,
kUnknown
=
-
1
};
namespace
internal
{
template
<
typename
T
,
OpInfoFillType
kType
>
struct
TypePair
{
using
Type
=
T
;
static
constexpr
OpInfoFillType
kFillType
=
kType
;
};
using
OpRegistryClasses
=
std
::
tuple
<
// NOLINT
TypePair
<
OperatorBase
,
kOperator
>
,
// NOLINT
TypePair
<
OpProtoAndCheckerMaker
,
kOpProtoAndCheckerMaker
>
,
// NOLINT
TypePair
<
GradOpDescMakerBase
,
kGradOpDescMaker
>
,
// NOLINT
TypePair
<
VarTypeInference
,
kVarTypeInference
>
,
// NOLINT
TypePair
<
InferShapeBase
,
kShapeInference
>
,
// NOLINT
TypePair
<
InplaceOpInference
,
kInplaceOpInference
>
,
// NOLINT
TypePair
<
NoNeedBufferVarsInference
,
kNoNeedBufferVarsInference
>
// NOLINT
>
;
static
constexpr
int
kOpRegistryClassNumber
=
std
::
tuple_size
<
OpRegistryClasses
>::
value
;
template
<
typename
T
,
int
kPos
,
bool
kIsBounded
/* = true*/
>
struct
IsMatchedBaseTypeImpl
{
using
PairType
=
typename
std
::
tuple_element
<
kPos
,
OpRegistryClasses
>::
type
;
static
constexpr
bool
kValue
=
std
::
is_base_of
<
typename
PairType
::
Type
,
T
>::
value
;
};
template
<
typename
T
,
int
kPos
>
struct
IsMatchedBaseTypeImpl
<
T
,
kPos
,
false
>
{
static
constexpr
bool
kValue
=
false
;
};
template
<
typename
T
,
int
kPos
>
static
inline
constexpr
bool
IsMatchedBaseType
()
{
return
IsMatchedBaseTypeImpl
<
T
,
kPos
,
(
kPos
>=
0
&&
kPos
<
kOpRegistryClassNumber
)
>::
kValue
;
}
template
<
typename
T
,
int
kStart
,
int
kEnd
,
bool
kIsEnd
,
bool
kIsMatched
>
struct
OpInfoFillTypeGetterImpl
{};
// This case should not happen
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
true
,
true
>
{};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
true
,
false
>
{
static
constexpr
OpInfoFillType
kType
=
kUnknown
;
};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
false
,
false
>
{
static
constexpr
OpInfoFillType
kType
=
OpInfoFillTypeGetterImpl
<
T
,
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
,
IsMatchedBaseType
<
T
,
kStart
+
1
>
()
>::
kType
;
};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
false
,
true
>
{
using
PairType
=
typename
std
::
tuple_element
<
kStart
,
OpRegistryClasses
>::
type
;
static
constexpr
OpInfoFillType
kType
=
PairType
::
kFillType
;
};
template
<
typename
T
>
using
OpInfoFillTypeGetter
=
OpInfoFillTypeGetterImpl
<
T
,
0
,
kOpRegistryClassNumber
,
kOpRegistryClassNumber
==
0
,
IsMatchedBaseType
<
T
,
0
>
()
>
;
}
// namespace internal
template
<
typename
T
>
struct
OpInfoFillTypeID
{
static
constexpr
OpInfoFillType
ID
()
{
return
std
::
is_base_of
<
OperatorBase
,
T
>::
value
?
kOperator
:
(
std
::
is_base_of
<
OpProtoAndCheckerMaker
,
T
>::
value
?
kOpProtoAndCheckerMaker
:
(
std
::
is_base_of
<
GradOpDescMakerBase
,
T
>::
value
?
kGradOpDescMaker
:
(
std
::
is_base_of
<
VarTypeInference
,
T
>::
value
?
kVarTypeInference
:
(
std
::
is_base_of
<
InferShapeBase
,
T
>::
value
?
kShapeInference
:
(
std
::
is_base_of
<
InplaceOpInference
,
T
>::
value
?
kInplaceOpInference
:
static_cast
<
OpInfoFillType
>
(
-
1
))))));
return
internal
::
OpInfoFillTypeGetter
<
T
>::
kType
;
}
};
...
...
@@ -156,6 +216,18 @@ struct OpInfoFiller<T, kInplaceOpInference> {
}
};
template
<
typename
T
>
struct
OpInfoFiller
<
T
,
kNoNeedBufferVarsInference
>
{
void
operator
()(
const
char
*
op_type
,
OpInfo
*
info
)
const
{
info
->
infer_no_need_buffer_vars_
=
[](
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
{
T
infer
(
inputs
,
outputs
,
attrs
);
return
infer
();
};
}
};
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/reference_count_pass.cc
浏览文件 @
a93a9eef
...
...
@@ -193,6 +193,65 @@ ExtractComputationOpFromLastLivedVar(VarHandle *var, size_t scope_idx,
return
shrink_func
(
computation_op
);
}
static
bool
CanPrecede
(
const
std
::
string
&
var_name
,
std
::
unordered_set
<
ComputationOpHandle
*>
*
op_handles
)
{
std
::
vector
<
ComputationOpHandle
*>
skip_ops
;
for
(
auto
*
op_handle
:
*
op_handles
)
{
auto
*
op_base
=
op_handle
->
GetOp
();
auto
&
inferer
=
op_base
->
Info
().
NoNeedBufferVarsInferer
();
if
(
!
inferer
)
{
continue
;
}
std
::
unordered_set
<
std
::
string
>
no_need_buffer_vars
=
inferer
(
op_base
->
Inputs
(),
op_base
->
Outputs
(),
op_base
->
Attrs
());
// Check whether var_name occurs in other inputs or outputs of the op
// If it occurs, we cannot precede reference count to previous op
bool
occurred_in_other_vars
=
false
;
for
(
auto
&
in_pair
:
op_base
->
Inputs
())
{
if
(
no_need_buffer_vars
.
count
(
in_pair
.
first
)
>
0
)
{
continue
;
}
auto
&
args
=
in_pair
.
second
;
auto
iter
=
std
::
find
(
args
.
begin
(),
args
.
end
(),
var_name
);
if
(
iter
!=
args
.
end
())
{
occurred_in_other_vars
=
true
;
break
;
}
}
if
(
occurred_in_other_vars
)
{
continue
;
}
for
(
auto
&
out_pair
:
op_base
->
Outputs
())
{
auto
&
args
=
out_pair
.
second
;
auto
iter
=
std
::
find
(
args
.
begin
(),
args
.
end
(),
var_name
);
if
(
iter
!=
args
.
end
())
{
occurred_in_other_vars
=
true
;
break
;
}
}
if
(
!
occurred_in_other_vars
)
{
VLOG
(
2
)
<<
"Shrink var "
<<
var_name
<<
" in op "
<<
op_handle
->
Name
();
skip_ops
.
emplace_back
(
op_handle
);
}
}
if
(
skip_ops
.
size
()
==
op_handles
->
size
())
{
op_handles
->
clear
();
return
true
;
}
else
{
for
(
auto
*
skip_op
:
skip_ops
)
{
op_handles
->
erase
(
skip_op
);
}
return
false
;
}
}
std
::
unique_ptr
<
ir
::
Graph
>
ReferenceCountPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
&
ref_cnts
=
Get
<
std
::
vector
<
ReferenceCountMap
>>
(
kGlobalReferenceCount
);
...
...
@@ -229,17 +288,42 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
continue
;
}
bool
ok
;
auto
result
=
ExtractComputationOpFromLastLivedVar
(
name_var_pair
.
second
.
back
(),
i
,
shrink_func
,
&
ok
);
auto
&
var_name
=
name_var_pair
.
first
;
auto
&
var_handles
=
name_var_pair
.
second
;
for
(
auto
iter
=
var_handles
.
rbegin
();
iter
!=
var_handles
.
rend
();
++
iter
)
{
bool
ok
;
auto
result
=
ExtractComputationOpFromLastLivedVar
(
*
iter
,
i
,
shrink_func
,
&
ok
);
// Seldomly, some vars may have no pending or preceding computation ops
// Just break;
if
(
!
ok
)
break
;
VLOG
(
10
)
<<
"Extract "
<<
result
.
size
()
<<
" ops of var "
<<
var_name
;
size_t
original_op_deps
=
result
.
size
();
// If reference count can be calculated precedingly, just precede
if
(
CanPrecede
(
var_name
,
&
result
))
{
VLOG
(
10
)
<<
"Try to precede reference count computing at var "
<<
var_name
;
continue
;
}
size_t
final_op_deps
=
result
.
size
();
if
(
final_op_deps
<
original_op_deps
)
{
VLOG
(
5
)
<<
"Shrink op deps from "
<<
original_op_deps
<<
" to "
<<
final_op_deps
;
}
if
(
ok
)
{
auto
&
var_name
=
name_var_pair
.
first
;
PADDLE_ENFORCE
(
!
result
.
empty
(),
"Last living ops of %s cannot be empty"
,
var_name
);
ref_cnts
[
i
].
emplace
(
var_name
,
result
.
size
());
last_live_ops_of_vars
[
i
].
emplace
(
var_name
,
std
::
move
(
result
));
}
// Seldomly, all preceding trying failed.
// Just skip this corner case
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
a93a9eef
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
...
...
@@ -48,97 +49,23 @@ namespace {
int
kProgramId
=
-
1
;
}
// namespace
static
std
::
unordered_map
<
std
::
string
,
size_t
>
GetNonPersistableReferenceCounts
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
string
>&
skip_var_list
)
{
std
::
unordered_map
<
std
::
string
,
size_t
>
ref_cnts
;
std
::
unordered_set
<
std
::
string
>
skip_vars
(
skip_var_list
.
begin
(),
skip_var_list
.
end
());
auto
update_ref_cnts
=
[
&
](
OpDesc
*
op_desc
,
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
skip_vars
.
count
(
name
))
continue
;
auto
*
var_desc
=
block
.
FindVar
(
name
);
if
(
var_desc
==
nullptr
||
var_desc
->
Persistable
())
continue
;
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
if
(
type
!=
proto
::
VarType
::
LOD_TENSOR
&&
type
!=
proto
::
VarType
::
SELECTED_ROWS
&&
type
!=
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
continue
;
}
++
ref_cnts
[
name
];
}
}
};
for
(
auto
op_desc
:
block
.
AllOps
())
{
update_ref_cnts
(
op_desc
,
op_desc
->
Inputs
());
update_ref_cnts
(
op_desc
,
op_desc
->
Outputs
());
}
return
ref_cnts
;
}
ExecutorPrepareContext
::
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
,
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
)
:
prog_
(
prog
),
block_id_
(
block_id
),
force_disable_gc_
(
force_disable_gc
)
{
if
(
GetEagerDeletionThreshold
()
>=
0
&&
!
force_disable_gc_
)
{
global_ref_cnts_
=
GetNonPersistableReferenceCounts
(
prog
.
Block
(
block_id
),
keep_vars
);
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
)
:
prog_
(
prog
),
block_id_
(
block_id
)
{}
void
ExecutorPrepareContext
::
PrepareUnusedVars
(
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
)
{
force_disable_gc_
=
force_disable_gc
;
if
(
GetEagerDeletionThreshold
()
<
0
||
force_disable_gc_
)
{
return
;
}
unused_vars_
=
GetUnusedVars
(
prog_
.
Block
(
block_id_
),
ops_
,
keep_vars
);
}
ExecutorPrepareContext
::~
ExecutorPrepareContext
()
{
VLOG
(
5
)
<<
"destroy ExecutorPrepareContext"
;
}
static
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
const
OperatorBase
*
op
,
GarbageCollector
*
gc
,
std
::
unordered_map
<
std
::
string
,
size_t
>*
ref_cnts
)
{
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
auto
handler
=
[
&
](
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
auto
it
=
ref_cnts
->
find
(
name
);
if
(
it
==
ref_cnts
->
end
())
continue
;
if
(
--
(
it
->
second
)
!=
0
)
{
continue
;
}
auto
*
var
=
scope
.
FindVar
(
name
);
if
(
var
==
nullptr
)
{
continue
;
}
VLOG
(
2
)
<<
"Erase variable "
<<
name
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
lod_tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
lod_tensor_arr
)
{
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
framework
::
ToTypeName
(
var
->
Type
()),
name
);
}
}
}
};
handler
(
op
->
Inputs
());
handler
(
op
->
Outputs
());
if
(
!
garbages
.
empty
())
{
gc
->
Add
(
std
::
move
(
garbages
));
}
}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
void
Executor
::
Close
()
{
...
...
@@ -362,8 +289,8 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
std
::
unique_ptr
<
ExecutorPrepareContext
>
Executor
::
Prepare
(
const
ProgramDesc
&
program
,
int
block_id
,
const
std
::
vector
<
std
::
string
>&
skip_ref_cnt_vars
,
bool
force_disable_gc
)
{
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
new
ExecutorPrepareContext
(
program
,
block_id
,
skip_ref_cnt_vars
,
force_disable_gc
));
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
new
ExecutorPrepareContext
(
program
,
block_id
));
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
block_id
),
program
.
Size
());
auto
&
block
=
program
.
Block
(
block_id
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
...
...
@@ -375,6 +302,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
ctx
->
prog_
.
Block
(
ctx
->
block_id_
),
&
ctx
->
ops_
);
}
#endif
ctx
->
PrepareUnusedVars
(
skip_ref_cnt_vars
,
force_disable_gc
);
return
ctx
;
}
...
...
@@ -389,19 +317,17 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
result
;
size_t
idx
=
0
;
for
(
auto
&
bid
:
block_ids
)
{
ExecutorPrepareContext
*
ctx
;
if
(
skip_ref_cnt_vars
.
empty
())
{
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
,
std
::
vector
<
std
::
string
>
(),
force_disable_gc
);
}
else
{
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
,
skip_ref_cnt_vars
[
idx
],
force_disable_gc
);
}
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
bid
),
program
.
Size
());
auto
*
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
);
auto
&
block
=
program
.
Block
(
bid
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
if
(
skip_ref_cnt_vars
.
empty
())
{
ctx
->
PrepareUnusedVars
(
std
::
vector
<
std
::
string
>
(),
force_disable_gc
);
}
else
{
ctx
->
PrepareUnusedVars
(
skip_ref_cnt_vars
[
idx
],
force_disable_gc
);
}
result
.
push_back
(
std
::
shared_ptr
<
ExecutorPrepareContext
>
(
ctx
));
++
idx
;
}
...
...
@@ -425,7 +351,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
// FIXME(zjl): recurrent_op is rather complex, we would
// disable gc forcely in recurrent_op
if
(
!
ctx
->
force_disable_gc_
&&
max_memory_size
>=
0
)
{
ctx
->
ResetReferenceCount
();
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
if
(
IsFastEagerDeletionModeEnabled
())
{
...
...
@@ -453,8 +378,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
op
->
Run
(
*
local_scope
,
place_
);
if
(
gc
)
{
DeleteUnusedTensors
(
*
local_scope
,
op
.
get
(),
gc
.
get
(),
&
(
ctx
->
runtime_ref_cnts_
));
DeleteUnusedTensors
(
*
local_scope
,
op
.
get
(),
ctx
->
unused_vars_
,
gc
.
get
());
}
}
...
...
paddle/fluid/framework/executor.h
浏览文件 @
a93a9eef
...
...
@@ -30,22 +30,20 @@ namespace paddle {
namespace
framework
{
struct
ExecutorPrepareContext
{
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
,
const
std
::
vector
<
std
::
string
>&
skip_ref_cnt_vars
=
std
::
vector
<
std
::
string
>
(),
bool
force_disable_gc
=
false
);
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
);
~
ExecutorPrepareContext
();
void
ResetReferenceCount
()
{
runtime_ref_cnts_
=
global_ref_cnts_
;
}
void
PrepareUnusedVars
(
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
=
false
);
const
framework
::
ProgramDesc
&
prog_
;
size_t
block_id_
;
bool
force_disable_gc_
;
const
size_t
block_id_
;
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
std
::
unordered_map
<
std
::
string
,
size_t
>
global_ref_cnt
s_
;
std
::
unordered_map
<
std
::
string
,
size_t
>
runtime_ref_cnts_
;
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
unused_var
s_
;
bool
force_disable_gc_
{
false
}
;
};
class
Executor
{
...
...
paddle/fluid/framework/executor_gc_helper.cc
0 → 100644
浏览文件 @
a93a9eef
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/executor_gc_helper.h"
#include <deque>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
struct
OpInOutInfo
{
public:
void
Build
(
const
OperatorBase
*
op
)
{
is_built_
=
true
;
auto
&
inferer
=
op
->
Info
().
NoNeedBufferVarsInferer
();
if
(
inferer
)
{
no_need_buffer_ins_
=
inferer
(
op
->
Inputs
(),
op
->
Outputs
(),
op
->
Attrs
());
if
(
no_need_buffer_ins_
.
empty
())
return
;
for
(
auto
&
in_name_pair
:
op
->
Inputs
())
{
if
(
no_need_buffer_ins_
.
count
(
in_name_pair
.
first
)
!=
0
)
{
continue
;
}
for
(
auto
&
in_arg_name
:
in_name_pair
.
second
)
{
other_args_set_
.
insert
(
in_arg_name
);
}
}
for
(
auto
&
out_name_pair
:
op
->
Outputs
())
{
for
(
auto
&
out_arg_name
:
out_name_pair
.
second
)
{
other_args_set_
.
insert
(
out_arg_name
);
}
}
}
}
bool
IsBuilt
()
const
{
return
is_built_
;
}
bool
IsInArgBufferNeeded
(
const
std
::
string
&
in_arg_name
)
const
{
return
no_need_buffer_ins_
.
empty
()
||
other_args_set_
.
count
(
in_arg_name
)
!=
0
;
}
private:
std
::
unordered_set
<
std
::
string
>
no_need_buffer_ins_
;
std
::
unordered_set
<
std
::
string
>
other_args_set_
;
bool
is_built_
{
false
};
};
static
bool
VarCanBeDeleted
(
const
std
::
string
&
name
,
const
BlockDesc
&
block
,
const
std
::
unordered_set
<
std
::
string
>
&
skip_vars
)
{
if
(
skip_vars
.
count
(
name
)
!=
0
)
{
return
false
;
}
auto
*
var_desc
=
block
.
FindVar
(
name
);
if
(
var_desc
==
nullptr
||
var_desc
->
Persistable
())
{
return
false
;
}
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
return
type
==
proto
::
VarType
::
LOD_TENSOR
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
;
}
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
GetUnusedVars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
&
ops
,
const
std
::
vector
<
std
::
string
>
&
skip_var_list
)
{
std
::
unordered_set
<
std
::
string
>
skip_vars
(
skip_var_list
.
begin
(),
skip_var_list
.
end
());
std
::
unordered_map
<
std
::
string
,
size_t
>
var_op_idx_map
;
for
(
size_t
i
=
0
;
i
<
ops
.
size
();
++
i
)
{
auto
*
op
=
ops
[
i
].
get
();
OpInOutInfo
info
;
for
(
auto
&
name_pair
:
op
->
Inputs
())
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
!
VarCanBeDeleted
(
name
,
block
,
skip_vars
))
{
continue
;
}
// var can be gc-ed
if
(
!
info
.
IsBuilt
())
{
info
.
Build
(
op
);
}
if
(
info
.
IsInArgBufferNeeded
(
name
))
{
var_op_idx_map
[
name
]
=
i
;
}
else
{
VLOG
(
10
)
<<
"Skip reference count computing of variable "
<<
name_pair
.
first
<<
"("
<<
name
<<
") in Operator "
<<
op
->
Type
();
}
}
}
for
(
auto
&
name_pair
:
op
->
Outputs
())
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
VarCanBeDeleted
(
name
,
block
,
skip_vars
))
{
var_op_idx_map
[
name
]
=
i
;
}
}
}
}
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
result
;
for
(
auto
&
name_op_idx_pair
:
var_op_idx_map
)
{
auto
&
name
=
name_op_idx_pair
.
first
;
size_t
op_idx
=
name_op_idx_pair
.
second
;
result
[
ops
[
op_idx
].
get
()].
emplace_back
(
name
);
}
return
result
;
}
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
OperatorBase
*
op
,
const
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
&
delete_vars_map
,
GarbageCollector
*
gc
)
{
auto
iter
=
delete_vars_map
.
find
(
op
);
if
(
iter
==
delete_vars_map
.
end
())
{
return
;
}
auto
&
delete_vars
=
iter
->
second
;
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
for
(
auto
&
var_name
:
delete_vars
)
{
auto
*
var
=
scope
.
FindVar
(
var_name
);
if
(
var
==
nullptr
)
{
continue
;
}
VLOG
(
2
)
<<
"Erase variable "
<<
var_name
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
lod_tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
lod_tensor_arr
)
{
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
framework
::
ToTypeName
(
var
->
Type
()),
var_name
);
}
}
if
(
!
garbages
.
empty
())
{
gc
->
Add
(
std
::
move
(
garbages
));
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/executor_gc_helper.h
0 → 100644
浏览文件 @
a93a9eef
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
GetUnusedVars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
&
ops
,
const
std
::
vector
<
std
::
string
>
&
skip_vars
);
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
OperatorBase
*
op
,
const
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
&
delete_vars_map
,
GarbageCollector
*
gc
);
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/no_need_buffer_vars_inference.cc
0 → 100644
浏览文件 @
a93a9eef
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include <string>
#include <vector>
namespace
paddle
{
namespace
framework
{
// Reserve empty source file
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/no_need_buffer_vars_inference.h
0 → 100644
浏览文件 @
a93a9eef
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
namespace
paddle
{
namespace
framework
{
class
NoNeedBufferVarsInference
{
public:
NoNeedBufferVarsInference
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
)
{}
virtual
~
NoNeedBufferVarsInference
()
=
default
;
const
VariableNameMap
&
Inputs
()
const
{
return
inputs_
;
}
const
VariableNameMap
&
Outputs
()
const
{
return
outputs_
;
}
const
AttributeMap
&
Attrs
()
const
{
return
attrs_
;
}
virtual
std
::
unordered_set
<
std
::
string
>
operator
()()
const
=
0
;
private:
const
VariableNameMap
&
inputs_
;
const
VariableNameMap
&
outputs_
;
const
AttributeMap
&
attrs_
;
};
#define DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(class_type, ...) \
class class_type : public ::paddle::framework::NoNeedBufferVarsInference { \
public: \
using ::paddle::framework::NoNeedBufferVarsInference:: \
NoNeedBufferVarsInference; \
\
std::unordered_set<std::string> operator()() const override { \
return {__VA_ARGS__}; \
} \
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/op_info.h
浏览文件 @
a93a9eef
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_map>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -39,6 +40,7 @@ struct OpInfo {
InferVarTypeFN
infer_var_type_
;
InferShapeFN
infer_shape_
;
InferInplaceOpFN
infer_inplace_
;
InferNoNeedBufferVarsFN
infer_no_need_buffer_vars_
;
bool
HasOpProtoAndChecker
()
const
{
return
proto_
!=
nullptr
&&
checker_
!=
nullptr
;
...
...
@@ -64,6 +66,10 @@ struct OpInfo {
}
const
OpAttrChecker
*
Checker
()
const
{
return
checker_
;
}
const
InferNoNeedBufferVarsFN
&
NoNeedBufferVarsInferer
()
const
{
return
infer_no_need_buffer_vars_
;
}
};
class
OpInfoMap
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
a93a9eef
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <algorithm>
#include <sstream>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
...
...
@@ -326,7 +327,12 @@ OperatorBase::OperatorBase(const std::string& type,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
type_
(
type
),
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
)
{
:
type_
(
type
),
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
),
// NOTE(zjl): why op_info may be nullptr?
info_
(
OpInfoMap
::
Instance
().
GetNullable
(
type
))
{
GenerateTemporaryNames
();
CheckAllInputOutputSet
();
}
...
...
@@ -350,7 +356,7 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
}
return
ret_val
;
}
auto
&
info
=
OpInfoMap
::
Instance
().
Get
(
Type
()
);
auto
&
info
=
Info
(
);
// get all OpProto::Var for outputs
for
(
auto
&
o
:
info
.
Proto
().
outputs
())
{
...
...
@@ -366,18 +372,16 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
}
void
OperatorBase
::
CheckAllInputOutputSet
()
const
{
auto
&
info_map
=
OpInfoMap
::
Instance
();
auto
*
op_info
=
info_map
.
GetNullable
(
Type
());
if
(
op_info
==
nullptr
||
op_info
->
proto_
==
nullptr
)
return
;
if
(
info_
==
nullptr
||
info_
->
proto_
==
nullptr
)
return
;
for
(
auto
&
in
:
op_info
->
Proto
().
inputs
())
{
for
(
auto
&
in
:
info_
->
Proto
().
inputs
())
{
if
(
!
in
.
dispensable
())
{
PADDLE_ENFORCE
(
inputs_
.
find
(
in
.
name
())
!=
inputs_
.
end
(),
"Operator %s's input, %s, is not set"
,
Type
(),
in
.
name
());
}
}
for
(
auto
&
out
:
op_info
->
Proto
().
outputs
())
{
for
(
auto
&
out
:
info_
->
Proto
().
outputs
())
{
if
(
!
out
.
dispensable
())
{
PADDLE_ENFORCE
(
outputs_
.
find
(
out
.
name
())
!=
outputs_
.
end
(),
"Operator %s's output, %s, is not set"
,
Type
(),
...
...
@@ -997,7 +1001,25 @@ Scope* OperatorWithKernel::PrepareData(
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
,
RuntimeContext
*
ctx
)
const
{
Scope
*
new_scope
=
nullptr
;
std
::
unordered_set
<
std
::
string
>
no_buffer_ins
;
if
(
info_
)
{
auto
&
no_buffer_inferer
=
info_
->
NoNeedBufferVarsInferer
();
// Some op may not register NoNeedBufferVarsInferer
if
(
no_buffer_inferer
)
{
no_buffer_ins
=
no_buffer_inferer
(
Inputs
(),
Outputs
(),
Attrs
());
}
}
for
(
auto
&
var_name_item
:
Inputs
())
{
// NOTE(zjl): STL does not guarantee fast std::unordered_set::count when set
// is empty. At least STL implemented on my mac does calculate hash code
// of search key even though the set is empty.
if
(
!
no_buffer_ins
.
empty
()
&&
no_buffer_ins
.
count
(
var_name_item
.
first
)
>
0
)
{
continue
;
}
std
::
vector
<
Variable
*>&
input_vars
=
ctx
->
inputs
[
var_name_item
.
first
];
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
...
...
paddle/fluid/framework/operator.h
浏览文件 @
a93a9eef
...
...
@@ -160,6 +160,11 @@ class OperatorBase {
const
VariableNameMap
&
Inputs
()
const
{
return
inputs_
;
}
const
VariableNameMap
&
Outputs
()
const
{
return
outputs_
;
}
const
OpInfo
&
Info
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
info_
,
"OpInfo of %s is not found"
,
type_
);
return
*
info_
;
}
bool
HasInputs
(
const
std
::
string
&
name
)
const
;
//! Get a input with argument's name described in `op_proto`
std
::
string
Input
(
const
std
::
string
&
name
)
const
;
...
...
@@ -194,6 +199,10 @@ class OperatorBase {
// IG (Inputs Gradients)
VariableNameMap
outputs_
;
AttributeMap
attrs_
;
// OpInfo
const
OpInfo
*
info_
;
// Whether this operator executes in an Executor.
bool
run_by_executor_
{
true
};
...
...
@@ -444,7 +453,7 @@ class OperatorWithKernel : public OperatorBase {
}
virtual
void
InferShape
(
InferShapeContext
*
ctx
)
const
{
OpInfoMap
::
Instance
().
Get
(
Type
()
).
infer_shape_
(
ctx
);
Info
(
).
infer_shape_
(
ctx
);
}
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
...
...
paddle/fluid/framework/type_defs.h
浏览文件 @
a93a9eef
...
...
@@ -30,6 +30,7 @@ class InferShapeContext;
class
InferVarTypeContext
;
class
BlockDesc
;
class
Variable
;
class
NoNeedBufferVarsInference
;
using
VariableNameMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
;
// TODO(panyx0718): Replace vector with something like gtl::Vector.
...
...
@@ -61,5 +62,9 @@ using InferShapeFN = std::function<void(InferShapeContext*)>;
using
InplacePair
=
std
::
unordered_map
<
std
::
string
,
std
::
string
>
;
using
InferInplaceOpFN
=
std
::
function
<
InplacePair
(
const
OpDesc
&
,
BlockDesc
*
)
>
;
using
InferNoNeedBufferVarsFN
=
std
::
function
<
std
::
unordered_set
<
std
::
string
>
(
const
VariableNameMap
&
/*inputs*/
,
const
VariableNameMap
&
/*outputs*/
,
const
AttributeMap
&
/*attrs*/
)
>
;
}
// namespace framework
}
// namespace paddle
paddle/fluid/operators/clip_op.cc
浏览文件 @
a93a9eef
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/clip_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -76,12 +77,28 @@ class ClipOpGrad : public framework::OperatorWithKernel {
}
};
class
ClipGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"clip_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
clip
,
ops
::
ClipOp
,
ops
::
ClipOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ClipGradOpDescMaker
);
REGISTER_OPERATOR
(
clip_grad
,
ops
::
ClipOpGrad
);
REGISTER_OP_CPU_KERNEL
(
clip
,
ops
::
ClipKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/elementwise/elementwise_add_op.cc
浏览文件 @
a93a9eef
...
...
@@ -16,8 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_GRAD_MAKER
(
elementwise_add
,
Add
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
,
"Out"
,
"X"
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
);
REGISTER_OP_CPU_KERNEL
(
elementwise_add
,
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
a93a9eef
...
...
@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
...
...
@@ -281,12 +283,11 @@ class ElementwiseGradOpInplace : public framework::InplaceInToOut {
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
ElementwiseGradNoBufVarsInference
,
"Y"
);
}
// namespace operators
}
// namespace paddle
/*
*/
#define REGISTER_ELEMWISE_GRAD_MAKER(kernel_type, op_name) \
class kernel_type##GradMaker \
: public paddle::framework::SingleGradOpDescMaker { \
...
...
@@ -320,18 +321,19 @@ class ElementwiseGradOpInplace : public framework::InplaceInToOut {
::paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(op_type##_grad, ::paddle::operators::ElementwiseOpGrad)
#define REGISTER_ELEMWISE_EXPLICIT_OP(op_type, op_name, equation, ...) \
class __ElemwiseOp##op_type##Maker__ \
: public ::paddle::operators::ElementwiseOpMaker { \
protected: \
virtual std::string GetName() const { return op_name; } \
virtual std::string GetEquation() const { return equation; } \
}; \
REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \
__ElemwiseOp##op_type##Maker__, \
::paddle::operators::ElementwiseOpInferVarType, \
op_type##GradMaker, \
::paddle::operators::ElementwiseOpInplace); \
REGISTER_OPERATOR(op_type##_grad, \
::paddle::operators::ElementwiseOpExplicitGrad, \
::paddle::operators::ElementwiseGradOpInplace)
#define REGISTER_ELEMWISE_EXPLICIT_OP(op_type, op_name, equation) \
class __ElemwiseOp##op_type##Maker__ \
: public ::paddle::operators::ElementwiseOpMaker { \
protected: \
virtual std::string GetName() const { return op_name; } \
virtual std::string GetEquation() const { return equation; } \
}; \
REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \
__ElemwiseOp##op_type##Maker__, \
::paddle::operators::ElementwiseOpInferVarType, \
op_type##GradMaker, \
::paddle::operators::ElementwiseOpInplace); \
REGISTER_OPERATOR(op_type##_grad, \
::paddle::operators::ElementwiseOpExplicitGrad, \
::paddle::operators::ElementwiseGradOpInplace, \
::paddle::operators::ElementwiseGradNoBufVarsInference)
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
浏览文件 @
a93a9eef
...
...
@@ -16,8 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_GRAD_MAKER
(
elementwise_sub
,
Sub
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_sub
,
"Sub"
,
"Out = X - Y"
,
"Out"
,
"X"
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_sub
,
"Sub"
,
"Out = X - Y"
);
REGISTER_OP_CPU_KERNEL
(
elementwise_sub
,
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
a93a9eef
...
...
@@ -274,6 +274,8 @@ PYBIND11_MODULE(core, m) {
py
::
class_
<
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
.
def_buffer
(
[](
Tensor
&
self
)
->
py
::
buffer_info
{
return
CastToPyBuffer
(
self
);
})
.
def
(
"_is_initialized"
,
[](
const
Tensor
&
self
)
{
return
self
.
IsInitialized
();
})
.
def
(
"_get_dims"
,
[](
const
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"_set_dims"
,
...
...
@@ -666,7 +668,8 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
,
R"DOC(
Delete all sub-scopes of the current scope.
)DOC"
);
)DOC"
)
.
def
(
"_kids"
,
&
Scope
::
kids
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
0 → 100644
浏览文件 @
a93a9eef
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
'0.0'
os
.
environ
[
'CPU_NUM'
]
=
'4'
import
paddle.fluid
as
fluid
import
six
import
unittest
import
multiprocessing
def
simple_fc_net
():
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
image
for
_
in
range
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
act
=
'tanh'
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
1e-3
)
optimizer
.
minimize
(
loss
)
return
image
,
label
,
loss
def
get_persistables_and_non_persistables
(
prog
,
fetch_list
):
num_block
=
prog
.
num_blocks
persitables
=
set
()
non_persistables
=
set
()
for
bid
in
six
.
moves
.
range
(
num_block
):
block
=
prog
.
block
(
bid
)
for
_
,
var
in
block
.
vars
.
items
():
if
var
.
persistable
or
var
.
name
in
fetch_list
:
persitables
.
add
(
var
.
name
)
else
:
non_persistables
.
add
(
var
.
name
)
return
persitables
,
non_persistables
class
TestExecutor
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
place
=
fluid
.
CPUPlace
()
def
test_executor_main
(
self
):
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
scope_guard
(
fluid
.
Scope
()):
self
.
executor_main
()
def
test_parallel_executor_main
(
self
):
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
scope_guard
(
fluid
.
Scope
()):
self
.
pe_main
()
def
prepare_feed
(
self
,
image
,
label
,
dev_cnt
=
1
):
batch_size
=
32
*
dev_cnt
image_shape
=
(
batch_size
,
)
+
tuple
(
image
.
shape
[
1
:])
label_shape
=
(
batch_size
,
)
+
tuple
(
label
.
shape
[
1
:])
image_np
=
np
.
random
.
random
(
size
=
image_shape
).
astype
(
'float32'
)
label_np
=
np
.
random
.
random_integers
(
low
=
0
,
high
=
9
,
size
=
label_shape
).
astype
(
'int64'
)
return
image_np
,
label_np
def
assertScopeVar
(
self
,
scope
,
persitables
,
non_persistables
):
for
name
in
persitables
:
var
=
scope
.
find_var
(
name
)
self
.
assertTrue
(
var
is
not
None
)
t
=
var
.
get_tensor
()
self
.
assertTrue
(
t
.
_is_initialized
())
for
name
in
non_persistables
:
var
=
scope
.
find_var
(
name
)
self
.
assertTrue
(
var
is
not
None
)
t
=
var
.
get_tensor
()
if
t
.
_is_initialized
():
print
(
'WARNING: Variable {} is alive'
.
format
(
name
))
self
.
assertTrue
(
not
t
.
_is_initialized
())
def
executor_main
(
self
):
image
,
label
,
loss
=
simple_fc_net
()
loss
.
persistable
=
False
persistables
,
non_persistables
=
get_persistables_and_non_persistables
(
fluid
.
default_main_program
(),
[
loss
.
name
])
exe
=
fluid
.
Executor
(
self
.
place
)
exe
.
run
(
fluid
.
default_startup_program
())
p
=
fluid
.
core
.
Place
()
p
.
set_place
(
self
.
place
)
exe
=
fluid
.
core
.
Executor
(
p
)
for
_
in
six
.
moves
.
range
(
10
):
image_np
,
label_np
=
self
.
prepare_feed
(
image
,
label
)
fluid
.
global_scope
().
var
(
image
.
name
).
get_tensor
().
set
(
image_np
,
self
.
place
)
fluid
.
global_scope
().
var
(
label
.
name
).
get_tensor
().
set
(
label_np
,
self
.
place
)
# exe.run would not create local scope
# so that we can detect whether gc clears temporary variables
exe
.
run
(
fluid
.
default_main_program
().
desc
,
fluid
.
global_scope
(),
0
,
False
,
True
,
[
loss
.
name
])
self
.
assertScopeVar
(
fluid
.
global_scope
(),
persistables
,
non_persistables
)
def
pe_main
(
self
):
image
,
label
,
loss
=
simple_fc_net
()
loss
.
persistable
=
False
persitables
,
non_persistables
=
get_persistables_and_non_persistables
(
fluid
.
default_main_program
(),
[
loss
.
name
])
exe
=
fluid
.
Executor
(
self
.
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
num_iteration_per_drop_scope
=
100
prog
=
fluid
.
CompiledProgram
(
fluid
.
default_main_program
(
)).
with_data_parallel
(
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
)
dev_cnt
=
fluid
.
core
.
get_cuda_device_count
()
if
isinstance
(
self
.
place
,
fluid
.
CUDAPlace
)
\
else
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
for
idx
in
six
.
moves
.
range
(
10
):
image_np
,
label_np
=
self
.
prepare_feed
(
image
,
label
,
dev_cnt
)
feed
=
{
image
.
name
:
image_np
,
label
.
name
:
label_np
}
exe
.
run
(
program
=
prog
,
feed
=
feed
,
fetch_list
=
[
loss
])
local_scopes
=
prog
.
_local_scopes
for
scope
in
local_scopes
:
kids
=
scope
.
_kids
()
self
.
assertTrue
(
len
(
kids
)
==
1
)
self
.
assertScopeVar
(
kids
[
0
],
persistables
,
non_persistables
)
class
TestExecutor2
(
TestExecutor
):
def
setUp
(
self
):
self
.
place
=
fluid
.
CPUPlace
()
if
not
fluid
.
core
.
is_compiled_with_cuda
()
\
else
fluid
.
CUDAPlace
(
0
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录