Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c7c6eeb4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c7c6eeb4
编写于
3月 27, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
3月 27, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16409 from sneaxiy/feature/advance_gc
Enhance gc to support deleting tensor buffer in advance
上级
54a73578
a0f4fefb
变更
42
隐藏空白更改
内联
并排
Showing
42 changed file
with
1083 addition
and
381 deletion
+1083
-381
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+5
-2
paddle/fluid/framework/details/eager_deletion_pass.cc
paddle/fluid/framework/details/eager_deletion_pass.cc
+5
-10
paddle/fluid/framework/details/early_delete_op_handle.h
paddle/fluid/framework/details/early_delete_op_handle.h
+0
-140
paddle/fluid/framework/details/op_registry.h
paddle/fluid/framework/details/op_registry.h
+88
-16
paddle/fluid/framework/details/reference_count_pass.cc
paddle/fluid/framework/details/reference_count_pass.cc
+104
-5
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+20
-96
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+7
-9
paddle/fluid/framework/executor_gc_helper.cc
paddle/fluid/framework/executor_gc_helper.cc
+189
-0
paddle/fluid/framework/executor_gc_helper.h
paddle/fluid/framework/executor_gc_helper.h
+42
-0
paddle/fluid/framework/garbage_collector.cc
paddle/fluid/framework/garbage_collector.cc
+42
-0
paddle/fluid/framework/garbage_collector.h
paddle/fluid/framework/garbage_collector.h
+9
-0
paddle/fluid/framework/no_need_buffer_vars_inference.h
paddle/fluid/framework/no_need_buffer_vars_inference.h
+60
-0
paddle/fluid/framework/op_info.h
paddle/fluid/framework/op_info.h
+6
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+31
-7
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+10
-1
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+0
-18
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+0
-3
paddle/fluid/framework/type_defs.h
paddle/fluid/framework/type_defs.h
+5
-0
paddle/fluid/operators/add_position_encoding_op.cc
paddle/fluid/operators/add_position_encoding_op.cc
+19
-7
paddle/fluid/operators/clip_op.cc
paddle/fluid/operators/clip_op.cc
+18
-1
paddle/fluid/operators/concat_op.cc
paddle/fluid/operators/concat_op.cc
+32
-8
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+27
-9
paddle/fluid/operators/crop_op.cc
paddle/fluid/operators/crop_op.cc
+23
-2
paddle/fluid/operators/cross_entropy_op.cc
paddle/fluid/operators/cross_entropy_op.cc
+18
-1
paddle/fluid/operators/cudnn_lstm_op.cc
paddle/fluid/operators/cudnn_lstm_op.cc
+31
-6
paddle/fluid/operators/distributed/parameter_prefetch.cc
paddle/fluid/operators/distributed/parameter_prefetch.cc
+2
-1
paddle/fluid/operators/elementwise/elementwise_add_op.cc
paddle/fluid/operators/elementwise/elementwise_add_op.cc
+1
-2
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+18
-18
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+1
-2
paddle/fluid/operators/gather_op.cc
paddle/fluid/operators/gather_op.cc
+29
-4
paddle/fluid/operators/lod_reset_op.cc
paddle/fluid/operators/lod_reset_op.cc
+26
-4
paddle/fluid/operators/reader/ctr_reader.h
paddle/fluid/operators/reader/ctr_reader.h
+2
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+11
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
.../fluid/tests/unittests/test_eager_deletion_delete_vars.py
+183
-0
python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
...d/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+0
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
...ddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
+2
-0
python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
...dle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
+2
-0
python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
...paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
+3
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
.../fluid/tests/unittests/test_eager_deletion_transformer.py
+3
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
...dle/fluid/tests/unittests/test_eager_deletion_while_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
...ests/unittests/test_partial_eager_deletion_transformer.py
+3
-2
python/paddle/fluid/tests/unittests/test_roi_align_op.py
python/paddle/fluid/tests/unittests/test_roi_align_op.py
+4
-0
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
c7c6eeb4
...
...
@@ -63,7 +63,7 @@ cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
nv_test
(
lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor
)
cc_library
(
garbage_collector SRCS garbage_collector.cc DEPS device_context memory
)
cc_library
(
garbage_collector SRCS garbage_collector.cc DEPS device_context memory
gflags glog
)
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
...
...
@@ -164,6 +164,8 @@ else()
set
(
NGRAPH_EXE_DEPS
)
endif
()
cc_library
(
executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog
lod_rank_table feed_fetch_method sendrecvop_rpc
${
GLOB_DISTRIBUTE_DEPS
}
graph_to_program_pass variable_helper
${
NGRAPH_EXE_DEPS
}
)
...
...
@@ -174,7 +176,7 @@ else()
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
endif
()
target_link_libraries
(
executor
garbage_collector while_op
_helper
)
target_link_libraries
(
executor
while_op_helper executor_gc
_helper
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor parallel_ssa_graph_executor
...
...
@@ -194,6 +196,7 @@ cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_con
cc_test
(
var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
proto_desc
)
cc_test
(
inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS op_registry proto_desc op_info memory_optimize_helper
)
cc_library
(
selected_rows SRCS selected_rows.cc DEPS tensor
)
cc_test
(
selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows
)
...
...
paddle/fluid/framework/details/eager_deletion_pass.cc
浏览文件 @
c7c6eeb4
...
...
@@ -22,14 +22,9 @@
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
DEFINE_double
(
memory_fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted."
);
namespace
paddle
{
namespace
framework
{
namespace
details
{
...
...
@@ -206,8 +201,9 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
}
}
op_vars_map
=
ShrinkGCVars
(
op_vars_map
,
vars
,
places
,
FLAGS_memory_fraction_of_eager_deletion
);
double
memory_fraction
=
framework
::
GetEagerDeletionMemoryFraction
();
op_vars_map
=
ShrinkGCVars
(
op_vars_map
,
vars
,
places
,
memory_fraction
);
for
(
auto
&
pair
:
op_vars_map
)
{
auto
*
op
=
pair
.
first
;
...
...
@@ -239,8 +235,7 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
eager_deletion_op
->
AddOutput
(
dummy_leaf
);
}
VLOG
(
10
)
<<
"FLAGS_memory_fraction_of_eager_deletion = "
<<
FLAGS_memory_fraction_of_eager_deletion
;
VLOG
(
10
)
<<
"FLAGS_memory_fraction_of_eager_deletion = "
<<
memory_fraction
;
VLOG
(
10
)
<<
"Create "
<<
op_vars_map
.
size
()
<<
" EagerDeletionOpHandle(s)"
;
auto
while_op_eager_deletion_pass
=
...
...
paddle/fluid/framework/details/early_delete_op_handle.h
已删除
100644 → 0
浏览文件 @
54a73578
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
EarlyDeleteOpHandle
:
public
OpHandleBase
{
public:
EarlyDeleteOpHandle
(
ir
::
Node
*
node
,
const
Scope
*
scope
,
const
platform
::
Place
&
place
,
const
std
::
vector
<
std
::
string
>&
names
,
GarbageCollector
*
gc
)
:
OpHandleBase
(
node
),
scope_
(
scope
),
place_
(
place
),
names_
(
names
),
gc_
(
gc
)
{
#ifdef PADDLE_WITH_CUDA
if
(
IsStreamGarabageCollector
())
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_place
.
device
));
PADDLE_ENFORCE
(
cudaEventCreateWithFlags
(
&
event_
,
cudaEventDisableTiming
));
}
#endif
}
~
EarlyDeleteOpHandle
()
{
#ifdef PADDLE_WITH_CUDA
if
(
IsStreamGarabageCollector
())
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx_
->
GetPlace
());
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_place
.
device
));
PADDLE_ENFORCE
(
cudaEventDestroy
(
event_
));
}
#endif
}
std
::
string
Name
()
const
override
{
return
"early_delete"
;
}
protected:
void
RunImpl
()
override
{
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>
tensors
;
auto
*
local_scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
for
(
auto
&
var_name
:
names_
)
{
auto
*
var
=
local_scope
->
FindVar
(
var_name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
string
::
Sprintf
(
"Local Scope not has var %s"
,
var_name
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
tensors
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
tensors
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
LoDTensorArray
*
tensor_array
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
tensor
:
*
tensor_array
)
{
tensors
.
emplace_back
(
tensor
.
MoveMemoryHolder
());
}
}
}
if
(
!
tensors
.
empty
())
{
ClearTensors
(
tensors
);
}
}
private:
void
ClearTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
if
(
platform
::
is_cpu_place
(
place_
))
{
ClearCPUTensors
(
tensors
);
}
else
{
ClearGPUTensors
(
tensors
);
}
}
void
ClearCPUTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
auto
*
gc
=
dynamic_cast
<
CPUGarbageCollector
*>
(
gc_
);
if
(
gc
!=
nullptr
)
{
gc
->
Add
(
tensors
);
}
}
void
ClearGPUTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
gc
=
dynamic_cast
<
StreamGarbageCollector
*>
(
gc_
);
if
(
gc
!=
nullptr
)
{
auto
compute_stream
=
dev_ctx_
->
stream
();
auto
callback_stream
=
gc
->
stream
();
auto
callback_func
=
[
=
]()
{
PADDLE_ENFORCE
(
cudaEventRecord
(
event_
,
compute_stream
));
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
callback_stream
,
event_
,
0
));
};
gc_
->
Add
(
tensors
,
callback_func
);
}
else
{
gc_
->
Add
(
tensors
);
}
}
bool
IsStreamGarabageCollector
()
const
{
return
dynamic_cast
<
const
StreamGarbageCollector
*>
(
gc_
)
!=
nullptr
;
#endif
}
const
Scope
*
scope_
;
const
platform
::
Place
place_
;
std
::
vector
<
std
::
string
>
names_
;
GarbageCollector
*
gc_
;
#ifdef PADDLE_WITH_CUDA
platform
::
CUDADeviceContext
*
dev_ctx_
;
cudaEvent_t
event_
;
#endif
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/op_registry.h
浏览文件 @
c7c6eeb4
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/inplace_op_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
...
...
@@ -36,27 +37,86 @@ enum OpInfoFillType {
kGradOpDescMaker
=
2
,
kVarTypeInference
=
3
,
kShapeInference
=
4
,
kInplaceOpInference
=
5
kInplaceOpInference
=
5
,
kNoNeedBufferVarsInference
=
6
,
kUnknown
=
-
1
};
namespace
internal
{
template
<
typename
T
,
OpInfoFillType
kType
>
struct
TypePair
{
using
Type
=
T
;
static
constexpr
OpInfoFillType
kFillType
=
kType
;
};
using
OpRegistryClasses
=
std
::
tuple
<
// NOLINT
TypePair
<
OperatorBase
,
kOperator
>
,
// NOLINT
TypePair
<
OpProtoAndCheckerMaker
,
kOpProtoAndCheckerMaker
>
,
// NOLINT
TypePair
<
GradOpDescMakerBase
,
kGradOpDescMaker
>
,
// NOLINT
TypePair
<
VarTypeInference
,
kVarTypeInference
>
,
// NOLINT
TypePair
<
InferShapeBase
,
kShapeInference
>
,
// NOLINT
TypePair
<
InplaceOpInference
,
kInplaceOpInference
>
,
// NOLINT
TypePair
<
NoNeedBufferVarsInference
,
kNoNeedBufferVarsInference
>
// NOLINT
>
;
static
constexpr
int
kOpRegistryClassNumber
=
std
::
tuple_size
<
OpRegistryClasses
>::
value
;
template
<
typename
T
,
int
kPos
,
bool
kIsBounded
/* = true*/
>
struct
IsMatchedBaseTypeImpl
{
using
PairType
=
typename
std
::
tuple_element
<
kPos
,
OpRegistryClasses
>::
type
;
static
constexpr
bool
kValue
=
std
::
is_base_of
<
typename
PairType
::
Type
,
T
>::
value
;
};
template
<
typename
T
,
int
kPos
>
struct
IsMatchedBaseTypeImpl
<
T
,
kPos
,
false
>
{
static
constexpr
bool
kValue
=
false
;
};
template
<
typename
T
,
int
kPos
>
static
inline
constexpr
bool
IsMatchedBaseType
()
{
return
IsMatchedBaseTypeImpl
<
T
,
kPos
,
(
kPos
>=
0
&&
kPos
<
kOpRegistryClassNumber
)
>::
kValue
;
}
template
<
typename
T
,
int
kStart
,
int
kEnd
,
bool
kIsEnd
,
bool
kIsMatched
>
struct
OpInfoFillTypeGetterImpl
{};
// This case should not happen
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
true
,
true
>
{};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
true
,
false
>
{
static
constexpr
OpInfoFillType
kType
=
kUnknown
;
};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
false
,
false
>
{
static
constexpr
OpInfoFillType
kType
=
OpInfoFillTypeGetterImpl
<
T
,
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
,
IsMatchedBaseType
<
T
,
kStart
+
1
>
()
>::
kType
;
};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
false
,
true
>
{
using
PairType
=
typename
std
::
tuple_element
<
kStart
,
OpRegistryClasses
>::
type
;
static
constexpr
OpInfoFillType
kType
=
PairType
::
kFillType
;
};
template
<
typename
T
>
using
OpInfoFillTypeGetter
=
OpInfoFillTypeGetterImpl
<
T
,
0
,
kOpRegistryClassNumber
,
kOpRegistryClassNumber
==
0
,
IsMatchedBaseType
<
T
,
0
>
()
>
;
}
// namespace internal
template
<
typename
T
>
struct
OpInfoFillTypeID
{
static
constexpr
OpInfoFillType
ID
()
{
return
std
::
is_base_of
<
OperatorBase
,
T
>::
value
?
kOperator
:
(
std
::
is_base_of
<
OpProtoAndCheckerMaker
,
T
>::
value
?
kOpProtoAndCheckerMaker
:
(
std
::
is_base_of
<
GradOpDescMakerBase
,
T
>::
value
?
kGradOpDescMaker
:
(
std
::
is_base_of
<
VarTypeInference
,
T
>::
value
?
kVarTypeInference
:
(
std
::
is_base_of
<
InferShapeBase
,
T
>::
value
?
kShapeInference
:
(
std
::
is_base_of
<
InplaceOpInference
,
T
>::
value
?
kInplaceOpInference
:
static_cast
<
OpInfoFillType
>
(
-
1
))))));
return
internal
::
OpInfoFillTypeGetter
<
T
>::
kType
;
}
};
...
...
@@ -156,6 +216,18 @@ struct OpInfoFiller<T, kInplaceOpInference> {
}
};
template
<
typename
T
>
struct
OpInfoFiller
<
T
,
kNoNeedBufferVarsInference
>
{
void
operator
()(
const
char
*
op_type
,
OpInfo
*
info
)
const
{
info
->
infer_no_need_buffer_vars_
=
[](
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
{
T
infer
(
inputs
,
outputs
,
attrs
);
return
infer
();
};
}
};
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/reference_count_pass.cc
浏览文件 @
c7c6eeb4
...
...
@@ -193,6 +193,79 @@ ExtractComputationOpFromLastLivedVar(VarHandle *var, size_t scope_idx,
return
shrink_func
(
computation_op
);
}
/**
* Shrink op dependencies according to no need buffer vars.
*
* If some ops do not need Tensor buffer of any input,
* just remove the dependency of this op, i.e, decrease reference count.
*
* For example, input Y of elementwise_add_grad op is only used to infer shape
* and lod of Y@GRAD, we do not need the buffer of input Y. Data buffer of
* input Y can be collected before elementwise_add_grad op runs.
*
* This method returns whether the dependency count decreases to 0, and
* shrinks op dependency if possible.
*/
static
bool
ShrinkNoNeedBufferVarOpDependency
(
const
std
::
string
&
var_name
,
std
::
unordered_set
<
ComputationOpHandle
*>
*
op_handles
)
{
std
::
vector
<
ComputationOpHandle
*>
skip_ops
;
for
(
auto
*
op_handle
:
*
op_handles
)
{
auto
*
op_base
=
op_handle
->
GetOp
();
auto
&
inferer
=
op_base
->
Info
().
NoNeedBufferVarsInferer
();
if
(
!
inferer
)
{
continue
;
}
std
::
unordered_set
<
std
::
string
>
no_need_buffer_vars
=
inferer
(
op_base
->
Inputs
(),
op_base
->
Outputs
(),
op_base
->
Attrs
());
// Check whether var_name occurs in other inputs or outputs of the op
// If it occurs, we cannot decrease the dependency number.
bool
occurred_in_other_vars
=
false
;
for
(
auto
&
in_pair
:
op_base
->
Inputs
())
{
if
(
no_need_buffer_vars
.
count
(
in_pair
.
first
)
>
0
)
{
continue
;
}
auto
&
args
=
in_pair
.
second
;
auto
iter
=
std
::
find
(
args
.
begin
(),
args
.
end
(),
var_name
);
if
(
iter
!=
args
.
end
())
{
occurred_in_other_vars
=
true
;
break
;
}
}
if
(
occurred_in_other_vars
)
{
continue
;
}
for
(
auto
&
out_pair
:
op_base
->
Outputs
())
{
auto
&
args
=
out_pair
.
second
;
auto
iter
=
std
::
find
(
args
.
begin
(),
args
.
end
(),
var_name
);
if
(
iter
!=
args
.
end
())
{
occurred_in_other_vars
=
true
;
break
;
}
}
if
(
!
occurred_in_other_vars
)
{
VLOG
(
2
)
<<
"Shrink var "
<<
var_name
<<
" in op "
<<
op_handle
->
Name
();
skip_ops
.
emplace_back
(
op_handle
);
}
}
if
(
skip_ops
.
size
()
==
op_handles
->
size
())
{
op_handles
->
clear
();
return
true
;
}
else
{
for
(
auto
*
skip_op
:
skip_ops
)
{
op_handles
->
erase
(
skip_op
);
}
return
false
;
}
}
std
::
unique_ptr
<
ir
::
Graph
>
ReferenceCountPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
&
ref_cnts
=
Get
<
std
::
vector
<
ReferenceCountMap
>>
(
kGlobalReferenceCount
);
...
...
@@ -229,17 +302,43 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
continue
;
}
bool
ok
;
auto
result
=
ExtractComputationOpFromLastLivedVar
(
name_var_pair
.
second
.
back
(),
i
,
shrink_func
,
&
ok
);
auto
&
var_name
=
name_var_pair
.
first
;
auto
&
var_handles
=
name_var_pair
.
second
;
for
(
auto
iter
=
var_handles
.
rbegin
();
iter
!=
var_handles
.
rend
();
++
iter
)
{
bool
ok
;
auto
result
=
ExtractComputationOpFromLastLivedVar
(
*
iter
,
i
,
shrink_func
,
&
ok
);
// Seldomly, some vars may have no pending or preceding computation ops
// Just break;
if
(
!
ok
)
break
;
VLOG
(
10
)
<<
"Extract "
<<
result
.
size
()
<<
" ops of var "
<<
var_name
;
size_t
original_op_deps
=
result
.
size
();
// If all ops do not need buffer of var_name, calculate reference count
// of the previous version of var_name.
if
(
ShrinkNoNeedBufferVarOpDependency
(
var_name
,
&
result
))
{
VLOG
(
10
)
<<
"Try to precede reference count computing at var "
<<
var_name
;
continue
;
}
size_t
final_op_deps
=
result
.
size
();
if
(
final_op_deps
<
original_op_deps
)
{
VLOG
(
5
)
<<
"Shrink op deps from "
<<
original_op_deps
<<
" to "
<<
final_op_deps
;
}
if
(
ok
)
{
auto
&
var_name
=
name_var_pair
.
first
;
PADDLE_ENFORCE
(
!
result
.
empty
(),
"Last living ops of %s cannot be empty"
,
var_name
);
ref_cnts
[
i
].
emplace
(
var_name
,
result
.
size
());
last_live_ops_of_vars
[
i
].
emplace
(
var_name
,
std
::
move
(
result
));
}
// Seldomly, all preceding trying failed.
// Just skip this corner case
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
c7c6eeb4
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
...
...
@@ -48,97 +49,23 @@ namespace {
int
kProgramId
=
-
1
;
}
// namespace
static
std
::
unordered_map
<
std
::
string
,
size_t
>
GetNonPersistableReferenceCounts
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
string
>&
skip_var_list
)
{
std
::
unordered_map
<
std
::
string
,
size_t
>
ref_cnts
;
std
::
unordered_set
<
std
::
string
>
skip_vars
(
skip_var_list
.
begin
(),
skip_var_list
.
end
());
auto
update_ref_cnts
=
[
&
](
OpDesc
*
op_desc
,
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
skip_vars
.
count
(
name
))
continue
;
auto
*
var_desc
=
block
.
FindVar
(
name
);
if
(
var_desc
==
nullptr
||
var_desc
->
Persistable
())
continue
;
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
if
(
type
!=
proto
::
VarType
::
LOD_TENSOR
&&
type
!=
proto
::
VarType
::
SELECTED_ROWS
&&
type
!=
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
continue
;
}
++
ref_cnts
[
name
];
}
}
};
for
(
auto
op_desc
:
block
.
AllOps
())
{
update_ref_cnts
(
op_desc
,
op_desc
->
Inputs
());
update_ref_cnts
(
op_desc
,
op_desc
->
Outputs
());
}
return
ref_cnts
;
}
ExecutorPrepareContext
::
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
,
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
)
:
prog_
(
prog
),
block_id_
(
block_id
),
force_disable_gc_
(
force_disable_gc
)
{
if
(
GetEagerDeletionThreshold
()
>=
0
&&
!
force_disable_gc_
)
{
global_ref_cnts_
=
GetNonPersistableReferenceCounts
(
prog
.
Block
(
block_id
),
keep_vars
);
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
)
:
prog_
(
prog
),
block_id_
(
block_id
)
{}
void
ExecutorPrepareContext
::
PrepareUnusedVars
(
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
)
{
force_disable_gc_
=
force_disable_gc
;
if
(
GetEagerDeletionThreshold
()
<
0
||
force_disable_gc_
)
{
return
;
}
unused_vars_
=
GetUnusedVars
(
prog_
.
Block
(
block_id_
),
ops_
,
keep_vars
);
}
ExecutorPrepareContext
::~
ExecutorPrepareContext
()
{
VLOG
(
5
)
<<
"destroy ExecutorPrepareContext"
;
}
static
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
const
OperatorBase
*
op
,
GarbageCollector
*
gc
,
std
::
unordered_map
<
std
::
string
,
size_t
>*
ref_cnts
)
{
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
auto
handler
=
[
&
](
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
auto
it
=
ref_cnts
->
find
(
name
);
if
(
it
==
ref_cnts
->
end
())
continue
;
if
(
--
(
it
->
second
)
!=
0
)
{
continue
;
}
auto
*
var
=
scope
.
FindVar
(
name
);
if
(
var
==
nullptr
)
{
continue
;
}
VLOG
(
2
)
<<
"Erase variable "
<<
name
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
lod_tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
lod_tensor_arr
)
{
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
framework
::
ToTypeName
(
var
->
Type
()),
name
);
}
}
}
};
handler
(
op
->
Inputs
());
handler
(
op
->
Outputs
());
if
(
!
garbages
.
empty
())
{
gc
->
Add
(
std
::
move
(
garbages
));
}
}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
void
Executor
::
Close
()
{
...
...
@@ -362,8 +289,8 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
std
::
unique_ptr
<
ExecutorPrepareContext
>
Executor
::
Prepare
(
const
ProgramDesc
&
program
,
int
block_id
,
const
std
::
vector
<
std
::
string
>&
skip_ref_cnt_vars
,
bool
force_disable_gc
)
{
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
new
ExecutorPrepareContext
(
program
,
block_id
,
skip_ref_cnt_vars
,
force_disable_gc
));
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
new
ExecutorPrepareContext
(
program
,
block_id
));
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
block_id
),
program
.
Size
());
auto
&
block
=
program
.
Block
(
block_id
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
...
...
@@ -375,6 +302,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
ctx
->
prog_
.
Block
(
ctx
->
block_id_
),
&
ctx
->
ops_
);
}
#endif
ctx
->
PrepareUnusedVars
(
skip_ref_cnt_vars
,
force_disable_gc
);
return
ctx
;
}
...
...
@@ -389,19 +317,17 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
result
;
size_t
idx
=
0
;
for
(
auto
&
bid
:
block_ids
)
{
ExecutorPrepareContext
*
ctx
;
if
(
skip_ref_cnt_vars
.
empty
())
{
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
,
std
::
vector
<
std
::
string
>
(),
force_disable_gc
);
}
else
{
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
,
skip_ref_cnt_vars
[
idx
],
force_disable_gc
);
}
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
bid
),
program
.
Size
());
auto
*
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
);
auto
&
block
=
program
.
Block
(
bid
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
if
(
skip_ref_cnt_vars
.
empty
())
{
ctx
->
PrepareUnusedVars
(
std
::
vector
<
std
::
string
>
(),
force_disable_gc
);
}
else
{
ctx
->
PrepareUnusedVars
(
skip_ref_cnt_vars
[
idx
],
force_disable_gc
);
}
result
.
push_back
(
std
::
shared_ptr
<
ExecutorPrepareContext
>
(
ctx
));
++
idx
;
}
...
...
@@ -425,7 +351,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
// FIXME(zjl): recurrent_op is rather complex, we would
// disable gc forcely in recurrent_op
if
(
!
ctx
->
force_disable_gc_
&&
max_memory_size
>=
0
)
{
ctx
->
ResetReferenceCount
();
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
if
(
IsFastEagerDeletionModeEnabled
())
{
...
...
@@ -453,8 +378,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
op
->
Run
(
*
local_scope
,
place_
);
if
(
gc
)
{
DeleteUnusedTensors
(
*
local_scope
,
op
.
get
(),
gc
.
get
(),
&
(
ctx
->
runtime_ref_cnts_
));
DeleteUnusedTensors
(
*
local_scope
,
op
.
get
(),
ctx
->
unused_vars_
,
gc
.
get
());
}
}
...
...
paddle/fluid/framework/executor.h
浏览文件 @
c7c6eeb4
...
...
@@ -30,22 +30,20 @@ namespace paddle {
namespace
framework
{
struct
ExecutorPrepareContext
{
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
,
const
std
::
vector
<
std
::
string
>&
skip_ref_cnt_vars
=
std
::
vector
<
std
::
string
>
(),
bool
force_disable_gc
=
false
);
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
);
~
ExecutorPrepareContext
();
void
ResetReferenceCount
()
{
runtime_ref_cnts_
=
global_ref_cnts_
;
}
void
PrepareUnusedVars
(
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
=
false
);
const
framework
::
ProgramDesc
&
prog_
;
size_t
block_id_
;
bool
force_disable_gc_
;
const
size_t
block_id_
;
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
std
::
unordered_map
<
std
::
string
,
size_t
>
global_ref_cnt
s_
;
std
::
unordered_map
<
std
::
string
,
size_t
>
runtime_ref_cnts_
;
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
unused_var
s_
;
bool
force_disable_gc_
{
false
}
;
};
class
Executor
{
...
...
paddle/fluid/framework/executor_gc_helper.cc
0 → 100644
浏览文件 @
c7c6eeb4
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/executor_gc_helper.h"
#include <deque>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
struct
OpInOutInfo
{
public:
void
Build
(
const
OperatorBase
*
op
)
{
is_built_
=
true
;
auto
&
inferer
=
op
->
Info
().
NoNeedBufferVarsInferer
();
if
(
inferer
)
{
no_need_buffer_ins_
=
inferer
(
op
->
Inputs
(),
op
->
Outputs
(),
op
->
Attrs
());
if
(
no_need_buffer_ins_
.
empty
())
return
;
for
(
auto
&
in_name_pair
:
op
->
Inputs
())
{
if
(
no_need_buffer_ins_
.
count
(
in_name_pair
.
first
)
!=
0
)
{
continue
;
}
for
(
auto
&
in_arg_name
:
in_name_pair
.
second
)
{
other_args_set_
.
insert
(
in_arg_name
);
}
}
for
(
auto
&
out_name_pair
:
op
->
Outputs
())
{
for
(
auto
&
out_arg_name
:
out_name_pair
.
second
)
{
other_args_set_
.
insert
(
out_arg_name
);
}
}
}
}
bool
IsBuilt
()
const
{
return
is_built_
;
}
bool
IsInArgBufferNeeded
(
const
std
::
string
&
in_arg_name
)
const
{
return
no_need_buffer_ins_
.
empty
()
||
other_args_set_
.
count
(
in_arg_name
)
!=
0
;
}
private:
// A set to record unused buffer input vars of op
std
::
unordered_set
<
std
::
string
>
no_need_buffer_ins_
;
// A set to record other args of op (including in, out)
std
::
unordered_set
<
std
::
string
>
other_args_set_
;
bool
is_built_
{
false
};
};
static
bool
VarCanBeDeleted
(
const
std
::
string
&
name
,
const
BlockDesc
&
block
,
const
std
::
unordered_set
<
std
::
string
>
&
skip_vars
)
{
if
(
skip_vars
.
count
(
name
)
!=
0
)
{
return
false
;
}
auto
*
var_desc
=
block
.
FindVar
(
name
);
if
(
var_desc
==
nullptr
||
var_desc
->
Persistable
())
{
return
false
;
}
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
return
type
==
proto
::
VarType
::
LOD_TENSOR
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
;
}
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
GetUnusedVars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
&
ops
,
const
std
::
vector
<
std
::
string
>
&
skip_var_list
)
{
std
::
unordered_set
<
std
::
string
>
skip_vars
(
skip_var_list
.
begin
(),
skip_var_list
.
end
());
std
::
unordered_map
<
std
::
string
,
size_t
>
var_op_idx_map
;
for
(
size_t
i
=
0
;
i
<
ops
.
size
();
++
i
)
{
auto
*
op
=
ops
[
i
].
get
();
OpInOutInfo
info
;
for
(
auto
&
name_pair
:
op
->
Inputs
())
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
!
VarCanBeDeleted
(
name
,
block
,
skip_vars
))
{
continue
;
}
// var can be gc-ed
if
(
!
info
.
IsBuilt
())
{
info
.
Build
(
op
);
}
if
(
info
.
IsInArgBufferNeeded
(
name
))
{
// Update the last living op of variable to current op
var_op_idx_map
[
name
]
=
i
;
}
else
{
VLOG
(
10
)
<<
"Skip reference count computing of variable "
<<
name_pair
.
first
<<
"("
<<
name
<<
") in Operator "
<<
op
->
Type
();
}
}
}
for
(
auto
&
name_pair
:
op
->
Outputs
())
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
VarCanBeDeleted
(
name
,
block
,
skip_vars
))
{
// Update the last living op of variable to current op
var_op_idx_map
[
name
]
=
i
;
}
}
}
}
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
result
;
for
(
auto
&
name_op_idx_pair
:
var_op_idx_map
)
{
auto
&
name
=
name_op_idx_pair
.
first
;
size_t
op_idx
=
name_op_idx_pair
.
second
;
result
[
ops
[
op_idx
].
get
()].
emplace_back
(
name
);
}
return
result
;
}
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
OperatorBase
*
op
,
const
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
&
delete_vars_map
,
GarbageCollector
*
gc
)
{
auto
iter
=
delete_vars_map
.
find
(
op
);
if
(
iter
==
delete_vars_map
.
end
())
{
return
;
}
auto
&
delete_vars
=
iter
->
second
;
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
for
(
auto
&
var_name
:
delete_vars
)
{
auto
*
var
=
scope
.
FindVar
(
var_name
);
if
(
var
==
nullptr
)
{
continue
;
}
VLOG
(
2
)
<<
"Erase variable "
<<
var_name
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
lod_tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
lod_tensor_arr
)
{
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
framework
::
ToTypeName
(
var
->
Type
()),
var_name
);
}
}
if
(
!
garbages
.
empty
())
{
gc
->
Add
(
std
::
move
(
garbages
));
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/executor_gc_helper.h
0 → 100644
浏览文件 @
c7c6eeb4
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
// Result map: op -> variable names that can be deleted after op runs
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
GetUnusedVars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
&
ops
,
const
std
::
vector
<
std
::
string
>
&
skip_vars
);
// Collect unused tensors after op runs
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
OperatorBase
*
op
,
const
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
&
delete_vars_map
,
GarbageCollector
*
gc
);
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/garbage_collector.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,14 +13,36 @@
// limitations under the License.
#include <algorithm>
#include <deque>
#include <functional>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/garbage_collector.h"
namespace
paddle
{
namespace
framework
{
DEFINE_double
(
eager_delete_tensor_gb
,
-
1.0
,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
DEFINE_bool
(
fast_eager_deletion_mode
,
true
,
"Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."
);
DEFINE_double
(
memory_fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted."
);
GarbageCollector
::
GarbageCollector
(
const
platform
::
Place
&
place
,
size_t
max_memory_size
)
:
max_memory_size_
((
std
::
max
)(
max_memory_size
,
static_cast
<
size_t
>
(
1
)))
{
...
...
@@ -85,5 +107,25 @@ void StreamGarbageCollector::ClearCallback(
callback_manager_
->
AddCallback
(
callback
);
}
#endif
int64_t
GetEagerDeletionThreshold
()
{
return
FLAGS_eager_delete_tensor_gb
<
0
?
-
1
:
static_cast
<
int64_t
>
(
FLAGS_eager_delete_tensor_gb
*
(
static_cast
<
int64_t
>
(
1
)
<<
30
));
}
bool
IsFastEagerDeletionModeEnabled
()
{
return
FLAGS_fast_eager_deletion_mode
;
}
void
SetEagerDeletionMode
(
double
threshold
,
double
fraction
,
bool
fast_mode
)
{
FLAGS_eager_delete_tensor_gb
=
threshold
;
FLAGS_memory_fraction_of_eager_deletion
=
fraction
;
FLAGS_fast_eager_deletion_mode
=
fast_mode
;
}
double
GetEagerDeletionMemoryFraction
()
{
return
FLAGS_memory_fraction_of_eager_deletion
;
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/garbage_collector.h
浏览文件 @
c7c6eeb4
...
...
@@ -18,6 +18,8 @@
#include <functional>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "gflags/gflags.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
...
...
@@ -126,5 +128,12 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) {
}
}
int64_t
GetEagerDeletionThreshold
();
bool
IsFastEagerDeletionModeEnabled
();
void
SetEagerDeletionMode
(
double
threshold
,
double
fraction
,
bool
fast_mode
);
double
GetEagerDeletionMemoryFraction
();
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/no_need_buffer_vars_inference.h
0 → 100644
浏览文件 @
c7c6eeb4
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
namespace
paddle
{
namespace
framework
{
class
NoNeedBufferVarsInference
{
public:
NoNeedBufferVarsInference
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
)
{}
virtual
~
NoNeedBufferVarsInference
()
=
default
;
const
VariableNameMap
&
Inputs
()
const
{
return
inputs_
;
}
const
VariableNameMap
&
Outputs
()
const
{
return
outputs_
;
}
const
AttributeMap
&
Attrs
()
const
{
return
attrs_
;
}
virtual
std
::
unordered_set
<
std
::
string
>
operator
()()
const
=
0
;
private:
const
VariableNameMap
&
inputs_
;
const
VariableNameMap
&
outputs_
;
const
AttributeMap
&
attrs_
;
};
#define DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(class_type, ...) \
class class_type : public ::paddle::framework::NoNeedBufferVarsInference { \
public: \
using ::paddle::framework::NoNeedBufferVarsInference:: \
NoNeedBufferVarsInference; \
\
std::unordered_set<std::string> operator()() const override { \
return {__VA_ARGS__}; \
} \
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/op_info.h
浏览文件 @
c7c6eeb4
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_map>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -39,6 +40,7 @@ struct OpInfo {
InferVarTypeFN
infer_var_type_
;
InferShapeFN
infer_shape_
;
InferInplaceOpFN
infer_inplace_
;
InferNoNeedBufferVarsFN
infer_no_need_buffer_vars_
;
bool
HasOpProtoAndChecker
()
const
{
return
proto_
!=
nullptr
&&
checker_
!=
nullptr
;
...
...
@@ -64,6 +66,10 @@ struct OpInfo {
}
const
OpAttrChecker
*
Checker
()
const
{
return
checker_
;
}
const
InferNoNeedBufferVarsFN
&
NoNeedBufferVarsInferer
()
const
{
return
infer_no_need_buffer_vars_
;
}
};
class
OpInfoMap
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
c7c6eeb4
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <algorithm>
#include <sstream>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
...
...
@@ -326,7 +327,12 @@ OperatorBase::OperatorBase(const std::string& type,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
type_
(
type
),
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
)
{
:
type_
(
type
),
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
),
// NOTE(zjl): why op_info may be nullptr?
info_
(
OpInfoMap
::
Instance
().
GetNullable
(
type
))
{
GenerateTemporaryNames
();
CheckAllInputOutputSet
();
}
...
...
@@ -350,7 +356,7 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
}
return
ret_val
;
}
auto
&
info
=
OpInfoMap
::
Instance
().
Get
(
Type
()
);
auto
&
info
=
Info
(
);
// get all OpProto::Var for outputs
for
(
auto
&
o
:
info
.
Proto
().
outputs
())
{
...
...
@@ -366,18 +372,16 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
}
void
OperatorBase
::
CheckAllInputOutputSet
()
const
{
auto
&
info_map
=
OpInfoMap
::
Instance
();
auto
*
op_info
=
info_map
.
GetNullable
(
Type
());
if
(
op_info
==
nullptr
||
op_info
->
proto_
==
nullptr
)
return
;
if
(
info_
==
nullptr
||
info_
->
proto_
==
nullptr
)
return
;
for
(
auto
&
in
:
op_info
->
Proto
().
inputs
())
{
for
(
auto
&
in
:
info_
->
Proto
().
inputs
())
{
if
(
!
in
.
dispensable
())
{
PADDLE_ENFORCE
(
inputs_
.
find
(
in
.
name
())
!=
inputs_
.
end
(),
"Operator %s's input, %s, is not set"
,
Type
(),
in
.
name
());
}
}
for
(
auto
&
out
:
op_info
->
Proto
().
outputs
())
{
for
(
auto
&
out
:
info_
->
Proto
().
outputs
())
{
if
(
!
out
.
dispensable
())
{
PADDLE_ENFORCE
(
outputs_
.
find
(
out
.
name
())
!=
outputs_
.
end
(),
"Operator %s's output, %s, is not set"
,
Type
(),
...
...
@@ -997,7 +1001,27 @@ Scope* OperatorWithKernel::PrepareData(
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
,
RuntimeContext
*
ctx
)
const
{
Scope
*
new_scope
=
nullptr
;
std
::
unordered_set
<
std
::
string
>
no_buffer_ins
;
if
(
info_
)
{
auto
&
no_buffer_inferer
=
info_
->
NoNeedBufferVarsInferer
();
// Some op may not register NoNeedBufferVarsInferer
if
(
no_buffer_inferer
)
{
no_buffer_ins
=
no_buffer_inferer
(
Inputs
(),
Outputs
(),
Attrs
());
}
}
for
(
auto
&
var_name_item
:
Inputs
())
{
// NOTE(zjl): STL does not guarantee fast std::unordered_set::count when set
// is empty. At least STL implemented on my mac does calculate hash code
// of search key even though the set is empty.
if
(
!
no_buffer_ins
.
empty
()
&&
no_buffer_ins
.
count
(
var_name_item
.
first
)
>
0
)
{
VLOG
(
1
)
<<
"Skip scanning input "
<<
var_name_item
.
first
<<
" in Operator "
<<
type_
;
continue
;
}
std
::
vector
<
Variable
*>&
input_vars
=
ctx
->
inputs
[
var_name_item
.
first
];
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
...
...
paddle/fluid/framework/operator.h
浏览文件 @
c7c6eeb4
...
...
@@ -160,6 +160,11 @@ class OperatorBase {
const
VariableNameMap
&
Inputs
()
const
{
return
inputs_
;
}
const
VariableNameMap
&
Outputs
()
const
{
return
outputs_
;
}
const
OpInfo
&
Info
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
info_
,
"OpInfo of %s is not found"
,
type_
);
return
*
info_
;
}
bool
HasInputs
(
const
std
::
string
&
name
)
const
;
//! Get a input with argument's name described in `op_proto`
std
::
string
Input
(
const
std
::
string
&
name
)
const
;
...
...
@@ -194,6 +199,10 @@ class OperatorBase {
// IG (Inputs Gradients)
VariableNameMap
outputs_
;
AttributeMap
attrs_
;
// OpInfo
const
OpInfo
*
info_
;
// Whether this operator executes in an Executor.
bool
run_by_executor_
{
true
};
...
...
@@ -444,7 +453,7 @@ class OperatorWithKernel : public OperatorBase {
}
virtual
void
InferShape
(
InferShapeContext
*
ctx
)
const
{
OpInfoMap
::
Instance
().
Get
(
Type
()
).
infer_shape_
(
ctx
);
Info
(
).
infer_shape_
(
ctx
);
}
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
c7c6eeb4
...
...
@@ -29,15 +29,6 @@ DEFINE_bool(
"Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)"
);
DEFINE_double
(
eager_delete_tensor_gb
,
-
1.0
,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
DEFINE_bool
(
fast_eager_deletion_mode
,
true
,
"Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."
);
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
...
...
@@ -57,15 +48,6 @@ DEFINE_bool(fast_eager_deletion_mode, true,
namespace
paddle
{
namespace
framework
{
int64_t
GetEagerDeletionThreshold
()
{
return
FLAGS_eager_delete_tensor_gb
<
0
?
-
1
:
static_cast
<
int64_t
>
(
FLAGS_eager_delete_tensor_gb
*
(
static_cast
<
int64_t
>
(
1
)
<<
30
));
}
bool
IsFastEagerDeletionModeEnabled
()
{
return
FLAGS_fast_eager_deletion_mode
;
}
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
...
...
paddle/fluid/framework/scope.h
浏览文件 @
c7c6eeb4
...
...
@@ -32,9 +32,6 @@ extern "C" {
namespace
paddle
{
namespace
framework
{
int64_t
GetEagerDeletionThreshold
();
bool
IsFastEagerDeletionModeEnabled
();
class
Scope
;
/**
...
...
paddle/fluid/framework/type_defs.h
浏览文件 @
c7c6eeb4
...
...
@@ -30,6 +30,7 @@ class InferShapeContext;
class
InferVarTypeContext
;
class
BlockDesc
;
class
Variable
;
class
NoNeedBufferVarsInference
;
using
VariableNameMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
;
// TODO(panyx0718): Replace vector with something like gtl::Vector.
...
...
@@ -61,5 +62,9 @@ using InferShapeFN = std::function<void(InferShapeContext*)>;
using
InplacePair
=
std
::
unordered_map
<
std
::
string
,
std
::
string
>
;
using
InferInplaceOpFN
=
std
::
function
<
InplacePair
(
const
OpDesc
&
)
>
;
using
InferNoNeedBufferVarsFN
=
std
::
function
<
std
::
unordered_set
<
std
::
string
>
(
const
VariableNameMap
&
/*inputs*/
,
const
VariableNameMap
&
/*outputs*/
,
const
AttributeMap
&
/*attrs*/
)
>
;
}
// namespace framework
}
// namespace paddle
paddle/fluid/operators/add_position_encoding_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/add_position_encoding_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -39,13 +40,8 @@ class AddPositionEncodingOpGrad : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"X(Input) must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Out"
),
"Out must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Out@GRAD must not be null."
);
auto
out_dims
=
ctx
->
GetInputDim
(
"Out"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
out_dims
);
}
}
...
...
@@ -75,6 +71,22 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker {
}
};
class
AddPositionEncodingGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"add_position_encoding_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -83,7 +95,7 @@ namespace plt = paddle::platform;
REGISTER_OPERATOR
(
add_position_encoding
,
ops
::
AddPositionEncodingOp
,
ops
::
AddPositionEncodingOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
AddPositionEncodingGradOpDescMaker
);
REGISTER_OPERATOR
(
add_position_encoding_grad
,
ops
::
AddPositionEncodingOpGrad
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/clip_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/clip_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -76,12 +77,28 @@ class ClipOpGrad : public framework::OperatorWithKernel {
}
};
class
ClipGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"clip_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
clip
,
ops
::
ClipOp
,
ops
::
ClipOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ClipGradOpDescMaker
);
REGISTER_OPERATOR
(
clip_grad
,
ops
::
ClipOpGrad
);
REGISTER_OP_CPU_KERNEL
(
clip
,
ops
::
ClipKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/concat_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
#include <memory>
#include <string>
#include <vector>
...
...
@@ -120,11 +121,7 @@ Examples:
class
ConcatOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
ConcatOpGrad
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
auto
in_x
=
"X"
;
...
...
@@ -142,6 +139,33 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
}
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
GetPlace
());
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
ConcatOpGradNoNeedBufferVarInference
,
"X"
);
class
ConcatGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"concat_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
,
false
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
...
...
@@ -149,9 +173,9 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
concat
,
ops
::
ConcatOp
,
ops
::
ConcatOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
false
>
/* set false to disable empty grad */
);
REGISTER_OPERATOR
(
concat_grad
,
ops
::
ConcatOpGrad
);
ops
::
ConcatGradOpDescMaker
);
REGISTER_OPERATOR
(
concat_grad
,
ops
::
ConcatOpGrad
,
ops
::
ConcatOpGradNoNeedBufferVarInference
);
REGISTER_OP_CPU_KERNEL
(
concat
,
ops
::
ConcatKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
ConcatKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -455,13 +455,13 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
return
type
;
}
class
Conv2
d
GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
class
Conv2
D
GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op
=
new
framework
::
OpDesc
();
op
->
SetType
(
GradOpType
()
);
op
->
SetType
(
this
->
ForwardOpType
()
+
"_grad"
);
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetInput
(
"Filter"
,
Input
(
"Filter"
));
op
->
SetInput
(
"Bias"
,
Input
(
"Bias"
));
...
...
@@ -470,14 +470,33 @@ class Conv2dGradMaker : public framework::SingleGradOpDescMaker {
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Filter"
),
InputGrad
(
"Filter"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Bias"
),
InputGrad
(
"Bias"
));
op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op
);
}
};
class
Conv3DGradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
virtual
std
::
string
GradOpType
()
const
{
return
this
->
ForwardOpType
()
+
"_grad"
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op
=
new
framework
::
OpDesc
();
op
->
SetType
(
this
->
ForwardOpType
()
+
"_grad"
);
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetInput
(
"Filter"
,
Input
(
"Filter"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Output"
),
OutputGrad
(
"Output"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Filter"
),
InputGrad
(
"Filter"
));
if
(
ForwardOp
().
Inputs
().
count
(
"ResidualData"
)
!=
0
)
{
op
->
SetInput
(
"ResidualData"
,
Input
(
"ResidualData"
));
}
op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op
);
}
};
...
...
@@ -486,17 +505,16 @@ class Conv2dGradMaker : public framework::SingleGradOpDescMaker {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
ops
::
ConvOpInferVarType
,
ops
::
Conv2
d
GradMaker
);
ops
::
ConvOpInferVarType
,
ops
::
Conv2
D
GradMaker
);
REGISTER_OPERATOR
(
conv2d_grad
,
ops
::
ConvOpGrad
);
// depthwise convolution op
REGISTER_OPERATOR
(
depthwise_conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
ops
::
ConvOpInferVarType
,
ops
::
Conv2
d
GradMaker
);
ops
::
ConvOpInferVarType
,
ops
::
Conv2
D
GradMaker
);
REGISTER_OPERATOR
(
depthwise_conv2d_grad
,
ops
::
ConvOpGrad
);
REGISTER_OPERATOR
(
conv3d
,
ops
::
ConvOp
,
ops
::
Conv3DOpMaker
,
ops
::
ConvOpInferVarType
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ConvOpInferVarType
,
ops
::
Conv3DGradMaker
);
REGISTER_OPERATOR
(
conv3d_grad
,
ops
::
ConvOpGrad
);
// depthwise conv kernel
...
...
paddle/fluid/operators/crop_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/crop_op.h"
#include <boost/lexical_cast.hpp>
#include <memory>
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -178,12 +180,31 @@ class CropOpGrad : public framework::OperatorWithKernel {
}
};
class
CropGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"crop_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetInput
(
"X"
,
Input
(
"X"
));
if
(
ForwardOp
().
Inputs
().
count
(
"Offsets"
)
>
0
)
{
op
->
SetInput
(
"Offsets"
,
Input
(
"Offsets"
));
}
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
crop
,
ops
::
CropOp
,
ops
::
CropOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
CropGradOpDescMaker
);
REGISTER_OPERATOR
(
crop_grad
,
ops
::
CropOpGrad
);
REGISTER_OP_CPU_KERNEL
(
crop
,
ops
::
CropKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/cross_entropy_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -238,6 +238,23 @@ class CrossEntropyGradientOp : public CrossEntropyGradientOpBase {
}
};
class
CrossEntropyGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"cross_entropy_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Label"
,
Input
(
"Label"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Y"
),
OutputGrad
(
"Y"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
class
CrossEntropyOp2
:
public
CrossEntropyOpBase
{
public:
using
CrossEntropyOpBase
::
CrossEntropyOpBase
;
...
...
@@ -354,7 +371,7 @@ using CPUCtx = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR
(
cross_entropy
,
ops
::
CrossEntropyOpBase
,
ops
::
CrossEntropyOpMaker
,
ops
::
CrossEntropyOpInferVarType
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
CrossEntropyGradOpDescMaker
);
REGISTER_OPERATOR
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
float
>
,
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
double
>
);
...
...
paddle/fluid/operators/cudnn_lstm_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -170,11 +171,6 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"Input(W) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"last_h"
),
"Input(last_h) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"last_c"
),
"Input(last_c) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cache"
),
"Input(last_c) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"InitH"
),
...
...
@@ -197,6 +193,35 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel {
}
};
class
CudnnLSTMGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"cudnn_lstm_grad"
);
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetInput
(
"InitH"
,
Input
(
"InitH"
));
op
->
SetInput
(
"InitC"
,
Input
(
"InitC"
));
op
->
SetInput
(
"W"
,
Input
(
"W"
));
if
(
ForwardOp
().
Inputs
().
count
(
"Cache"
)
>
0
)
{
op
->
SetInput
(
"Cache"
,
Input
(
"Cache"
));
}
op
->
SetInput
(
"Out"
,
Output
(
"Out"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetInput
(
framework
::
GradVarName
(
"last_c"
),
OutputGrad
(
"last_c"
));
op
->
SetInput
(
framework
::
GradVarName
(
"last_h"
),
OutputGrad
(
"last_h"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"W"
),
InputGrad
(
"W"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"InitH"
),
InputGrad
(
"InitH"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"InitC"
),
InputGrad
(
"InitC"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
template
<
typename
T
>
class
NotImpleKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -211,7 +236,7 @@ class NotImpleKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
cudnn_lstm
,
ops
::
CudnnLSTMOp
,
ops
::
CudnnLSTMOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
CudnnLSTMGradOpDescMaker
);
REGISTER_OPERATOR
(
cudnn_lstm_grad
,
ops
::
CudnnLSTMGradOp
);
REGISTER_OP_CPU_KERNEL
(
cudnn_lstm
,
ops
::
NotImpleKernel
<
float
>
);
...
...
paddle/fluid/operators/distributed/parameter_prefetch.cc
浏览文件 @
c7c6eeb4
...
...
@@ -14,6 +14,7 @@
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/operators/distributed/parameter_prefetch.h"
...
...
@@ -218,7 +219,7 @@ void prefetch(const std::string& id_name, const std::string& out_name,
boost
::
get
<
platform
::
CUDAPlace
>
(
id_tensor
.
place
()),
id_tensor
.
data
<
int64_t
>
(),
sizeof
(
int64_t
)
*
id_tensor
.
numel
(),
stream
);
for
(
size
_t
i
=
0
;
i
<
cpu_tensor
.
numel
();
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
cpu_tensor
.
numel
();
++
i
)
{
ids_vector
.
push_back
(
cpu_tensor_data
[
i
]);
}
#endif
...
...
paddle/fluid/operators/elementwise/elementwise_add_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -16,8 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_GRAD_MAKER
(
elementwise_add
,
Add
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
,
"Out"
,
"X"
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
);
REGISTER_OP_CPU_KERNEL
(
elementwise_add
,
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
c7c6eeb4
...
...
@@ -272,12 +272,11 @@ class ElementwiseGradOpInplace : public framework::InplaceOpInference {
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
ElementwiseGradNoBufVarsInference
,
"Y"
);
}
// namespace operators
}
// namespace paddle
/*
*/
#define REGISTER_ELEMWISE_GRAD_MAKER(kernel_type, op_name) \
class kernel_type##GradMaker \
: public paddle::framework::SingleGradOpDescMaker { \
...
...
@@ -311,18 +310,19 @@ class ElementwiseGradOpInplace : public framework::InplaceOpInference {
::paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(op_type##_grad, ::paddle::operators::ElementwiseOpGrad)
#define REGISTER_ELEMWISE_EXPLICIT_OP(op_type, op_name, equation, ...) \
class __ElemwiseOp##op_type##Maker__ \
: public ::paddle::operators::ElementwiseOpMaker { \
protected: \
virtual std::string GetName() const { return op_name; } \
virtual std::string GetEquation() const { return equation; } \
}; \
REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \
__ElemwiseOp##op_type##Maker__, \
::paddle::operators::ElementwiseOpInferVarType, \
op_type##GradMaker, \
::paddle::operators::ElementwiseOpInplace); \
REGISTER_OPERATOR(op_type##_grad, \
::paddle::operators::ElementwiseOpExplicitGrad, \
::paddle::operators::ElementwiseGradOpInplace)
#define REGISTER_ELEMWISE_EXPLICIT_OP(op_type, op_name, equation) \
class __ElemwiseOp##op_type##Maker__ \
: public ::paddle::operators::ElementwiseOpMaker { \
protected: \
virtual std::string GetName() const { return op_name; } \
virtual std::string GetEquation() const { return equation; } \
}; \
REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \
__ElemwiseOp##op_type##Maker__, \
::paddle::operators::ElementwiseOpInferVarType, \
op_type##GradMaker, \
::paddle::operators::ElementwiseOpInplace); \
REGISTER_OPERATOR(op_type##_grad, \
::paddle::operators::ElementwiseOpExplicitGrad, \
::paddle::operators::ElementwiseGradOpInplace, \
::paddle::operators::ElementwiseGradNoBufVarsInference)
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -16,8 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_GRAD_MAKER
(
elementwise_sub
,
Sub
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_sub
,
"Sub"
,
"Out = X - Y"
,
"Out"
,
"X"
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_sub
,
"Sub"
,
"Out = X - Y"
);
REGISTER_OP_CPU_KERNEL
(
elementwise_sub
,
...
...
paddle/fluid/operators/gather_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/gather_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ddim.h"
namespace
paddle
{
...
...
@@ -59,8 +62,9 @@ class GatherGradOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
device_context
());
}
};
...
...
@@ -94,13 +98,34 @@ Out = [[3, 4],
)DOC"
);
}
};
class
GatherGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"gather_grad"
);
op
->
SetInput
(
"Index"
,
Input
(
"Index"
));
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
GatherGradNoNeedBufferVarInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
gather
,
ops
::
GatherOp
,
ops
::
GatherOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
gather_grad
,
ops
::
GatherGradOp
);
ops
::
GatherGradOpDescMaker
);
REGISTER_OPERATOR
(
gather_grad
,
ops
::
GatherGradOp
,
ops
::
GatherGradNoNeedBufferVarInference
);
REGISTER_OP_CPU_KERNEL
(
gather
,
ops
::
GatherOpKernel
<
float
>
,
ops
::
GatherOpKernel
<
double
>
,
ops
::
GatherOpKernel
<
int
>
,
ops
::
GatherOpKernel
<
uint8_t
>
,
...
...
paddle/fluid/operators/lod_reset_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lod_reset_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -146,18 +147,39 @@ class LoDResetGradOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
device_context
());
}
};
class
LoDResetGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"lod_reset_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
LoDResetGradNoNeedBufferVarInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
lod_reset
,
ops
::
LoDResetOp
,
ops
::
LoDResetOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
lod_reset_grad
,
ops
::
LoDResetGradOp
);
ops
::
LoDResetGradDescMaker
);
REGISTER_OPERATOR
(
lod_reset_grad
,
ops
::
LoDResetGradOp
,
ops
::
LoDResetGradNoNeedBufferVarInference
);
REGISTER_OP_CPU_KERNEL
(
lod_reset
,
ops
::
LoDResetKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
LoDResetKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
...
...
paddle/fluid/operators/reader/ctr_reader.h
浏览文件 @
c7c6eeb4
...
...
@@ -21,6 +21,7 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
...
...
@@ -152,7 +153,7 @@ class CTRReader : public framework::FileReader {
queue_
->
ReOpen
();
VLOG
(
3
)
<<
"reopen success"
;
VLOG
(
3
)
<<
"thread_num "
<<
thread_num_
;
for
(
in
t
thread_id
=
0
;
thread_id
<
thread_num_
;
thread_id
++
)
{
for
(
size_
t
thread_id
=
0
;
thread_id
<
thread_num_
;
thread_id
++
)
{
read_threads_
.
emplace_back
(
new
std
::
thread
(
std
::
bind
(
&
ReadThread
,
file_groups_
[
thread_id
],
data_desc_
,
static_cast
<
int
>
(
thread_id
),
&
read_thread_status_
,
queue_
)));
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
c7c6eeb4
...
...
@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -139,6 +140,7 @@ PYBIND11_MODULE(core, m) {
paddle
::
platform
::
CpuTotalPhysicalMemory
();
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
m
.
doc
()
=
"C++ core of PaddlePaddle"
;
// using framework in this function. Since it is inside a function, it will
...
...
@@ -153,6 +155,11 @@ PYBIND11_MODULE(core, m) {
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
});
// NOTE(zjl): ctest would load environment variables at the beginning even
// though we have not `import paddle.fluid as fluid`. So we add this API
// to enable eager deletion mode in unittest.
m
.
def
(
"_set_eager_deletion_mode"
,
&
paddle
::
framework
::
SetEagerDeletionMode
);
m
.
add_object
(
"_cleanup"
,
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
...
...
@@ -281,6 +288,8 @@ PYBIND11_MODULE(core, m) {
py
::
class_
<
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
.
def_buffer
(
[](
Tensor
&
self
)
->
py
::
buffer_info
{
return
CastToPyBuffer
(
self
);
})
.
def
(
"_is_initialized"
,
[](
const
Tensor
&
self
)
{
return
self
.
IsInitialized
();
})
.
def
(
"_get_dims"
,
[](
const
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"_set_dims"
,
...
...
@@ -681,7 +690,8 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
,
R"DOC(
Delete all sub-scopes of the current scope.
)DOC"
);
)DOC"
)
.
def
(
"_kids"
,
&
Scope
::
kids
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
0 → 100644
浏览文件 @
c7c6eeb4
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
os
.
environ
[
'FLAGS_use_ngraph'
]
=
'0'
os
.
environ
[
'FLAGS_use_mkldnn'
]
=
'0'
os
.
environ
[
'CPU_NUM'
]
=
'4'
import
paddle.fluid
as
fluid
import
six
import
unittest
import
multiprocessing
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
def
simple_fc_net
():
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
image
for
_
in
range
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
act
=
'tanh'
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
1e-3
)
optimizer
.
minimize
(
loss
)
return
image
,
label
,
loss
def
get_persistables_and_non_persistables
(
prog
,
fetch_list
):
num_block
=
prog
.
num_blocks
persitables
=
set
()
non_persistables
=
set
()
for
bid
in
six
.
moves
.
range
(
num_block
):
block
=
prog
.
block
(
bid
)
for
_
,
var
in
block
.
vars
.
items
():
if
var
.
persistable
or
var
.
name
in
fetch_list
:
persitables
.
add
(
var
.
name
)
else
:
non_persistables
.
add
(
var
.
name
)
return
persitables
,
non_persistables
class
TestExecutor
(
unittest
.
TestCase
):
def
test_executor_main
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
fluid
.
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
self
.
place
=
p
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
scope_guard
(
fluid
.
Scope
()):
with
fluid
.
unique_name
.
guard
():
self
.
executor_main
()
for
p
in
places
:
self
.
place
=
p
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
scope_guard
(
fluid
.
Scope
()):
with
fluid
.
unique_name
.
guard
():
self
.
pe_main
()
def
prepare_feed
(
self
,
image
,
label
,
dev_cnt
=
1
):
batch_size
=
32
*
dev_cnt
image_shape
=
(
batch_size
,
)
+
tuple
(
image
.
shape
[
1
:])
label_shape
=
(
batch_size
,
)
+
tuple
(
label
.
shape
[
1
:])
image_np
=
np
.
random
.
random
(
size
=
image_shape
).
astype
(
'float32'
)
label_np
=
np
.
random
.
random_integers
(
low
=
0
,
high
=
9
,
size
=
label_shape
).
astype
(
'int64'
)
return
image_np
,
label_np
def
assertScopeVar
(
self
,
scope
,
persitables
,
non_persistables
):
outline_p_vars
=
[]
for
name
in
persitables
:
var
=
scope
.
find_var
(
name
)
self
.
assertTrue
(
var
is
not
None
)
t
=
var
.
get_tensor
()
if
not
t
.
_is_initialized
():
outline_p_vars
.
append
(
name
)
outline_np_vars
=
[]
for
name
in
non_persistables
:
var
=
scope
.
find_var
(
name
)
self
.
assertTrue
(
var
is
not
None
)
t
=
var
.
get_tensor
()
if
t
.
_is_initialized
():
outline_np_vars
.
append
(
name
)
print
(
'Non-alive persistable vars {} in {}'
.
format
(
outline_p_vars
,
persitables
))
print
(
'Alive non-persistable vars {} in {}'
.
format
(
outline_np_vars
,
non_persistables
))
self
.
assertEqual
(
len
(
outline_p_vars
),
0
)
self
.
assertEqual
(
len
(
outline_np_vars
),
0
)
def
executor_main
(
self
):
image
,
label
,
loss
=
simple_fc_net
()
loss
.
persistable
=
False
persistables
,
non_persistables
=
get_persistables_and_non_persistables
(
fluid
.
default_main_program
(),
[
loss
.
name
])
print
(
'Non-persistable var number {}'
.
format
(
len
(
non_persistables
)))
print
(
non_persistables
)
exe
=
fluid
.
Executor
(
self
.
place
)
exe
.
run
(
fluid
.
default_startup_program
())
p
=
fluid
.
core
.
Place
()
p
.
set_place
(
self
.
place
)
exe
=
fluid
.
core
.
Executor
(
p
)
for
_
in
six
.
moves
.
range
(
10
):
image_np
,
label_np
=
self
.
prepare_feed
(
image
,
label
)
fluid
.
global_scope
().
var
(
image
.
name
).
get_tensor
().
set
(
image_np
,
self
.
place
)
fluid
.
global_scope
().
var
(
label
.
name
).
get_tensor
().
set
(
label_np
,
self
.
place
)
# exe.run would not create local scope
# so that we can detect whether gc clears temporary variables
exe
.
run
(
fluid
.
default_main_program
().
desc
,
fluid
.
global_scope
(),
0
,
False
,
True
,
[
loss
.
name
])
self
.
assertScopeVar
(
fluid
.
global_scope
(),
persistables
,
non_persistables
)
def
pe_main
(
self
):
image
,
label
,
loss
=
simple_fc_net
()
loss
.
persistable
=
False
persitables
,
non_persistables
=
get_persistables_and_non_persistables
(
fluid
.
default_main_program
(),
[
loss
.
name
])
exe
=
fluid
.
Executor
(
self
.
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
num_iteration_per_drop_scope
=
100
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
memory_optimize
=
False
build_strategy
.
enable_inplace
=
False
prog
=
fluid
.
CompiledProgram
(
fluid
.
default_main_program
(
)).
with_data_parallel
(
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
)
dev_cnt
=
fluid
.
core
.
get_cuda_device_count
()
if
isinstance
(
self
.
place
,
fluid
.
CUDAPlace
)
\
else
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
for
idx
in
six
.
moves
.
range
(
10
):
image_np
,
label_np
=
self
.
prepare_feed
(
image
,
label
,
dev_cnt
)
feed
=
{
image
.
name
:
image_np
,
label
.
name
:
label_np
}
exe
.
run
(
program
=
prog
,
feed
=
feed
,
fetch_list
=
[
loss
])
local_scopes
=
prog
.
_local_scopes
for
scope
in
local_scopes
:
kids
=
scope
.
_kids
()
self
.
assertTrue
(
len
(
kids
)
==
1
)
self
.
assertScopeVar
(
kids
[
0
],
persistables
,
non_persistables
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
浏览文件 @
c7c6eeb4
...
...
@@ -13,7 +13,6 @@
# limitations under the License.
import
os
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
'0.0'
os
.
environ
[
'CPU_NUM'
]
=
'2'
import
six
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
浏览文件 @
c7c6eeb4
...
...
@@ -16,6 +16,8 @@ import unittest
from
test_eager_deletion_dynamic_rnn_base
import
TestBase
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
def
gru_net
(
data
,
label
,
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
浏览文件 @
c7c6eeb4
...
...
@@ -16,6 +16,8 @@ from test_eager_deletion_dynamic_rnn_base import TestBase
import
paddle.fluid
as
fluid
import
unittest
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
def
lstm_net
(
data
,
label
,
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
浏览文件 @
c7c6eeb4
...
...
@@ -14,7 +14,9 @@
import
os
import
unittest
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
# FIXME(zjl): It seems that this unittest fails randomly
# when comparing all reduce last loss and reduce last loss
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
浏览文件 @
c7c6eeb4
...
...
@@ -14,7 +14,9 @@
import
os
import
unittest
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'./eager_deletion_transformer.wmt16.recordio'
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
浏览文件 @
c7c6eeb4
...
...
@@ -16,8 +16,6 @@ from __future__ import print_function
import
os
os
.
environ
[
'CPU_NUM'
]
=
'2'
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
'0.0'
os
.
environ
[
'FLAGS_fast_eager_deletion_mode'
]
=
'1'
import
unittest
import
paddle.fluid
as
fluid
...
...
@@ -29,6 +27,8 @@ import paddle.fluid.compiler as compiler
import
numpy
import
multiprocessing
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
class
TestEagerDeletionWhileOpBase
(
unittest
.
TestCase
):
def
test_main
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
浏览文件 @
c7c6eeb4
...
...
@@ -14,11 +14,12 @@
import
os
import
unittest
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
os
.
environ
[
'FLAGS_memory_fraction_of_eager_deletion'
]
=
"0.55"
import
paddle.fluid
as
fluid
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'./p_gc_transformer.wmt16.recordio'
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
0.55
,
True
)
from
test_parallel_executor_transformer
import
TestTransformer
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_roi_align_op.py
浏览文件 @
c7c6eeb4
...
...
@@ -168,3 +168,7 @@ class TestROIAlignOp(OpTest):
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录