Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c7c6eeb4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c7c6eeb4
编写于
3月 27, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
3月 27, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16409 from sneaxiy/feature/advance_gc
Enhance gc to support deleting tensor buffer in advance
上级
54a73578
a0f4fefb
变更
42
隐藏空白更改
内联
并排
Showing
42 changed file
with
1083 addition
and
381 deletion
+1083
-381
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+5
-2
paddle/fluid/framework/details/eager_deletion_pass.cc
paddle/fluid/framework/details/eager_deletion_pass.cc
+5
-10
paddle/fluid/framework/details/early_delete_op_handle.h
paddle/fluid/framework/details/early_delete_op_handle.h
+0
-140
paddle/fluid/framework/details/op_registry.h
paddle/fluid/framework/details/op_registry.h
+88
-16
paddle/fluid/framework/details/reference_count_pass.cc
paddle/fluid/framework/details/reference_count_pass.cc
+104
-5
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+20
-96
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+7
-9
paddle/fluid/framework/executor_gc_helper.cc
paddle/fluid/framework/executor_gc_helper.cc
+189
-0
paddle/fluid/framework/executor_gc_helper.h
paddle/fluid/framework/executor_gc_helper.h
+42
-0
paddle/fluid/framework/garbage_collector.cc
paddle/fluid/framework/garbage_collector.cc
+42
-0
paddle/fluid/framework/garbage_collector.h
paddle/fluid/framework/garbage_collector.h
+9
-0
paddle/fluid/framework/no_need_buffer_vars_inference.h
paddle/fluid/framework/no_need_buffer_vars_inference.h
+60
-0
paddle/fluid/framework/op_info.h
paddle/fluid/framework/op_info.h
+6
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+31
-7
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+10
-1
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+0
-18
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+0
-3
paddle/fluid/framework/type_defs.h
paddle/fluid/framework/type_defs.h
+5
-0
paddle/fluid/operators/add_position_encoding_op.cc
paddle/fluid/operators/add_position_encoding_op.cc
+19
-7
paddle/fluid/operators/clip_op.cc
paddle/fluid/operators/clip_op.cc
+18
-1
paddle/fluid/operators/concat_op.cc
paddle/fluid/operators/concat_op.cc
+32
-8
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+27
-9
paddle/fluid/operators/crop_op.cc
paddle/fluid/operators/crop_op.cc
+23
-2
paddle/fluid/operators/cross_entropy_op.cc
paddle/fluid/operators/cross_entropy_op.cc
+18
-1
paddle/fluid/operators/cudnn_lstm_op.cc
paddle/fluid/operators/cudnn_lstm_op.cc
+31
-6
paddle/fluid/operators/distributed/parameter_prefetch.cc
paddle/fluid/operators/distributed/parameter_prefetch.cc
+2
-1
paddle/fluid/operators/elementwise/elementwise_add_op.cc
paddle/fluid/operators/elementwise/elementwise_add_op.cc
+1
-2
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+18
-18
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+1
-2
paddle/fluid/operators/gather_op.cc
paddle/fluid/operators/gather_op.cc
+29
-4
paddle/fluid/operators/lod_reset_op.cc
paddle/fluid/operators/lod_reset_op.cc
+26
-4
paddle/fluid/operators/reader/ctr_reader.h
paddle/fluid/operators/reader/ctr_reader.h
+2
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+11
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
.../fluid/tests/unittests/test_eager_deletion_delete_vars.py
+183
-0
python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
...d/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+0
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
...ddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
+2
-0
python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
...dle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
+2
-0
python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
...paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
+3
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
.../fluid/tests/unittests/test_eager_deletion_transformer.py
+3
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
...dle/fluid/tests/unittests/test_eager_deletion_while_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
...ests/unittests/test_partial_eager_deletion_transformer.py
+3
-2
python/paddle/fluid/tests/unittests/test_roi_align_op.py
python/paddle/fluid/tests/unittests/test_roi_align_op.py
+4
-0
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
c7c6eeb4
...
...
@@ -63,7 +63,7 @@ cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
nv_test
(
lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor
)
cc_library
(
garbage_collector SRCS garbage_collector.cc DEPS device_context memory
)
cc_library
(
garbage_collector SRCS garbage_collector.cc DEPS device_context memory
gflags glog
)
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
...
...
@@ -164,6 +164,8 @@ else()
set
(
NGRAPH_EXE_DEPS
)
endif
()
cc_library
(
executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog
lod_rank_table feed_fetch_method sendrecvop_rpc
${
GLOB_DISTRIBUTE_DEPS
}
graph_to_program_pass variable_helper
${
NGRAPH_EXE_DEPS
}
)
...
...
@@ -174,7 +176,7 @@ else()
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
endif
()
target_link_libraries
(
executor
garbage_collector while_op
_helper
)
target_link_libraries
(
executor
while_op_helper executor_gc
_helper
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor parallel_ssa_graph_executor
...
...
@@ -194,6 +196,7 @@ cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_con
cc_test
(
var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
proto_desc
)
cc_test
(
inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS op_registry proto_desc op_info memory_optimize_helper
)
cc_library
(
selected_rows SRCS selected_rows.cc DEPS tensor
)
cc_test
(
selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows
)
...
...
paddle/fluid/framework/details/eager_deletion_pass.cc
浏览文件 @
c7c6eeb4
...
...
@@ -22,14 +22,9 @@
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
DEFINE_double
(
memory_fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted."
);
namespace
paddle
{
namespace
framework
{
namespace
details
{
...
...
@@ -206,8 +201,9 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
}
}
op_vars_map
=
ShrinkGCVars
(
op_vars_map
,
vars
,
places
,
FLAGS_memory_fraction_of_eager_deletion
);
double
memory_fraction
=
framework
::
GetEagerDeletionMemoryFraction
();
op_vars_map
=
ShrinkGCVars
(
op_vars_map
,
vars
,
places
,
memory_fraction
);
for
(
auto
&
pair
:
op_vars_map
)
{
auto
*
op
=
pair
.
first
;
...
...
@@ -239,8 +235,7 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
eager_deletion_op
->
AddOutput
(
dummy_leaf
);
}
VLOG
(
10
)
<<
"FLAGS_memory_fraction_of_eager_deletion = "
<<
FLAGS_memory_fraction_of_eager_deletion
;
VLOG
(
10
)
<<
"FLAGS_memory_fraction_of_eager_deletion = "
<<
memory_fraction
;
VLOG
(
10
)
<<
"Create "
<<
op_vars_map
.
size
()
<<
" EagerDeletionOpHandle(s)"
;
auto
while_op_eager_deletion_pass
=
...
...
paddle/fluid/framework/details/early_delete_op_handle.h
已删除
100644 → 0
浏览文件 @
54a73578
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
EarlyDeleteOpHandle
:
public
OpHandleBase
{
public:
EarlyDeleteOpHandle
(
ir
::
Node
*
node
,
const
Scope
*
scope
,
const
platform
::
Place
&
place
,
const
std
::
vector
<
std
::
string
>&
names
,
GarbageCollector
*
gc
)
:
OpHandleBase
(
node
),
scope_
(
scope
),
place_
(
place
),
names_
(
names
),
gc_
(
gc
)
{
#ifdef PADDLE_WITH_CUDA
if
(
IsStreamGarabageCollector
())
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_place
.
device
));
PADDLE_ENFORCE
(
cudaEventCreateWithFlags
(
&
event_
,
cudaEventDisableTiming
));
}
#endif
}
~
EarlyDeleteOpHandle
()
{
#ifdef PADDLE_WITH_CUDA
if
(
IsStreamGarabageCollector
())
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx_
->
GetPlace
());
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_place
.
device
));
PADDLE_ENFORCE
(
cudaEventDestroy
(
event_
));
}
#endif
}
std
::
string
Name
()
const
override
{
return
"early_delete"
;
}
protected:
void
RunImpl
()
override
{
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>
tensors
;
auto
*
local_scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
for
(
auto
&
var_name
:
names_
)
{
auto
*
var
=
local_scope
->
FindVar
(
var_name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
string
::
Sprintf
(
"Local Scope not has var %s"
,
var_name
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
tensors
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
tensors
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
LoDTensorArray
*
tensor_array
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
tensor
:
*
tensor_array
)
{
tensors
.
emplace_back
(
tensor
.
MoveMemoryHolder
());
}
}
}
if
(
!
tensors
.
empty
())
{
ClearTensors
(
tensors
);
}
}
private:
void
ClearTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
if
(
platform
::
is_cpu_place
(
place_
))
{
ClearCPUTensors
(
tensors
);
}
else
{
ClearGPUTensors
(
tensors
);
}
}
void
ClearCPUTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
auto
*
gc
=
dynamic_cast
<
CPUGarbageCollector
*>
(
gc_
);
if
(
gc
!=
nullptr
)
{
gc
->
Add
(
tensors
);
}
}
void
ClearGPUTensors
(
const
std
::
vector
<
std
::
shared_ptr
<
memory
::
Allocation
>>&
tensors
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
gc
=
dynamic_cast
<
StreamGarbageCollector
*>
(
gc_
);
if
(
gc
!=
nullptr
)
{
auto
compute_stream
=
dev_ctx_
->
stream
();
auto
callback_stream
=
gc
->
stream
();
auto
callback_func
=
[
=
]()
{
PADDLE_ENFORCE
(
cudaEventRecord
(
event_
,
compute_stream
));
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
callback_stream
,
event_
,
0
));
};
gc_
->
Add
(
tensors
,
callback_func
);
}
else
{
gc_
->
Add
(
tensors
);
}
}
bool
IsStreamGarabageCollector
()
const
{
return
dynamic_cast
<
const
StreamGarbageCollector
*>
(
gc_
)
!=
nullptr
;
#endif
}
const
Scope
*
scope_
;
const
platform
::
Place
place_
;
std
::
vector
<
std
::
string
>
names_
;
GarbageCollector
*
gc_
;
#ifdef PADDLE_WITH_CUDA
platform
::
CUDADeviceContext
*
dev_ctx_
;
cudaEvent_t
event_
;
#endif
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/op_registry.h
浏览文件 @
c7c6eeb4
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/inplace_op_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
...
...
@@ -36,27 +37,86 @@ enum OpInfoFillType {
kGradOpDescMaker
=
2
,
kVarTypeInference
=
3
,
kShapeInference
=
4
,
kInplaceOpInference
=
5
kInplaceOpInference
=
5
,
kNoNeedBufferVarsInference
=
6
,
kUnknown
=
-
1
};
namespace
internal
{
template
<
typename
T
,
OpInfoFillType
kType
>
struct
TypePair
{
using
Type
=
T
;
static
constexpr
OpInfoFillType
kFillType
=
kType
;
};
using
OpRegistryClasses
=
std
::
tuple
<
// NOLINT
TypePair
<
OperatorBase
,
kOperator
>
,
// NOLINT
TypePair
<
OpProtoAndCheckerMaker
,
kOpProtoAndCheckerMaker
>
,
// NOLINT
TypePair
<
GradOpDescMakerBase
,
kGradOpDescMaker
>
,
// NOLINT
TypePair
<
VarTypeInference
,
kVarTypeInference
>
,
// NOLINT
TypePair
<
InferShapeBase
,
kShapeInference
>
,
// NOLINT
TypePair
<
InplaceOpInference
,
kInplaceOpInference
>
,
// NOLINT
TypePair
<
NoNeedBufferVarsInference
,
kNoNeedBufferVarsInference
>
// NOLINT
>
;
static
constexpr
int
kOpRegistryClassNumber
=
std
::
tuple_size
<
OpRegistryClasses
>::
value
;
template
<
typename
T
,
int
kPos
,
bool
kIsBounded
/* = true*/
>
struct
IsMatchedBaseTypeImpl
{
using
PairType
=
typename
std
::
tuple_element
<
kPos
,
OpRegistryClasses
>::
type
;
static
constexpr
bool
kValue
=
std
::
is_base_of
<
typename
PairType
::
Type
,
T
>::
value
;
};
template
<
typename
T
,
int
kPos
>
struct
IsMatchedBaseTypeImpl
<
T
,
kPos
,
false
>
{
static
constexpr
bool
kValue
=
false
;
};
template
<
typename
T
,
int
kPos
>
static
inline
constexpr
bool
IsMatchedBaseType
()
{
return
IsMatchedBaseTypeImpl
<
T
,
kPos
,
(
kPos
>=
0
&&
kPos
<
kOpRegistryClassNumber
)
>::
kValue
;
}
template
<
typename
T
,
int
kStart
,
int
kEnd
,
bool
kIsEnd
,
bool
kIsMatched
>
struct
OpInfoFillTypeGetterImpl
{};
// This case should not happen
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
true
,
true
>
{};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
true
,
false
>
{
static
constexpr
OpInfoFillType
kType
=
kUnknown
;
};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
false
,
false
>
{
static
constexpr
OpInfoFillType
kType
=
OpInfoFillTypeGetterImpl
<
T
,
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
,
IsMatchedBaseType
<
T
,
kStart
+
1
>
()
>::
kType
;
};
template
<
typename
T
,
int
kStart
,
int
kEnd
>
struct
OpInfoFillTypeGetterImpl
<
T
,
kStart
,
kEnd
,
false
,
true
>
{
using
PairType
=
typename
std
::
tuple_element
<
kStart
,
OpRegistryClasses
>::
type
;
static
constexpr
OpInfoFillType
kType
=
PairType
::
kFillType
;
};
template
<
typename
T
>
using
OpInfoFillTypeGetter
=
OpInfoFillTypeGetterImpl
<
T
,
0
,
kOpRegistryClassNumber
,
kOpRegistryClassNumber
==
0
,
IsMatchedBaseType
<
T
,
0
>
()
>
;
}
// namespace internal
template
<
typename
T
>
struct
OpInfoFillTypeID
{
static
constexpr
OpInfoFillType
ID
()
{
return
std
::
is_base_of
<
OperatorBase
,
T
>::
value
?
kOperator
:
(
std
::
is_base_of
<
OpProtoAndCheckerMaker
,
T
>::
value
?
kOpProtoAndCheckerMaker
:
(
std
::
is_base_of
<
GradOpDescMakerBase
,
T
>::
value
?
kGradOpDescMaker
:
(
std
::
is_base_of
<
VarTypeInference
,
T
>::
value
?
kVarTypeInference
:
(
std
::
is_base_of
<
InferShapeBase
,
T
>::
value
?
kShapeInference
:
(
std
::
is_base_of
<
InplaceOpInference
,
T
>::
value
?
kInplaceOpInference
:
static_cast
<
OpInfoFillType
>
(
-
1
))))));
return
internal
::
OpInfoFillTypeGetter
<
T
>::
kType
;
}
};
...
...
@@ -156,6 +216,18 @@ struct OpInfoFiller<T, kInplaceOpInference> {
}
};
template
<
typename
T
>
struct
OpInfoFiller
<
T
,
kNoNeedBufferVarsInference
>
{
void
operator
()(
const
char
*
op_type
,
OpInfo
*
info
)
const
{
info
->
infer_no_need_buffer_vars_
=
[](
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
{
T
infer
(
inputs
,
outputs
,
attrs
);
return
infer
();
};
}
};
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/reference_count_pass.cc
浏览文件 @
c7c6eeb4
...
...
@@ -193,6 +193,79 @@ ExtractComputationOpFromLastLivedVar(VarHandle *var, size_t scope_idx,
return
shrink_func
(
computation_op
);
}
/**
* Shrink op dependencies according to no need buffer vars.
*
* If some ops do not need Tensor buffer of any input,
* just remove the dependency of this op, i.e, decrease reference count.
*
* For example, input Y of elementwise_add_grad op is only used to infer shape
* and lod of Y@GRAD, we do not need the buffer of input Y. Data buffer of
* input Y can be collected before elementwise_add_grad op runs.
*
* This method returns whether the dependency count decreases to 0, and
* shrinks op dependency if possible.
*/
static
bool
ShrinkNoNeedBufferVarOpDependency
(
const
std
::
string
&
var_name
,
std
::
unordered_set
<
ComputationOpHandle
*>
*
op_handles
)
{
std
::
vector
<
ComputationOpHandle
*>
skip_ops
;
for
(
auto
*
op_handle
:
*
op_handles
)
{
auto
*
op_base
=
op_handle
->
GetOp
();
auto
&
inferer
=
op_base
->
Info
().
NoNeedBufferVarsInferer
();
if
(
!
inferer
)
{
continue
;
}
std
::
unordered_set
<
std
::
string
>
no_need_buffer_vars
=
inferer
(
op_base
->
Inputs
(),
op_base
->
Outputs
(),
op_base
->
Attrs
());
// Check whether var_name occurs in other inputs or outputs of the op
// If it occurs, we cannot decrease the dependency number.
bool
occurred_in_other_vars
=
false
;
for
(
auto
&
in_pair
:
op_base
->
Inputs
())
{
if
(
no_need_buffer_vars
.
count
(
in_pair
.
first
)
>
0
)
{
continue
;
}
auto
&
args
=
in_pair
.
second
;
auto
iter
=
std
::
find
(
args
.
begin
(),
args
.
end
(),
var_name
);
if
(
iter
!=
args
.
end
())
{
occurred_in_other_vars
=
true
;
break
;
}
}
if
(
occurred_in_other_vars
)
{
continue
;
}
for
(
auto
&
out_pair
:
op_base
->
Outputs
())
{
auto
&
args
=
out_pair
.
second
;
auto
iter
=
std
::
find
(
args
.
begin
(),
args
.
end
(),
var_name
);
if
(
iter
!=
args
.
end
())
{
occurred_in_other_vars
=
true
;
break
;
}
}
if
(
!
occurred_in_other_vars
)
{
VLOG
(
2
)
<<
"Shrink var "
<<
var_name
<<
" in op "
<<
op_handle
->
Name
();
skip_ops
.
emplace_back
(
op_handle
);
}
}
if
(
skip_ops
.
size
()
==
op_handles
->
size
())
{
op_handles
->
clear
();
return
true
;
}
else
{
for
(
auto
*
skip_op
:
skip_ops
)
{
op_handles
->
erase
(
skip_op
);
}
return
false
;
}
}
std
::
unique_ptr
<
ir
::
Graph
>
ReferenceCountPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
&
ref_cnts
=
Get
<
std
::
vector
<
ReferenceCountMap
>>
(
kGlobalReferenceCount
);
...
...
@@ -229,17 +302,43 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
continue
;
}
bool
ok
;
auto
result
=
ExtractComputationOpFromLastLivedVar
(
name_var_pair
.
second
.
back
(),
i
,
shrink_func
,
&
ok
);
auto
&
var_name
=
name_var_pair
.
first
;
auto
&
var_handles
=
name_var_pair
.
second
;
for
(
auto
iter
=
var_handles
.
rbegin
();
iter
!=
var_handles
.
rend
();
++
iter
)
{
bool
ok
;
auto
result
=
ExtractComputationOpFromLastLivedVar
(
*
iter
,
i
,
shrink_func
,
&
ok
);
// Seldomly, some vars may have no pending or preceding computation ops
// Just break;
if
(
!
ok
)
break
;
VLOG
(
10
)
<<
"Extract "
<<
result
.
size
()
<<
" ops of var "
<<
var_name
;
size_t
original_op_deps
=
result
.
size
();
// If all ops do not need buffer of var_name, calculate reference count
// of the previous version of var_name.
if
(
ShrinkNoNeedBufferVarOpDependency
(
var_name
,
&
result
))
{
VLOG
(
10
)
<<
"Try to precede reference count computing at var "
<<
var_name
;
continue
;
}
size_t
final_op_deps
=
result
.
size
();
if
(
final_op_deps
<
original_op_deps
)
{
VLOG
(
5
)
<<
"Shrink op deps from "
<<
original_op_deps
<<
" to "
<<
final_op_deps
;
}
if
(
ok
)
{
auto
&
var_name
=
name_var_pair
.
first
;
PADDLE_ENFORCE
(
!
result
.
empty
(),
"Last living ops of %s cannot be empty"
,
var_name
);
ref_cnts
[
i
].
emplace
(
var_name
,
result
.
size
());
last_live_ops_of_vars
[
i
].
emplace
(
var_name
,
std
::
move
(
result
));
}
// Seldomly, all preceding trying failed.
// Just skip this corner case
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
c7c6eeb4
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
...
...
@@ -48,97 +49,23 @@ namespace {
int
kProgramId
=
-
1
;
}
// namespace
static
std
::
unordered_map
<
std
::
string
,
size_t
>
GetNonPersistableReferenceCounts
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
string
>&
skip_var_list
)
{
std
::
unordered_map
<
std
::
string
,
size_t
>
ref_cnts
;
std
::
unordered_set
<
std
::
string
>
skip_vars
(
skip_var_list
.
begin
(),
skip_var_list
.
end
());
auto
update_ref_cnts
=
[
&
](
OpDesc
*
op_desc
,
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
skip_vars
.
count
(
name
))
continue
;
auto
*
var_desc
=
block
.
FindVar
(
name
);
if
(
var_desc
==
nullptr
||
var_desc
->
Persistable
())
continue
;
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
if
(
type
!=
proto
::
VarType
::
LOD_TENSOR
&&
type
!=
proto
::
VarType
::
SELECTED_ROWS
&&
type
!=
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
continue
;
}
++
ref_cnts
[
name
];
}
}
};
for
(
auto
op_desc
:
block
.
AllOps
())
{
update_ref_cnts
(
op_desc
,
op_desc
->
Inputs
());
update_ref_cnts
(
op_desc
,
op_desc
->
Outputs
());
}
return
ref_cnts
;
}
ExecutorPrepareContext
::
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
,
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
)
:
prog_
(
prog
),
block_id_
(
block_id
),
force_disable_gc_
(
force_disable_gc
)
{
if
(
GetEagerDeletionThreshold
()
>=
0
&&
!
force_disable_gc_
)
{
global_ref_cnts_
=
GetNonPersistableReferenceCounts
(
prog
.
Block
(
block_id
),
keep_vars
);
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
)
:
prog_
(
prog
),
block_id_
(
block_id
)
{}
void
ExecutorPrepareContext
::
PrepareUnusedVars
(
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
)
{
force_disable_gc_
=
force_disable_gc
;
if
(
GetEagerDeletionThreshold
()
<
0
||
force_disable_gc_
)
{
return
;
}
unused_vars_
=
GetUnusedVars
(
prog_
.
Block
(
block_id_
),
ops_
,
keep_vars
);
}
ExecutorPrepareContext
::~
ExecutorPrepareContext
()
{
VLOG
(
5
)
<<
"destroy ExecutorPrepareContext"
;
}
static
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
const
OperatorBase
*
op
,
GarbageCollector
*
gc
,
std
::
unordered_map
<
std
::
string
,
size_t
>*
ref_cnts
)
{
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
auto
handler
=
[
&
](
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
auto
it
=
ref_cnts
->
find
(
name
);
if
(
it
==
ref_cnts
->
end
())
continue
;
if
(
--
(
it
->
second
)
!=
0
)
{
continue
;
}
auto
*
var
=
scope
.
FindVar
(
name
);
if
(
var
==
nullptr
)
{
continue
;
}
VLOG
(
2
)
<<
"Erase variable "
<<
name
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
lod_tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
lod_tensor_arr
)
{
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
framework
::
ToTypeName
(
var
->
Type
()),
name
);
}
}
}
};
handler
(
op
->
Inputs
());
handler
(
op
->
Outputs
());
if
(
!
garbages
.
empty
())
{
gc
->
Add
(
std
::
move
(
garbages
));
}
}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
void
Executor
::
Close
()
{
...
...
@@ -362,8 +289,8 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
std
::
unique_ptr
<
ExecutorPrepareContext
>
Executor
::
Prepare
(
const
ProgramDesc
&
program
,
int
block_id
,
const
std
::
vector
<
std
::
string
>&
skip_ref_cnt_vars
,
bool
force_disable_gc
)
{
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
new
ExecutorPrepareContext
(
program
,
block_id
,
skip_ref_cnt_vars
,
force_disable_gc
));
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
new
ExecutorPrepareContext
(
program
,
block_id
));
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
block_id
),
program
.
Size
());
auto
&
block
=
program
.
Block
(
block_id
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
...
...
@@ -375,6 +302,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
ctx
->
prog_
.
Block
(
ctx
->
block_id_
),
&
ctx
->
ops_
);
}
#endif
ctx
->
PrepareUnusedVars
(
skip_ref_cnt_vars
,
force_disable_gc
);
return
ctx
;
}
...
...
@@ -389,19 +317,17 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
result
;
size_t
idx
=
0
;
for
(
auto
&
bid
:
block_ids
)
{
ExecutorPrepareContext
*
ctx
;
if
(
skip_ref_cnt_vars
.
empty
())
{
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
,
std
::
vector
<
std
::
string
>
(),
force_disable_gc
);
}
else
{
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
,
skip_ref_cnt_vars
[
idx
],
force_disable_gc
);
}
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
bid
),
program
.
Size
());
auto
*
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
);
auto
&
block
=
program
.
Block
(
bid
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
if
(
skip_ref_cnt_vars
.
empty
())
{
ctx
->
PrepareUnusedVars
(
std
::
vector
<
std
::
string
>
(),
force_disable_gc
);
}
else
{
ctx
->
PrepareUnusedVars
(
skip_ref_cnt_vars
[
idx
],
force_disable_gc
);
}
result
.
push_back
(
std
::
shared_ptr
<
ExecutorPrepareContext
>
(
ctx
));
++
idx
;
}
...
...
@@ -425,7 +351,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
// FIXME(zjl): recurrent_op is rather complex, we would
// disable gc forcely in recurrent_op
if
(
!
ctx
->
force_disable_gc_
&&
max_memory_size
>=
0
)
{
ctx
->
ResetReferenceCount
();
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
if
(
IsFastEagerDeletionModeEnabled
())
{
...
...
@@ -453,8 +378,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
op
->
Run
(
*
local_scope
,
place_
);
if
(
gc
)
{
DeleteUnusedTensors
(
*
local_scope
,
op
.
get
(),
gc
.
get
(),
&
(
ctx
->
runtime_ref_cnts_
));
DeleteUnusedTensors
(
*
local_scope
,
op
.
get
(),
ctx
->
unused_vars_
,
gc
.
get
());
}
}
...
...
paddle/fluid/framework/executor.h
浏览文件 @
c7c6eeb4
...
...
@@ -30,22 +30,20 @@ namespace paddle {
namespace
framework
{
struct
ExecutorPrepareContext
{
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
,
const
std
::
vector
<
std
::
string
>&
skip_ref_cnt_vars
=
std
::
vector
<
std
::
string
>
(),
bool
force_disable_gc
=
false
);
ExecutorPrepareContext
(
const
framework
::
ProgramDesc
&
prog
,
size_t
block_id
);
~
ExecutorPrepareContext
();
void
ResetReferenceCount
()
{
runtime_ref_cnts_
=
global_ref_cnts_
;
}
void
PrepareUnusedVars
(
const
std
::
vector
<
std
::
string
>&
keep_vars
,
bool
force_disable_gc
=
false
);
const
framework
::
ProgramDesc
&
prog_
;
size_t
block_id_
;
bool
force_disable_gc_
;
const
size_t
block_id_
;
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
std
::
unordered_map
<
std
::
string
,
size_t
>
global_ref_cnt
s_
;
std
::
unordered_map
<
std
::
string
,
size_t
>
runtime_ref_cnts_
;
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
unused_var
s_
;
bool
force_disable_gc_
{
false
}
;
};
class
Executor
{
...
...
paddle/fluid/framework/executor_gc_helper.cc
0 → 100644
浏览文件 @
c7c6eeb4
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/executor_gc_helper.h"
#include <deque>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
struct
OpInOutInfo
{
public:
void
Build
(
const
OperatorBase
*
op
)
{
is_built_
=
true
;
auto
&
inferer
=
op
->
Info
().
NoNeedBufferVarsInferer
();
if
(
inferer
)
{
no_need_buffer_ins_
=
inferer
(
op
->
Inputs
(),
op
->
Outputs
(),
op
->
Attrs
());
if
(
no_need_buffer_ins_
.
empty
())
return
;
for
(
auto
&
in_name_pair
:
op
->
Inputs
())
{
if
(
no_need_buffer_ins_
.
count
(
in_name_pair
.
first
)
!=
0
)
{
continue
;
}
for
(
auto
&
in_arg_name
:
in_name_pair
.
second
)
{
other_args_set_
.
insert
(
in_arg_name
);
}
}
for
(
auto
&
out_name_pair
:
op
->
Outputs
())
{
for
(
auto
&
out_arg_name
:
out_name_pair
.
second
)
{
other_args_set_
.
insert
(
out_arg_name
);
}
}
}
}
bool
IsBuilt
()
const
{
return
is_built_
;
}
bool
IsInArgBufferNeeded
(
const
std
::
string
&
in_arg_name
)
const
{
return
no_need_buffer_ins_
.
empty
()
||
other_args_set_
.
count
(
in_arg_name
)
!=
0
;
}
private:
// A set to record unused buffer input vars of op
std
::
unordered_set
<
std
::
string
>
no_need_buffer_ins_
;
// A set to record other args of op (including in, out)
std
::
unordered_set
<
std
::
string
>
other_args_set_
;
bool
is_built_
{
false
};
};
static
bool
VarCanBeDeleted
(
const
std
::
string
&
name
,
const
BlockDesc
&
block
,
const
std
::
unordered_set
<
std
::
string
>
&
skip_vars
)
{
if
(
skip_vars
.
count
(
name
)
!=
0
)
{
return
false
;
}
auto
*
var_desc
=
block
.
FindVar
(
name
);
if
(
var_desc
==
nullptr
||
var_desc
->
Persistable
())
{
return
false
;
}
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
return
type
==
proto
::
VarType
::
LOD_TENSOR
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
;
}
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
GetUnusedVars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
&
ops
,
const
std
::
vector
<
std
::
string
>
&
skip_var_list
)
{
std
::
unordered_set
<
std
::
string
>
skip_vars
(
skip_var_list
.
begin
(),
skip_var_list
.
end
());
std
::
unordered_map
<
std
::
string
,
size_t
>
var_op_idx_map
;
for
(
size_t
i
=
0
;
i
<
ops
.
size
();
++
i
)
{
auto
*
op
=
ops
[
i
].
get
();
OpInOutInfo
info
;
for
(
auto
&
name_pair
:
op
->
Inputs
())
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
!
VarCanBeDeleted
(
name
,
block
,
skip_vars
))
{
continue
;
}
// var can be gc-ed
if
(
!
info
.
IsBuilt
())
{
info
.
Build
(
op
);
}
if
(
info
.
IsInArgBufferNeeded
(
name
))
{
// Update the last living op of variable to current op
var_op_idx_map
[
name
]
=
i
;
}
else
{
VLOG
(
10
)
<<
"Skip reference count computing of variable "
<<
name_pair
.
first
<<
"("
<<
name
<<
") in Operator "
<<
op
->
Type
();
}
}
}
for
(
auto
&
name_pair
:
op
->
Outputs
())
{
for
(
auto
&
name
:
name_pair
.
second
)
{
if
(
VarCanBeDeleted
(
name
,
block
,
skip_vars
))
{
// Update the last living op of variable to current op
var_op_idx_map
[
name
]
=
i
;
}
}
}
}
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
result
;
for
(
auto
&
name_op_idx_pair
:
var_op_idx_map
)
{
auto
&
name
=
name_op_idx_pair
.
first
;
size_t
op_idx
=
name_op_idx_pair
.
second
;
result
[
ops
[
op_idx
].
get
()].
emplace_back
(
name
);
}
return
result
;
}
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
OperatorBase
*
op
,
const
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
&
delete_vars_map
,
GarbageCollector
*
gc
)
{
auto
iter
=
delete_vars_map
.
find
(
op
);
if
(
iter
==
delete_vars_map
.
end
())
{
return
;
}
auto
&
delete_vars
=
iter
->
second
;
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
for
(
auto
&
var_name
:
delete_vars
)
{
auto
*
var
=
scope
.
FindVar
(
var_name
);
if
(
var
==
nullptr
)
{
continue
;
}
VLOG
(
2
)
<<
"Erase variable "
<<
var_name
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
LoDTensor
>
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
garbages
.
emplace_back
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
()
->
MoveMemoryHolder
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
lod_tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
lod_tensor_arr
)
{
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
framework
::
ToTypeName
(
var
->
Type
()),
var_name
);
}
}
if
(
!
garbages
.
empty
())
{
gc
->
Add
(
std
::
move
(
garbages
));
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/executor_gc_helper.h
0 → 100644
浏览文件 @
c7c6eeb4
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
// Result map: op -> variable names that can be deleted after op runs
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
GetUnusedVars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
&
ops
,
const
std
::
vector
<
std
::
string
>
&
skip_vars
);
// Collect unused tensors after op runs
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
OperatorBase
*
op
,
const
std
::
unordered_map
<
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
&
delete_vars_map
,
GarbageCollector
*
gc
);
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/garbage_collector.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,14 +13,36 @@
// limitations under the License.
#include <algorithm>
#include <deque>
#include <functional>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/garbage_collector.h"
namespace
paddle
{
namespace
framework
{
DEFINE_double
(
eager_delete_tensor_gb
,
-
1.0
,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
DEFINE_bool
(
fast_eager_deletion_mode
,
true
,
"Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."
);
DEFINE_double
(
memory_fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion. If less than 1.0, all variables in "
"the program would be sorted according to its memory size, and "
"only the FLAGS_memory_fraction_of_eager_deletion of the largest "
"variables would be deleted."
);
GarbageCollector
::
GarbageCollector
(
const
platform
::
Place
&
place
,
size_t
max_memory_size
)
:
max_memory_size_
((
std
::
max
)(
max_memory_size
,
static_cast
<
size_t
>
(
1
)))
{
...
...
@@ -85,5 +107,25 @@ void StreamGarbageCollector::ClearCallback(
callback_manager_
->
AddCallback
(
callback
);
}
#endif
int64_t
GetEagerDeletionThreshold
()
{
return
FLAGS_eager_delete_tensor_gb
<
0
?
-
1
:
static_cast
<
int64_t
>
(
FLAGS_eager_delete_tensor_gb
*
(
static_cast
<
int64_t
>
(
1
)
<<
30
));
}
bool
IsFastEagerDeletionModeEnabled
()
{
return
FLAGS_fast_eager_deletion_mode
;
}
void
SetEagerDeletionMode
(
double
threshold
,
double
fraction
,
bool
fast_mode
)
{
FLAGS_eager_delete_tensor_gb
=
threshold
;
FLAGS_memory_fraction_of_eager_deletion
=
fraction
;
FLAGS_fast_eager_deletion_mode
=
fast_mode
;
}
double
GetEagerDeletionMemoryFraction
()
{
return
FLAGS_memory_fraction_of_eager_deletion
;
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/garbage_collector.h
浏览文件 @
c7c6eeb4
...
...
@@ -18,6 +18,8 @@
#include <functional>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "gflags/gflags.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
...
...
@@ -126,5 +128,12 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) {
}
}
int64_t
GetEagerDeletionThreshold
();
bool
IsFastEagerDeletionModeEnabled
();
void
SetEagerDeletionMode
(
double
threshold
,
double
fraction
,
bool
fast_mode
);
double
GetEagerDeletionMemoryFraction
();
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/no_need_buffer_vars_inference.h
0 → 100644
浏览文件 @
c7c6eeb4
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
namespace
paddle
{
namespace
framework
{
class
NoNeedBufferVarsInference
{
public:
NoNeedBufferVarsInference
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
)
{}
virtual
~
NoNeedBufferVarsInference
()
=
default
;
const
VariableNameMap
&
Inputs
()
const
{
return
inputs_
;
}
const
VariableNameMap
&
Outputs
()
const
{
return
outputs_
;
}
const
AttributeMap
&
Attrs
()
const
{
return
attrs_
;
}
virtual
std
::
unordered_set
<
std
::
string
>
operator
()()
const
=
0
;
private:
const
VariableNameMap
&
inputs_
;
const
VariableNameMap
&
outputs_
;
const
AttributeMap
&
attrs_
;
};
#define DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(class_type, ...) \
class class_type : public ::paddle::framework::NoNeedBufferVarsInference { \
public: \
using ::paddle::framework::NoNeedBufferVarsInference:: \
NoNeedBufferVarsInference; \
\
std::unordered_set<std::string> operator()() const override { \
return {__VA_ARGS__}; \
} \
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/op_info.h
浏览文件 @
c7c6eeb4
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include <unordered_map>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -39,6 +40,7 @@ struct OpInfo {
InferVarTypeFN
infer_var_type_
;
InferShapeFN
infer_shape_
;
InferInplaceOpFN
infer_inplace_
;
InferNoNeedBufferVarsFN
infer_no_need_buffer_vars_
;
bool
HasOpProtoAndChecker
()
const
{
return
proto_
!=
nullptr
&&
checker_
!=
nullptr
;
...
...
@@ -64,6 +66,10 @@ struct OpInfo {
}
const
OpAttrChecker
*
Checker
()
const
{
return
checker_
;
}
const
InferNoNeedBufferVarsFN
&
NoNeedBufferVarsInferer
()
const
{
return
infer_no_need_buffer_vars_
;
}
};
class
OpInfoMap
{
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
c7c6eeb4
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <algorithm>
#include <sstream>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
...
...
@@ -326,7 +327,12 @@ OperatorBase::OperatorBase(const std::string& type,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
type_
(
type
),
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
)
{
:
type_
(
type
),
inputs_
(
inputs
),
outputs_
(
outputs
),
attrs_
(
attrs
),
// NOTE(zjl): why op_info may be nullptr?
info_
(
OpInfoMap
::
Instance
().
GetNullable
(
type
))
{
GenerateTemporaryNames
();
CheckAllInputOutputSet
();
}
...
...
@@ -350,7 +356,7 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
}
return
ret_val
;
}
auto
&
info
=
OpInfoMap
::
Instance
().
Get
(
Type
()
);
auto
&
info
=
Info
(
);
// get all OpProto::Var for outputs
for
(
auto
&
o
:
info
.
Proto
().
outputs
())
{
...
...
@@ -366,18 +372,16 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
}
void
OperatorBase
::
CheckAllInputOutputSet
()
const
{
auto
&
info_map
=
OpInfoMap
::
Instance
();
auto
*
op_info
=
info_map
.
GetNullable
(
Type
());
if
(
op_info
==
nullptr
||
op_info
->
proto_
==
nullptr
)
return
;
if
(
info_
==
nullptr
||
info_
->
proto_
==
nullptr
)
return
;
for
(
auto
&
in
:
op_info
->
Proto
().
inputs
())
{
for
(
auto
&
in
:
info_
->
Proto
().
inputs
())
{
if
(
!
in
.
dispensable
())
{
PADDLE_ENFORCE
(
inputs_
.
find
(
in
.
name
())
!=
inputs_
.
end
(),
"Operator %s's input, %s, is not set"
,
Type
(),
in
.
name
());
}
}
for
(
auto
&
out
:
op_info
->
Proto
().
outputs
())
{
for
(
auto
&
out
:
info_
->
Proto
().
outputs
())
{
if
(
!
out
.
dispensable
())
{
PADDLE_ENFORCE
(
outputs_
.
find
(
out
.
name
())
!=
outputs_
.
end
(),
"Operator %s's output, %s, is not set"
,
Type
(),
...
...
@@ -997,7 +1001,27 @@ Scope* OperatorWithKernel::PrepareData(
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
,
RuntimeContext
*
ctx
)
const
{
Scope
*
new_scope
=
nullptr
;
std
::
unordered_set
<
std
::
string
>
no_buffer_ins
;
if
(
info_
)
{
auto
&
no_buffer_inferer
=
info_
->
NoNeedBufferVarsInferer
();
// Some op may not register NoNeedBufferVarsInferer
if
(
no_buffer_inferer
)
{
no_buffer_ins
=
no_buffer_inferer
(
Inputs
(),
Outputs
(),
Attrs
());
}
}
for
(
auto
&
var_name_item
:
Inputs
())
{
// NOTE(zjl): STL does not guarantee fast std::unordered_set::count when set
// is empty. At least STL implemented on my mac does calculate hash code
// of search key even though the set is empty.
if
(
!
no_buffer_ins
.
empty
()
&&
no_buffer_ins
.
count
(
var_name_item
.
first
)
>
0
)
{
VLOG
(
1
)
<<
"Skip scanning input "
<<
var_name_item
.
first
<<
" in Operator "
<<
type_
;
continue
;
}
std
::
vector
<
Variable
*>&
input_vars
=
ctx
->
inputs
[
var_name_item
.
first
];
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
...
...
paddle/fluid/framework/operator.h
浏览文件 @
c7c6eeb4
...
...
@@ -160,6 +160,11 @@ class OperatorBase {
const
VariableNameMap
&
Inputs
()
const
{
return
inputs_
;
}
const
VariableNameMap
&
Outputs
()
const
{
return
outputs_
;
}
const
OpInfo
&
Info
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
info_
,
"OpInfo of %s is not found"
,
type_
);
return
*
info_
;
}
bool
HasInputs
(
const
std
::
string
&
name
)
const
;
//! Get a input with argument's name described in `op_proto`
std
::
string
Input
(
const
std
::
string
&
name
)
const
;
...
...
@@ -194,6 +199,10 @@ class OperatorBase {
// IG (Inputs Gradients)
VariableNameMap
outputs_
;
AttributeMap
attrs_
;
// OpInfo
const
OpInfo
*
info_
;
// Whether this operator executes in an Executor.
bool
run_by_executor_
{
true
};
...
...
@@ -444,7 +453,7 @@ class OperatorWithKernel : public OperatorBase {
}
virtual
void
InferShape
(
InferShapeContext
*
ctx
)
const
{
OpInfoMap
::
Instance
().
Get
(
Type
()
).
infer_shape_
(
ctx
);
Info
(
).
infer_shape_
(
ctx
);
}
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
c7c6eeb4
...
...
@@ -29,15 +29,6 @@ DEFINE_bool(
"Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)"
);
DEFINE_double
(
eager_delete_tensor_gb
,
-
1.0
,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
DEFINE_bool
(
fast_eager_deletion_mode
,
true
,
"Fast eager deletion mode. If enabled, memory would release "
"immediately without waiting GPU kernel ends."
);
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
...
...
@@ -57,15 +48,6 @@ DEFINE_bool(fast_eager_deletion_mode, true,
namespace
paddle
{
namespace
framework
{
int64_t
GetEagerDeletionThreshold
()
{
return
FLAGS_eager_delete_tensor_gb
<
0
?
-
1
:
static_cast
<
int64_t
>
(
FLAGS_eager_delete_tensor_gb
*
(
static_cast
<
int64_t
>
(
1
)
<<
30
));
}
bool
IsFastEagerDeletionModeEnabled
()
{
return
FLAGS_fast_eager_deletion_mode
;
}
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
...
...
paddle/fluid/framework/scope.h
浏览文件 @
c7c6eeb4
...
...
@@ -32,9 +32,6 @@ extern "C" {
namespace
paddle
{
namespace
framework
{
int64_t
GetEagerDeletionThreshold
();
bool
IsFastEagerDeletionModeEnabled
();
class
Scope
;
/**
...
...
paddle/fluid/framework/type_defs.h
浏览文件 @
c7c6eeb4
...
...
@@ -30,6 +30,7 @@ class InferShapeContext;
class
InferVarTypeContext
;
class
BlockDesc
;
class
Variable
;
class
NoNeedBufferVarsInference
;
using
VariableNameMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
;
// TODO(panyx0718): Replace vector with something like gtl::Vector.
...
...
@@ -61,5 +62,9 @@ using InferShapeFN = std::function<void(InferShapeContext*)>;
using
InplacePair
=
std
::
unordered_map
<
std
::
string
,
std
::
string
>
;
using
InferInplaceOpFN
=
std
::
function
<
InplacePair
(
const
OpDesc
&
)
>
;
using
InferNoNeedBufferVarsFN
=
std
::
function
<
std
::
unordered_set
<
std
::
string
>
(
const
VariableNameMap
&
/*inputs*/
,
const
VariableNameMap
&
/*outputs*/
,
const
AttributeMap
&
/*attrs*/
)
>
;
}
// namespace framework
}
// namespace paddle
paddle/fluid/operators/add_position_encoding_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/add_position_encoding_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -39,13 +40,8 @@ class AddPositionEncodingOpGrad : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"X(Input) must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Out"
),
"Out must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Out@GRAD must not be null."
);
auto
out_dims
=
ctx
->
GetInputDim
(
"Out"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
auto
out_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
out_dims
);
}
}
...
...
@@ -75,6 +71,22 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker {
}
};
class
AddPositionEncodingGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"add_position_encoding_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -83,7 +95,7 @@ namespace plt = paddle::platform;
REGISTER_OPERATOR
(
add_position_encoding
,
ops
::
AddPositionEncodingOp
,
ops
::
AddPositionEncodingOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
AddPositionEncodingGradOpDescMaker
);
REGISTER_OPERATOR
(
add_position_encoding_grad
,
ops
::
AddPositionEncodingOpGrad
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/clip_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/clip_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -76,12 +77,28 @@ class ClipOpGrad : public framework::OperatorWithKernel {
}
};
class
ClipGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"clip_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
clip
,
ops
::
ClipOp
,
ops
::
ClipOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ClipGradOpDescMaker
);
REGISTER_OPERATOR
(
clip_grad
,
ops
::
ClipOpGrad
);
REGISTER_OP_CPU_KERNEL
(
clip
,
ops
::
ClipKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/concat_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
#include <memory>
#include <string>
#include <vector>
...
...
@@ -120,11 +121,7 @@ Examples:
class
ConcatOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
ConcatOpGrad
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
auto
in_x
=
"X"
;
...
...
@@ -142,6 +139,33 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
}
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
GetPlace
());
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
ConcatOpGradNoNeedBufferVarInference
,
"X"
);
class
ConcatGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"concat_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
,
false
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
...
...
@@ -149,9 +173,9 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
concat
,
ops
::
ConcatOp
,
ops
::
ConcatOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
false
>
/* set false to disable empty grad */
);
REGISTER_OPERATOR
(
concat_grad
,
ops
::
ConcatOpGrad
);
ops
::
ConcatGradOpDescMaker
);
REGISTER_OPERATOR
(
concat_grad
,
ops
::
ConcatOpGrad
,
ops
::
ConcatOpGradNoNeedBufferVarInference
);
REGISTER_OP_CPU_KERNEL
(
concat
,
ops
::
ConcatKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
ConcatKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -455,13 +455,13 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
return
type
;
}
class
Conv2
d
GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
class
Conv2
D
GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op
=
new
framework
::
OpDesc
();
op
->
SetType
(
GradOpType
()
);
op
->
SetType
(
this
->
ForwardOpType
()
+
"_grad"
);
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetInput
(
"Filter"
,
Input
(
"Filter"
));
op
->
SetInput
(
"Bias"
,
Input
(
"Bias"
));
...
...
@@ -470,14 +470,33 @@ class Conv2dGradMaker : public framework::SingleGradOpDescMaker {
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Filter"
),
InputGrad
(
"Filter"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Bias"
),
InputGrad
(
"Bias"
));
op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op
);
}
};
class
Conv3DGradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
virtual
std
::
string
GradOpType
()
const
{
return
this
->
ForwardOpType
()
+
"_grad"
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op
=
new
framework
::
OpDesc
();
op
->
SetType
(
this
->
ForwardOpType
()
+
"_grad"
);
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetInput
(
"Filter"
,
Input
(
"Filter"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Output"
),
OutputGrad
(
"Output"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Filter"
),
InputGrad
(
"Filter"
));
if
(
ForwardOp
().
Inputs
().
count
(
"ResidualData"
)
!=
0
)
{
op
->
SetInput
(
"ResidualData"
,
Input
(
"ResidualData"
));
}
op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op
);
}
};
...
...
@@ -486,17 +505,16 @@ class Conv2dGradMaker : public framework::SingleGradOpDescMaker {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
ops
::
ConvOpInferVarType
,
ops
::
Conv2
d
GradMaker
);
ops
::
ConvOpInferVarType
,
ops
::
Conv2
D
GradMaker
);
REGISTER_OPERATOR
(
conv2d_grad
,
ops
::
ConvOpGrad
);
// depthwise convolution op
REGISTER_OPERATOR
(
depthwise_conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
ops
::
ConvOpInferVarType
,
ops
::
Conv2
d
GradMaker
);
ops
::
ConvOpInferVarType
,
ops
::
Conv2
D
GradMaker
);
REGISTER_OPERATOR
(
depthwise_conv2d_grad
,
ops
::
ConvOpGrad
);
REGISTER_OPERATOR
(
conv3d
,
ops
::
ConvOp
,
ops
::
Conv3DOpMaker
,
ops
::
ConvOpInferVarType
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ConvOpInferVarType
,
ops
::
Conv3DGradMaker
);
REGISTER_OPERATOR
(
conv3d_grad
,
ops
::
ConvOpGrad
);
// depthwise conv kernel
...
...
paddle/fluid/operators/crop_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/crop_op.h"
#include <boost/lexical_cast.hpp>
#include <memory>
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -178,12 +180,31 @@ class CropOpGrad : public framework::OperatorWithKernel {
}
};
class
CropGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"crop_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetInput
(
"X"
,
Input
(
"X"
));
if
(
ForwardOp
().
Inputs
().
count
(
"Offsets"
)
>
0
)
{
op
->
SetInput
(
"Offsets"
,
Input
(
"Offsets"
));
}
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
crop
,
ops
::
CropOp
,
ops
::
CropOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
CropGradOpDescMaker
);
REGISTER_OPERATOR
(
crop_grad
,
ops
::
CropOpGrad
);
REGISTER_OP_CPU_KERNEL
(
crop
,
ops
::
CropKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/cross_entropy_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -238,6 +238,23 @@ class CrossEntropyGradientOp : public CrossEntropyGradientOpBase {
}
};
class
CrossEntropyGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"cross_entropy_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Label"
,
Input
(
"Label"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Y"
),
OutputGrad
(
"Y"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
class
CrossEntropyOp2
:
public
CrossEntropyOpBase
{
public:
using
CrossEntropyOpBase
::
CrossEntropyOpBase
;
...
...
@@ -354,7 +371,7 @@ using CPUCtx = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR
(
cross_entropy
,
ops
::
CrossEntropyOpBase
,
ops
::
CrossEntropyOpMaker
,
ops
::
CrossEntropyOpInferVarType
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
CrossEntropyGradOpDescMaker
);
REGISTER_OPERATOR
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
float
>
,
ops
::
CrossEntropyOpKernel
<
CPUCtx
,
double
>
);
...
...
paddle/fluid/operators/cudnn_lstm_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -170,11 +171,6 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"Input(W) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"last_h"
),
"Input(last_h) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"last_c"
),
"Input(last_c) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cache"
),
"Input(last_c) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"InitH"
),
...
...
@@ -197,6 +193,35 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel {
}
};
class
CudnnLSTMGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"cudnn_lstm_grad"
);
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetInput
(
"InitH"
,
Input
(
"InitH"
));
op
->
SetInput
(
"InitC"
,
Input
(
"InitC"
));
op
->
SetInput
(
"W"
,
Input
(
"W"
));
if
(
ForwardOp
().
Inputs
().
count
(
"Cache"
)
>
0
)
{
op
->
SetInput
(
"Cache"
,
Input
(
"Cache"
));
}
op
->
SetInput
(
"Out"
,
Output
(
"Out"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetInput
(
framework
::
GradVarName
(
"last_c"
),
OutputGrad
(
"last_c"
));
op
->
SetInput
(
framework
::
GradVarName
(
"last_h"
),
OutputGrad
(
"last_h"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"W"
),
InputGrad
(
"W"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"InitH"
),
InputGrad
(
"InitH"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"InitC"
),
InputGrad
(
"InitC"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
template
<
typename
T
>
class
NotImpleKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -211,7 +236,7 @@ class NotImpleKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
cudnn_lstm
,
ops
::
CudnnLSTMOp
,
ops
::
CudnnLSTMOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
CudnnLSTMGradOpDescMaker
);
REGISTER_OPERATOR
(
cudnn_lstm_grad
,
ops
::
CudnnLSTMGradOp
);
REGISTER_OP_CPU_KERNEL
(
cudnn_lstm
,
ops
::
NotImpleKernel
<
float
>
);
...
...
paddle/fluid/operators/distributed/parameter_prefetch.cc
浏览文件 @
c7c6eeb4
...
...
@@ -14,6 +14,7 @@
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/operators/distributed/parameter_prefetch.h"
...
...
@@ -218,7 +219,7 @@ void prefetch(const std::string& id_name, const std::string& out_name,
boost
::
get
<
platform
::
CUDAPlace
>
(
id_tensor
.
place
()),
id_tensor
.
data
<
int64_t
>
(),
sizeof
(
int64_t
)
*
id_tensor
.
numel
(),
stream
);
for
(
size
_t
i
=
0
;
i
<
cpu_tensor
.
numel
();
++
i
)
{
for
(
int64
_t
i
=
0
;
i
<
cpu_tensor
.
numel
();
++
i
)
{
ids_vector
.
push_back
(
cpu_tensor_data
[
i
]);
}
#endif
...
...
paddle/fluid/operators/elementwise/elementwise_add_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -16,8 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_GRAD_MAKER
(
elementwise_add
,
Add
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
,
"Out"
,
"X"
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_add
,
"Add"
,
"Out = X + Y"
);
REGISTER_OP_CPU_KERNEL
(
elementwise_add
,
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
c7c6eeb4
...
...
@@ -272,12 +272,11 @@ class ElementwiseGradOpInplace : public framework::InplaceOpInference {
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
ElementwiseGradNoBufVarsInference
,
"Y"
);
}
// namespace operators
}
// namespace paddle
/*
*/
#define REGISTER_ELEMWISE_GRAD_MAKER(kernel_type, op_name) \
class kernel_type##GradMaker \
: public paddle::framework::SingleGradOpDescMaker { \
...
...
@@ -311,18 +310,19 @@ class ElementwiseGradOpInplace : public framework::InplaceOpInference {
::paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(op_type##_grad, ::paddle::operators::ElementwiseOpGrad)
#define REGISTER_ELEMWISE_EXPLICIT_OP(op_type, op_name, equation, ...) \
class __ElemwiseOp##op_type##Maker__ \
: public ::paddle::operators::ElementwiseOpMaker { \
protected: \
virtual std::string GetName() const { return op_name; } \
virtual std::string GetEquation() const { return equation; } \
}; \
REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \
__ElemwiseOp##op_type##Maker__, \
::paddle::operators::ElementwiseOpInferVarType, \
op_type##GradMaker, \
::paddle::operators::ElementwiseOpInplace); \
REGISTER_OPERATOR(op_type##_grad, \
::paddle::operators::ElementwiseOpExplicitGrad, \
::paddle::operators::ElementwiseGradOpInplace)
#define REGISTER_ELEMWISE_EXPLICIT_OP(op_type, op_name, equation) \
class __ElemwiseOp##op_type##Maker__ \
: public ::paddle::operators::ElementwiseOpMaker { \
protected: \
virtual std::string GetName() const { return op_name; } \
virtual std::string GetEquation() const { return equation; } \
}; \
REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \
__ElemwiseOp##op_type##Maker__, \
::paddle::operators::ElementwiseOpInferVarType, \
op_type##GradMaker, \
::paddle::operators::ElementwiseOpInplace); \
REGISTER_OPERATOR(op_type##_grad, \
::paddle::operators::ElementwiseOpExplicitGrad, \
::paddle::operators::ElementwiseGradOpInplace, \
::paddle::operators::ElementwiseGradNoBufVarsInference)
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -16,8 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_GRAD_MAKER
(
elementwise_sub
,
Sub
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_sub
,
"Sub"
,
"Out = X - Y"
,
"Out"
,
"X"
);
REGISTER_ELEMWISE_EXPLICIT_OP
(
elementwise_sub
,
"Sub"
,
"Out = X - Y"
);
REGISTER_OP_CPU_KERNEL
(
elementwise_sub
,
...
...
paddle/fluid/operators/gather_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/gather_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ddim.h"
namespace
paddle
{
...
...
@@ -59,8 +62,9 @@ class GatherGradOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
device_context
());
}
};
...
...
@@ -94,13 +98,34 @@ Out = [[3, 4],
)DOC"
);
}
};
class
GatherGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"gather_grad"
);
op
->
SetInput
(
"Index"
,
Input
(
"Index"
));
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
GatherGradNoNeedBufferVarInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
gather
,
ops
::
GatherOp
,
ops
::
GatherOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
gather_grad
,
ops
::
GatherGradOp
);
ops
::
GatherGradOpDescMaker
);
REGISTER_OPERATOR
(
gather_grad
,
ops
::
GatherGradOp
,
ops
::
GatherGradNoNeedBufferVarInference
);
REGISTER_OP_CPU_KERNEL
(
gather
,
ops
::
GatherOpKernel
<
float
>
,
ops
::
GatherOpKernel
<
double
>
,
ops
::
GatherOpKernel
<
int
>
,
ops
::
GatherOpKernel
<
uint8_t
>
,
...
...
paddle/fluid/operators/lod_reset_op.cc
浏览文件 @
c7c6eeb4
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lod_reset_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -146,18 +147,39 @@ class LoDResetGradOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
device_context
());
}
};
class
LoDResetGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"lod_reset_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
LoDResetGradNoNeedBufferVarInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
lod_reset
,
ops
::
LoDResetOp
,
ops
::
LoDResetOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
lod_reset_grad
,
ops
::
LoDResetGradOp
);
ops
::
LoDResetGradDescMaker
);
REGISTER_OPERATOR
(
lod_reset_grad
,
ops
::
LoDResetGradOp
,
ops
::
LoDResetGradNoNeedBufferVarInference
);
REGISTER_OP_CPU_KERNEL
(
lod_reset
,
ops
::
LoDResetKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
LoDResetKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
...
...
paddle/fluid/operators/reader/ctr_reader.h
浏览文件 @
c7c6eeb4
...
...
@@ -21,6 +21,7 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
...
...
@@ -152,7 +153,7 @@ class CTRReader : public framework::FileReader {
queue_
->
ReOpen
();
VLOG
(
3
)
<<
"reopen success"
;
VLOG
(
3
)
<<
"thread_num "
<<
thread_num_
;
for
(
in
t
thread_id
=
0
;
thread_id
<
thread_num_
;
thread_id
++
)
{
for
(
size_
t
thread_id
=
0
;
thread_id
<
thread_num_
;
thread_id
++
)
{
read_threads_
.
emplace_back
(
new
std
::
thread
(
std
::
bind
(
&
ReadThread
,
file_groups_
[
thread_id
],
data_desc_
,
static_cast
<
int
>
(
thread_id
),
&
read_thread_status_
,
queue_
)));
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
c7c6eeb4
...
...
@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -139,6 +140,7 @@ PYBIND11_MODULE(core, m) {
paddle
::
platform
::
CpuTotalPhysicalMemory
();
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
m
.
doc
()
=
"C++ core of PaddlePaddle"
;
// using framework in this function. Since it is inside a function, it will
...
...
@@ -153,6 +155,11 @@ PYBIND11_MODULE(core, m) {
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
});
// NOTE(zjl): ctest would load environment variables at the beginning even
// though we have not `import paddle.fluid as fluid`. So we add this API
// to enable eager deletion mode in unittest.
m
.
def
(
"_set_eager_deletion_mode"
,
&
paddle
::
framework
::
SetEagerDeletionMode
);
m
.
add_object
(
"_cleanup"
,
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
...
...
@@ -281,6 +288,8 @@ PYBIND11_MODULE(core, m) {
py
::
class_
<
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
.
def_buffer
(
[](
Tensor
&
self
)
->
py
::
buffer_info
{
return
CastToPyBuffer
(
self
);
})
.
def
(
"_is_initialized"
,
[](
const
Tensor
&
self
)
{
return
self
.
IsInitialized
();
})
.
def
(
"_get_dims"
,
[](
const
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"_set_dims"
,
...
...
@@ -681,7 +690,8 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
,
R"DOC(
Delete all sub-scopes of the current scope.
)DOC"
);
)DOC"
)
.
def
(
"_kids"
,
&
Scope
::
kids
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
0 → 100644
浏览文件 @
c7c6eeb4
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
os
.
environ
[
'FLAGS_use_ngraph'
]
=
'0'
os
.
environ
[
'FLAGS_use_mkldnn'
]
=
'0'
os
.
environ
[
'CPU_NUM'
]
=
'4'
import
paddle.fluid
as
fluid
import
six
import
unittest
import
multiprocessing
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
def
simple_fc_net
():
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
image
for
_
in
range
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
act
=
'tanh'
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
1e-3
)
optimizer
.
minimize
(
loss
)
return
image
,
label
,
loss
def
get_persistables_and_non_persistables
(
prog
,
fetch_list
):
num_block
=
prog
.
num_blocks
persitables
=
set
()
non_persistables
=
set
()
for
bid
in
six
.
moves
.
range
(
num_block
):
block
=
prog
.
block
(
bid
)
for
_
,
var
in
block
.
vars
.
items
():
if
var
.
persistable
or
var
.
name
in
fetch_list
:
persitables
.
add
(
var
.
name
)
else
:
non_persistables
.
add
(
var
.
name
)
return
persitables
,
non_persistables
class
TestExecutor
(
unittest
.
TestCase
):
def
test_executor_main
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
fluid
.
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
self
.
place
=
p
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
scope_guard
(
fluid
.
Scope
()):
with
fluid
.
unique_name
.
guard
():
self
.
executor_main
()
for
p
in
places
:
self
.
place
=
p
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
scope_guard
(
fluid
.
Scope
()):
with
fluid
.
unique_name
.
guard
():
self
.
pe_main
()
def
prepare_feed
(
self
,
image
,
label
,
dev_cnt
=
1
):
batch_size
=
32
*
dev_cnt
image_shape
=
(
batch_size
,
)
+
tuple
(
image
.
shape
[
1
:])
label_shape
=
(
batch_size
,
)
+
tuple
(
label
.
shape
[
1
:])
image_np
=
np
.
random
.
random
(
size
=
image_shape
).
astype
(
'float32'
)
label_np
=
np
.
random
.
random_integers
(
low
=
0
,
high
=
9
,
size
=
label_shape
).
astype
(
'int64'
)
return
image_np
,
label_np
def
assertScopeVar
(
self
,
scope
,
persitables
,
non_persistables
):
outline_p_vars
=
[]
for
name
in
persitables
:
var
=
scope
.
find_var
(
name
)
self
.
assertTrue
(
var
is
not
None
)
t
=
var
.
get_tensor
()
if
not
t
.
_is_initialized
():
outline_p_vars
.
append
(
name
)
outline_np_vars
=
[]
for
name
in
non_persistables
:
var
=
scope
.
find_var
(
name
)
self
.
assertTrue
(
var
is
not
None
)
t
=
var
.
get_tensor
()
if
t
.
_is_initialized
():
outline_np_vars
.
append
(
name
)
print
(
'Non-alive persistable vars {} in {}'
.
format
(
outline_p_vars
,
persitables
))
print
(
'Alive non-persistable vars {} in {}'
.
format
(
outline_np_vars
,
non_persistables
))
self
.
assertEqual
(
len
(
outline_p_vars
),
0
)
self
.
assertEqual
(
len
(
outline_np_vars
),
0
)
def
executor_main
(
self
):
image
,
label
,
loss
=
simple_fc_net
()
loss
.
persistable
=
False
persistables
,
non_persistables
=
get_persistables_and_non_persistables
(
fluid
.
default_main_program
(),
[
loss
.
name
])
print
(
'Non-persistable var number {}'
.
format
(
len
(
non_persistables
)))
print
(
non_persistables
)
exe
=
fluid
.
Executor
(
self
.
place
)
exe
.
run
(
fluid
.
default_startup_program
())
p
=
fluid
.
core
.
Place
()
p
.
set_place
(
self
.
place
)
exe
=
fluid
.
core
.
Executor
(
p
)
for
_
in
six
.
moves
.
range
(
10
):
image_np
,
label_np
=
self
.
prepare_feed
(
image
,
label
)
fluid
.
global_scope
().
var
(
image
.
name
).
get_tensor
().
set
(
image_np
,
self
.
place
)
fluid
.
global_scope
().
var
(
label
.
name
).
get_tensor
().
set
(
label_np
,
self
.
place
)
# exe.run would not create local scope
# so that we can detect whether gc clears temporary variables
exe
.
run
(
fluid
.
default_main_program
().
desc
,
fluid
.
global_scope
(),
0
,
False
,
True
,
[
loss
.
name
])
self
.
assertScopeVar
(
fluid
.
global_scope
(),
persistables
,
non_persistables
)
def
pe_main
(
self
):
image
,
label
,
loss
=
simple_fc_net
()
loss
.
persistable
=
False
persitables
,
non_persistables
=
get_persistables_and_non_persistables
(
fluid
.
default_main_program
(),
[
loss
.
name
])
exe
=
fluid
.
Executor
(
self
.
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
num_iteration_per_drop_scope
=
100
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
memory_optimize
=
False
build_strategy
.
enable_inplace
=
False
prog
=
fluid
.
CompiledProgram
(
fluid
.
default_main_program
(
)).
with_data_parallel
(
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
)
dev_cnt
=
fluid
.
core
.
get_cuda_device_count
()
if
isinstance
(
self
.
place
,
fluid
.
CUDAPlace
)
\
else
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
for
idx
in
six
.
moves
.
range
(
10
):
image_np
,
label_np
=
self
.
prepare_feed
(
image
,
label
,
dev_cnt
)
feed
=
{
image
.
name
:
image_np
,
label
.
name
:
label_np
}
exe
.
run
(
program
=
prog
,
feed
=
feed
,
fetch_list
=
[
loss
])
local_scopes
=
prog
.
_local_scopes
for
scope
in
local_scopes
:
kids
=
scope
.
_kids
()
self
.
assertTrue
(
len
(
kids
)
==
1
)
self
.
assertScopeVar
(
kids
[
0
],
persistables
,
non_persistables
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
浏览文件 @
c7c6eeb4
...
...
@@ -13,7 +13,6 @@
# limitations under the License.
import
os
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
'0.0'
os
.
environ
[
'CPU_NUM'
]
=
'2'
import
six
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
浏览文件 @
c7c6eeb4
...
...
@@ -16,6 +16,8 @@ import unittest
from
test_eager_deletion_dynamic_rnn_base
import
TestBase
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
def
gru_net
(
data
,
label
,
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
浏览文件 @
c7c6eeb4
...
...
@@ -16,6 +16,8 @@ from test_eager_deletion_dynamic_rnn_base import TestBase
import
paddle.fluid
as
fluid
import
unittest
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
def
lstm_net
(
data
,
label
,
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
浏览文件 @
c7c6eeb4
...
...
@@ -14,7 +14,9 @@
import
os
import
unittest
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
# FIXME(zjl): It seems that this unittest fails randomly
# when comparing all reduce last loss and reduce last loss
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
浏览文件 @
c7c6eeb4
...
...
@@ -14,7 +14,9 @@
import
os
import
unittest
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'./eager_deletion_transformer.wmt16.recordio'
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
浏览文件 @
c7c6eeb4
...
...
@@ -16,8 +16,6 @@ from __future__ import print_function
import
os
os
.
environ
[
'CPU_NUM'
]
=
'2'
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
'0.0'
os
.
environ
[
'FLAGS_fast_eager_deletion_mode'
]
=
'1'
import
unittest
import
paddle.fluid
as
fluid
...
...
@@ -29,6 +27,8 @@ import paddle.fluid.compiler as compiler
import
numpy
import
multiprocessing
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
class
TestEagerDeletionWhileOpBase
(
unittest
.
TestCase
):
def
test_main
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
浏览文件 @
c7c6eeb4
...
...
@@ -14,11 +14,12 @@
import
os
import
unittest
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
os
.
environ
[
'FLAGS_memory_fraction_of_eager_deletion'
]
=
"0.55"
import
paddle.fluid
as
fluid
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'./p_gc_transformer.wmt16.recordio'
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
0.55
,
True
)
from
test_parallel_executor_transformer
import
TestTransformer
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_roi_align_op.py
浏览文件 @
c7c6eeb4
...
...
@@ -168,3 +168,7 @@ class TestROIAlignOp(OpTest):
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录