Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
4b269baa
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4b269baa
编写于
3月 17, 2022
作者:
W
Weilong Wu
提交者:
GitHub
3月 17, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "[Eager Grad] Support eager grad interface (#40170)"
This reverts commit
4db8cf24
.
上级
06fee998
变更
32
隐藏空白更改
内联
并排
Showing
32 changed file
with
163 addition
and
1217 deletion
+163
-1217
paddle/fluid/eager/accumulation/accumulation_node.cc
paddle/fluid/eager/accumulation/accumulation_node.cc
+4
-4
paddle/fluid/eager/accumulation/accumulation_node.h
paddle/fluid/eager/accumulation/accumulation_node.h
+2
-9
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc
...ger/api/generated/eager_generated/backwards/scale_node.cc
+2
-2
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h
...ager/api/generated/eager_generated/backwards/scale_node.h
+2
-9
paddle/fluid/eager/auto_code_generator/eager_generator.cc
paddle/fluid/eager/auto_code_generator/eager_generator.cc
+5
-28
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
...er/auto_code_generator/final_state_generator/eager_gen.py
+6
-35
paddle/fluid/eager/backward.cc
paddle/fluid/eager/backward.cc
+18
-372
paddle/fluid/eager/backward.h
paddle/fluid/eager/backward.h
+4
-12
paddle/fluid/eager/custom_operator/custom_operator_node.cc
paddle/fluid/eager/custom_operator/custom_operator_node.cc
+2
-2
paddle/fluid/eager/custom_operator/custom_operator_node.h
paddle/fluid/eager/custom_operator/custom_operator_node.h
+2
-8
paddle/fluid/eager/grad_node_info.h
paddle/fluid/eager/grad_node_info.h
+1
-5
paddle/fluid/eager/grad_tensor_holder.cc
paddle/fluid/eager/grad_tensor_holder.cc
+0
-5
paddle/fluid/eager/grad_tensor_holder.h
paddle/fluid/eager/grad_tensor_holder.h
+0
-2
paddle/fluid/eager/tensor_wrapper.h
paddle/fluid/eager/tensor_wrapper.h
+0
-2
paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h
...e/fluid/eager/tests/data_structure_tests/grad_node_test.h
+2
-7
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
...le/fluid/eager/tests/performance_tests/benchmark_utils.cc
+4
-4
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
+0
-1
paddle/fluid/eager/tests/task_tests/backward_test.cc
paddle/fluid/eager/tests/task_tests/backward_test.cc
+4
-5
paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc
...d/eager/tests/task_tests/cross_batch_accumulation_test.cc
+2
-2
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
+8
-8
paddle/fluid/eager/tests/task_tests/generated_test.cc
paddle/fluid/eager/tests/task_tests/generated_test.cc
+3
-3
paddle/fluid/eager/tests/task_tests/grad_test.cc
paddle/fluid/eager/tests/task_tests/grad_test.cc
+0
-339
paddle/fluid/eager/tests/task_tests/hook_test.cc
paddle/fluid/eager/tests/task_tests/hook_test.cc
+2
-2
paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc
...le/fluid/eager/tests/task_tests/hook_test_intermidiate.cc
+3
-3
paddle/fluid/eager/to_static/run_program_op_node.h
paddle/fluid/eager/to_static/run_program_op_node.h
+2
-8
paddle/fluid/pybind/eager_functions.cc
paddle/fluid/pybind/eager_functions.cc
+2
-25
paddle/fluid/pybind/eager_utils.cc
paddle/fluid/pybind/eager_utils.cc
+9
-15
paddle/fluid/pybind/eager_utils.h
paddle/fluid/pybind/eager_utils.h
+1
-2
python/paddle/fluid/dygraph/base.py
python/paddle/fluid/dygraph/base.py
+15
-47
python/paddle/fluid/tests/unittests/test_egr_python_api.py
python/paddle/fluid/tests/unittests/test_egr_python_api.py
+1
-1
python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
...ddle/fluid/tests/unittests/test_imperative_double_grad.py
+31
-183
python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
...uid/tests/unittests/test_paddle_imperative_double_grad.py
+26
-67
未找到文件。
paddle/fluid/eager/accumulation/accumulation_node.cc
浏览文件 @
4b269baa
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/errors.h"
#include "glog/logging.h"
#include "glog/logging.h"
DECLARE_bool
(
retain_grad_for_all_tensor
);
namespace
egr
{
namespace
egr
{
static
void
CopyOrAddTensor
(
paddle
::
experimental
::
Tensor
*
tensor
,
static
void
CopyOrAddTensor
(
paddle
::
experimental
::
Tensor
*
tensor
,
...
@@ -39,8 +39,8 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
...
@@ -39,8 +39,8 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
}
}
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeAccumulation
::
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeAccumulation
::
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
operator
()(
bool
create_graph
)
{
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
{
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeAccumulation"
;
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeAccumulation"
;
PADDLE_ENFORCE
(
grads
.
size
()
==
1
,
PADDLE_ENFORCE
(
grads
.
size
()
==
1
,
paddle
::
platform
::
errors
::
Fatal
(
paddle
::
platform
::
errors
::
Fatal
(
...
@@ -62,7 +62,7 @@ operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
...
@@ -62,7 +62,7 @@ operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
grad_out
=
grads
[
0
][
0
];
grad_out
=
grads
[
0
][
0
];
}
}
if
(
!
weak_grad_
.
expired
()
&&
FLAGS_retain_grad_for_all_tensor
)
{
if
(
!
weak_grad_
.
expired
())
{
auto
grad
=
weak_grad_
.
lock
();
auto
grad
=
weak_grad_
.
lock
();
CopyOrAddTensor
(
grad
.
get
(),
grad_out
);
CopyOrAddTensor
(
grad
.
get
(),
grad_out
);
}
}
...
...
paddle/fluid/eager/accumulation/accumulation_node.h
浏览文件 @
4b269baa
...
@@ -35,15 +35,8 @@ class GradNodeAccumulation : public GradNodeBase {
...
@@ -35,15 +35,8 @@ class GradNodeAccumulation : public GradNodeBase {
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
bool
create_graph
=
false
)
override
;
override
;
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
std
::
string
name
()
{
return
"GradNodeAccumulation"
;
}
std
::
string
name
()
{
return
"GradNodeAccumulation"
;
}
...
...
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc
浏览文件 @
4b269baa
...
@@ -145,8 +145,8 @@ void GradNodeScale::SetTensorWrappers_X(
...
@@ -145,8 +145,8 @@ void GradNodeScale::SetTensorWrappers_X(
void
GradNodeScale
::
SetAttributes_scale
(
float
scale
)
{
scale_
=
scale
;
}
void
GradNodeScale
::
SetAttributes_scale
(
float
scale
)
{
scale_
=
scale
;
}
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeScale
::
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeScale
::
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
operator
()(
bool
create_graph
)
{
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
{
// 1. Check Output Size
// 1. Check Output Size
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
((
grads
.
size
()
==
1
)
&&
(
grads
[
0
].
size
()
==
1
)),
((
grads
.
size
()
==
1
)
&&
(
grads
[
0
].
size
()
==
1
)),
...
...
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h
浏览文件 @
4b269baa
...
@@ -39,15 +39,8 @@ class GradNodeScale : public GradNodeBase {
...
@@ -39,15 +39,8 @@ class GradNodeScale : public GradNodeBase {
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
bool
create_graph
=
false
)
override
;
override
;
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
void
SetTensorWrappers_X
(
void
SetTensorWrappers_X
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
);
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
);
...
...
paddle/fluid/eager/auto_code_generator/eager_generator.cc
浏览文件 @
4b269baa
...
@@ -2074,8 +2074,7 @@ static std::string GenerateGradNodeCCContents(
...
@@ -2074,8 +2074,7 @@ static std::string GenerateGradNodeCCContents(
const
char
*
GRAD_FUNCTION_TEMPLATE
=
const
char
*
GRAD_FUNCTION_TEMPLATE
=
"std::vector<std::vector<paddle::experimental::Tensor>> "
"std::vector<std::vector<paddle::experimental::Tensor>> "
"GradNode%s::operator()(const "
"GradNode%s::operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
\n
%s
\n
}"
;
"bool create_graph) {
\n
%s
\n
}"
;
std
::
string
grad_function_str
=
paddle
::
string
::
Sprintf
(
std
::
string
grad_function_str
=
paddle
::
string
::
Sprintf
(
GRAD_FUNCTION_TEMPLATE
,
fwd_op_type
,
generated_grad_function_body
);
GRAD_FUNCTION_TEMPLATE
,
fwd_op_type
,
generated_grad_function_body
);
...
@@ -2110,28 +2109,18 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2110,28 +2109,18 @@ static std::string GenerateGradNodeHeaderContents(
"
\n
"
"
\n
"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
"operator()(const "
"operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) "
"bool create_graph = false) "
"override;
\n
"
"override;
\n
"
"
\n
"
"
\n
"
" void ClearTensorWrappers() override {
\n
"
"%s
\n
"
" is_tensor_wrappers_cleared = true;
\n
"
" }
\n
"
" std::string name() override { return
\"
GradNode%s
\"
; }
\n
"
" std::string name() override { return
\"
GradNode%s
\"
; }
\n
"
"
\n
"
"
\n
"
" // SetX, SetY, ...
\n
"
" // SetX, SetY, ...
\n
"
"%s
\n
"
"%s
\n
"
" // SetAttrMap
\n
"
" // SetAttrMap
\n
"
"%s
\n
"
"%s
\n
"
" bool IsTensorWrappersCleared() override {
\n
"
" return is_tensor_wrappers_cleared;
\n
"
" }
\n
"
" private:
\n
"
" private:
\n
"
" // TensorWrappers
\n
"
" // TensorWrappers
\n
"
"%s
\n
"
"%s
\n
"
" bool is_tensor_wrappers_cleared = false;
\n
"
"
\n
"
" // Attribute Map
\n
"
" // Attribute Map
\n
"
"%s
\n
"
"%s
\n
"
"};"
;
"};"
;
...
@@ -2165,7 +2154,6 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2165,7 +2154,6 @@ static std::string GenerateGradNodeHeaderContents(
std
::
string
set_tensor_wrappers_str
=
""
;
std
::
string
set_tensor_wrappers_str
=
""
;
std
::
string
tensor_wrapper_members_str
=
""
;
std
::
string
tensor_wrapper_members_str
=
""
;
std
::
string
clear_tensor_wrappers_str
=
""
;
for
(
const
auto
&
iter
:
op_base_infos
)
{
for
(
const
auto
&
iter
:
op_base_infos
)
{
const
std
::
map
<
std
::
string
,
std
::
string
>&
grad_ins_fwd_slotname_map
=
const
std
::
map
<
std
::
string
,
std
::
string
>&
grad_ins_fwd_slotname_map
=
iter
.
GetGradInsFwdSlotnameMap
();
iter
.
GetGradInsFwdSlotnameMap
();
...
@@ -2197,13 +2185,6 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2197,13 +2185,6 @@ static std::string GenerateGradNodeHeaderContents(
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
tensor_wrapper_name
,
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
tensor_wrapper_name
,
struct_tensor_wrapper_name
);
struct_tensor_wrapper_name
);
const
char
*
CLEAR_TENSOR_WRAPPER_TEMPLATE
=
"for (auto tw: %s) {
\n
"
" tw.clear();
\n
"
" }
\n
"
;
clear_tensor_wrappers_str
+=
paddle
::
string
::
Sprintf
(
CLEAR_TENSOR_WRAPPER_TEMPLATE
,
struct_tensor_wrapper_name
);
}
else
{
}
else
{
const
char
*
ATTR_TENSOR_WRAPPER_ARG_TEMPLATE
=
const
char
*
ATTR_TENSOR_WRAPPER_ARG_TEMPLATE
=
"const paddle::experimental::Tensor& %s"
;
"const paddle::experimental::Tensor& %s"
;
...
@@ -2216,14 +2197,10 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2216,14 +2197,10 @@ static std::string GenerateGradNodeHeaderContents(
TENSOR_WRAPPER_MEMBER_TEMPLATE
,
struct_tensor_wrapper_name
);
TENSOR_WRAPPER_MEMBER_TEMPLATE
,
struct_tensor_wrapper_name
);
const
char
*
SET_TENSOR_WRAPPER_BODY_TEMPLATE
=
const
char
*
SET_TENSOR_WRAPPER_BODY_TEMPLATE
=
"%s = egr::TensorWrapper(%s, %s /*full_reserved*/);
\n
"
;
"%s = egr::TensorWrapper(%s, %s /*full_reserved*/);"
;
tensor_wrapper_body_str
=
paddle
::
string
::
Sprintf
(
tensor_wrapper_body_str
=
paddle
::
string
::
Sprintf
(
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
struct_tensor_wrapper_name
,
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
struct_tensor_wrapper_name
,
tensor_wrapper_name
,
full_reserved_str
);
tensor_wrapper_name
,
full_reserved_str
);
const
char
*
CLEAR_TENSOR_WRAPPER_TEMPLATE
=
" %s.clear();
\n
"
;
clear_tensor_wrappers_str
+=
paddle
::
string
::
Sprintf
(
CLEAR_TENSOR_WRAPPER_TEMPLATE
,
struct_tensor_wrapper_name
);
}
}
std
::
string
full_reserved_signature_str
=
"bool full_reserved"
;
std
::
string
full_reserved_signature_str
=
"bool full_reserved"
;
const
char
*
SET_TENSOR_WRAPPER_TEMPLATE
=
const
char
*
SET_TENSOR_WRAPPER_TEMPLATE
=
...
@@ -2238,8 +2215,8 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2238,8 +2215,8 @@ static std::string GenerateGradNodeHeaderContents(
std
::
string
grad_node_str
=
paddle
::
string
::
Sprintf
(
std
::
string
grad_node_str
=
paddle
::
string
::
Sprintf
(
GRAD_NODE_TEMPLATE
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
GRAD_NODE_TEMPLATE
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
clear_tensor_wrappers_str
,
op_type
,
set_tensor_wrappers
_str
,
op_type
,
op_type
,
set_tensor_wrappers_str
,
set_attr_map
_str
,
set_attr_map_str
,
tensor_wrapper_members_str
,
attr_members_str
);
tensor_wrapper_members_str
,
attr_members_str
);
return
grad_node_str
;
return
grad_node_str
;
}
}
...
...
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
浏览文件 @
4b269baa
...
@@ -478,7 +478,6 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -478,7 +478,6 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
# SetTensorWrapper Methods & TensorWrapper Members
# SetTensorWrapper Methods & TensorWrapper Members
set_tensor_wrapper_methods_str
=
""
set_tensor_wrapper_methods_str
=
""
tensor_wrapper_members_str
=
""
tensor_wrapper_members_str
=
""
clear_tensor_wrapper_str
=
""
for
tname
,
(
ttype
,
is_fwd_input
,
_
)
in
backward_fwd_input_map
.
items
():
for
tname
,
(
ttype
,
is_fwd_input
,
_
)
in
backward_fwd_input_map
.
items
():
if
tname
in
no_need_buffer_set
:
if
tname
in
no_need_buffer_set
:
no_need_buffer
=
"true"
no_need_buffer
=
"true"
...
@@ -500,13 +499,6 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -500,13 +499,6 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
"""
"""
tensor_wrapper_members_str
+=
PLAIN_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_members_str
+=
PLAIN_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_name
)
tensor_wrapper_name
)
CLEAR_TENSOR_WRAPPERS_TEMPLATE
=
"""
{}.clear();
"""
clear_tensor_wrapper_str
+=
CLEAR_TENSOR_WRAPPERS_TEMPLATE
.
format
(
tensor_wrapper_name
)
else
:
else
:
assert
IsVectorTensorType
(
ttype
)
assert
IsVectorTensorType
(
ttype
)
SET_VECTOR_TENSOR_WRAPPER_TEMPLATE
=
"""
SET_VECTOR_TENSOR_WRAPPER_TEMPLATE
=
"""
...
@@ -524,15 +516,6 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -524,15 +516,6 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
"""
"""
tensor_wrapper_members_str
+=
VECTOR_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_members_str
+=
VECTOR_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_name
)
tensor_wrapper_name
)
CLEAR_TENSOR_WRAPPERS_TEMPLATE
=
"""
for (auto tw: {}) {
tw.clear();
};
"""
clear_tensor_wrapper_str
+=
CLEAR_TENSOR_WRAPPERS_TEMPLATE
.
format
(
tensor_wrapper_name
)
# End: SetTensorWrapper Methods & TensorWrapper Members
# End: SetTensorWrapper Methods & TensorWrapper Members
# SetAttributes & Attribute Members
# SetAttributes & Attribute Members
...
@@ -541,7 +524,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -541,7 +524,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
for
aname
,
atype
,
default_val
,
_
in
backward_attrs_list
:
for
aname
,
atype
,
default_val
,
_
in
backward_attrs_list
:
saved_attr_name
=
GetSavedName
(
aname
)
saved_attr_name
=
GetSavedName
(
aname
)
SET_ATTR_METHOD_TEMPLATE
=
"""
SET_ATTR_METHOD_TEMPLATE
=
"""
void SetAttribute{}({} {}) {{
void SetAttribute{}({} {}) {{
{} = {};
{} = {};
}}
}}
"""
"""
...
@@ -572,37 +555,25 @@ class {} : public egr::GradNodeBase {{
...
@@ -572,37 +555,25 @@ class {} : public egr::GradNodeBase {{
~{}() override = default;
~{}() override = default;
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads
, bool create_graph = false
) override;
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) override;
std::string name() override {{ return
\"
{}
\"
; }}
std::string name() override {{ return
\"
{}
\"
; }}
void ClearTensorWrappers() override {{
{}
is_tensor_wrappers_cleared = true;
}}
// SetTensorWrapperX, SetTensorWrapperY, ...
// SetTensorWrapperX, SetTensorWrapperY, ...
{}
{}
// SetAttributes
// SetAttributes
{}
{}
bool IsTensorWrappersCleared() override {{
return is_tensor_wrappers_cleared;
}}
private:
private:
// TensorWrappers
// TensorWrappers
{}
{}
bool is_tensor_wrappers_cleared = false;
// Attributes
// Attributes
{}
{}
}};
}};
"""
"""
node_declaration_str
=
NODE_DECLARATION_TEMPLATE
.
format
(
node_declaration_str
=
NODE_DECLARATION_TEMPLATE
.
format
(
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
clear_tensor_wrapper
_str
,
grad_node_name
,
set_tensor_wrapper_methods
_str
,
set_
tensor_wrapper_methods_str
,
set_attribute_method
s_str
,
set_
attribute_methods_str
,
tensor_wrapper_member
s_str
,
tensor_wrapper_members_str
,
attribute_members_str
)
attribute_members_str
)
return
node_declaration_str
return
node_declaration_str
...
@@ -666,7 +637,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
...
@@ -666,7 +637,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
grad_api_namespace
=
f
"paddle::experimental"
grad_api_namespace
=
f
"paddle::experimental"
FUNCTION_TEMPLATE
=
"""
FUNCTION_TEMPLATE
=
"""
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads
, bool create_graph
) {{
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {{
// Call grad_api function
// Call grad_api function
auto grad_api_returns = {}::{}({});
auto grad_api_returns = {}::{}({});
{}
{}
...
...
paddle/fluid/eager/backward.cc
浏览文件 @
4b269baa
...
@@ -39,21 +39,12 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
...
@@ -39,21 +39,12 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
// Copy nodes
// Copy nodes
std
::
queue
<
GradNodeBase
*>
queue
=
init_queue
;
std
::
queue
<
GradNodeBase
*>
queue
=
init_queue
;
std
::
unordered_set
<
GradNodeBase
*>
visited
;
std
::
unordered_set
<
GradNodeBase
*>
visited
;
size_t
potential_startup_ops_cnt
=
queue
.
size
();
size_t
cnt
=
0
;
// Visit each node exactly once in any order
// Visit each node exactly once in any order
while
(
!
queue
.
empty
())
{
while
(
!
queue
.
empty
())
{
GradNodeBase
*
node
=
queue
.
front
();
GradNodeBase
*
node
=
queue
.
front
();
queue
.
pop
();
queue
.
pop
();
if
(
cnt
<
potential_startup_ops_cnt
)
{
if
(
!
node_in_degree_map
.
count
(
node
))
{
node_in_degree_map
[
node
]
=
0
;
}
cnt
+=
1
;
}
if
(
visited
.
count
(
node
))
{
if
(
visited
.
count
(
node
))
{
continue
;
continue
;
}
}
...
@@ -85,248 +76,23 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
...
@@ -85,248 +76,23 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
return
node_in_degree_map
;
return
node_in_degree_map
;
}
}
// Remove some nodes those doesn't need to be
void
RunBackward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// stored in potential_stop_nodes、potential_startup_nodes
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
void
UpdateGraphInfo
(
bool
retain_graph
)
{
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>*
paddle
::
platform
::
RecordEvent
backward_record_event
(
target_nodes_inputmeta_map
,
"backward"
,
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
std
::
unordered_map
<
GradNodeBase
*
,
std
::
unordered_set
<
GradNodeBase
*>>*
depending_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_stop_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_startup_nodes
)
{
// Updated potential_sotp_nodes by depending_nodes,
// make sure the path from root to target_node is ok
std
::
unordered_set
<
GradNodeBase
*>
_startup_ops
;
VLOG
(
6
)
<<
"Running in UpdateGraphInfo"
;
std
::
queue
<
GradNodeBase
*>
queue
;
for
(
auto
&
target_nodes_inputmeta_pair
:
*
target_nodes_inputmeta_map
)
{
queue
.
emplace
(
target_nodes_inputmeta_pair
.
first
);
}
while
(
!
queue
.
empty
())
{
auto
*
target_node
=
queue
.
front
();
queue
.
pop
();
if
(
!
(
*
depending_nodes
)[
target_node
].
empty
())
{
auto
precedding_nodes
=
(
*
depending_nodes
)[
target_node
];
for
(
auto
pre_nodes
:
precedding_nodes
)
{
queue
.
emplace
(
pre_nodes
);
if
(
potential_stop_nodes
->
find
(
pre_nodes
)
!=
potential_stop_nodes
->
end
())
{
potential_stop_nodes
->
erase
(
pre_nodes
);
}
}
}
else
{
// startup_ops have no precedding nodes
VLOG
(
6
)
<<
"Emplace _startup_ops"
;
_startup_ops
.
emplace
(
target_node
);
}
}
// Purify potential_startup_nodes again, remove some
// potential startup_nodes that unreach to input target nodes
if
(
!
_startup_ops
.
empty
())
{
std
::
unordered_set
<
GradNodeBase
*>
potential_startup_nodes_to_be_erased
;
for
(
auto
node
:
*
potential_startup_nodes
)
{
if
(
_startup_ops
.
count
(
node
)
==
0
)
{
VLOG
(
6
)
<<
"Set up potential_startup_nodes_to_be_erased"
;
potential_startup_nodes_to_be_erased
.
emplace
(
node
);
}
}
if
(
!
potential_startup_nodes_to_be_erased
.
empty
())
{
for
(
auto
node
:
potential_startup_nodes_to_be_erased
)
{
VLOG
(
6
)
<<
"Erase nodes in potential_startup_nodes_to_be_erased"
;
potential_startup_nodes
->
erase
(
node
);
}
}
}
}
// Get Graph Info Betweent input target gradnode and outputs,
// record depending_nodes、 potential_stop_nodes、potential_startup_nodes
void
GetGraphInfoBetweenTargets
(
const
std
::
queue
<
GradNodeBase
*>&
init_queue
,
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>*
input_target_nodes_inputmeta_map
,
std
::
unordered_map
<
/*child node*/
GradNodeBase
*
,
/*father nodes*/
std
::
unordered_set
<
GradNodeBase
*>>*
depending_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_stop_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_startup_nodes
)
{
if
(
input_target_nodes_inputmeta_map
->
empty
())
return
;
VLOG
(
6
)
<<
"Runing In GetGraphInfoBetweenTargets"
;
// Calculate in_degree for each node
std
::
unordered_map
<
GradNodeBase
*
,
int
>
node_in_degree_map
;
// Copy nodes
std
::
queue
<
GradNodeBase
*>
queue
=
init_queue
;
std
::
unordered_set
<
GradNodeBase
*>
visited
;
// Visit each node exactly once in any order
while
(
!
queue
.
empty
())
{
GradNodeBase
*
node
=
queue
.
front
();
queue
.
pop
();
if
(
visited
.
count
(
node
))
{
continue
;
}
visited
.
insert
(
node
);
// Check node is target_nodes or not, if node is not target_node,
// all the next_node will be marked in potential_stop_nodes
bool
is_potential_stop_nodes
=
input_target_nodes_inputmeta_map
->
count
(
node
);
// Find and append next nodes
const
std
::
vector
<
std
::
vector
<
Edge
>>&
edges
=
node
->
GetEdges
();
for
(
const
auto
&
edge_list
:
edges
)
{
for
(
const
Edge
&
edge
:
edge_list
)
{
GradNodeBase
*
next_node
=
edge
.
GetMutableGradNode
().
get
();
// Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached
// Or it could also originated from dispensable inputs
if
(
!
next_node
)
continue
;
// if node not in input_target_nodes,
// all the next_nodes of current node will be inserted to
// potential_stop_node
if
(
is_potential_stop_nodes
)
{
potential_stop_nodes
->
emplace
(
next_node
);
}
// Update in_degree
if
(
!
node_in_degree_map
.
count
(
next_node
))
node_in_degree_map
[
next_node
]
=
0
;
node_in_degree_map
[
next_node
]
++
;
// Record depending relationship
(
*
depending_nodes
)[
next_node
].
emplace
(
node
);
queue
.
push
(
next_node
);
}
}
}
// Update Graph Info, remove some stop_node in potential_stop_nodes
UpdateGraphInfo
(
input_target_nodes_inputmeta_map
,
depending_nodes
,
potential_stop_nodes
,
potential_startup_nodes
);
}
void
GetTargetNodesInfo
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>*
target_nodes_inputmeta_map
)
{
VLOG
(
6
)
<<
"Running in GetTargetNodesInfo"
;
if
(
!
inputs
.
empty
())
{
VLOG
(
6
)
<<
"Inputs are not empty"
;
size_t
num_inputs
=
inputs
.
size
();
for
(
size_t
i
=
0
;
i
<
num_inputs
;
i
++
)
{
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
unsafe_autograd_meta
(
inputs
[
i
]);
auto
target_node
=
auto_grad_meta
->
GetMutableGradNode
().
get
();
PADDLE_ENFORCE_NOT_NULL
(
target_node
,
paddle
::
platform
::
errors
::
Fatal
(
"There is no grad op for input:%d or it's"
"stop_gradient=True"
,
i
));
(
*
target_nodes_inputmeta_map
)[
target_node
]
=
auto_grad_meta
;
}
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
GetResults
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
std
::
unordered_map
<
GradNodeBase
*
,
paddle
::
experimental
::
Tensor
>*
results_map
,
bool
allow_unused
,
bool
create_graph
)
{
VLOG
(
6
)
<<
"Running in GetResults"
;
if
(
inputs
.
empty
())
return
{};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
results
;
results
.
reserve
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
&
input
=
inputs
[
i
];
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
unsafe_autograd_meta
(
input
);
auto
target_node
=
auto_grad_meta
->
GetMutableGradNode
().
get
();
auto
iter
=
results_map
->
find
(
target_node
);
if
(
iter
!=
results_map
->
end
())
{
// set StopGradient = !create_graph
AutogradMeta
*
tensor_auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
(
iter
->
second
));
tensor_auto_grad_meta
->
SetStopGradient
(
!
create_graph
);
results
.
emplace_back
(
iter
->
second
);
}
else
{
PADDLE_ENFORCE_EQ
(
allow_unused
,
true
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The %d-th input does not appear in the backward "
"graph. Please check the input variable or set "
"allow_unused=True to get None result."
,
i
));
results
.
emplace_back
();
}
}
return
results
;
}
// Enforce GradNode has TensorWrappers as Input
void
EnforceGradNodeHasInput
(
GradNodeBase
*
node
)
{
VLOG
(
6
)
<<
"Running in EnforceGradNodeHasInput"
;
PADDLE_ENFORCE_NE
(
node
->
IsTensorWrappersCleared
(),
true
,
paddle
::
platform
::
errors
::
Fatal
(
"The TensorWrappers of %s do not exist. This may be because:
\n
"
"You calculate backward twice for the same subgraph without "
"setting retain_graph=True. Please set retain_graph=True in the "
"first backward/grad call.
\n
"
,
node
->
name
()));
}
// Purify potential_startup_nodes, remove nodes those are the same as
// input_target_nodes
void
PurifyPotentialStartUpNodes
(
std
::
unordered_set
<
GradNodeBase
*>*
potential_startup_nodes
,
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*
/* InputMeta */
>*
input_target_nodes_inputmeta_map
)
{
VLOG
(
6
)
<<
"Running in PurifyPotentialStartUpNodes"
;
if
(
input_target_nodes_inputmeta_map
->
empty
())
return
;
std
::
unordered_set
<
GradNodeBase
*>
potential_startup_nodes_to_be_erased
;
for
(
auto
startup_op
:
*
potential_startup_nodes
)
{
auto
iter
=
input_target_nodes_inputmeta_map
->
find
(
startup_op
);
if
(
iter
!=
input_target_nodes_inputmeta_map
->
end
())
{
potential_startup_nodes_to_be_erased
.
emplace
(
iter
->
first
);
}
}
if
(
!
potential_startup_nodes_to_be_erased
.
empty
())
{
for
(
auto
nodes
:
potential_startup_nodes_to_be_erased
)
{
potential_startup_nodes
->
erase
(
nodes
);
}
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
RunBackward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// output
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
bool
retain_graph
,
bool
create_graph
=
false
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
=
{},
bool
allow_unused
=
false
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
no_grad_vars
=
{})
{
VLOG
(
6
)
<<
"Start Backward"
;
VLOG
(
6
)
<<
"Start Backward"
;
// *Gradient Hook should happen at node-level
// *Gradient Hook should happen at node-level
// *Inplace version check should perform at node-level
// *Inplace version check should perform at node-level
// *Cross-batch accumulation happens at forward pass
// *Cross-batch accumulation happens at forward pass
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>
no_grad_var_nodes_inputmeta_map
;
// Get no_grad_vars's GradNodes and InputMeta Info
GetTargetNodesInfo
(
no_grad_vars
,
&
no_grad_var_nodes_inputmeta_map
);
/* --- Initialization --- */
/* --- Initialization --- */
// 1. Init queue with starting nodes
// 1. Init queue with starting nodes
// 2. Prepare initial input buffers
// 2. Prepare initial input buffers
std
::
queue
<
GradNodeBase
*>
queue
;
std
::
queue
<
GradNodeBase
*>
queue
;
std
::
unordered_map
<
GradNodeBase
*
,
std
::
unique_ptr
<
GradTensorHolder
>>
std
::
unordered_map
<
GradNodeBase
*
,
std
::
unique_ptr
<
GradTensorHolder
>>
node_input_buffers_dict
;
node_input_buffers_dict
;
std
::
unordered_set
<
GradNodeBase
*>
potential_startup_nodes
;
for
(
size_t
i
=
0
;
i
<
tensors
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
tensors
.
size
();
i
++
)
{
const
paddle
::
experimental
::
Tensor
&
tensor
=
tensors
[
i
];
const
paddle
::
experimental
::
Tensor
&
tensor
=
tensors
[
i
];
...
@@ -366,17 +132,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
...
@@ -366,17 +132,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
"size = 0 or same size as tensors"
));
"size = 0 or same size as tensors"
));
// Feed given tensor if it's provided
// Feed given tensor if it's provided
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
"with give grad tensor"
;
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
"with give grad tensor"
;
node_input_buffers_dict
[
grad_node
]
->
add
(
if
(
grad_tensors
[
i
].
is_initialized
())
{
input_info
.
first
,
input_info
.
second
,
grad_tensors
[
i
]);
// Deep copy
paddle
::
experimental
::
Tensor
tmp_tensor
;
tmp_tensor
.
copy_
(
grad_tensors
[
i
],
true
);
node_input_buffers_dict
[
grad_node
]
->
add
(
input_info
.
first
,
input_info
.
second
,
tmp_tensor
);
}
else
{
node_input_buffers_dict
[
grad_node
]
->
add
(
input_info
.
first
,
input_info
.
second
,
grad_tensors
[
i
]);
}
}
else
{
}
else
{
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
" with 1.0"
;
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
" with 1.0"
;
...
@@ -389,9 +146,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
...
@@ -389,9 +146,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
input_info
.
first
,
input_info
.
second
,
tensor
,
true
/*fill_one=true*/
);
input_info
.
first
,
input_info
.
second
,
tensor
,
true
/*fill_one=true*/
);
}
}
// Prepare queue
, potential startup_nodes
// Prepare queue
queue
.
push
(
grad_node
);
queue
.
push
(
grad_node
);
potential_startup_nodes
.
emplace
(
grad_node
);
}
}
VLOG
(
6
)
<<
"Update In degree Map for backward"
;
VLOG
(
6
)
<<
"Update In degree Map for backward"
;
...
@@ -399,74 +155,25 @@ std::vector<paddle::experimental::Tensor> RunBackward(
...
@@ -399,74 +155,25 @@ std::vector<paddle::experimental::Tensor> RunBackward(
std
::
unordered_map
<
GradNodeBase
*
,
int
>
node_in_degree_map
=
std
::
unordered_map
<
GradNodeBase
*
,
int
>
node_in_degree_map
=
getInDegreeMap
(
queue
);
getInDegreeMap
(
queue
);
// Get input's GradNodes and InputMeta Info
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*
/* InputMeta */
>
input_target_nodes_inputmeta_map
;
GetTargetNodesInfo
(
inputs
,
&
input_target_nodes_inputmeta_map
);
// Purify potential_startup_ops, remove those nodes that are the same as
// input_target_nodes
PurifyPotentialStartUpNodes
(
&
potential_startup_nodes
,
&
input_target_nodes_inputmeta_map
);
// Get Graph Info Betweent input target gradnode and outputs
// Record the depending_nodes and potential_stop_nodes
std
::
unordered_map
<
GradNodeBase
*
/* child node */
,
std
::
unordered_set
<
GradNodeBase
*>
/* father node */
>
depending_nodes
;
std
::
unordered_set
<
GradNodeBase
*>
potential_stop_nodes
;
// std::unordered_set<GradNodeBase*> startup_ops;
GetGraphInfoBetweenTargets
(
queue
,
&
input_target_nodes_inputmeta_map
,
&
depending_nodes
,
&
potential_stop_nodes
,
&
potential_startup_nodes
);
// ready_queue store all startup nodes
std
::
queue
<
GradNodeBase
*>
ready_queue
;
// startup op's indegree should be 0
for
(
auto
node
:
potential_startup_nodes
)
{
if
(
node_in_degree_map
[
node
]
==
0
)
{
ready_queue
.
emplace
(
node
);
}
}
VLOG
(
1
)
<<
" startup_ops' size is :"
<<
ready_queue
.
size
();
std
::
unordered_map
<
GradNodeBase
*
,
paddle
::
experimental
::
Tensor
>
results_map
;
// read_queue is empty only when 1.input equals to output. 2.input can not
// reach to output.
if
(
ready_queue
.
size
()
==
0
)
{
for
(
auto
input_target_node
:
input_target_nodes_inputmeta_map
)
{
// out rank_info of forward op
auto
rank_info
=
input_target_node
.
second
->
OutRankInfo
();
if
(
node_input_buffers_dict
[
input_target_node
.
first
])
{
auto
&
target_result
=
node_input_buffers_dict
[
input_target_node
.
first
]
->
Buffers
()[
rank_info
.
first
][
rank_info
.
second
];
// save the target result
results_map
[
input_target_node
.
first
]
=
target_result
;
}
}
}
/* --- Topological Visit --- */
/* --- Topological Visit --- */
// 1. Pop queue
// 1. Pop queue
// 2. Run node
// 2. Run node
// |- Check and capture target result
// |- node(grads)
// |- node(grads)
// |- Prepare for next node
// |- Prepare for next node
// 3. Update queue
// 3. Update queue
VLOG
(
6
)
<<
"Run Backward"
;
VLOG
(
6
)
<<
"Run Backward"
;
while
(
!
ready_queue
.
empty
())
{
while
(
!
queue
.
empty
())
{
GradNodeBase
*
node
=
ready_queue
.
front
();
GradNodeBase
*
node
=
queue
.
front
();
VLOG
(
6
)
<<
"Running GradNode:"
<<
node
->
name
();
ready_queue
.
pop
();
paddle
::
platform
::
RecordEvent
node_record_event
(
paddle
::
platform
::
RecordEvent
node_record_event
(
std
::
string
(
typeid
(
*
node
).
name
())
+
" grad_node"
,
std
::
string
(
typeid
(
*
node
).
name
())
+
" grad_node"
,
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
if
(
queue
.
size
()
>
1
&&
node_in_degree_map
[
node
]
!=
0
)
{
queue
.
pop
();
continue
;
}
queue
.
pop
();
// Run node: This is where Hook happens
// Run node: This is where Hook happens
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
node_input_buffers_dict
.
count
(
node
),
node_input_buffers_dict
.
count
(
node
),
...
@@ -477,45 +184,10 @@ std::vector<paddle::experimental::Tensor> RunBackward(
...
@@ -477,45 +184,10 @@ std::vector<paddle::experimental::Tensor> RunBackward(
std
::
unique_ptr
<
GradTensorHolder
>
node_input_buffer
=
std
::
unique_ptr
<
GradTensorHolder
>
node_input_buffer
=
std
::
move
(
node_input_buffers_dict
[
node
]);
std
::
move
(
node_input_buffers_dict
[
node
]);
// get target grad_var from node_input_buffer by inputmeta
if
(
input_target_nodes_inputmeta_map
.
find
(
node
)
!=
input_target_nodes_inputmeta_map
.
end
())
{
VLOG
(
6
)
<<
"Get target result by by inputmeta"
;
// out rank_info of forward op
auto
rank_info
=
input_target_nodes_inputmeta_map
[
node
]
->
OutRankInfo
();
// rank_info is a pair, first means slot_id, second means rank.
auto
&
target_result
=
node_input_buffer
->
Buffers
()[
rank_info
.
first
][
rank_info
.
second
];
// save the target result
results_map
[
node
]
=
target_result
;
}
// no_grad_vars
if
(
no_grad_var_nodes_inputmeta_map
.
find
(
node
)
!=
no_grad_var_nodes_inputmeta_map
.
end
())
{
VLOG
(
6
)
<<
"Change the input buffer[slot][rank] by Zeros"
;
auto
rank_info
=
no_grad_var_nodes_inputmeta_map
[
node
]
->
OutRankInfo
();
node_input_buffer
->
SetBufferSlotRankZeros
(
rank_info
.
first
,
rank_info
.
second
);
}
VLOG
(
6
)
<<
"Running GradNode:"
<<
node
->
name
();
// check input
EnforceGradNodeHasInput
(
node
);
VLOG
(
6
)
<<
"Run Backward Kernel with GradTensorHolder"
;
VLOG
(
6
)
<<
"Run Backward Kernel with GradTensorHolder"
;
// Run Pre Backward Node and get outputs
// Run Pre Backward Node and get outputs
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
grad_output_tensors
=
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
grad_output_tensors
=
(
*
node
)(
node_input_buffer
->
Buffers
(),
create_graph
);
(
*
node
)(
node_input_buffer
->
Buffers
());
// retain_grad or not
if
(
!
retain_graph
)
{
VLOG
(
6
)
<<
"retain_graph is false, need to clear the TensorWrapper of nodes."
;
node
->
ClearTensorWrappers
();
}
// TODO(jiabin): Should we erase it or find a more efficient way.
// TODO(jiabin): Should we erase it or find a more efficient way.
node_input_buffers_dict
.
erase
(
node
);
node_input_buffers_dict
.
erase
(
node
);
...
@@ -580,44 +252,18 @@ std::vector<paddle::experimental::Tensor> RunBackward(
...
@@ -580,44 +252,18 @@ std::vector<paddle::experimental::Tensor> RunBackward(
// Update queue
// Update queue
node_in_degree_map
[
next_node
]
--
;
node_in_degree_map
[
next_node
]
--
;
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
node_in_degree_map
[
next_node
]
>=
0
,
node_in_degree_map
[
next_node
]
>=
0
,
paddle
::
platform
::
errors
::
Fatal
(
paddle
::
platform
::
errors
::
Fatal
(
"Detected in-degree value smaller than zero. For Node: %s"
"Detected in-degree value smaller than zero. For Node: %s"
"Node's in-degree cannot be negative"
,
"Node's in-degree cannot be negative"
,
next_node
->
name
()));
next_node
->
name
()));
if
(
node_in_degree_map
[
next_node
]
==
0
)
{
bool
is_potential_stop_node
=
potential_stop_nodes
.
count
(
next_node
);
queue
.
emplace
(
std
::
move
(
next_node
));
if
(
node_in_degree_map
[
next_node
]
==
0
&&
!
is_potential_stop_node
)
{
ready_queue
.
emplace
(
std
::
move
(
next_node
));
}
}
}
}
}
}
}
}
return
GetResults
(
inputs
,
&
results_map
,
allow_unused
,
create_graph
);
}
}
void
Backward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// output
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
bool
retain_graph
)
{
VLOG
(
6
)
<<
"Run in Backward"
;
paddle
::
platform
::
RecordEvent
backward_record_event
(
"backward"
,
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
RunBackward
(
tensors
,
grad_tensors
,
retain_graph
);
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
Grad
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// output
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
bool
retain_graph
,
bool
create_graph
,
bool
only_inputs
,
bool
allow_unused
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
no_grad_vars
)
{
VLOG
(
6
)
<<
"Run in Grad"
;
return
RunBackward
(
tensors
,
grad_tensors
,
retain_graph
,
create_graph
,
inputs
,
allow_unused
,
no_grad_vars
);
}
}
// namespace egr
}
// namespace egr
paddle/fluid/eager/backward.h
浏览文件 @
4b269baa
...
@@ -19,20 +19,12 @@
...
@@ -19,20 +19,12 @@
namespace
egr
{
namespace
egr
{
//
B
ackward():
//
run_b
ackward():
// tensors corresponds to those lived in the backward graph
// tensors corresponds to those lived in the backward graph
// each grad_tensors[i] keeps the value for its corresponding tensors[i]
// each grad_tensors[i] keeps the value for its corresponding tensors[i]
void
Backward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
void
RunBackward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>
&
tensors
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>
&
grad_tensors
,
bool
retain_graph
=
false
);
bool
retain_graph
=
false
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
Grad
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
=
{},
bool
retain_graph
=
false
,
bool
create_graph
=
false
,
bool
only_inputs
=
false
,
bool
allow_unused
=
false
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
no_grad_vars
=
{});
// Reserved for gradient()
// Reserved for gradient()
...
...
paddle/fluid/eager/custom_operator/custom_operator_node.cc
浏览文件 @
4b269baa
...
@@ -20,8 +20,8 @@
...
@@ -20,8 +20,8 @@
namespace
egr
{
namespace
egr
{
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
RunCustomOpNode
::
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
RunCustomOpNode
::
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
operator
()(
bool
create_graph
)
{
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
{
paddle
::
CustomOpKernelContext
ctx
;
paddle
::
CustomOpKernelContext
ctx
;
auto
grad_inputs_name
=
paddle
::
framework
::
OpMetaInfoHelper
::
GetInputs
(
auto
grad_inputs_name
=
paddle
::
framework
::
OpMetaInfoHelper
::
GetInputs
(
egr
::
Controller
::
Instance
().
GetOpMetaInfoMap
().
at
(
op_type_
)[
1
]);
egr
::
Controller
::
Instance
().
GetOpMetaInfoMap
().
at
(
op_type_
)[
1
]);
...
...
paddle/fluid/eager/custom_operator/custom_operator_node.h
浏览文件 @
4b269baa
...
@@ -37,8 +37,8 @@ class RunCustomOpNode : public GradNodeBase {
...
@@ -37,8 +37,8 @@ class RunCustomOpNode : public GradNodeBase {
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
bool
create_graph
)
override
;
override
;
std
::
string
name
()
{
std
::
string
name
()
{
return
paddle
::
string
::
Sprintf
(
"RunCustomOpNode: %s_grad"
,
op_type_
);
return
paddle
::
string
::
Sprintf
(
"RunCustomOpNode: %s_grad"
,
op_type_
);
...
@@ -62,12 +62,6 @@ class RunCustomOpNode : public GradNodeBase {
...
@@ -62,12 +62,6 @@ class RunCustomOpNode : public GradNodeBase {
return
res
;
return
res
;
}
}
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
void
SetAttrs
(
const
std
::
vector
<
paddle
::
any
>&
attr
)
{
attrs_
=
attr
;
}
void
SetAttrs
(
const
std
::
vector
<
paddle
::
any
>&
attr
)
{
attrs_
=
attr
;
}
public:
public:
...
...
paddle/fluid/eager/grad_node_info.h
浏览文件 @
4b269baa
...
@@ -95,12 +95,8 @@ class GradNodeBase {
...
@@ -95,12 +95,8 @@ class GradNodeBase {
* is better choice to fit this format.
* is better choice to fit this format.
* **/
* **/
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
=
0
;
bool
create_graph
=
false
)
=
0
;
virtual
void
ClearTensorWrappers
()
=
0
;
virtual
bool
IsTensorWrappersCleared
()
=
0
;
/**
/**
* AddEdges is designed to set input tensors' backward Node as current
* AddEdges is designed to set input tensors' backward Node as current
* node's Edges.
* node's Edges.
...
...
paddle/fluid/eager/grad_tensor_holder.cc
浏览文件 @
4b269baa
...
@@ -21,11 +21,6 @@
...
@@ -21,11 +21,6 @@
namespace
egr
{
namespace
egr
{
void
GradTensorHolder
::
SetBufferSlotRankZeros
(
size_t
slot_id
,
size_t
rank
)
{
buffer_
[
slot_id
][
rank
]
=
paddle
::
experimental
::
zeros_like
(
buffer_
[
slot_id
][
rank
]);
}
void
GradTensorHolder
::
add
(
size_t
slot_id
,
size_t
rank
,
void
GradTensorHolder
::
add
(
size_t
slot_id
,
size_t
rank
,
const
paddle
::
experimental
::
Tensor
&
t
,
const
paddle
::
experimental
::
Tensor
&
t
,
bool
fill_one
)
{
bool
fill_one
)
{
...
...
paddle/fluid/eager/grad_tensor_holder.h
浏览文件 @
4b269baa
...
@@ -56,8 +56,6 @@ class GradTensorHolder {
...
@@ -56,8 +56,6 @@ class GradTensorHolder {
return
buffer_
;
return
buffer_
;
}
}
void
SetBufferSlotRankZeros
(
size_t
slot_id
,
size_t
rank
);
private:
private:
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
buffer_
;
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
buffer_
;
};
};
...
...
paddle/fluid/eager/tensor_wrapper.h
浏览文件 @
4b269baa
...
@@ -98,8 +98,6 @@ class TensorWrapper {
...
@@ -98,8 +98,6 @@ class TensorWrapper {
}
}
}
}
void
clear
()
{
intermidiate_tensor_
.
reset
();
}
private:
private:
bool
full_reserved_
=
false
;
bool
full_reserved_
=
false
;
std
::
pair
<
size_t
,
size_t
>
out_rank_info_
;
std
::
pair
<
size_t
,
size_t
>
out_rank_info_
;
...
...
paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h
浏览文件 @
4b269baa
...
@@ -32,8 +32,8 @@ class GradTestNode : public egr::GradNodeBase {
...
@@ -32,8 +32,8 @@ class GradTestNode : public egr::GradNodeBase {
GradTestNode
()
:
GradNodeBase
()
{
val_
=
1.0
;
}
GradTestNode
()
:
GradNodeBase
()
{
val_
=
1.0
;
}
std
::
string
name
()
override
{
return
"GradTestNode"
;
}
std
::
string
name
()
override
{
return
"GradTestNode"
;
}
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
bool
create_graph
=
false
)
override
{
override
{
val_
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
grads
[
0
][
0
].
impl
())
val_
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
grads
[
0
][
0
].
impl
())
->
data
<
float
>
()[
0
];
->
data
<
float
>
()[
0
];
phi
::
DenseTensorMeta
meta
=
phi
::
DenseTensorMeta
meta
=
...
@@ -49,11 +49,6 @@ class GradTestNode : public egr::GradNodeBase {
...
@@ -49,11 +49,6 @@ class GradTestNode : public egr::GradNodeBase {
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
res
=
{{
et1
}};
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
res
=
{{
et1
}};
return
res
;
return
res
;
}
}
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
float
val_
;
float
val_
;
};
};
}
// namespace eager_test
}
// namespace eager_test
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
浏览文件 @
4b269baa
...
@@ -58,7 +58,7 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
...
@@ -58,7 +58,7 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor
};
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 10)
// Examine Forward Grad (w.r.t max_num_runs = 10)
...
@@ -80,7 +80,7 @@ void benchmark_eager_matmul(const paddle::experimental::Tensor& X,
...
@@ -80,7 +80,7 @@ void benchmark_eager_matmul(const paddle::experimental::Tensor& X,
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 2)
// Examine Forward Grad (w.r.t max_num_runs = 2)
...
@@ -106,7 +106,7 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X,
...
@@ -106,7 +106,7 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X,
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 2)
// Examine Forward Grad (w.r.t max_num_runs = 2)
...
@@ -137,7 +137,7 @@ void benchmark_eager_intermediate_mlp(
...
@@ -137,7 +137,7 @@ void benchmark_eager_intermediate_mlp(
reduce_sum_dygraph_function
(
input0
,
{{
"reduce_all"
,
true
}});
reduce_sum_dygraph_function
(
input0
,
{{
"reduce_all"
,
true
}});
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
Out
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
Out
};
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
std
::
unordered_map
<
std
::
string
,
float
>
result
=
std
::
unordered_map
<
std
::
string
,
float
>
result
=
...
...
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
浏览文件 @
4b269baa
...
@@ -5,7 +5,6 @@ cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_
...
@@ -5,7 +5,6 @@ cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_
cc_test
(
test_egr_task_hook SRCS hook_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_hook SRCS hook_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_grad SRCS grad_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
if
(
NOT
((
NOT WITH_PYTHON
)
AND ON_INFER
))
if
(
NOT
((
NOT WITH_PYTHON
)
AND ON_INFER
))
cc_test
(
test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS
${
eager_deps
}
${
fluid_deps
}
${
generated_deps
}
dygraph_node
)
cc_test
(
test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS
${
eager_deps
}
${
fluid_deps
}
${
generated_deps
}
dygraph_node
)
...
...
paddle/fluid/eager/tests/task_tests/backward_test.cc
浏览文件 @
4b269baa
...
@@ -33,7 +33,6 @@
...
@@ -33,7 +33,6 @@
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
copy
,
CPU
,
ALL_LAYOUT
);
namespace
egr
{
namespace
egr
{
...
@@ -80,7 +79,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
...
@@ -80,7 +79,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
target_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
target_tensor
};
// Run Backward
// Run Backward
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Check Output Value
// Check Output Value
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
...
@@ -139,7 +138,7 @@ TEST(Backward, SingleNodeCustomGrad) {
...
@@ -139,7 +138,7 @@ TEST(Backward, SingleNodeCustomGrad) {
}
}
// Run Backward
// Run Backward
Backward
(
target_tensors
,
grad_tensors
);
Run
Backward
(
target_tensors
,
grad_tensors
);
// Check Output Value
// Check Output Value
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
...
@@ -212,7 +211,7 @@ TEST(Backward, LinearNodes) {
...
@@ -212,7 +211,7 @@ TEST(Backward, LinearNodes) {
}
}
// Use Empty Grad Tensor
// Use Empty Grad Tensor
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
// Check Output Value
// Check Output Value
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
...
@@ -316,7 +315,7 @@ TEST(Backward, WithAccumulation) {
...
@@ -316,7 +315,7 @@ TEST(Backward, WithAccumulation) {
node2_ptr
->
AddEdges
(
&
res2
,
0
);
node2_ptr
->
AddEdges
(
&
res2
,
0
);
}
}
Backward
(
target_tensors
,
grad_tensors
);
Run
Backward
(
target_tensors
,
grad_tensors
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
2500.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
2500.0
);
}
}
...
...
paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc
浏览文件 @
4b269baa
...
@@ -71,12 +71,12 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
...
@@ -71,12 +71,12 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
meta
};
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
meta
};
scale_node_ptr
->
AddEdges
(
&
res
,
0
);
scale_node_ptr
->
AddEdges
(
&
res
,
0
);
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
10.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
10.0
);
...
...
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
浏览文件 @
4b269baa
...
@@ -86,7 +86,7 @@ TEST(FwdBwdJoint, SingleNode) {
...
@@ -86,7 +86,7 @@ TEST(FwdBwdJoint, SingleNode) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
// 4. Run Backward
// 4. Run Backward
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
VLOG
(
7
)
<<
"Target Grad is: "
VLOG
(
7
)
<<
"Target Grad is: "
<<
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
<<
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
...
@@ -137,7 +137,7 @@ TEST(FwdBwdJoint, LinearNodes) {
...
@@ -137,7 +137,7 @@ TEST(FwdBwdJoint, LinearNodes) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
};
// 4. Run Backward
// 4. Run Backward
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
10.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
10.0
);
...
@@ -203,7 +203,7 @@ TEST(FwdBwdJoint, BranchedNodes) {
...
@@ -203,7 +203,7 @@ TEST(FwdBwdJoint, BranchedNodes) {
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
...
@@ -260,7 +260,7 @@ TEST(FwdBwdJoint, GradientHook) {
...
@@ -260,7 +260,7 @@ TEST(FwdBwdJoint, GradientHook) {
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
// leaf grad
// leaf grad
...
@@ -318,13 +318,13 @@ TEST(FwdBwdJoint, CrossBatchAccumulation) {
...
@@ -318,13 +318,13 @@ TEST(FwdBwdJoint, CrossBatchAccumulation) {
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
// Cross Batch Accumulation
// Cross Batch Accumulation
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
60.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
60.0
);
...
@@ -356,7 +356,7 @@ TEST(FwdBwdJoint, SingleNodeCUDA) {
...
@@ -356,7 +356,7 @@ TEST(FwdBwdJoint, SingleNodeCUDA) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
// 4. Run Backward
// 4. Run Backward
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
...
@@ -412,7 +412,7 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) {
...
@@ -412,7 +412,7 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) {
// TODO(jiabin): fix this with add functor
// TODO(jiabin): fix this with add functor
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Backward
(
outs
,
{});
Run
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
...
...
paddle/fluid/eager/tests/task_tests/generated_test.cc
浏览文件 @
4b269baa
...
@@ -57,7 +57,7 @@ TEST(Generated, Sigmoid) {
...
@@ -57,7 +57,7 @@ TEST(Generated, Sigmoid) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
VLOG
(
6
)
<<
"Runing Backward"
;
VLOG
(
6
)
<<
"Runing Backward"
;
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
VLOG
(
6
)
<<
"Finish Backward"
;
VLOG
(
6
)
<<
"Finish Backward"
;
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
0.25
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
0.25
);
...
@@ -89,7 +89,7 @@ TEST(Generated, Matmul_v2) {
...
@@ -89,7 +89,7 @@ TEST(Generated, Matmul_v2) {
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
96
);
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
96
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
3.0
*
4
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
3.0
*
4
);
...
@@ -120,7 +120,7 @@ TEST(Generated, ElementwiseAdd) {
...
@@ -120,7 +120,7 @@ TEST(Generated, ElementwiseAdd) {
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
5
);
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
5
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
1.0
);
...
...
paddle/fluid/eager/tests/task_tests/grad_test.cc
已删除
100644 → 0
浏览文件 @
06fee998
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h"
#include "paddle/fluid/eager/api/utils/tensor_utils.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_meta.h"
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
copy
,
CPU
,
ALL_LAYOUT
);
namespace
egr
{
TEST
(
Grad
,
SingleNodeEmptyGrad
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Inputs
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor (output)
paddle
::
experimental
::
Tensor
output_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
// Create input tensor
const
paddle
::
experimental
::
Tensor
leaf_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
true
/*is_leaf*/
);
{
// Create Scale Node
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
// Set grad in/out meta
node0_ptr
->
SetDefaultGradInOutMeta
();
// Output_tensor set GradNode、OutRank、StopGradient propertis
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
output_tensor
);
auto_grad_meta
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta
->
SetStopGradient
(
false
);
// Get autograd_meta from input tensor
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
unsafe_autograd_meta
(
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta1
);
// input tensor set GradNode、OutRank、StopGradient propertis
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
// grad_node Add Edges
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
auto_grad_meta1
};
node0_ptr
->
AddEdges
(
&
res
,
0
);
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
output_tensor
};
// Run Grad
auto
result
=
Grad
(
outs
,
{
leaf_tensor
},
{});
// Check Output Value
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
5.0
);
}
TEST
(
Grad
,
SingleNodeCustomGrad
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Inputs
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
;
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor
paddle
::
experimental
::
Tensor
tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
target_tensors
.
emplace_back
(
std
::
move
(
tensor
));
std
::
vector
<
paddle
::
experimental
::
Tensor
>
grad_tensors
;
// Create Grad Tensor
paddle
::
experimental
::
Tensor
grad_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
10.0
/*value*/
,
false
/*is_leaf*/
);
grad_tensors
.
emplace_back
(
std
::
move
(
grad_tensor
));
paddle
::
experimental
::
Tensor
leaf_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
true
/*is_leaf*/
);
{
// Create Scale Node
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
// Set grad in/out meta
node0_ptr
->
SetDefaultGradInOutMeta
();
// Connect Tensor and Node via AutoGradMeta
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
0
]));
auto_grad_meta
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta
->
SetStopGradient
(
false
);
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
autograd_meta
(
&
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta1
);
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
auto_grad_meta1
};
node0_ptr
->
AddEdges
(
&
res
,
0
);
}
auto
result
=
Grad
(
target_tensors
,
{
leaf_tensor
},
grad_tensors
);
// Check Output Value
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
50.0
);
}
/*
Node1
|
Node0
|
{ } // empty grad tensor
*/
TEST
(
Grad
,
LinearNodes
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Target Tensor
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
;
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor
paddle
::
experimental
::
Tensor
tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
target_tensors
.
emplace_back
(
std
::
move
(
tensor
));
paddle
::
experimental
::
Tensor
leaf_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
true
/*is_leaf*/
);
{
// Create Node0
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
// Set grad in/out meta for node0
node0_ptr
->
SetDefaultGradInOutMeta
();
// Create Node1
auto
node1_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node1_ptr
->
SetAttributes_scale
(
10.0
/*scale*/
);
// Set grad in/out meta for node1
node1_ptr
->
SetDefaultGradInOutMeta
();
// Connect Input Tensor and Node0 via AutoGradMeta
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
0
]));
auto_grad_meta
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta
->
SetStopGradient
(
false
);
// Connect Node0 -> Node1 via Edge
auto
meta0
=
egr
::
AutogradMeta
();
meta0
.
SetStopGradient
(
false
);
meta0
.
SetSingleOutRankWithSlot
(
0
,
0
);
meta0
.
SetGradNode
(
node1_ptr
);
std
::
vector
<
egr
::
AutogradMeta
*>
res0
=
{
&
meta0
};
node0_ptr
->
AddEdges
(
&
res0
,
0
);
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
autograd_meta
(
&
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta1
);
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
std
::
vector
<
egr
::
AutogradMeta
*>
res1
=
{
auto_grad_meta1
};
node1_ptr
->
AddEdges
(
&
res1
,
0
);
}
// Use Empty Grad Tensor
auto
result
=
Grad
(
target_tensors
,
{
leaf_tensor
},
{});
// Check Output Value
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
50.0
);
}
/*
Node2
| |
Node0 Node1
| |
in0 in1
*/
TEST
(
Grad
,
WithAccumulation
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Inputs
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
;
paddle
::
experimental
::
Tensor
tensor0
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
paddle
::
experimental
::
Tensor
tensor1
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
target_tensors
.
emplace_back
(
std
::
move
(
tensor0
));
target_tensors
.
emplace_back
(
std
::
move
(
tensor1
));
// Create Grad Tensor
std
::
vector
<
paddle
::
experimental
::
Tensor
>
grad_tensors
;
paddle
::
experimental
::
Tensor
grad_tensor0
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
false
/*is_leaf*/
);
paddle
::
experimental
::
Tensor
grad_tensor1
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
10.0
/*value*/
,
false
/*is_leaf*/
);
grad_tensors
.
emplace_back
(
std
::
move
(
grad_tensor0
));
grad_tensors
.
emplace_back
(
std
::
move
(
grad_tensor1
));
paddle
::
experimental
::
Tensor
leaf_tensor
;
{
// Create Node0
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
node0_ptr
->
SetDefaultGradInOutMeta
();
// Create Node1
auto
node1_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node1_ptr
->
SetAttributes_scale
(
10.0
/*scale*/
);
node1_ptr
->
SetDefaultGradInOutMeta
();
// Create Node2
auto
node2_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node2_ptr
->
SetAttributes_scale
(
20.0
/*scale*/
);
node2_ptr
->
SetDefaultGradInOutMeta
();
// Connect Inp0 and Node0 via AutoGradMeta
AutogradMeta
*
auto_grad_meta0
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
0
]));
auto_grad_meta0
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta0
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta0
->
SetStopGradient
(
false
);
// Connect Inp1 and Node1 via AutoGradMeta
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
1
]));
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node1_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
// Connect Node0 -> Node2 via Edge
auto
meta0
=
egr
::
AutogradMeta
();
meta0
.
SetStopGradient
(
false
);
meta0
.
SetSingleOutRankWithSlot
(
0
,
0
);
meta0
.
SetGradNode
(
node2_ptr
);
std
::
vector
<
egr
::
AutogradMeta
*>
res0
=
{
&
meta0
};
node0_ptr
->
AddEdges
(
&
res0
,
0
);
// Connect Node1 -> Node2 via Edge
auto
meta1
=
egr
::
AutogradMeta
();
meta1
.
SetStopGradient
(
false
);
meta1
.
SetSingleOutRankWithSlot
(
0
,
0
);
meta1
.
SetGradNode
(
node2_ptr
);
std
::
vector
<
egr
::
AutogradMeta
*>
res1
=
{
&
meta1
};
node1_ptr
->
AddEdges
(
&
res1
,
0
);
AutogradMeta
*
auto_grad_meta2
=
EagerUtils
::
autograd_meta
(
&
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta2
);
auto_grad_meta2
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta2
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta2
->
SetStopGradient
(
false
);
std
::
vector
<
egr
::
AutogradMeta
*>
res2
=
{
auto_grad_meta2
};
node2_ptr
->
AddEdges
(
&
res2
,
0
);
}
auto
result
=
Grad
(
target_tensors
,
{
leaf_tensor
},
grad_tensors
);
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
2500.0
);
}
}
// namespace egr
paddle/fluid/eager/tests/task_tests/hook_test.cc
浏览文件 @
4b269baa
...
@@ -132,7 +132,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
...
@@ -132,7 +132,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
leaf_tensor
);
// result: 4.0*5.0 + 3.0 = 23.0
leaf_tensor
);
// result: 4.0*5.0 + 3.0 = 23.0
}
}
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
4.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
4.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
...
@@ -199,7 +199,7 @@ TEST(RetainGrad, HookAfterRetainGrad) {
...
@@ -199,7 +199,7 @@ TEST(RetainGrad, HookAfterRetainGrad) {
leaf_tensor
,
std
::
make_shared
<
egr
::
CppTensorHook
>
(
hook_function
));
leaf_tensor
,
std
::
make_shared
<
egr
::
CppTensorHook
>
(
hook_function
));
}
}
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
}
}
...
...
paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc
浏览文件 @
4b269baa
...
@@ -108,7 +108,7 @@ void test_sigmoid(bool is_remove_gradient_hook) {
...
@@ -108,7 +108,7 @@ void test_sigmoid(bool is_remove_gradient_hook) {
}
}
VLOG
(
6
)
<<
"Runing Backward"
;
VLOG
(
6
)
<<
"Runing Backward"
;
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
VLOG
(
6
)
<<
"Finish Backward"
;
VLOG
(
6
)
<<
"Finish Backward"
;
eager_test
::
CompareGradTensorWithValue
<
float
>
(
eager_test
::
CompareGradTensorWithValue
<
float
>
(
...
@@ -166,7 +166,7 @@ void test_elementwiseAdd(bool is_remove_gradient_hook) {
...
@@ -166,7 +166,7 @@ void test_elementwiseAdd(bool is_remove_gradient_hook) {
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
}
}
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
eager_test
::
CompareGradTensorWithValue
<
float
>
(
...
@@ -224,7 +224,7 @@ void test_matmul(bool is_remove_gradient_hook) {
...
@@ -224,7 +224,7 @@ void test_matmul(bool is_remove_gradient_hook) {
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
}
}
Backward
(
target_tensors
,
{});
Run
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
eager_test
::
CompareGradTensorWithValue
<
float
>
(
...
...
paddle/fluid/eager/to_static/run_program_op_node.h
浏览文件 @
4b269baa
...
@@ -370,8 +370,8 @@ class GradNodeRunProgram : public egr::GradNodeBase {
...
@@ -370,8 +370,8 @@ class GradNodeRunProgram : public egr::GradNodeBase {
~
GradNodeRunProgram
()
override
=
default
;
~
GradNodeRunProgram
()
override
=
default
;
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
&
grads
)
bool
create_graph
)
override
{
override
{
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeRunProgram"
;
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeRunProgram"
;
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
grads
.
size
(),
1
,
grads
.
size
(),
1
,
...
@@ -415,12 +415,6 @@ class GradNodeRunProgram : public egr::GradNodeBase {
...
@@ -415,12 +415,6 @@ class GradNodeRunProgram : public egr::GradNodeBase {
// return {x_grad, details::DereferenceTensors(params_grad_ptr)};
// return {x_grad, details::DereferenceTensors(params_grad_ptr)};
}
}
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
// SetAttrMap
// SetAttrMap
void
SetAttrMap
(
const
paddle
::
framework
::
AttributeMap
&
attrs
)
{
void
SetAttrMap
(
const
paddle
::
framework
::
AttributeMap
&
attrs
)
{
attrs_
=
attrs
;
attrs_
=
attrs
;
...
...
paddle/fluid/pybind/eager_functions.cc
浏览文件 @
4b269baa
...
@@ -122,33 +122,13 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
...
@@ -122,33 +122,13 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
EAGER_TRY
EAGER_TRY
auto
tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
grad_tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
1
),
1
);
auto
grad_tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
1
),
1
);
egr
::
Backward
(
tensors
,
grad_tensors
,
egr
::
Run
Backward
(
tensors
,
grad_tensors
,
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
2
),
2
));
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
2
),
2
));
Py_INCREF
(
Py_None
);
Py_INCREF
(
Py_None
);
return
Py_None
;
return
Py_None
;
EAGER_CATCH_AND_THROW_RETURN_NULL
EAGER_CATCH_AND_THROW_RETURN_NULL
}
}
static
PyObject
*
eager_api_run_partial_grad
(
PyObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
auto
tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
inputs
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
1
),
1
);
auto
grad_tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
2
),
2
);
auto
retain_graph
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
3
),
3
);
auto
create_graph
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
4
),
4
);
auto
only_inputs
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
5
),
5
);
auto
allow_unused
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
6
),
6
);
auto
no_grad_vars
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
7
),
7
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
result
=
egr
::
Grad
(
tensors
,
inputs
,
grad_tensors
,
retain_graph
,
create_graph
,
only_inputs
,
allow_unused
,
no_grad_vars
);
VLOG
(
1
)
<<
" in eager_api_run_partial_grad, after runing egr::Grad"
;
return
ToPyObject
(
result
,
true
/* return_py_none_if_not_initialize */
);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
eager_api_tensor_copy
(
PyObject
*
self
,
PyObject
*
args
,
static
PyObject
*
eager_api_tensor_copy
(
PyObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
PyObject
*
kwargs
)
{
EAGER_TRY
EAGER_TRY
...
@@ -472,9 +452,6 @@ PyMethodDef variable_functions[] = {
...
@@ -472,9 +452,6 @@ PyMethodDef variable_functions[] = {
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"run_backward"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_backward
,
{
"run_backward"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_backward
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"run_partial_grad"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_partial_grad
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"_run_custom_op"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_costum_op
,
{
"_run_custom_op"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_costum_op
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"tensor_copy"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_tensor_copy
,
{
"tensor_copy"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_tensor_copy
,
...
...
paddle/fluid/pybind/eager_utils.cc
浏览文件 @
4b269baa
...
@@ -492,26 +492,20 @@ PyObject* ToPyObject(const std::vector<double>& value) {
...
@@ -492,26 +492,20 @@ PyObject* ToPyObject(const std::vector<double>& value) {
return
result
;
return
result
;
}
}
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
,
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
)
{
bool
return_py_none_if_not_initialize
)
{
PyObject
*
result
=
PyList_New
((
Py_ssize_t
)
value
.
size
());
PyObject
*
result
=
PyList_New
((
Py_ssize_t
)
value
.
size
());
for
(
size_t
i
=
0
;
i
<
value
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
value
.
size
();
i
++
)
{
if
(
!
value
[
i
].
initialized
()
&&
return_py_none_if_not_initialize
)
{
PyObject
*
obj
=
p_tensor_type
->
tp_alloc
(
p_tensor_type
,
0
);
Py_INCREF
(
Py_None
);
if
(
obj
)
{
PyList_SET_ITEM
(
result
,
static_cast
<
Py_ssize_t
>
(
i
),
Py_None
);
auto
v
=
reinterpret_cast
<
TensorObject
*>
(
obj
);
new
(
&
(
v
->
tensor
))
paddle
::
experimental
::
Tensor
();
v
->
tensor
=
value
[
i
];
}
else
{
}
else
{
PyObject
*
obj
=
p_tensor_type
->
tp_alloc
(
p_tensor_type
,
0
);
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
if
(
obj
)
{
"tp_alloc return null, can not new a PyObject."
));
auto
v
=
reinterpret_cast
<
TensorObject
*>
(
obj
);
new
(
&
(
v
->
tensor
))
paddle
::
experimental
::
Tensor
();
v
->
tensor
=
value
[
i
];
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"tp_alloc return null, can not new a PyObject."
));
}
PyList_SET_ITEM
(
result
,
static_cast
<
Py_ssize_t
>
(
i
),
obj
);
}
}
PyList_SET_ITEM
(
result
,
static_cast
<
Py_ssize_t
>
(
i
),
obj
);
}
}
return
result
;
return
result
;
...
...
paddle/fluid/pybind/eager_utils.h
浏览文件 @
4b269baa
...
@@ -68,8 +68,7 @@ PyObject* ToPyObject(const std::vector<int>& value);
...
@@ -68,8 +68,7 @@ PyObject* ToPyObject(const std::vector<int>& value);
PyObject
*
ToPyObject
(
const
std
::
vector
<
int64_t
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
int64_t
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
float
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
float
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
double
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
double
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
,
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
);
bool
return_py_none_if_not_initialize
=
false
);
PyObject
*
ToPyObject
(
const
platform
::
Place
&
value
);
PyObject
*
ToPyObject
(
const
platform
::
Place
&
value
);
PyObject
*
ToPyObject
(
const
framework
::
LoDTensor
*
value
);
PyObject
*
ToPyObject
(
const
framework
::
LoDTensor
*
value
);
PyObject
*
ToPyObject
(
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
dtype
);
PyObject
*
ToPyObject
(
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
dtype
);
...
...
python/paddle/fluid/dygraph/base.py
浏览文件 @
4b269baa
...
@@ -565,25 +565,16 @@ def grad(outputs,
...
@@ -565,25 +565,16 @@ def grad(outputs,
if
isinstance
(
in_out_list
,
(
list
,
tuple
)):
if
isinstance
(
in_out_list
,
(
list
,
tuple
)):
assert
len
(
in_out_list
)
>
0
,
"{} cannot be empty"
.
format
(
name
)
assert
len
(
in_out_list
)
>
0
,
"{} cannot be empty"
.
format
(
name
)
for
each_var
in
in_out_list
:
for
each_var
in
in_out_list
:
if
core
.
_in_eager_mode
():
assert
isinstance
(
assert
isinstance
(
each_var
,
each_var
,
core
.
eager
.
core
.
VarBase
),
"Elements of {} must be Variable"
.
format
(
Tensor
),
"Elements of {} must be Tensor"
.
format
(
name
)
name
)
else
:
assert
isinstance
(
each_var
,
core
.
VarBase
),
"Elements of {} must be Variable"
.
format
(
name
)
return
in_out_list
return
in_out_list
else
:
else
:
if
core
.
_in_eager_mode
():
assert
isinstance
(
assert
isinstance
(
in_out_list
,
in_out_list
,
core
.
eager
.
core
.
VarBase
),
"{} must be Variable or list of Variable"
.
format
(
Tensor
),
"{} must be Tensor or list of Tensor"
.
format
(
name
)
name
)
else
:
assert
isinstance
(
in_out_list
,
core
.
VarBase
),
"{} must be Variable or list of Variable"
.
format
(
name
)
return
[
in_out_list
]
return
[
in_out_list
]
outputs
=
check_in_out
(
outputs
,
'outputs'
)
outputs
=
check_in_out
(
outputs
,
'outputs'
)
...
@@ -595,14 +586,9 @@ def grad(outputs,
...
@@ -595,14 +586,9 @@ def grad(outputs,
for
each_var
in
grad_outputs
:
for
each_var
in
grad_outputs
:
if
each_var
is
not
None
:
if
each_var
is
not
None
:
if
core
.
_in_eager_mode
():
assert
isinstance
(
assert
isinstance
(
each_var
,
core
.
VarBase
each_var
,
core
.
eager
.
Tensor
),
"grad_outputs must be None, a Variable or a list containing None or Variables"
),
"grad_outputs must be None, a Variable or a list containing None or Variables"
else
:
assert
isinstance
(
each_var
,
core
.
VarBase
),
"grad_outputs must be None, a Variable or a list containing None or Variables"
else
:
else
:
grad_outputs
=
[]
grad_outputs
=
[]
...
@@ -614,27 +600,14 @@ def grad(outputs,
...
@@ -614,27 +600,14 @@ def grad(outputs,
no_grad_vars
=
[]
no_grad_vars
=
[]
elif
isinstance
(
no_grad_vars
,
core
.
VarBase
):
elif
isinstance
(
no_grad_vars
,
core
.
VarBase
):
no_grad_vars
=
[
no_grad_vars
]
no_grad_vars
=
[
no_grad_vars
]
elif
isinstance
(
no_grad_vars
,
core
.
eager
.
Tensor
):
no_grad_vars
=
[
no_grad_vars
]
elif
isinstance
(
no_grad_vars
,
(
list
,
tuple
,
set
)):
elif
isinstance
(
no_grad_vars
,
(
list
,
tuple
,
set
)):
no_grad_vars
=
list
(
no_grad_vars
)
no_grad_vars
=
list
(
no_grad_vars
)
for
var
in
no_grad_vars
:
for
var
in
no_grad_vars
:
if
core
.
_in_eager_mode
():
assert
isinstance
(
assert
isinstance
(
var
,
core
.
VarBase
),
"no_grad_vars can only contains Variable"
var
,
core
.
eager
.
Tensor
),
"no_grad_vars can only contains Tensor"
else
:
assert
isinstance
(
var
,
core
.
VarBase
),
"no_grad_vars can only contains Variable"
else
:
else
:
if
core
.
_in_eager_mode
():
raise
AssertionError
(
raise
AssertionError
(
"no_grad_vars must be None, Variable or list/tuple/set of Variables"
)
"no_grad_vars must be None, Tensor or list/tuple/set of Tensors"
)
else
:
raise
AssertionError
(
"no_grad_vars must be None, Variable or list/tuple/set of Variables"
)
assert
isinstance
(
create_graph
,
bool
),
"create_graph must be True or False"
assert
isinstance
(
create_graph
,
bool
),
"create_graph must be True or False"
...
@@ -649,11 +622,6 @@ def grad(outputs,
...
@@ -649,11 +622,6 @@ def grad(outputs,
assert
isinstance
(
only_inputs
,
bool
),
"only_inputs must be True or False"
assert
isinstance
(
only_inputs
,
bool
),
"only_inputs must be True or False"
assert
only_inputs
,
"only_inputs=False is not supported yet"
assert
only_inputs
,
"only_inputs=False is not supported yet"
if
core
.
_in_eager_mode
():
return
core
.
eager
.
run_partial_grad
(
outputs
,
inputs
,
grad_outputs
,
retain_graph
,
create_graph
,
only_inputs
,
allow_unused
,
no_grad_vars
)
place
=
core
.
Place
()
place
=
core
.
Place
()
place
.
set_place
(
framework
.
_current_expected_place
())
place
.
set_place
(
framework
.
_current_expected_place
())
return
core
.
dygraph_partial_grad
(
inputs
,
outputs
,
grad_outputs
,
return
core
.
dygraph_partial_grad
(
inputs
,
outputs
,
grad_outputs
,
...
...
python/paddle/fluid/tests/unittests/test_egr_python_api.py
浏览文件 @
4b269baa
...
@@ -52,7 +52,7 @@ class EagerScaleTestCase(unittest.TestCase):
...
@@ -52,7 +52,7 @@ class EagerScaleTestCase(unittest.TestCase):
out_eager
=
core
.
eager
.
scale
(
data_eager
,
1.0
,
0.9
,
True
,
True
)
out_eager
=
core
.
eager
.
scale
(
data_eager
,
1.0
,
0.9
,
True
,
True
)
self
.
assertIsNone
(
data_eager
.
grad
)
self
.
assertIsNone
(
data_eager
.
grad
)
out_eager
.
backward
(
grad_eager
,
False
)
out_eager
.
backward
(
grad_eager
,
False
)
self
.
assert
IsNotNone
(
data_eager
.
grad
)
self
.
assert
True
(
data_eager
.
grad
.
_is_initialized
()
)
self
.
assertTrue
(
np
.
array_equal
(
data_eager
.
grad
.
numpy
(),
input_data
))
self
.
assertTrue
(
np
.
array_equal
(
data_eager
.
grad
.
numpy
(),
input_data
))
def
test_retain_grad_and_run_backward_raises
(
self
):
def
test_retain_grad_and_run_backward_raises
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
浏览文件 @
4b269baa
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -19,9 +19,6 @@ from paddle.vision.models import resnet50, resnet101
...
@@ -19,9 +19,6 @@ from paddle.vision.models import resnet50, resnet101
import
unittest
import
unittest
from
unittest
import
TestCase
from
unittest
import
TestCase
import
numpy
as
np
import
numpy
as
np
import
paddle.compat
as
cpt
from
paddle.fluid.framework
import
_test_eager_guard
import
paddle.fluid.core
as
core
def
_dygraph_guard_
(
func
):
def
_dygraph_guard_
(
func
):
...
@@ -43,80 +40,6 @@ def random_var(size, low=-1, high=1, dtype='float32'):
...
@@ -43,80 +40,6 @@ def random_var(size, low=-1, high=1, dtype='float32'):
return
fluid
.
dygraph
.
to_variable
(
x_np
)
return
fluid
.
dygraph
.
to_variable
(
x_np
)
class
TestEagerGrad
(
TestCase
):
def
func_simple_example_eager_grad
(
self
):
np
.
random
.
seed
(
2021
)
paddle
.
set_device
(
'cpu'
)
np_x
=
np
.
random
.
random
((
3
,
3
))
np_y
=
np
.
random
.
random
((
3
,
1
))
x
=
paddle
.
to_tensor
(
np_x
,
dtype
=
"float64"
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
np_y
,
dtype
=
"float64"
,
stop_gradient
=
False
)
out
=
paddle
.
matmul
(
x
,
y
)
dx
=
fluid
.
dygraph
.
grad
(
out
,
x
)
dout
=
np
.
ones_like
(
np_y
)
expected_dx
=
np
.
matmul
(
dout
,
np
.
transpose
(
np_y
))
# stop_gradient = !create_graph, create_graph default false
self
.
assertEqual
(
dx
[
0
].
stop_gradient
,
True
)
self
.
assertTrue
(
np
.
allclose
(
dx
[
0
].
numpy
(),
expected_dx
[
0
]))
def
test_simple_example_eager_grad
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example_eager_grad
()
self
.
func_simple_example_eager_grad
()
def
func_simple_example_eager_grad_allow_unused
(
self
):
np
.
random
.
seed
(
2021
)
paddle
.
set_device
(
'cpu'
)
np_x
=
np
.
random
.
random
((
3
,
3
))
np_y
=
np
.
random
.
random
((
3
,
1
))
np_z
=
np
.
random
.
random
((
3
,
1
))
x
=
paddle
.
to_tensor
(
np_x
,
dtype
=
"float64"
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
np_y
,
dtype
=
"float64"
,
stop_gradient
=
False
)
z
=
paddle
.
to_tensor
(
np_z
,
dtype
=
"float64"
,
stop_gradient
=
False
)
out_z
=
paddle
.
nn
.
functional
.
sigmoid
(
z
)
out
=
paddle
.
matmul
(
x
,
y
)
dx
=
fluid
.
dygraph
.
grad
(
out
,
[
x
,
z
],
allow_unused
=
True
)
dout
=
np
.
ones_like
(
np_y
)
expected_dx
=
np
.
matmul
(
dout
,
np
.
transpose
(
np_y
))
self
.
assertTrue
(
np
.
allclose
(
dx
[
0
].
numpy
(),
expected_dx
[
0
]))
# stop_gradient = !create_graph, create_graph default false
self
.
assertEqual
(
dx
[
0
].
stop_gradient
,
True
)
# x is unused input in the graph
self
.
assertEqual
(
dx
[
1
],
None
)
def
test_simple_example_eager_grad_allow_unused
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example_eager_grad_allow_unused
()
self
.
func_simple_example_eager_grad_allow_unused
()
def
func_simple_example_eager_grad_not_allow_unused
(
self
):
np
.
random
.
seed
(
2021
)
paddle
.
set_device
(
'cpu'
)
np_x
=
np
.
random
.
random
((
3
,
3
))
np_y
=
np
.
random
.
random
((
3
,
1
))
np_z
=
np
.
random
.
random
((
3
,
1
))
x
=
paddle
.
to_tensor
(
np_x
,
dtype
=
"float64"
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
np_y
,
dtype
=
"float64"
,
stop_gradient
=
False
)
z
=
paddle
.
to_tensor
(
np_z
,
dtype
=
"float64"
,
stop_gradient
=
False
)
out_z
=
paddle
.
nn
.
functional
.
sigmoid
(
z
)
out
=
paddle
.
matmul
(
x
,
y
)
try
:
# allow_unused is false in default
dx
=
fluid
.
dygraph
.
grad
(
out
,
[
x
,
z
])
except
ValueError
as
e
:
error_msg
=
cpt
.
get_exception_message
(
e
)
assert
error_msg
.
find
(
"allow_unused"
)
>
0
def
test_simple_example_eager_grad_not_allow_unused
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example_eager_grad_not_allow_unused
()
self
.
func_simple_example_eager_grad_not_allow_unused
()
class
TestDygraphDoubleGrad
(
TestCase
):
class
TestDygraphDoubleGrad
(
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
sort_sum_gradient
=
False
self
.
sort_sum_gradient
=
False
...
@@ -141,7 +64,7 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -141,7 +64,7 @@ class TestDygraphDoubleGrad(TestCase):
allow_unused
=
allow_unused
)
allow_unused
=
allow_unused
)
@
dygraph_guard
@
dygraph_guard
def
func
_exception
(
self
):
def
test
_exception
(
self
):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
(
None
,
None
)
self
.
grad
(
None
,
None
)
...
@@ -170,13 +93,8 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -170,13 +93,8 @@ class TestDygraphDoubleGrad(TestCase):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
def
test_exception
(
self
):
with
_test_eager_guard
():
self
.
func_exception
()
self
.
func_exception
()
@
dygraph_guard
@
dygraph_guard
def
func
_simple_example
(
self
):
def
test
_simple_example
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x
.
stop_gradient
=
False
x
.
stop_gradient
=
False
y
=
x
+
1
y
=
x
+
1
...
@@ -205,44 +123,8 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -205,44 +123,8 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
create_graph
)
create_graph
)
def
test_simple_example
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example
()
self
.
func_simple_example
()
@
dygraph_guard
@
dygraph_guard
def
func_example_no_grad_vars
(
self
):
def
test_none_one_initial_gradient
(
self
):
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
x
.
stop_gradient
=
False
y1
=
fluid
.
layers
.
relu
(
x
)
y2
=
fluid
.
layers
.
relu
(
x
)
z
=
y1
+
y2
w
=
z
*
z
w_mean
=
fluid
.
layers
.
reduce_mean
(
w
)
del
y1
,
z
,
w
dx_actual
,
=
self
.
grad
(
[
w_mean
],
[
x
],
create_graph
=
True
,
no_grad_vars
=
[
y2
])
self
.
assertFalse
(
y2
.
stop_gradient
)
self
.
assertFalse
(
dx_actual
.
stop_gradient
)
dx_expected
=
(
1.0
/
float
(
numel
)
*
(
np
.
maximum
(
x_np
,
0
)
+
y2
.
numpy
())
*
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
def
test_example_no_grad_vars
(
self
):
with
_test_eager_guard
():
self
.
func_example_no_grad_vars
()
self
.
func_example_no_grad_vars
()
@
dygraph_guard
def
func_none_one_initial_gradient
(
self
):
numel
=
1
numel
=
1
for
s
in
self
.
shape
:
for
s
in
self
.
shape
:
numel
*=
s
numel
*=
s
...
@@ -308,13 +190,8 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -308,13 +190,8 @@ class TestDygraphDoubleGrad(TestCase):
np
.
array_equal
(
grad_z
.
numpy
(),
np
.
array_equal
(
grad_z
.
numpy
(),
original_random_grad_z
))
original_random_grad_z
))
def
test_none_one_initial_gradient
(
self
):
with
_test_eager_guard
():
self
.
func_none_one_initial_gradient
()
self
.
func_none_one_initial_gradient
()
@
dygraph_guard
@
dygraph_guard
def
func
_example_with_gradient_accumulation_and_create_graph
(
self
):
def
test
_example_with_gradient_accumulation_and_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -337,33 +214,25 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -337,33 +214,25 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
if
core
.
_in_eager_mode
():
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
pass
loss
.
backward
(
retain_graph
=
True
)
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
(
retain_graph
=
True
)
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
for
i
in
range
(
5
):
loss
.
backward
(
retain_graph
=
True
)
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_grad_expected
=
(
i
+
2
)
*
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
for
i
in
range
(
5
):
loss
.
backward
(
retain_graph
=
True
)
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
i
+
2
)
*
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_create_graph
()
self
.
func_example_with_gradient_accumulation_and_create_graph
()
@
dygraph_guard
@
dygraph_guard
def
func
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
def
test
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -387,25 +256,17 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -387,25 +256,17 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
if
core
.
_in_eager_mode
():
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
pass
loss
.
backward
()
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
x_np
+
dx_expected
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
@
dygraph_guard
@
dygraph_guard
def
func
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
def
test
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -428,20 +289,12 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -428,20 +289,12 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
if
core
.
_in_eager_mode
():
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
pass
loss
.
backward
()
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_not_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
...
@@ -451,7 +304,7 @@ class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad):
...
@@ -451,7 +304,7 @@ class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad):
class
TestDygraphDoubleGradVisitedUniq
(
TestCase
):
class
TestDygraphDoubleGradVisitedUniq
(
TestCase
):
def
func
_compare
(
self
):
def
test
_compare
(
self
):
value
=
np
.
random
.
uniform
(
-
0.5
,
0.5
,
100
).
reshape
(
10
,
2
,
value
=
np
.
random
.
uniform
(
-
0.5
,
0.5
,
100
).
reshape
(
10
,
2
,
5
).
astype
(
"float32"
)
5
).
astype
(
"float32"
)
...
@@ -496,11 +349,6 @@ class TestDygraphDoubleGradVisitedUniq(TestCase):
...
@@ -496,11 +349,6 @@ class TestDygraphDoubleGradVisitedUniq(TestCase):
self
.
assertTrue
(
np
.
array_equal
(
grad_1
,
grad_2
))
self
.
assertTrue
(
np
.
array_equal
(
grad_1
,
grad_2
))
def
test_compare
(
self
):
with
_test_eager_guard
():
self
.
func_compare
()
self
.
func_compare
()
class
TestRaiseNoDoubleGradOp
(
TestCase
):
class
TestRaiseNoDoubleGradOp
(
TestCase
):
def
raise_no_grad_op
(
self
):
def
raise_no_grad_op
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
浏览文件 @
4b269baa
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -18,8 +18,6 @@ import unittest
...
@@ -18,8 +18,6 @@ import unittest
from
unittest
import
TestCase
from
unittest
import
TestCase
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddle.fluid.framework
import
_test_eager_guard
import
paddle.fluid.core
as
core
def
_dygraph_guard_
(
func
):
def
_dygraph_guard_
(
func
):
...
@@ -64,7 +62,7 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -64,7 +62,7 @@ class TestDygraphDoubleGrad(TestCase):
allow_unused
=
allow_unused
)
allow_unused
=
allow_unused
)
@
dygraph_guard
@
dygraph_guard
def
func
_exception
(
self
):
def
test
_exception
(
self
):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
(
None
,
None
)
self
.
grad
(
None
,
None
)
...
@@ -93,13 +91,8 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -93,13 +91,8 @@ class TestDygraphDoubleGrad(TestCase):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
def
test_exception
(
self
):
with
_test_eager_guard
():
self
.
func_exception
()
self
.
func_exception
()
@
dygraph_guard
@
dygraph_guard
def
func
_simple_example
(
self
):
def
test
_simple_example
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x
.
stop_gradient
=
False
x
.
stop_gradient
=
False
y
=
x
+
1
y
=
x
+
1
...
@@ -128,13 +121,8 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -128,13 +121,8 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
create_graph
)
create_graph
)
def
test_simple_example
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example
()
self
.
func_simple_example
()
@
dygraph_guard
@
dygraph_guard
def
func
_none_one_initial_gradient
(
self
):
def
test
_none_one_initial_gradient
(
self
):
numel
=
1
numel
=
1
for
s
in
self
.
shape
:
for
s
in
self
.
shape
:
numel
*=
s
numel
*=
s
...
@@ -200,13 +188,8 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -200,13 +188,8 @@ class TestDygraphDoubleGrad(TestCase):
np
.
array_equal
(
grad_z
.
numpy
(),
np
.
array_equal
(
grad_z
.
numpy
(),
original_random_grad_z
))
original_random_grad_z
))
def
test_none_one_initial_gradient
(
self
):
with
_test_eager_guard
():
self
.
func_none_one_initial_gradient
()
self
.
func_none_one_initial_gradient
()
@
dygraph_guard
@
dygraph_guard
def
func
_example_with_gradient_accumulation_and_create_graph
(
self
):
def
test
_example_with_gradient_accumulation_and_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -229,25 +212,17 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -229,25 +212,17 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
if
core
.
_in_eager_mode
():
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
pass
loss
.
backward
()
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
x_np
+
dx_expected
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_create_graph
()
self
.
func_example_with_gradient_accumulation_and_create_graph
()
@
dygraph_guard
@
dygraph_guard
def
func
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
def
test
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -271,25 +246,17 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -271,25 +246,17 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
if
core
.
_in_eager_mode
():
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
pass
loss
.
backward
()
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
x_grad_actual
=
x
.
gradient
()
with
_test_eager_guard
():
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
(
x_np
+
dx_expected
*
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
@
dygraph_guard
@
dygraph_guard
def
func
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
def
test
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -312,20 +279,12 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -312,20 +279,12 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
if
core
.
_in_eager_mode
():
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
pass
loss
.
backward
()
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_not_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录