未验证 提交 83b953f5 编写于 作者: C chentianyu03 提交者: GitHub

add custom init grad for backward function (#31540)

* add custom init grad for backward function

* add custom init grad for backward function

* handle when the grad_tensor is none

* handle when the grad_tensor is none

* fix the args type error on windows platform

* modify the args order and doc

* format code

* add grad_tensor to xpu

* modify the grad_tensor type check

* add paddle.backward api to support multi tensors gradient compute

* add paddle.backward api to support multi tensors gradient compute

* add paddle.atuograd module and backward api

* change tensor.backward func args

* modify tensor backward api

* remove create_graph intputs args

* add doc and examplex code for backward api

* when have the same tensor, throw error

* modify test Init func args

* modify the execute.Init func args in test files

* add paddle.autograd package in setup.py.in

* modify error msg, remove _run_backward method in class Tensor

* add test cases for backward api
上级 9c5d0286
...@@ -36,48 +36,73 @@ DECLARE_bool(sort_sum_gradient); ...@@ -36,48 +36,73 @@ DECLARE_bool(sort_sum_gradient);
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
void BasicEngine::Init(VarBase* var, bool retain_graph) { void BasicEngine::Init(
const std::vector<std::shared_ptr<VarBase>>& tensors,
const std::vector<std::shared_ptr<VarBase>>& grad_tensors,
bool retain_graph) {
retain_graph_ = retain_graph; retain_graph_ = retain_graph;
init_node_ = var->GradVarBase()->GradNode();
PADDLE_ENFORCE_EQ(var->GradVarBase()->GraphIsFreed(), false,
platform::errors::Unavailable(
"%s trying to backward through the same graph a second "
"time, but this graph have already been freed. Please "
"specify Tensor.backward(retain_graph=True) when "
"calling backward at the first time.",
var->Name()));
if (!retain_graph) {
VLOG(5) << "Clear the auto-grad graph from grad var " << var->Name()
<< " because of retain_graph=False when calling backward";
var->GradVarBase()->SetGraphIsFreed(true);
var->GradVarBase()->ClearGradNode();
}
if (init_node_ == nullptr || var->OverridedStopGradient()) { PADDLE_ENFORCE_EQ(
VLOG(3) << "Skip auto grad since there is no grad op for var or loss is " tensors.size(), grad_tensors.size(),
"stop_gradient=True: " platform::errors::Unavailable(
<< var->Name(); "The size of tensors do not equal the size of grad_tensors,"
return; "the size of tensors is %s, but the size of grad_tensors is %s.",
} tensors.size(), grad_tensors.size()));
for (size_t i = 0; i < tensors.size(); ++i) {
auto var = tensors[i];
auto grad_tensor = grad_tensors[i];
auto init_node = var->GradVarBase()->GradNode();
PADDLE_ENFORCE_EQ(
var->GradVarBase()->GraphIsFreed(), false,
platform::errors::Unavailable(
"%s trying to backward through the same graph a second "
"time, but this graph have already been freed. Please "
"specify Tensor.backward(retain_graph=True) when "
"calling backward at the first time.",
var->Name()));
if (!retain_graph) {
VLOG(5) << "Clear the auto-grad graph from grad var " << var->Name()
<< " because of retain_graph=False when calling backward";
var->GradVarBase()->SetGraphIsFreed(true);
var->GradVarBase()->ClearGradNode();
}
VLOG(3) << "Init first node of backward"; if (init_node == nullptr || var->OverridedStopGradient()) {
VLOG(3) << "Skip auto grad since there is no grad op for var or loss is "
"stop_gradient=True: "
<< var->Name();
continue;
}
PADDLE_ENFORCE_EQ( VLOG(3) << "Init node of backward";
var->HasGradVar(), true,
platform::errors::NotFound("Grad variable not exist for variable %s", PADDLE_ENFORCE_EQ(
var->Name())); var->HasGradVar(), true,
platform::errors::NotFound("Tensor %s has no gradient", var->Name()));
auto& fwd_var = var->Var().Get<framework::LoDTensor>();
auto* grad_var = auto& fwd_var = var->Var().Get<framework::LoDTensor>();
var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>(); auto* grad_var =
VLOG(6) << "init loss grad:" << var->GradVarBase()->Name() var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>();
<< " as stop_gradient false"; VLOG(6) << "init loss grad:" << var->GradVarBase()->Name()
var->GradVarBase()->InnerSetOverridedStopGradient(false); << " as stop_gradient false";
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place()); var->GradVarBase()->InnerSetOverridedStopGradient(false);
grad_var->Resize(fwd_var.dims()); auto* dev_ctx =
grad_var->mutable_data(fwd_var.place(), fwd_var.type()); platform::DeviceContextPool::Instance().Get(fwd_var.place());
operators::math::set_constant(*dev_ctx, grad_var, 1.0); if (grad_tensor == nullptr) {
grad_var->Resize(fwd_var.dims());
grad_var->mutable_data(fwd_var.place(), fwd_var.type());
operators::math::set_constant(*dev_ctx, grad_var, 1.0);
} else {
paddle::framework::TensorCopy(
grad_tensor->Var().Get<framework::LoDTensor>(), fwd_var.place(),
*dev_ctx, grad_var);
}
init_nodes_.push_back(init_node);
}
} }
void BasicEngine::CheckBackwardInputs(const OpBase& op) { void BasicEngine::CheckBackwardInputs(const OpBase& op) {
...@@ -224,8 +249,10 @@ void BasicEngine::PrepareDeps() { ...@@ -224,8 +249,10 @@ void BasicEngine::PrepareDeps() {
std::queue<GradOpNode*> q; std::queue<GradOpNode*> q;
std::unordered_set<GradOpNode*> visited; std::unordered_set<GradOpNode*> visited;
q.push(init_node_.get()); for (size_t i = 0; i < init_nodes_.size(); ++i) {
visited.insert(init_node_.get()); q.push(init_nodes_[i].get());
visited.insert(init_nodes_[i].get());
}
while (!q.empty()) { while (!q.empty()) {
auto* cur_node = q.front(); auto* cur_node = q.front();
...@@ -276,14 +303,16 @@ static std::shared_ptr<NameVarMap<VariableWrapper>> CallGradientHooks( ...@@ -276,14 +303,16 @@ static std::shared_ptr<NameVarMap<VariableWrapper>> CallGradientHooks(
} }
void BasicEngine::Execute() { void BasicEngine::Execute() {
if (init_node_ == nullptr) { if (init_nodes_.empty()) {
return; return;
} }
PrepareDeps(); PrepareDeps();
// Start execute Computation graph // Start execute Computation graph
std::queue<std::shared_ptr<GradOpNode>> q; std::queue<std::shared_ptr<GradOpNode>> q;
q.push(std::move(init_node_)); for (size_t i = 0; i < init_nodes_.size(); ++i) {
q.push(std::move(init_nodes_[i]));
}
size_t op_num = 0; size_t op_num = 0;
...@@ -505,7 +534,7 @@ void BasicEngine::Execute() { ...@@ -505,7 +534,7 @@ void BasicEngine::Execute() {
} }
void BasicEngine::Clear() { void BasicEngine::Clear() {
init_node_.reset(); init_nodes_.clear();
node_deps_.clear(); node_deps_.clear();
accumulators_.clear(); accumulators_.clear();
accumulators_with_grad_node_.clear(); accumulators_with_grad_node_.clear();
......
...@@ -30,7 +30,9 @@ class OpBase; ...@@ -30,7 +30,9 @@ class OpBase;
class BasicEngine : public Engine { class BasicEngine : public Engine {
public: public:
void Init(VarBase* var, bool retain_graph = false); void Init(const std::vector<std::shared_ptr<VarBase>>& tensors,
const std::vector<std::shared_ptr<VarBase>>& grad_tensors,
bool retain_graph = false);
void Execute() override; void Execute() override;
...@@ -46,7 +48,7 @@ class BasicEngine : public Engine { ...@@ -46,7 +48,7 @@ class BasicEngine : public Engine {
void Clear(); void Clear();
private: private:
std::shared_ptr<GradOpNode> init_node_; std::vector<std::shared_ptr<GradOpNode>> init_nodes_;
std::unordered_map<GradOpNode*, size_t> node_deps_; std::unordered_map<GradOpNode*, size_t> node_deps_;
// The input and output of Inplace op are the same. If only `var` is used // The input and output of Inplace op are the same. If only `var` is used
// as the key, then the input and output of inplace op must be gradient // as the key, then the input and output of inplace op must be gradient
......
...@@ -92,8 +92,10 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) { ...@@ -92,8 +92,10 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) {
ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL); ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL);
// 3. backward // 3. backward
std::vector<std::shared_ptr<imperative::VarBase>> tensors{out};
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
BasicEngine engine; BasicEngine engine;
engine.Init(out.get()); engine.Init(tensors, grad_tensors);
engine.Execute(); engine.Execute();
framework::LoDTensor x_grad; framework::LoDTensor x_grad;
...@@ -191,8 +193,10 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { ...@@ -191,8 +193,10 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() {
ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL); ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL);
// 3. backward // 3. backward
std::vector<std::shared_ptr<imperative::VarBase>> tensors{out};
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
BasicEngine engine; BasicEngine engine;
engine.Init(out.get()); engine.Init(tensors, grad_tensors);
engine.Execute(); engine.Execute();
framework::LoDTensor x_grad; framework::LoDTensor x_grad;
......
...@@ -250,7 +250,10 @@ TEST(test_tracer, test_trace_op_with_multi_device_inputs) { ...@@ -250,7 +250,10 @@ TEST(test_tracer, test_trace_op_with_multi_device_inputs) {
tracer.TraceOp("reduce_sum", reduce_in, reduce_out, reduce_attr_map, tracer.TraceOp("reduce_sum", reduce_in, reduce_out, reduce_attr_map,
gpu_place, true); gpu_place, true);
imperative::BasicEngine engine; imperative::BasicEngine engine;
engine.Init(reduce_sum_out.get());
std::vector<std::shared_ptr<imperative::VarBase>> tensors{reduce_sum_out};
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
engine.Init(tensors, grad_tensors);
engine.Execute(); engine.Execute();
framework::LoDTensor rlt; framework::LoDTensor rlt;
...@@ -376,8 +379,10 @@ TEST(test_tracer, test_var_without_grad_var) { ...@@ -376,8 +379,10 @@ TEST(test_tracer, test_var_without_grad_var) {
ASSERT_EQ(y_in->GradVarBase()->GradOpNum(), 0UL); ASSERT_EQ(y_in->GradVarBase()->GradOpNum(), 0UL);
ASSERT_EQ(vout->GradVarBase()->GradOpNum(), 1UL); ASSERT_EQ(vout->GradVarBase()->GradOpNum(), 1UL);
std::vector<std::shared_ptr<imperative::VarBase>> tensors{vout};
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
imperative::BasicEngine engine; imperative::BasicEngine engine;
engine.Init(vout.get()); engine.Init(tensors, grad_tensors);
engine.Execute(); engine.Execute();
// check the grad // check the grad
......
...@@ -805,6 +805,7 @@ void BindImperative(py::module *m_ptr) { ...@@ -805,6 +805,7 @@ void BindImperative(py::module *m_ptr) {
Bump the version whenever the Tensor is modified through an inplace operation. Bump the version whenever the Tensor is modified through an inplace operation.
)DOC") )DOC")
.def("numpy", .def("numpy",
[](imperative::VarBase &self) -> py::array { [](imperative::VarBase &self) -> py::array {
const auto &tensor = const auto &tensor =
self.MutableVar()->Get<framework::LoDTensor>(); self.MutableVar()->Get<framework::LoDTensor>();
...@@ -1003,18 +1004,6 @@ void BindImperative(py::module *m_ptr) { ...@@ -1003,18 +1004,6 @@ void BindImperative(py::module *m_ptr) {
print(x.stop_gradient) # True print(x.stop_gradient) # True
print(x.grad) # None print(x.grad) # None
)DOC") )DOC")
.def("_run_backward",
[](imperative::VarBase &self, const imperative::Tracer &tracer,
bool retain_graph) {
// TODO(jiabin): when we impl more backward execution we can
// select them
auto *engine = tracer.GetEngine();
engine->Init(&self, retain_graph);
VLOG(3) << "Start backward";
engine->Execute();
VLOG(3) << "Finish backward";
},
py::call_guard<py::gil_scoped_release>())
.def("_grad_name", &imperative::VarBase::GradVarName) .def("_grad_name", &imperative::VarBase::GradVarName)
.def("_grad_value", .def("_grad_value",
[](imperative::VarBase &self) { [](imperative::VarBase &self) {
...@@ -1549,6 +1538,19 @@ void BindImperative(py::module *m_ptr) { ...@@ -1549,6 +1538,19 @@ void BindImperative(py::module *m_ptr) {
}, },
py::call_guard<py::gil_scoped_release>()); py::call_guard<py::gil_scoped_release>());
m.def(
"dygraph_run_backward",
[](const std::vector<std::shared_ptr<imperative::VarBase>> &tensors,
const std::vector<std::shared_ptr<imperative::VarBase>> &grad_tensors,
bool retain_graph, const imperative::Tracer &tracer) {
auto *engine = tracer.GetEngine();
engine->Init(tensors, grad_tensors, retain_graph);
VLOG(3) << "Start backward";
engine->Execute();
VLOG(3) << "Finish backward";
},
py::call_guard<py::gil_scoped_release>());
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(PADDLE_WITH_XPU_BKCL) defined(PADDLE_WITH_XPU_BKCL)
py::class_<imperative::ParallelContext, py::class_<imperative::ParallelContext,
......
...@@ -44,6 +44,7 @@ import paddle.metric ...@@ -44,6 +44,7 @@ import paddle.metric
import paddle.device import paddle.device
import paddle.regularizer import paddle.regularizer
import paddle.incubate import paddle.incubate
import paddle.autograd
# TODO: define alias in tensor and framework directory # TODO: define alias in tensor and framework directory
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..fluid.dygraph.base import grad #DEFINE_ALIAS
from . import backward_mode
from .backward_mode import backward
__all__ = ['grad']
__all__ += backward_mode.__all__
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid import core
from paddle.fluid import framework
import paddle
__all__ = ['backward']
@framework.dygraph_only
def backward(tensors, grad_tensors=None, retain_graph=False):
"""
Compute the backward gradients of given tensors.
Args:
tensors(list of Tensors): the tensors which the gradient to be computed. The tensors can not contain the same tensor.
grad_tensors(list of Tensors of None, optional): the init gradients of the `tensors`` .If not None, it must have the same length with ``tensors`` ,
and if any of the elements is None, then the init gradient is the default value which is filled with 1.0.
If None, all the gradients of the ``tensors`` is the default value which is filled with 1.0.
Defaults to None.
retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would
like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter
:code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient.
Defaults to False.
Returns:
NoneType: None
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([[1, 2], [3, 4]], dtype='float32', stop_gradient=False)
y = paddle.to_tensor([[3, 2], [3, 4]], dtype='float32')
grad_tensor1 = paddle.to_tensor([[1,2], [2, 3]], dtype='float32')
grad_tensor2 = paddle.to_tensor([[1,1], [1, 1]], dtype='float32')
z1 = paddle.matmul(x, y)
z2 = paddle.matmul(x, y)
paddle.autograd.backward([z1, z2], [grad_tensor1, grad_tensor2], True)
print(x.grad)
#[[12. 18.]
# [17. 25.]]
x.clear_grad()
paddle.autograd.backward([z1, z2], [grad_tensor1, None], True)
print(x.grad)
#[[12. 18.]
# [17. 25.]]
x.clear_grad()
paddle.autograd.backward([z1, z2])
print(x.grad)
#[[10. 14.]
# [10. 14.]]
"""
def check_tensors(in_out_list, name):
assert in_out_list is not None, "{} should not be None".format(name)
if isinstance(in_out_list, (list, tuple)):
assert len(in_out_list) > 0, "{} connot be empyt".format(name)
for each_var in in_out_list:
assert isinstance(
each_var, paddle.
Tensor), "Elements of {} must be paddle.Tensor".format(name)
return in_out_list
else:
assert isinstance(
in_out_list,
paddle.Tensor), "{} must be Tensor or list of Tensor".format(
name)
return [in_out_list]
tensors = check_tensors(tensors, "tensors")
assert len(tensors) == len(
set(tensors)
), "The argument 'tensors' of paddle.autograd.backward contains duplicate paddle.Tensor object."
if grad_tensors is not None:
if not isinstance(grad_tensors, (list, tuple)):
grad_tensors = [grad_tensors]
for each_tensor in grad_tensors:
if each_tensor is not None:
assert isinstance(
each_tensor, paddle.Tensor
), "The argument 'grad_tensors' of paddle.autograd.backward is invalid, it can be 'None', 'paddle.Tensor' or 'list[None/paddle.Tensor]'."
else:
grad_tensors = [None] * len(tensors)
if len(grad_tensors) > 0:
assert len(tensors) == len(
grad_tensors), "The length of grad_tensors must be equal to tensors"
assert isinstance(retain_graph, bool), "retain_graph must be True or False"
core.dygraph_run_backward(tensors, grad_tensors, retain_graph,
framework._dygraph_tracer())
...@@ -26,6 +26,7 @@ import logging ...@@ -26,6 +26,7 @@ import logging
from ..data_feeder import convert_dtype from ..data_feeder import convert_dtype
import warnings import warnings
from ..framework import _get_paddle_place from ..framework import _get_paddle_place
import paddle
__all__ = [ __all__ = [
'no_grad', 'no_grad_', 'grad', 'guard', 'enable_dygraph', 'disable_dygraph', 'no_grad', 'no_grad_', 'grad', 'guard', 'enable_dygraph', 'disable_dygraph',
......
...@@ -163,7 +163,7 @@ def monkey_patch_varbase(): ...@@ -163,7 +163,7 @@ def monkey_patch_varbase():
framework._current_expected_place()) framework._current_expected_place())
@framework.dygraph_only @framework.dygraph_only
def backward(self, retain_graph=False): def backward(self, grad_tensor=None, retain_graph=False):
""" """
Run backward of current Graph which starts from current Tensor. Run backward of current Graph which starts from current Tensor.
...@@ -172,17 +172,22 @@ def monkey_patch_varbase(): ...@@ -172,17 +172,22 @@ def monkey_patch_varbase():
You can clear gradient by ``Tensor.clear_grad()`` . You can clear gradient by ``Tensor.clear_grad()`` .
Args: Args:
grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None,
the initial gradient values of the current Tensor would be Tensor filled with 1.0;
if `grad_tensor` is not None, it must have the same length as the current Tensor.
Teh default value is None.
retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would
like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter
:code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient.
Defaults to False. Defaults to False.
Returns: Returns:
NoneType: None NoneType: None
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
x = paddle.to_tensor(5., stop_gradient=False) x = paddle.to_tensor(5., stop_gradient=False)
for i in range(5): for i in range(5):
y = paddle.pow(x, 4.0) y = paddle.pow(x, 4.0)
...@@ -198,15 +203,36 @@ def monkey_patch_varbase(): ...@@ -198,15 +203,36 @@ def monkey_patch_varbase():
print("{}".format(x.grad)) print("{}".format(x.grad))
# 0. # 0.
grad_tensor=paddle.to_tensor(2.)
for i in range(5):
y = paddle.pow(x, 4.0)
y.backward(grad_tensor)
print("{}: {}".format(i, x.grad))
# 0: [1000.]
# 1: [2000.]
# 2: [3000.]
# 3: [4000.]
# 4: [5000.]
""" """
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
if grad_tensor is not None:
assert isinstance(
grad_tensor, paddle.
Tensor), "The type of grad_tensot must be paddle.Tensor"
assert grad_tensor.shape == self.shape, \
"Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format(
grad_tensor.name, grad_tensor.shape, self.name, self.shape)
if paddle.is_compiled_with_xpu(): if paddle.is_compiled_with_xpu():
# TODO(liuyuhui): Currently only for xpu. Will be removed in the future. # TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
scaled_loss = scale_loss(self) scaled_loss = scale_loss(self)
scaled_loss._run_backward(framework._dygraph_tracer(), core.dygraph_run_backward([scaled_loss], [grad_tensor],
retain_graph) retain_graph,
framework._dygraph_tracer())
else: else:
self._run_backward(framework._dygraph_tracer(), retain_graph) core.dygraph_run_backward([self], [grad_tensor], retain_graph,
framework._dygraph_tracer())
else: else:
raise ValueError( raise ValueError(
"Variable.backward() is only available in DyGraph mode") "Variable.backward() is only available in DyGraph mode")
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.dygraph as dg
from op_test import OpTest
class TestTensorBackward(unittest.TestCase):
def setUp(self):
self._dtypes = ["float32", "float64"]
self._places = [paddle.CPUPlace()]
if paddle.is_compiled_with_cuda():
self._places.append(paddle.CUDAPlace(0))
def test_tensor_backward(self):
for dtype in self._dtypes:
x = np.random.random([2, 100]).astype(dtype)
y = np.random.random([100, 2]).astype(dtype)
z = np.matmul(x, y)
grad = np.random.random(z.shape).astype(dtype)
for place in self._places:
with dg.guard(place):
x_tensor = paddle.to_tensor(x, stop_gradient=False)
y_tensor = paddle.to_tensor(y)
z_tensor = paddle.matmul(x_tensor, y_tensor)
grad_tensor = paddle.to_tensor(grad)
z_tensor.backward(grad_tensor)
x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad, x_tensor.grad))
class TestBackwardAPI(unittest.TestCase):
def setUp(self):
self._dtypes = ["float32", "float64"]
self._places = [paddle.CPUPlace()]
if paddle.is_compiled_with_cuda():
self._places.append(paddle.CUDAPlace(0))
def test_backward_api(self):
for dtype in self._dtypes:
x = np.random.random([2, 2]).astype(dtype)
y = np.random.random([2, 2]).astype(dtype)
z = np.matmul(x, y)
grad = np.random.random(z.shape).astype(dtype)
for place in self._places:
with dg.guard(place):
x_tensor = paddle.to_tensor(x, stop_gradient=False)
y_tensor = paddle.to_tensor(y)
z_tensor1 = paddle.matmul(x_tensor, y_tensor)
z_tensor2 = paddle.matmul(x_tensor, y_tensor)
grad_tensor = paddle.to_tensor(grad)
paddle.autograd.backward([z_tensor1, z_tensor2],
[grad_tensor, grad_tensor], True)
x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad * 2, x_tensor.grad))
def test_backward_single_tensor(self):
for dtype in self._dtypes:
x = np.random.random([2, 2]).astype(dtype)
y = np.random.random([2, 2]).astype(dtype)
z = np.matmul(x, y)
grad = np.random.random(z.shape).astype(dtype)
for place in self._places:
with dg.guard(place):
x_tensor = paddle.to_tensor(x, stop_gradient=False)
y_tensor = paddle.to_tensor(y)
z_tensor1 = paddle.matmul(x_tensor, y_tensor)
grad_tensor = paddle.to_tensor(grad)
paddle.autograd.backward(z_tensor1, grad_tensor, True)
x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad, x_tensor.grad))
def test_backward_none_grad_tensor(self):
for dtype in self._dtypes:
x = np.random.random([2, 2]).astype(dtype)
y = np.random.random([2, 2]).astype(dtype)
z = np.matmul(x, y)
grad = np.ones(z.shape).astype(dtype)
for place in self._places:
with dg.guard(place):
x_tensor = paddle.to_tensor(x, stop_gradient=False)
y_tensor = paddle.to_tensor(y)
z_tensor1 = paddle.matmul(x_tensor, y_tensor)
paddle.autograd.backward(z_tensor1, None)
x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad, x_tensor.grad))
if __name__ == '__main__':
unittest.main()
...@@ -216,6 +216,7 @@ packages=['paddle', ...@@ -216,6 +216,7 @@ packages=['paddle',
'paddle.static.amp', 'paddle.static.amp',
'paddle.tensor', 'paddle.tensor',
'paddle.onnx', 'paddle.onnx',
'paddle.autograd',
] ]
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册