未验证 提交 68643a9e 编写于 作者: Z Zhanlue Yang 提交者: GitHub

[DoubleGrad] Enabled test_autograd_functional_dynamic.py under eager mode (#41668) (#41895)

* [DoubleGrad] Enabled double grad test cases in eager_mode for test_imperative_double_grad

* Fixed elementwise issue

* Addressed CI failures

* [DoubleGrad] Enabled test_imperative_triple_grad test cases under eager_mode

* [DoubleGrad] Enabled test_autograd_functional_dynamic.py under eager mode

* Enabled more test cases

* Fixed performance issues

* Fixed minor issue
上级 e568268b
...@@ -22,9 +22,16 @@ import os ...@@ -22,9 +22,16 @@ import os
### Global Variables ### ### Global Variables ###
######################## ########################
ops_to_fill_zero_for_empty_grads = set([ ops_to_fill_zero_for_empty_grads = set([
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad", "split_grad",
"sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad", "rnn_grad",
"add_triple_grad" "matmul_double_grad",
"matmul_triple_grad",
"sigmoid_double_grad",
"sigmoid_triple_grad",
"add_double_grad",
"add_triple_grad",
"multiply_double_grad",
"multiply_triple_grad",
]) ])
# For API dispatch used at python-level # For API dispatch used at python-level
......
...@@ -107,6 +107,8 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -107,6 +107,8 @@ class AutogradMeta : public AbstractAutogradMeta {
GradNodeBase* GradNode() const { return grad_node_.get(); } GradNodeBase* GradNode() const { return grad_node_.get(); }
void ResetGradNode() { grad_node_.reset(); }
void SetSingleOutRankWithSlot(size_t slot_id, size_t rank) { void SetSingleOutRankWithSlot(size_t slot_id, size_t rank) {
out_slot_id_ = slot_id; out_slot_id_ = slot_id;
out_rank_ = rank; out_rank_ = rank;
......
...@@ -53,7 +53,7 @@ class GeneralGrad { ...@@ -53,7 +53,7 @@ class GeneralGrad {
auto* target_node = auto_grad_meta->GetMutableGradNode().get(); auto* target_node = auto_grad_meta->GetMutableGradNode().get();
if (orig_to_copied_node_mapping_.count(target_node)) { if (orig_to_copied_node_mapping_.count(target_node)) {
target_node = orig_to_copied_node_mapping_[target_node]; target_node = orig_to_copied_node_mapping_[target_node].get();
} else { } else {
VLOG(6) << "Unable to find target node in " VLOG(6) << "Unable to find target node in "
"orig_to_copied_node_mapping_, likely indicating an " "orig_to_copied_node_mapping_, likely indicating an "
...@@ -261,7 +261,7 @@ class GeneralGrad { ...@@ -261,7 +261,7 @@ class GeneralGrad {
auto* target_node = auto_grad_meta->GetMutableGradNode().get(); auto* target_node = auto_grad_meta->GetMutableGradNode().get();
if (orig_to_copied_node_mapping_.count(target_node)) { if (orig_to_copied_node_mapping_.count(target_node)) {
target_node = orig_to_copied_node_mapping_[target_node]; target_node = orig_to_copied_node_mapping_[target_node].get();
} else { } else {
VLOG(6) << "Unable to find target node in " VLOG(6) << "Unable to find target node in "
"orig_to_copied_node_mapping_, likely indicating an unused " "orig_to_copied_node_mapping_, likely indicating an unused "
...@@ -349,12 +349,12 @@ class GeneralGrad { ...@@ -349,12 +349,12 @@ class GeneralGrad {
GradNodeBase* CopyGradNode(const std::shared_ptr<GradNodeBase>& orig_node) { GradNodeBase* CopyGradNode(const std::shared_ptr<GradNodeBase>& orig_node) {
if (orig_to_copied_node_mapping_.count(orig_node.get())) { if (orig_to_copied_node_mapping_.count(orig_node.get())) {
return orig_to_copied_node_mapping_[orig_node.get()]; return orig_to_copied_node_mapping_[orig_node.get()].get();
} }
std::shared_ptr<GradNodeBase> copied_node = orig_node->Copy(); std::shared_ptr<GradNodeBase> copied_node = orig_node->Copy();
// Save node and update mapping // Save node and update mapping
orig_to_copied_node_mapping_[orig_node.get()] = copied_node.get(); orig_to_copied_node_mapping_[orig_node.get()] = copied_node;
copied_grad_nodes_.push_back(copied_node); copied_grad_nodes_.push_back(copied_node);
return copied_node.get(); return copied_node.get();
...@@ -379,7 +379,7 @@ class GeneralGrad { ...@@ -379,7 +379,7 @@ class GeneralGrad {
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Cannot reconstruct backward graph," "Cannot reconstruct backward graph,"
"unable to find copied target for certain grad node.")); "unable to find copied target for certain grad node."));
GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node]; GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node].get();
const std::vector<std::vector<Edge>>& orig_edges = orig_node->GetEdges(); const std::vector<std::vector<Edge>>& orig_edges = orig_node->GetEdges();
std::vector<std::vector<Edge>>& copied_edges = std::vector<std::vector<Edge>>& copied_edges =
...@@ -397,13 +397,12 @@ class GeneralGrad { ...@@ -397,13 +397,12 @@ class GeneralGrad {
std::shared_ptr<GradNodeBase> copied_next_node; std::shared_ptr<GradNodeBase> copied_next_node;
if (orig_to_copied_node_mapping_.count(orig_next_node.get())) { if (orig_to_copied_node_mapping_.count(orig_next_node.get())) {
copied_next_node = copied_next_node =
orig_to_copied_node_mapping_[orig_next_node.get()] orig_to_copied_node_mapping_[orig_next_node.get()];
->shared_from_this();
} else { } else {
copied_next_node = orig_next_node->Copy(); copied_next_node = orig_next_node->Copy();
orig_to_copied_node_mapping_[orig_next_node.get()] = orig_to_copied_node_mapping_[orig_next_node.get()] =
copied_next_node.get(); copied_next_node;
copied_grad_nodes_.push_back(copied_next_node); copied_grad_nodes_.push_back(copied_next_node);
} }
...@@ -436,7 +435,8 @@ class GeneralGrad { ...@@ -436,7 +435,8 @@ class GeneralGrad {
std::unordered_map<GradNodeBase*, paddle::experimental::Tensor> results_map; std::unordered_map<GradNodeBase*, paddle::experimental::Tensor> results_map;
std::vector<std::shared_ptr<GradNodeBase>> copied_grad_nodes_; std::vector<std::shared_ptr<GradNodeBase>> copied_grad_nodes_;
std::unordered_map<GradNodeBase*, GradNodeBase*> orig_to_copied_node_mapping_; std::unordered_map<GradNodeBase*, std::shared_ptr<GradNodeBase>>
orig_to_copied_node_mapping_;
DISABLE_COPY_AND_ASSIGN(GeneralGrad); DISABLE_COPY_AND_ASSIGN(GeneralGrad);
}; };
...@@ -534,6 +534,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -534,6 +534,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
// GeneralGrad // GeneralGrad
bool is_general_grad = !inputs.empty(); bool is_general_grad = !inputs.empty();
if (is_general_grad) GeneralGrad::Instance().Clear();
/* --- Initialization --- */ /* --- Initialization --- */
// 1. Init queue with starting nodes // 1. Init queue with starting nodes
...@@ -746,6 +747,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -746,6 +747,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
VLOG(6) << "We get grad_output_tensor with slot: " << i VLOG(6) << "We get grad_output_tensor with slot: " << i
<< ", rank: " << j << " as uninitialized or undefined tensor"; << ", rank: " << j << " as uninitialized or undefined tensor";
} }
VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i
<< ", rank: " << j << ", rank: " << j
<< " 's name is: " << grad_output_tensor.name(); << " 's name is: " << grad_output_tensor.name();
......
...@@ -87,7 +87,7 @@ class GradSlotMeta { ...@@ -87,7 +87,7 @@ class GradSlotMeta {
std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr; std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr;
}; };
class GradNodeBase : public std::enable_shared_from_this<GradNodeBase> { class GradNodeBase {
public: public:
GradNodeBase() { VLOG(6) << "Construct GradNodeBase"; } GradNodeBase() { VLOG(6) << "Construct GradNodeBase"; }
GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num); GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num);
......
...@@ -79,9 +79,9 @@ class TensorWrapper { ...@@ -79,9 +79,9 @@ class TensorWrapper {
auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor); auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor);
if (tensor_autograd_meta) { if (tensor_autograd_meta) {
auto autograd_meta = std::make_shared<AutogradMeta>( auto autograd_meta =
Edge(nullptr, EagerUtils::OutRankInfo(tensor))); std::make_shared<AutogradMeta>(*tensor_autograd_meta);
autograd_meta->SetStopGradient(tensor_autograd_meta->StopGradient()); autograd_meta->ResetGradNode();
intermidiate_tensor_.set_autograd_meta(autograd_meta); intermidiate_tensor_.set_autograd_meta(autograd_meta);
weak_grad_node_ = tensor_autograd_meta->GetMutableGradNode(); weak_grad_node_ = tensor_autograd_meta->GetMutableGradNode();
} }
...@@ -98,8 +98,11 @@ class TensorWrapper { ...@@ -98,8 +98,11 @@ class TensorWrapper {
check_inplace_version(); check_inplace_version();
// if it's full_reserved just return the full copy of tensor // if it's full_reserved just return the full copy of tensor
paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_; if (full_reserved_) {
if (!full_reserved_) { return intermidiate_tensor_;
} else {
paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_;
std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock(); std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock();
if (new_grad_node) { if (new_grad_node) {
VLOG(3) << "Recovered TensorWrapper with GradNode " VLOG(3) << "Recovered TensorWrapper with GradNode "
...@@ -109,17 +112,15 @@ class TensorWrapper { ...@@ -109,17 +112,15 @@ class TensorWrapper {
} }
auto* intermediate_autograd_meta = auto* intermediate_autograd_meta =
EagerUtils::unsafe_autograd_meta(intermidiate_tensor_); EagerUtils::unsafe_autograd_meta(intermidiate_tensor_);
auto p_ab_autograd_meta = std::make_shared<AutogradMeta>( auto p_ab_autograd_meta =
Edge(new_grad_node, intermediate_autograd_meta->OutRankInfo())); std::make_shared<AutogradMeta>(*intermediate_autograd_meta);
p_ab_autograd_meta->SetStopGradient( if (new_grad_node) {
intermediate_autograd_meta->StopGradient()); p_ab_autograd_meta->SetGradNode(new_grad_node);
}
recovered_tensor.set_autograd_meta( recovered_tensor.set_autograd_meta(p_ab_autograd_meta);
std::static_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
p_ab_autograd_meta));
}
return recovered_tensor; return recovered_tensor;
}
} }
void check_inplace_version() { void check_inplace_version() {
......
...@@ -100,6 +100,8 @@ void GatherNdGradInferMeta(const MetaTensor& x, ...@@ -100,6 +100,8 @@ void GatherNdGradInferMeta(const MetaTensor& x,
const MetaTensor& out_grad, const MetaTensor& out_grad,
MetaTensor* x_grad); MetaTensor* x_grad);
void GeneralUnaryGradInferMeta(const MetaTensor& x, MetaTensor* dx);
void GeneralBinaryGradInferMeta(const MetaTensor& x, void GeneralBinaryGradInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
MetaTensor* dx, MetaTensor* dx,
...@@ -132,8 +134,6 @@ void GeneralQuinaryGradInferMeta(const MetaTensor& x, ...@@ -132,8 +134,6 @@ void GeneralQuinaryGradInferMeta(const MetaTensor& x,
MetaTensor* dk, MetaTensor* dk,
MetaTensor* dl); MetaTensor* dl);
void GeneralUnaryGradInferMeta(const MetaTensor& x, MetaTensor* dx);
void GumbelSoftmaxGradInferMeta(const MetaTensor& out, void GumbelSoftmaxGradInferMeta(const MetaTensor& out,
const MetaTensor& dout, const MetaTensor& dout,
int axis, int axis,
......
...@@ -943,8 +943,10 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): ...@@ -943,8 +943,10 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False):
# [0., 1., 0., 1., 0., 1., 0., 1.]])) # [0., 1., 0., 1., 0., 1., 0., 1.]]))
''' '''
inputs = _as_tensors(inputs) inputs = _as_tensors(inputs)
outputs = _as_tensors(func(*inputs)) outputs = _as_tensors(func(*inputs))
batch_size = inputs[0].shape[0] batch_size = inputs[0].shape[0]
for input in inputs: for input in inputs:
assert input.shape[ assert input.shape[
...@@ -961,12 +963,14 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): ...@@ -961,12 +963,14 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False):
for i, flat_output in enumerate(flat_outputs): for i, flat_output in enumerate(flat_outputs):
jac_i = list([] for _ in range(fin_size)) jac_i = list([] for _ in range(fin_size))
for k in range(flat_output.shape[1]): for k in range(flat_output.shape[1]):
row_k = paddle.grad( row_k = paddle.grad(
flat_output[:, k], flat_output[:, k],
inputs, inputs,
create_graph=create_graph, create_graph=create_graph,
retain_graph=True, retain_graph=True,
allow_unused=allow_unused) allow_unused=allow_unused)
for j in range(fin_size): for j in range(fin_size):
jac_i[j].append( jac_i[j].append(
paddle.reshape( paddle.reshape(
......
...@@ -205,7 +205,7 @@ class TestVJP(TestAutogradFunctional): ...@@ -205,7 +205,7 @@ class TestVJP(TestAutogradFunctional):
self.check_results(ref_result, aliased_result) self.check_results(ref_result, aliased_result)
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.func_vjp_i1o1() self.func_vjp_i1o1()
self.func_vjp_i2o1() self.func_vjp_i2o1()
self.func_vjp_i2o2() self.func_vjp_i2o2()
...@@ -213,6 +213,13 @@ class TestVJP(TestAutogradFunctional): ...@@ -213,6 +213,13 @@ class TestVJP(TestAutogradFunctional):
self.func_vjp_nested() self.func_vjp_nested()
self.func_vjp_aliased_input() self.func_vjp_aliased_input()
self.func_vjp_i1o1()
self.func_vjp_i2o1()
self.func_vjp_i2o2()
self.func_vjp_i2o2_omitting_v()
self.func_vjp_nested()
self.func_vjp_aliased_input()
@utils.place(config.DEVICES) @utils.place(config.DEVICES)
@utils.parameterize( @utils.parameterize(
...@@ -227,8 +234,9 @@ class TestVJPException(unittest.TestCase): ...@@ -227,8 +234,9 @@ class TestVJPException(unittest.TestCase):
paddle.to_tensor(self.v)) paddle.to_tensor(self.v))
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.func_vjp() self.func_vjp()
self.func_vjp()
def jac(grad_fn, f, inputs): def jac(grad_fn, f, inputs):
...@@ -303,11 +311,15 @@ class TestJVP(TestAutogradFunctional): ...@@ -303,11 +311,15 @@ class TestJVP(TestAutogradFunctional):
self.check_results(results_omitting_v, results_with_v) self.check_results(results_omitting_v, results_with_v)
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.func_jvp_i1o1() self.func_jvp_i1o1()
self.func_jvp_i2o1() self.func_jvp_i2o1()
self.func_jvp_i2o2() self.func_jvp_i2o2()
self.func_jvp_i2o2_omitting_v() self.func_jvp_i2o2_omitting_v()
self.func_jvp_i1o1()
self.func_jvp_i2o1()
self.func_jvp_i2o2()
self.func_jvp_i2o2_omitting_v()
@utils.place(config.DEVICES) @utils.place(config.DEVICES)
...@@ -328,12 +340,12 @@ class TestJacobianClassNoBatch(unittest.TestCase): ...@@ -328,12 +340,12 @@ class TestJacobianClassNoBatch(unittest.TestCase):
self._atol = config.TOLERANCE.get(str(self._dtype)).get( self._atol = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("atol") "first_order_grad").get("atol")
self.xs = [paddle.to_tensor(x) for x in self.xs] if isinstance( def func_jacobian(self):
xs = [paddle.to_tensor(x) for x in self.xs] if isinstance(
self.xs, typing.Sequence) else paddle.to_tensor(self.xs) self.xs, typing.Sequence) else paddle.to_tensor(self.xs)
self._actual = paddle.autograd.Jacobian(self.func, self.xs, False) self._actual = paddle.autograd.Jacobian(self.func, xs, False)
self._expected = self._expected() self._expected = self._get_expected()
def func_jacobian(self):
Index = collections.namedtuple('Index', ('type', 'value')) Index = collections.namedtuple('Index', ('type', 'value'))
indexes = (Index('all', (slice(0, None, None), slice(0, None, None))), indexes = (Index('all', (slice(0, None, None), slice(0, None, None))),
Index('row', (0, slice(0, None, None))), Index('row', (0, slice(0, None, None))),
...@@ -349,14 +361,17 @@ class TestJacobianClassNoBatch(unittest.TestCase): ...@@ -349,14 +361,17 @@ class TestJacobianClassNoBatch(unittest.TestCase):
err_msg=f'Testcase {index.type} index not passed, value is {index.value}' err_msg=f'Testcase {index.type} index not passed, value is {index.value}'
) )
def _expected(self): def _get_expected(self):
jac = utils._compute_numerical_jacobian(self.func, self.xs, self._eps, xs = [paddle.to_tensor(x) for x in self.xs] if isinstance(
self.xs, typing.Sequence) else paddle.to_tensor(self.xs)
jac = utils._compute_numerical_jacobian(self.func, xs, self._eps,
self._dtype) self._dtype)
return utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NM) return utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NM)
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.func_jacobian() self.func_jacobian()
self.func_jacobian()
@utils.place(config.DEVICES) @utils.place(config.DEVICES)
...@@ -375,12 +390,12 @@ class TestJacobianClassBatchFirst(unittest.TestCase): ...@@ -375,12 +390,12 @@ class TestJacobianClassBatchFirst(unittest.TestCase):
self._atol = config.TOLERANCE.get(str(self._dtype)).get( self._atol = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("atol") "first_order_grad").get("atol")
self.xs = [paddle.to_tensor(x) for x in self.xs] if isinstance( def func_jacobian(self):
xs = [paddle.to_tensor(x) for x in self.xs] if isinstance(
self.xs, typing.Sequence) else paddle.to_tensor(self.xs) self.xs, typing.Sequence) else paddle.to_tensor(self.xs)
self._actual = paddle.autograd.Jacobian(self.func, self.xs, True) self._actual = paddle.autograd.Jacobian(self.func, xs, True)
self._expected = self._expected() self._expected = self._get_expected()
def func_jacobian(self):
Index = collections.namedtuple('Index', ('type', 'value')) Index = collections.namedtuple('Index', ('type', 'value'))
indexes = ( indexes = (
Index('all', (slice(0, None, None), slice(0, None, None), Index('all', (slice(0, None, None), slice(0, None, None),
...@@ -402,16 +417,19 @@ class TestJacobianClassBatchFirst(unittest.TestCase): ...@@ -402,16 +417,19 @@ class TestJacobianClassBatchFirst(unittest.TestCase):
err_msg=f'Testcase {index.type} index not passed, value is {index.value}' err_msg=f'Testcase {index.type} index not passed, value is {index.value}'
) )
def _expected(self): def _get_expected(self):
jac = utils._compute_numerical_batch_jacobian( xs = [paddle.to_tensor(x) for x in self.xs] if isinstance(
self.func, self.xs, self._eps, self._dtype, False) self.xs, typing.Sequence) else paddle.to_tensor(self.xs)
jac = utils._compute_numerical_batch_jacobian(self.func, xs, self._eps,
self._dtype, False)
jac = utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NBM) jac = utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NBM)
return utils._np_transpose_matrix_format(jac, utils.MatrixFormat.NBM, return utils._np_transpose_matrix_format(jac, utils.MatrixFormat.NBM,
utils.MatrixFormat.BNM) utils.MatrixFormat.BNM)
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.func_jacobian() self.func_jacobian()
self.func_jacobian()
class TestHessianClassNoBatch(unittest.TestCase): class TestHessianClassNoBatch(unittest.TestCase):
...@@ -492,12 +510,19 @@ class TestHessianClassNoBatch(unittest.TestCase): ...@@ -492,12 +510,19 @@ class TestHessianClassNoBatch(unittest.TestCase):
paddle.autograd.Hessian(func, paddle.ones([3])) paddle.autograd.Hessian(func, paddle.ones([3]))
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.setUpClass()
self.func_single_input() self.func_single_input()
self.func_multi_input() self.func_multi_input()
self.func_allow_unused_true() self.func_allow_unused_true()
self.func_create_graph_true() self.func_create_graph_true()
self.func_out_not_single() self.func_out_not_single()
self.setUpClass()
self.func_single_input()
self.func_multi_input()
self.func_allow_unused_true()
self.func_create_graph_true()
self.func_out_not_single()
class TestHessianClassBatchFirst(unittest.TestCase): class TestHessianClassBatchFirst(unittest.TestCase):
...@@ -599,12 +624,19 @@ class TestHessianClassBatchFirst(unittest.TestCase): ...@@ -599,12 +624,19 @@ class TestHessianClassBatchFirst(unittest.TestCase):
paddle.autograd.Hessian(func, paddle.ones((3, 3)), is_batched=True) paddle.autograd.Hessian(func, paddle.ones((3, 3)), is_batched=True)
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.setUpClass()
self.func_single_input() self.func_single_input()
self.func_multi_input() self.func_multi_input()
self.func_allow_unused() self.func_allow_unused()
self.func_stop_gradient() self.func_stop_gradient()
self.func_out_not_single() self.func_out_not_single()
self.setUpClass()
self.func_single_input()
self.func_multi_input()
self.func_allow_unused()
self.func_stop_gradient()
self.func_out_not_single()
class TestHessian(unittest.TestCase): class TestHessian(unittest.TestCase):
...@@ -619,6 +651,7 @@ class TestHessian(unittest.TestCase): ...@@ -619,6 +651,7 @@ class TestHessian(unittest.TestCase):
"second_order_grad").get("rtol") "second_order_grad").get("rtol")
self.atol = config.TOLERANCE.get(self.dtype).get( self.atol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("atol") "second_order_grad").get("atol")
self.x = paddle.rand(shape=self.shape, dtype=self.dtype) self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype) self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
...@@ -694,9 +727,10 @@ class TestHessian(unittest.TestCase): ...@@ -694,9 +727,10 @@ class TestHessian(unittest.TestCase):
self.rtol, self.atol) self.rtol, self.atol)
try: try:
paddle.grad(hessian, self.x) paddle.grad(hessian, self.x)
except RuntimeError as e: except Exception as e:
error_msg = cpt.get_exception_message(e) error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0 assert error_msg.find("has no gradient") > 0 or error_msg.find(
"does not appear") > 0
def func_create_graph_true(self): def func_create_graph_true(self):
def func(x): def func(x):
...@@ -713,13 +747,21 @@ class TestHessian(unittest.TestCase): ...@@ -713,13 +747,21 @@ class TestHessian(unittest.TestCase):
assert triple_grad is not None assert triple_grad is not None
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.setUpClass()
self.func_single_input() self.func_single_input()
self.func_multi_input() self.func_multi_input()
self.func_allow_unused_false() self.func_allow_unused_false()
self.func_allow_unused_true() self.func_allow_unused_true()
self.func_create_graph_false() self.func_create_graph_false()
self.func_create_graph_true() self.func_create_graph_true()
self.setUpClass()
self.func_single_input()
self.func_multi_input()
self.func_allow_unused_false()
self.func_allow_unused_true()
self.func_create_graph_false()
self.func_create_graph_true()
class TestHessianFloat64(TestHessian): class TestHessianFloat64(TestHessian):
...@@ -830,9 +872,10 @@ class TestBatchHessian(unittest.TestCase): ...@@ -830,9 +872,10 @@ class TestBatchHessian(unittest.TestCase):
self.rtol, self.atol) self.rtol, self.atol)
try: try:
paddle.grad(hessian, self.x) paddle.grad(hessian, self.x)
except RuntimeError as e: except Exception as e:
error_msg = cpt.get_exception_message(e) error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0 assert error_msg.find("has no gradient") > 0 or error_msg.find(
"does not appear") > 0
def func_create_graph_true(self): def func_create_graph_true(self):
def func(x): def func(x):
...@@ -849,13 +892,21 @@ class TestBatchHessian(unittest.TestCase): ...@@ -849,13 +892,21 @@ class TestBatchHessian(unittest.TestCase):
assert triple_grad is not None assert triple_grad is not None
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.setUpClass()
self.func_single_input() self.func_single_input()
self.func_multi_input() self.func_multi_input()
self.func_allow_unused_false() self.func_allow_unused_false()
self.func_allow_unused_true() self.func_allow_unused_true()
self.func_create_graph_false() self.func_create_graph_false()
self.func_create_graph_true() self.func_create_graph_true()
self.setUpClass()
self.func_single_input()
self.func_multi_input()
self.func_allow_unused_false()
self.func_allow_unused_true()
self.func_create_graph_false()
self.func_create_graph_true()
class TestBatchHessianFloat64(TestBatchHessian): class TestBatchHessianFloat64(TestBatchHessian):
...@@ -985,12 +1036,19 @@ class TestVHP(unittest.TestCase): ...@@ -985,12 +1036,19 @@ class TestVHP(unittest.TestCase):
assert triple_grad is not None assert triple_grad is not None
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.setUpClass()
self.func_v_default() self.func_v_default()
self.func_multi_input() self.func_multi_input()
self.func_single_input() self.func_single_input()
self.func_allow_unused_true() self.func_allow_unused_true()
self.func_create_graph_true() self.func_create_graph_true()
self.setUpClass()
self.func_v_default()
self.func_multi_input()
self.func_single_input()
self.func_allow_unused_true()
self.func_create_graph_true()
class TestJacobian(unittest.TestCase): class TestJacobian(unittest.TestCase):
...@@ -1100,9 +1158,10 @@ class TestJacobian(unittest.TestCase): ...@@ -1100,9 +1158,10 @@ class TestJacobian(unittest.TestCase):
self.atol) self.atol)
try: try:
paddle.grad(jacobian[0], [self.x, self.y]) paddle.grad(jacobian[0], [self.x, self.y])
except RuntimeError as e: except Exception as e:
error_msg = cpt.get_exception_message(e) error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0 assert error_msg.find("has no gradient") > 0 or error_msg.find(
"does not appear") > 0
def func_create_graph_true(self): def func_create_graph_true(self):
def func(x, y): def func(x, y):
...@@ -1123,7 +1182,8 @@ class TestJacobian(unittest.TestCase): ...@@ -1123,7 +1182,8 @@ class TestJacobian(unittest.TestCase):
assert double_grad is not None assert double_grad is not None
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.setUpClass()
self.func_multi_input_and_multi_output() self.func_multi_input_and_multi_output()
self.func_multi_input_and_single_output() self.func_multi_input_and_single_output()
self.func_single_input_and_multi_output() self.func_single_input_and_multi_output()
...@@ -1132,6 +1192,15 @@ class TestJacobian(unittest.TestCase): ...@@ -1132,6 +1192,15 @@ class TestJacobian(unittest.TestCase):
self.func_allow_unused_true() self.func_allow_unused_true()
self.func_create_graph_false() self.func_create_graph_false()
self.func_create_graph_true() self.func_create_graph_true()
self.setUpClass()
self.func_multi_input_and_multi_output()
self.func_multi_input_and_single_output()
self.func_single_input_and_multi_output()
self.func_single_input_and_single_output()
self.func_allow_unused_false()
self.func_allow_unused_true()
self.func_create_graph_false()
self.func_create_graph_true()
class TestJacobianFloat64(TestJacobian): class TestJacobianFloat64(TestJacobian):
...@@ -1269,9 +1338,10 @@ class TestJacobianBatch(unittest.TestCase): ...@@ -1269,9 +1338,10 @@ class TestJacobianBatch(unittest.TestCase):
self.atol) self.atol)
try: try:
paddle.grad(jacobian[0], [self.x, self.y]) paddle.grad(jacobian[0], [self.x, self.y])
except RuntimeError as e: except Exception as e:
error_msg = cpt.get_exception_message(e) error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0 assert error_msg.find("has no gradient") > 0 or error_msg.find(
"does not appear") > 0
def func_create_graph_true(self): def func_create_graph_true(self):
def func(x, y): def func(x, y):
...@@ -1292,7 +1362,8 @@ class TestJacobianBatch(unittest.TestCase): ...@@ -1292,7 +1362,8 @@ class TestJacobianBatch(unittest.TestCase):
assert double_grad is not None assert double_grad is not None
def test_all_cases(self): def test_all_cases(self):
if _in_legacy_dygraph(): with _test_eager_guard():
self.setUpClass()
self.func_batch_single_input_and_batch_single_output() self.func_batch_single_input_and_batch_single_output()
self.func_batch_single_input_and_batch_multi_output() self.func_batch_single_input_and_batch_multi_output()
self.func_batch_multi_input_and_batch_single_output() self.func_batch_multi_input_and_batch_single_output()
...@@ -1301,6 +1372,15 @@ class TestJacobianBatch(unittest.TestCase): ...@@ -1301,6 +1372,15 @@ class TestJacobianBatch(unittest.TestCase):
self.func_allow_unused_true() self.func_allow_unused_true()
self.func_create_graph_false() self.func_create_graph_false()
self.func_create_graph_true() self.func_create_graph_true()
self.setUpClass()
self.func_batch_single_input_and_batch_single_output()
self.func_batch_single_input_and_batch_multi_output()
self.func_batch_multi_input_and_batch_single_output()
self.func_batch_multi_input_and_batch_multi_output()
self.func_allow_unused_false()
self.func_allow_unused_true()
self.func_create_graph_false()
self.func_create_graph_true()
class TestJacobianBatchFloat64(TestJacobianBatch): class TestJacobianBatchFloat64(TestJacobianBatch):
......
...@@ -1103,7 +1103,15 @@ def t(input, name=None): ...@@ -1103,7 +1103,15 @@ def t(input, name=None):
"Input(input) only support N-D (N<=2) tensor, but received " "Input(input) only support N-D (N<=2) tensor, but received "
"length of Input(input) is %s. Perhaps you can use paddle." "length of Input(input) is %s. Perhaps you can use paddle."
"tensor.transpose() instead." % len(input.shape)) "tensor.transpose() instead." % len(input.shape))
if paddle.in_dynamic_mode(): if in_dygraph_mode():
if len(input.shape) == 1:
return input
# 2-D tensor
perm = [1, 0]
out = _C_ops.final_state_transpose(input, perm)
return out
if _in_legacy_dygraph():
if len(input.shape) == 1: if len(input.shape) == 1:
return input return input
# 2-D tensor # 2-D tensor
......
...@@ -1060,6 +1060,7 @@ ...@@ -1060,6 +1060,7 @@
kernel : kernel :
func : multiply_double_grad func : multiply_double_grad
optional : grad_x_grad, grad_y_grad optional : grad_x_grad, grad_y_grad
backward : multiply_triple_grad
- backward_api : multiply_grad - backward_api : multiply_grad
forward : multiply (Tensor x, Tensor y) -> Tensor(out) forward : multiply (Tensor x, Tensor y) -> Tensor(out)
...@@ -1072,6 +1073,17 @@ ...@@ -1072,6 +1073,17 @@
func : multiply_grad func : multiply_grad
backward : multiply_double_grad backward : multiply_double_grad
- backward_api : multiply_triple_grad
forward : multiply_double_grad (Tensor x, Tensor y, Tensor fwd_grad_out, Tensor fwd_grad_grad_x, Tensor fwd_grad_grad_y, int aixs = -1) -> Tensor(grad_x), Tensor(grad_y), Tensor(grad_grad_out)
args : (Tensor x, Tensor y, Tensor fwd_grad_out, Tensor fwd_grad_grad_x, Tensor fwd_grad_grad_y, Tensor grad_x_grad, Tensor grad_y_grad, Tensor grad_grad_out_grad, int axis = -1)
output : Tensor(x_grad), Tensor(y_grad), Tensor(fwd_grad_out_grad), Tensor(fwd_grad_grad_x_grad), Tensor(fwd_grad_grad_y_grad)
infer_meta :
func : GeneralQuinaryGradInferMeta
param : [x, y, fwd_grad_out, x, y]
kernel :
func : multiply_triple_grad
optional : fwd_grad_grad_x, fwd_grad_grad_y, grad_grad_out_grad
- backward_api : mv_grad - backward_api : mv_grad
forward : mv (Tensor x, Tensor vec) -> Tensor(out) forward : mv (Tensor x, Tensor vec) -> Tensor(out)
args : (Tensor x, Tensor vec, Tensor out_grad) args : (Tensor x, Tensor vec, Tensor out_grad)
...@@ -1239,6 +1251,16 @@ ...@@ -1239,6 +1251,16 @@
func : relu_grad func : relu_grad
backward: relu_double_grad backward: relu_double_grad
- backward_api : reshape_double_grad
forward : reshape_grad (Tensor xshape, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor grad_out, Tensor grad_x_grad)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_out]
kernel :
func : reshape_double_grad
- backward_api : reshape_grad - backward_api : reshape_grad
forward : reshape_with_xshape (Tensor x, IntArray shape) -> Tensor(out), Tensor(xshape) forward : reshape_with_xshape (Tensor x, IntArray shape) -> Tensor(out), Tensor(xshape)
args : (Tensor xshape, Tensor out_grad) args : (Tensor xshape, Tensor out_grad)
...@@ -1252,6 +1274,7 @@ ...@@ -1252,6 +1274,7 @@
data_type: out_grad data_type: out_grad
backend: out_grad backend: out_grad
layout: out_grad layout: out_grad
backward : reshape_double_grad
- backward_api : roi_align_grad - backward_api : roi_align_grad
forward : roi_align (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned) -> Tensor(out) forward : roi_align (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned) -> Tensor(out)
...@@ -1540,6 +1563,13 @@ ...@@ -1540,6 +1563,13 @@
func : subtract_grad func : subtract_grad
no_need_buffer : x, y no_need_buffer : x, y
- backward_api : sum_double_grad
forward : sum_grad (Tensor x, Tensor grad_out, int64_t[] dims, bool keep_dim, bool reduce_all=false) -> Tensor(grad_x)
args : (Tensor grad_x_grad, int64_t[] dims={}, bool keep_dim=false)
output : Tensor(grad_out_grad)
invoke : sum(grad_x_grad, dims, grad_x_grad.dtype(), keep_dim)
backward : sum_triple_grad
- backward_api : sum_grad - backward_api : sum_grad
forward : sum (Tensor x, int64_t[] dims={}, DataType out_dtype=paddle::experimental::DataType::UNDEFINED, bool keep_dim=false) -> Tensor(out) forward : sum (Tensor x, int64_t[] dims={}, DataType out_dtype=paddle::experimental::DataType::UNDEFINED, bool keep_dim=false) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int64_t[] dims, bool keep_dim, bool reduce_all=false) args : (Tensor x, Tensor out_grad, int64_t[] dims, bool keep_dim, bool reduce_all=false)
...@@ -1549,6 +1579,13 @@ ...@@ -1549,6 +1579,13 @@
param : [x] param : [x]
kernel : kernel :
func : sum_grad func : sum_grad
backward : sum_double_grad
- backward_api : sum_triple_grad
forward : sum_double_grad (Tensor grad_grad_x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(grad_grad_out)
args : (Tensor grad_grad_x, Tensor grad_grad_out_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false)
output : Tensor(grad_grad_x_grad)
invoke : sum_grad(grad_grad_x, grad_grad_out_grad, dims, keep_dim, reduce_all)
no_need_buffer : x no_need_buffer : x
- backward_api : swish_grad - backward_api : swish_grad
...@@ -1643,6 +1680,12 @@ ...@@ -1643,6 +1680,12 @@
func : trace_grad func : trace_grad
no_need_buffer : x no_need_buffer : x
- backward_api : transpose_double_grad
forward : transpose_grad (Tensor grad_out, int[] axis) -> Tensor(grad_x)
args : (Tensor grad_x_grad, int[] axis)
output : Tensor(grad_out_grad)
invoke : transpose(grad_x_grad, axis)
- backward_api : transpose_grad - backward_api : transpose_grad
forward : transpose (Tensor x, int[] axis) -> Tensor(out) forward : transpose (Tensor x, int[] axis) -> Tensor(out)
args : (Tensor out_grad, int[] axis) args : (Tensor out_grad, int[] axis)
...@@ -1652,6 +1695,7 @@ ...@@ -1652,6 +1695,7 @@
param : [out_grad, axis] param : [out_grad, axis]
kernel : kernel :
func : transpose_grad func : transpose_grad
backward : transpose_double_grad
- backward_api : tril_triu_grad - backward_api : tril_triu_grad
forward : tril_triu(Tensor x, int diagonal, bool lower) -> Tensor(out) forward : tril_triu(Tensor x, int diagonal, bool lower) -> Tensor(out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册