未验证 提交 2421a25a 编写于 作者: J Jiabin Yang 提交者: GitHub

Support test imperative basic with fixed retain grad interface (#38548)

* Rearranged Eager AutoCodeGen directory structure

* Removed USE_OP in Eager AutoCodeGen

* Enabled generation for Operators without Grad/Inputs/Outputs

* Resolved operators without input

* Fixed merge conflicts

* Enabled Eager AutoCodeGen for 10+ more operators

* Refactored Eager AutoCodeGen with more organized helper objects

* Enabled Eager AutoCodeGen for operators with multiple OpBases

* Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input Argument

* Handled Dispensable Inputs/Outputs in Eager AutoCodeGen

* Adjusted function generation/call between Python-C API & Dygraph API

* Synchronized auto-generated Python-C API with Dygraph Forward Functions

* support more eager tensor api

* fix merge compile error

* fix compile error and fit develop code

* support pure CPU

* fix some logic error in eager_mode

* support _varbase_creator in eager mode

* Added safe_initialized interface to EagerTensor for use in processing dispensable inputs

* for eager mode

* refine

* support multiple constructor for eager tensor

* add place related code

* polish code

* specific randint with dtype of int64

* Support pure cpu test

* eager logic

* refine test in pure cpu

* eager logic

* eager logic

* eager logic, test=develop

* skip core.eager when in inference, test=develop

* refine, test=develop

* refine, test=develop

* call RetainGrad after run forward kernel, test=develop

* refine, test=develop

* support dygraph util, meta, guard test

* support inference test

* refine test and fix initializer failed

* support create varbase and fix retain grad error

* fix windows error

* support test_imperative_basic test in eager mode

* remove additional log in variable.h

* remove additional log in variable.h

* remove additional code create in merge
Co-authored-by: Njim19930609 <jim19930609@gmail.com>
Co-authored-by: NWang Huan <wanghuan29@baidu.com>
上级 339c34e6
......@@ -43,33 +43,37 @@ void RegisterReduceHookForTensor(const egr::EagerTensor& tensor,
void RetainGradForTensor(const egr::EagerTensor& tensor) {
// TODO(jiabin): Support More Tensor type here
AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor);
egr::EagerTensor* grad_tensor = meta->MutableGrad();
std::weak_ptr<egr::EagerTensor> weak_grad_tensor = meta->WeakGrad();
// Define Hook
std::function<egr::EagerTensor(const egr::EagerTensor&)> hook =
[grad_tensor](const egr::EagerTensor& t) {
if (!grad_tensor) {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Detected null grad_tensor."
"Grad tensor in AutogradMeta of should not be nullptr"));
}
if (t.defined()) {
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
return *grad_tensor;
[weak_grad_tensor](const egr::EagerTensor& t) {
if (!weak_grad_tensor.expired()) {
auto grad_tensor = weak_grad_tensor.lock();
if (t.defined()) {
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
return *grad_tensor.get();
} else {
VLOG(7) << "Set Var for RetainGrad Hook for tensor: " << t.name();
PADDLE_ENFORCE_EQ(
t.Var().IsInitialized(), true,
paddle::platform::errors::Fatal(
"Detected uninitialized variable, causing segmentation "
"fault "
"inside the hook."
"Variable %s has to be initialized while we need to set it."
"please check tensor initialization status.",
t.name()));
grad_tensor->MutableVar()
->GetMutable<paddle::framework::LoDTensor>()
->ShareDataWith(t.Var().Get<paddle::framework::LoDTensor>());
return *grad_tensor.get();
}
} else {
PADDLE_ENFORCE_EQ(
t.Var().IsInitialized(), true,
paddle::platform::errors::Fatal(
"Detected uninitialized variable, causing segmentation fault "
"inside the hook."
"Variable %s has to be initialized while we need to set it."
"please check tensor initialization status.",
t.name()));
grad_tensor->MutableVar()
->GetMutable<paddle::framework::LoDTensor>()
->ShareDataWith(t.Var().Get<paddle::framework::LoDTensor>());
return *grad_tensor;
VLOG(7) << "Retain NULL EagerTensor in Grad Hook";
return EagerTensor();
}
};
......
......@@ -972,11 +972,16 @@ static std::string GenerateGradNodeCreationContent(
iter.GetGradInsFwdSlotnameMap();
for (auto& kv : grad_ins_fwd_slotname_map) {
const std::string& tensor_wrapper_name = kv.second;
std::string full_reserved = "false";
if (fwd_outputs_name_pos_map.find(tensor_wrapper_name) ==
fwd_outputs_name_pos_map.end()) {
full_reserved = "true";
}
const char* SET_TENSOR_WRAPPER_TEMPLATE =
" grad_node->SetTensorWrapper%s(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(SET_TENSOR_WRAPPER_TEMPLATE,
tensor_wrapper_name, tensor_wrapper_name);
" grad_node->SetTensorWrapper%s(%s, %s);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, tensor_wrapper_name,
full_reserved);
}
}
grad_node_creation_str += "\n";
......@@ -1017,11 +1022,6 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str += paddle::string::Sprintf(
ADD_EDGES_TEMPLATE, input_autograd_name, input_position);
}
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, input_name);
}
// [GradOpNode] SetGradInMeta
......@@ -1048,6 +1048,12 @@ static std::string GenerateGradNodeCreationContent(
" egr::EagerUtils::SetHistory(&%s, grad_node);\n";
grad_node_creation_str +=
paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name);
}
VLOG(6) << "Generated SetGradIn/OutMeta";
......@@ -1771,6 +1777,7 @@ static std::string GenerateGradNodeHeaderContents(
std::string tensor_wrapper_arg_str;
std::string tensor_wrapper_body_str;
std::string full_reserved_str = "full_reserved";
if (duplicable_tensors.count(tensor_wrapper_name)) {
const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE =
"const std::vector<egr::EagerTensor>& %s";
......@@ -1803,17 +1810,18 @@ static std::string GenerateGradNodeHeaderContents(
TENSOR_WRAPPER_MEMBER_TEMPLATE, struct_tensor_wrapper_name);
const char* SET_TENSOR_WRAPPER_BODY_TEMPLATE =
"%s = egr::TensorWrapper(%s, true /*full_reserved*/);";
"%s = egr::TensorWrapper(%s, %s /*full_reserved*/);";
tensor_wrapper_body_str = paddle::string::Sprintf(
SET_TENSOR_WRAPPER_BODY_TEMPLATE, struct_tensor_wrapper_name,
tensor_wrapper_name);
tensor_wrapper_name, full_reserved_str);
}
std::string full_reserved_signature_str = "bool full_reserved";
const char* SET_TENSOR_WRAPPER_TEMPLATE =
" void SetTensorWrapper%s(%s) {\n %s\n }\n";
" void SetTensorWrapper%s(%s, %s) {\n %s\n }\n";
set_tensor_wrappers_str += paddle::string::Sprintf(
SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name,
tensor_wrapper_arg_str, tensor_wrapper_body_str);
tensor_wrapper_arg_str, full_reserved_signature_str,
tensor_wrapper_body_str);
}
}
VLOG(6) << "Generated TensorWrapper";
......
......@@ -14,8 +14,8 @@
#pragma once
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/grad_node_info.h"
namespace egr {
using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
......@@ -75,9 +75,20 @@ class AutogradMeta : public AbstractAutogradMeta {
~AutogradMeta() override = default;
const egr::EagerTensor& Grad() const { return grad_; }
const egr::EagerTensor& Grad() const {
PADDLE_ENFORCE_NOT_NULL(
grad_.get(),
paddle::platform::errors::InvalidArgument(
"Should Not get NULL from Grad pointer, since "
"we should have default EagerTensor once we init AutoGradMeta. "
"if you got this error may indicates framework error in "
"PaddlePaddle"));
return *(grad_.get());
}
egr::EagerTensor* MutableGrad() { return grad_.get(); }
egr::EagerTensor* MutableGrad() { return &grad_; }
std::weak_ptr<egr::EagerTensor> WeakGrad() { return grad_; }
void SetGradNode(const std::shared_ptr<GradNodeBase>& grad_node) {
PADDLE_ENFORCE_NOT_NULL(
......@@ -126,12 +137,13 @@ class AutogradMeta : public AbstractAutogradMeta {
private:
// TODO(jiabin) :Should we use pointer instead of object?
egr::EagerTensor grad_;
std::shared_ptr<egr::EagerTensor> grad_{std::make_shared<egr::EagerTensor>(
egr::Controller::Instance().GenerateUniqueName("@grad"))};
// GradNodeBase is base class of all grad op which is a
// wrapper for grad op. This class will make grad op easy
// to be traced.
std::shared_ptr<GradNodeBase> grad_node_;
std::shared_ptr<GradNodeBase> grad_node_ = nullptr;
/**
* Why we need slot id here?
......
......@@ -71,6 +71,14 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
return node_in_degree_map;
}
void RunBackwardHooks(
const std::vector<std::vector<egr::EagerTensor>>& grad_tensors,
egr::GradNodeBase* grad_node) {
grad_node->ApplyGradientHooks(grad_tensors);
VLOG(6) << "Apply Reduce Hooks for node";
grad_node->ApplyReduceHooks();
}
void RunBackward(const std::vector<egr::EagerTensor>& tensors,
const std::vector<egr::EagerTensor>& grad_tensors,
bool retain_graph) {
......@@ -157,7 +165,11 @@ void RunBackward(const std::vector<egr::EagerTensor>& tensors,
std::unique_ptr<GradTensorHolder> node_input_buffer =
std::move(node_input_buffers_dict[node]);
VLOG(6) << "Run Backward Kernel with input_buffer";
// Run Backward Node and get outputs
RunBackwardHooks(node_input_buffer->Buffers(), node);
// TODO(jiabin): Support post hook here and make hook run in seperate
// operator
// Run Pre Backward Node and get outputs
std::vector<std::vector<egr::EagerTensor>> grad_output_tensors =
(*node)(node_input_buffer->Buffers());
// TODO(jiabin): Should we erase it or find a more efficient way.
......
......@@ -47,14 +47,18 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
if (meta) {
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
}
}
......@@ -71,14 +75,18 @@ void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas,
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
if (meta) {
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
}
}
......@@ -90,14 +98,18 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
if (meta) {
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
}
......@@ -127,6 +139,11 @@ void GradNodeBase::SetGradInMeta(const std::vector<AutogradMeta*>& fwd_out,
// Init stop gradient vector before use to avoid push back
meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) {
PADDLE_ENFORCE_NOT_NULL(fwd_out[i],
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this "
"error, it indicates bugs in framework."));
if (fwd_out[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
......@@ -173,6 +190,10 @@ void GradNodeBase::SetGradOutMeta(const std::vector<AutogradMeta*>& fwd_in,
// Init stop gradient vector before use to avoid push back
meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) {
if (!fwd_in[i]) {
meta.SetStopGradient(i, true);
continue;
}
if (fwd_in[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
......@@ -249,6 +270,7 @@ std::vector<std::vector<egr::EagerTensor>> GradNodeBase::ApplyGradientHooks(
slot_out.resize(tensors[slot_id].size());
egr::EagerTensor& out = slot_out[rank];
if (!out.defined() || !out.initialized()) {
VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name();
out = hook(tensors[slot_id][rank]);
} else {
// TODO(jiabin): Why this?
......
......@@ -266,6 +266,7 @@ std::vector<EagerTensor> EagerUtils::RecoverTensorWrapper(
void EagerUtils::CheckAndRetainGrad(const egr::EagerTensor& tensor) {
VLOG(6) << "Check RetainGradForTensor: " << tensor.name();
if (FLAGS_retain_grad_for_all_tensor) {
VLOG(6) << "RetainGradForTensor: " << tensor.name();
egr::egr_utils_api::RetainGradForTensor(tensor);
}
}
......@@ -274,7 +275,7 @@ void EagerUtils::CheckAndRetainGrad(
const std::vector<egr::EagerTensor>& tensors) {
if (FLAGS_retain_grad_for_all_tensor) {
for (auto& tensor : tensors) {
VLOG(6) << "Check RetainGradForTensor: " << tensor.name();
VLOG(6) << "RetainGradForTensor: " << tensor.name();
egr::egr_utils_api::RetainGradForTensor(tensor);
}
}
......
......@@ -62,6 +62,13 @@ void EmptyEagerTensorInitializer(
const std::vector<int>& dims = {},
framework::proto::VarType::Type var_type =
paddle::framework::proto::VarType::LOD_TENSOR) {
auto ddims = paddle::framework::make_ddim(dims);
PADDLE_ENFORCE_GE(
paddle::framework::product(ddims), 0,
paddle::platform::errors::InvalidArgument(
"Create Eager Tensor with dims contain minus num is ilegal"
"Please check your code and make sure you new a "
"eager tensor with fixed shape instead of using -1."));
self->eager_tensor.set_name(name);
auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->eager_tensor));
autograd_meta->SetPersistable(persistable);
......@@ -71,8 +78,7 @@ void EmptyEagerTensorInitializer(
std::shared_ptr<pten::DenseTensor> dense_tensor =
std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(place),
pten::DenseTensorMeta(pten::TransToPtenDataType(dtype),
paddle::framework::make_ddim(dims)));
pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims));
self->eager_tensor.set_impl(dense_tensor);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
......
......@@ -39,10 +39,12 @@ extern PyTypeObject* pEagerTensorType;
static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
PyObject* args, PyObject* kwargs) {
EAGER_SYNC_TRY
if (!self->eager_tensor.initialized()) {
Py_INCREF(Py_None);
return Py_None;
}
PADDLE_ENFORCE_EQ(
self->eager_tensor.initialized(), true,
platform::errors::InvalidArgument(
"Tensor data of %s is Empty that indicates we have null tensor for "
"now, please check if it has no data and initialize it first.",
self->eager_tensor.name()));
auto tensor_dims = self->eager_tensor.shape();
auto numpy_dtype = TensorDtype2NumpyDtype(self->eager_tensor.type());
auto sizeof_dtype = pten::DataTypeSize(self->eager_tensor.type());
......
......@@ -75,6 +75,7 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
return ToPyObject(accumulation_grad_node->Grad());
} else {
VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name();
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
return ToPyObject(meta->Grad());
}
......
......@@ -123,10 +123,11 @@ def monkey_patch_eagertensor():
# [500.]
"""
if self.grad is None:
if self.grad._is_initialized():
return self.grad.numpy()
else:
return None
# TODO(wanghuancoder) support SELECTED_ROWS
return self.grad.numpy()
if hasattr(core, "eager"):
setattr(core.eager.EagerTensor, "__str__", __str__)
......
......@@ -452,6 +452,22 @@ class EagerParamBaseUsageTestCase(unittest.TestCase):
self.assertTrue(np.array_equal(res1, res2))
self.assertTrue(np.array_equal(res3, res4))
def test_backward_with_single_tensor(self):
arr4 = np.random.rand(4, 16, 16, 32).astype('float32')
egr_tensor12 = core.eager.EagerTensor(arr4, core.CPUPlace())
egr_tensor12.retain_grads()
arr = np.ones([4, 16, 16, 32]).astype('float32')
self.assertEqual(egr_tensor12.persistable, False)
self.assertTrue("generated_tensor" in egr_tensor12.name)
self.assertEqual(egr_tensor12.shape, [4, 16, 16, 32])
self.assertEqual(egr_tensor12.dtype, core.VarDesc.VarType.FP32)
self.assertEqual(egr_tensor12.stop_gradient, True)
self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace()))
self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4))
self.assertTrue(np.array_equal(egr_tensor12.gradient(), None))
egr_tensor12.backward()
self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr))
class EagerGuardTestCase(unittest.TestCase):
def test__test_eager_guard(self):
......
......@@ -24,7 +24,7 @@ from test_imperative_base import new_program_scope
import paddle.fluid.dygraph_utils as dygraph_utils
from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
import paddle
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode, in_dygraph_mode
class MyLayer(fluid.Layer):
......@@ -94,58 +94,13 @@ class SimpleRNNCell(fluid.Layer):
is_bias=False)
def forward(self, input, pre_hidden):
tmp_i2h = self.create_variable(dtype=self._dtype)
tmp_h2h = self.create_variable(dtype=self._dtype)
hidden = self.create_variable(dtype=self._dtype)
out = self.create_variable(dtype=self._dtype)
softmax_out = self.create_variable(dtype=self._dtype)
reduce_out = self.create_variable(dtype=self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": input,
"Y": self._i2h_w},
outputs={"Out": tmp_i2h},
attrs={"x_num_col_dims": 1,
"y_num_col_dims": 1})
self._helper.append_op(
type="mul",
inputs={"X": pre_hidden,
"Y": self._h2h_w},
outputs={"Out": tmp_h2h},
attrs={"x_num_col_dims": 1,
"y_num_col_dims": 1})
self._helper.append_op(
type="elementwise_add",
inputs={'X': tmp_h2h,
'Y': tmp_i2h},
outputs={'Out': hidden},
attrs={'axis': -1,
'use_mkldnn': False})
tmp_i2h = paddle.fluid.layers.nn.mul(input, self._i2h_w)
tmp_h2h = paddle.fluid.layers.nn.mul(pre_hidden, self._h2h_w)
hidden = paddle.add(tmp_h2h, tmp_i2h)
hidden = self._helper.append_activation(hidden, act='tanh')
self._helper.append_op(
type="mul",
inputs={"X": hidden,
"Y": self._h2o_w},
outputs={"Out": out},
attrs={"x_num_col_dims": 1,
"y_num_col_dims": 1})
self._helper.append_op(
type="softmax",
inputs={"X": out},
outputs={"Out": softmax_out},
attrs={"use_cudnn": False})
self._helper.append_op(
type='reduce_sum',
inputs={'X': softmax_out},
outputs={'Out': reduce_out},
attrs={'keep_dim': False,
'reduce_all': True})
out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w)
softmax_out = paddle.nn.functional.softmax(out)
reduce_out = paddle.fluid.layers.nn.reduce_sum(softmax_out)
return reduce_out, hidden
......@@ -394,12 +349,17 @@ class TestImperative(unittest.TestCase):
a = inputs2[0].gradient()
self.assertTrue(np.allclose(inputs2[0].gradient(), x))
def test_empty_var(self):
def func_empty_var(self):
with fluid.dygraph.guard():
cur_program = fluid.Program()
cur_block = cur_program.current_block()
new_variable = cur_block.create_var(
name="X", shape=[-1, 23, 48], dtype='float32')
# Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good.
if not _in_eager_mode():
new_variable = cur_block.create_var(
name="X", shape=[-1, 23, 48], dtype='float32')
else:
new_variable = cur_block.create_var(
name="X", shape=[1, 23, 48], dtype='float32')
try:
new_variable.numpy()
except Exception as e:
......@@ -409,37 +369,51 @@ class TestImperative(unittest.TestCase):
new_variable.backward()
except Exception as e:
assert type(e) == core.EnforceNotMet
# TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement
if not _in_eager_mode():
try:
new_variable.clear_gradient()
except Exception as e:
assert type(e) == core.EnforceNotMet
try:
new_variable.clear_gradient()
except Exception as e:
assert type(e) == core.EnforceNotMet
def test_empty_var(self):
with _test_eager_guard():
self.func_empty_var()
self.func_empty_var()
def test_empty_grad(self):
def func_empty_grad(self):
with fluid.dygraph.guard():
x = np.ones([2, 2], np.float32)
new_var = paddle.to_tensor(x)
try:
new_var.gradient()
except Exception as e:
assert type(e) == ValueError
try:
new_var.clear_gradient()
except Exception as e:
assert type(e) == core.EnforceNotMet
self.assertIsNone(new_var.gradient())
# TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement
if not _in_eager_mode():
try:
new_var.clear_gradient()
except Exception as e:
assert type(e) == core.EnforceNotMet
with fluid.dygraph.guard():
cur_program = fluid.Program()
cur_block = cur_program.current_block()
new_variable = cur_block.create_var(
name="X", shape=[-1, 23, 48], dtype='float32')
# Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good.
if not _in_eager_mode():
new_variable = cur_block.create_var(
name="X", shape=[-1, 23, 48], dtype='float32')
else:
new_variable = cur_block.create_var(
name="X", shape=[1, 23, 48], dtype='float32')
try:
new_variable.gradient()
except Exception as e:
assert type(e) == ValueError
def test_set_persistable(self):
def test_empty_grad(self):
with _test_eager_guard():
self.func_empty_grad()
self.func_empty_grad()
def func_set_persistable(self):
with fluid.dygraph.guard():
x = np.ones([2, 2], np.float32)
new_var = paddle.to_tensor(x)
......@@ -447,12 +421,22 @@ class TestImperative(unittest.TestCase):
new_var.persistable = True
self.assertTrue(new_var.persistable)
def test_layer(self):
def test_set_persistable(self):
with _test_eager_guard():
self.func_set_persistable()
self.func_set_persistable()
def func_layer(self):
with fluid.dygraph.guard():
l = fluid.Layer("l")
self.assertRaises(NotImplementedError, l.forward, [])
def test_layer_in_out(self):
def test_layer(self):
with _test_eager_guard():
self.func_layer()
self.func_layer()
def func_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.dygraph.guard():
var_inp = paddle.to_tensor(np_inp)
......@@ -489,12 +473,17 @@ class TestImperative(unittest.TestCase):
feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
self.assertTrue(np.allclose(dy_out2, static_out))
self.assertTrue(np.allclose(dy_grad2, static_grad))
self.assertTrue(np.array_equal(dy_out, static_out))
self.assertTrue(np.array_equal(dy_grad, static_grad))
self.assertTrue(np.array_equal(dy_out2, static_out))
self.assertTrue(np.array_equal(dy_grad2, static_grad))
def test_mlp(self):
def test_layer_in_out(self):
with _test_eager_guard():
self.func_layer_in_out()
self.func_layer_in_out()
def func_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.dygraph.guard():
var_inp = paddle.to_tensor(np_inp)
......@@ -545,6 +534,11 @@ class TestImperative(unittest.TestCase):
self.assertEqual(mlp._linear2, sublayers[1])
self.assertEqual(len(sublayers), 2)
def test_mlp(self):
with _test_eager_guard():
self.func_mlp()
self.func_mlp()
def test_gradient_accumulation(self):
def test_single_api(sort_sum_gradient):
fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
......@@ -677,7 +671,7 @@ class TestImperative(unittest.TestCase):
test_mlp(False)
test_mlp(True)
def test_dygraph_vs_static(self):
def func_dygraph_vs_static(self):
np_inp1 = np.random.rand(4, 3, 3)
np_inp2 = np.random.rand(4, 3, 3)
......@@ -728,7 +722,12 @@ class TestImperative(unittest.TestCase):
fetch_list=out)[0]
self.assertTrue(np.allclose(dygraph_result, static_result))
def test_rnn(self):
def test_dygraph_vs_static(self):
with _test_eager_guard():
self.func_dygraph_vs_static()
self.func_dygraph_vs_static()
def func_rnn(self):
np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
[10.0, 11.0, 12.0]])
np_inp = np_inp.reshape((1, 4, 3))
......@@ -771,14 +770,19 @@ class TestImperative(unittest.TestCase):
param_grads[1][1].name, param_grads[2][1].name
])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o))
self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h))
self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h))
self.assertTrue(np.allclose(dy_out2, static_out))
self.assertTrue(np.allclose(dy_grad_h2o2, static_grad_h2o))
self.assertTrue(np.allclose(dy_grad_h2h2, static_grad_h2h))
self.assertTrue(np.allclose(dy_grad_i2h2, static_grad_i2h))
self.assertTrue(np.array_equal(dy_out, static_out))
self.assertTrue(np.array_equal(dy_grad_h2o, static_grad_h2o))
self.assertTrue(np.array_equal(dy_grad_h2h, static_grad_h2h))
self.assertTrue(np.array_equal(dy_grad_i2h, static_grad_i2h))
self.assertTrue(np.array_equal(dy_out2, static_out))
self.assertTrue(np.array_equal(dy_grad_h2o2, static_grad_h2o))
self.assertTrue(np.array_equal(dy_grad_h2h2, static_grad_h2h))
self.assertTrue(np.array_equal(dy_grad_i2h2, static_grad_i2h))
def test_rnn(self):
with _test_eager_guard():
self.func_rnn()
self.func_rnn()
def func_layer_attrs(self):
layer = fluid.dygraph.Layer("test")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册