未验证 提交 2421a25a 编写于 作者: J Jiabin Yang 提交者: GitHub

Support test imperative basic with fixed retain grad interface (#38548)

* Rearranged Eager AutoCodeGen directory structure

* Removed USE_OP in Eager AutoCodeGen

* Enabled generation for Operators without Grad/Inputs/Outputs

* Resolved operators without input

* Fixed merge conflicts

* Enabled Eager AutoCodeGen for 10+ more operators

* Refactored Eager AutoCodeGen with more organized helper objects

* Enabled Eager AutoCodeGen for operators with multiple OpBases

* Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input Argument

* Handled Dispensable Inputs/Outputs in Eager AutoCodeGen

* Adjusted function generation/call between Python-C API & Dygraph API

* Synchronized auto-generated Python-C API with Dygraph Forward Functions

* support more eager tensor api

* fix merge compile error

* fix compile error and fit develop code

* support pure CPU

* fix some logic error in eager_mode

* support _varbase_creator in eager mode

* Added safe_initialized interface to EagerTensor for use in processing dispensable inputs

* for eager mode

* refine

* support multiple constructor for eager tensor

* add place related code

* polish code

* specific randint with dtype of int64

* Support pure cpu test

* eager logic

* refine test in pure cpu

* eager logic

* eager logic

* eager logic, test=develop

* skip core.eager when in inference, test=develop

* refine, test=develop

* refine, test=develop

* call RetainGrad after run forward kernel, test=develop

* refine, test=develop

* support dygraph util, meta, guard test

* support inference test

* refine test and fix initializer failed

* support create varbase and fix retain grad error

* fix windows error

* support test_imperative_basic test in eager mode

* remove additional log in variable.h

* remove additional log in variable.h

* remove additional code create in merge
Co-authored-by: Njim19930609 <jim19930609@gmail.com>
Co-authored-by: NWang Huan <wanghuan29@baidu.com>
上级 339c34e6
...@@ -43,25 +43,25 @@ void RegisterReduceHookForTensor(const egr::EagerTensor& tensor, ...@@ -43,25 +43,25 @@ void RegisterReduceHookForTensor(const egr::EagerTensor& tensor,
void RetainGradForTensor(const egr::EagerTensor& tensor) { void RetainGradForTensor(const egr::EagerTensor& tensor) {
// TODO(jiabin): Support More Tensor type here // TODO(jiabin): Support More Tensor type here
AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor); AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor);
egr::EagerTensor* grad_tensor = meta->MutableGrad(); std::weak_ptr<egr::EagerTensor> weak_grad_tensor = meta->WeakGrad();
// Define Hook // Define Hook
std::function<egr::EagerTensor(const egr::EagerTensor&)> hook = std::function<egr::EagerTensor(const egr::EagerTensor&)> hook =
[grad_tensor](const egr::EagerTensor& t) { [weak_grad_tensor](const egr::EagerTensor& t) {
if (!grad_tensor) { if (!weak_grad_tensor.expired()) {
PADDLE_THROW(paddle::platform::errors::Fatal( auto grad_tensor = weak_grad_tensor.lock();
"Detected null grad_tensor."
"Grad tensor in AutogradMeta of should not be nullptr"));
}
if (t.defined()) { if (t.defined()) {
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
// Simply Copy impl() to grad_tensor // Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl()); grad_tensor->set_impl(t.impl());
return *grad_tensor; return *grad_tensor.get();
} else { } else {
VLOG(7) << "Set Var for RetainGrad Hook for tensor: " << t.name();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
t.Var().IsInitialized(), true, t.Var().IsInitialized(), true,
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Detected uninitialized variable, causing segmentation fault " "Detected uninitialized variable, causing segmentation "
"fault "
"inside the hook." "inside the hook."
"Variable %s has to be initialized while we need to set it." "Variable %s has to be initialized while we need to set it."
"please check tensor initialization status.", "please check tensor initialization status.",
...@@ -69,7 +69,11 @@ void RetainGradForTensor(const egr::EagerTensor& tensor) { ...@@ -69,7 +69,11 @@ void RetainGradForTensor(const egr::EagerTensor& tensor) {
grad_tensor->MutableVar() grad_tensor->MutableVar()
->GetMutable<paddle::framework::LoDTensor>() ->GetMutable<paddle::framework::LoDTensor>()
->ShareDataWith(t.Var().Get<paddle::framework::LoDTensor>()); ->ShareDataWith(t.Var().Get<paddle::framework::LoDTensor>());
return *grad_tensor; return *grad_tensor.get();
}
} else {
VLOG(7) << "Retain NULL EagerTensor in Grad Hook";
return EagerTensor();
} }
}; };
......
...@@ -972,11 +972,16 @@ static std::string GenerateGradNodeCreationContent( ...@@ -972,11 +972,16 @@ static std::string GenerateGradNodeCreationContent(
iter.GetGradInsFwdSlotnameMap(); iter.GetGradInsFwdSlotnameMap();
for (auto& kv : grad_ins_fwd_slotname_map) { for (auto& kv : grad_ins_fwd_slotname_map) {
const std::string& tensor_wrapper_name = kv.second; const std::string& tensor_wrapper_name = kv.second;
std::string full_reserved = "false";
if (fwd_outputs_name_pos_map.find(tensor_wrapper_name) ==
fwd_outputs_name_pos_map.end()) {
full_reserved = "true";
}
const char* SET_TENSOR_WRAPPER_TEMPLATE = const char* SET_TENSOR_WRAPPER_TEMPLATE =
" grad_node->SetTensorWrapper%s(%s);\n"; " grad_node->SetTensorWrapper%s(%s, %s);\n";
grad_node_creation_str += grad_node_creation_str += paddle::string::Sprintf(
paddle::string::Sprintf(SET_TENSOR_WRAPPER_TEMPLATE, SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, tensor_wrapper_name,
tensor_wrapper_name, tensor_wrapper_name); full_reserved);
} }
} }
grad_node_creation_str += "\n"; grad_node_creation_str += "\n";
...@@ -1017,11 +1022,6 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1017,11 +1022,6 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
ADD_EDGES_TEMPLATE, input_autograd_name, input_position); ADD_EDGES_TEMPLATE, input_autograd_name, input_position);
} }
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, input_name);
} }
// [GradOpNode] SetGradInMeta // [GradOpNode] SetGradInMeta
...@@ -1048,6 +1048,12 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1048,6 +1048,12 @@ static std::string GenerateGradNodeCreationContent(
" egr::EagerUtils::SetHistory(&%s, grad_node);\n"; " egr::EagerUtils::SetHistory(&%s, grad_node);\n";
grad_node_creation_str += grad_node_creation_str +=
paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name);
} }
VLOG(6) << "Generated SetGradIn/OutMeta"; VLOG(6) << "Generated SetGradIn/OutMeta";
...@@ -1771,6 +1777,7 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -1771,6 +1777,7 @@ static std::string GenerateGradNodeHeaderContents(
std::string tensor_wrapper_arg_str; std::string tensor_wrapper_arg_str;
std::string tensor_wrapper_body_str; std::string tensor_wrapper_body_str;
std::string full_reserved_str = "full_reserved";
if (duplicable_tensors.count(tensor_wrapper_name)) { if (duplicable_tensors.count(tensor_wrapper_name)) {
const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE = const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE =
"const std::vector<egr::EagerTensor>& %s"; "const std::vector<egr::EagerTensor>& %s";
...@@ -1803,17 +1810,18 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -1803,17 +1810,18 @@ static std::string GenerateGradNodeHeaderContents(
TENSOR_WRAPPER_MEMBER_TEMPLATE, struct_tensor_wrapper_name); TENSOR_WRAPPER_MEMBER_TEMPLATE, struct_tensor_wrapper_name);
const char* SET_TENSOR_WRAPPER_BODY_TEMPLATE = const char* SET_TENSOR_WRAPPER_BODY_TEMPLATE =
"%s = egr::TensorWrapper(%s, true /*full_reserved*/);"; "%s = egr::TensorWrapper(%s, %s /*full_reserved*/);";
tensor_wrapper_body_str = paddle::string::Sprintf( tensor_wrapper_body_str = paddle::string::Sprintf(
SET_TENSOR_WRAPPER_BODY_TEMPLATE, struct_tensor_wrapper_name, SET_TENSOR_WRAPPER_BODY_TEMPLATE, struct_tensor_wrapper_name,
tensor_wrapper_name); tensor_wrapper_name, full_reserved_str);
} }
std::string full_reserved_signature_str = "bool full_reserved";
const char* SET_TENSOR_WRAPPER_TEMPLATE = const char* SET_TENSOR_WRAPPER_TEMPLATE =
" void SetTensorWrapper%s(%s) {\n %s\n }\n"; " void SetTensorWrapper%s(%s, %s) {\n %s\n }\n";
set_tensor_wrappers_str += paddle::string::Sprintf( set_tensor_wrappers_str += paddle::string::Sprintf(
SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name,
tensor_wrapper_arg_str, tensor_wrapper_body_str); tensor_wrapper_arg_str, full_reserved_signature_str,
tensor_wrapper_body_str);
} }
} }
VLOG(6) << "Generated TensorWrapper"; VLOG(6) << "Generated TensorWrapper";
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
#pragma once #pragma once
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_node_info.h"
namespace egr { namespace egr {
using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta; using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
...@@ -75,9 +75,20 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -75,9 +75,20 @@ class AutogradMeta : public AbstractAutogradMeta {
~AutogradMeta() override = default; ~AutogradMeta() override = default;
const egr::EagerTensor& Grad() const { return grad_; } const egr::EagerTensor& Grad() const {
PADDLE_ENFORCE_NOT_NULL(
grad_.get(),
paddle::platform::errors::InvalidArgument(
"Should Not get NULL from Grad pointer, since "
"we should have default EagerTensor once we init AutoGradMeta. "
"if you got this error may indicates framework error in "
"PaddlePaddle"));
return *(grad_.get());
}
egr::EagerTensor* MutableGrad() { return grad_.get(); }
egr::EagerTensor* MutableGrad() { return &grad_; } std::weak_ptr<egr::EagerTensor> WeakGrad() { return grad_; }
void SetGradNode(const std::shared_ptr<GradNodeBase>& grad_node) { void SetGradNode(const std::shared_ptr<GradNodeBase>& grad_node) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -126,12 +137,13 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -126,12 +137,13 @@ class AutogradMeta : public AbstractAutogradMeta {
private: private:
// TODO(jiabin) :Should we use pointer instead of object? // TODO(jiabin) :Should we use pointer instead of object?
egr::EagerTensor grad_; std::shared_ptr<egr::EagerTensor> grad_{std::make_shared<egr::EagerTensor>(
egr::Controller::Instance().GenerateUniqueName("@grad"))};
// GradNodeBase is base class of all grad op which is a // GradNodeBase is base class of all grad op which is a
// wrapper for grad op. This class will make grad op easy // wrapper for grad op. This class will make grad op easy
// to be traced. // to be traced.
std::shared_ptr<GradNodeBase> grad_node_; std::shared_ptr<GradNodeBase> grad_node_ = nullptr;
/** /**
* Why we need slot id here? * Why we need slot id here?
......
...@@ -71,6 +71,14 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap( ...@@ -71,6 +71,14 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
return node_in_degree_map; return node_in_degree_map;
} }
void RunBackwardHooks(
const std::vector<std::vector<egr::EagerTensor>>& grad_tensors,
egr::GradNodeBase* grad_node) {
grad_node->ApplyGradientHooks(grad_tensors);
VLOG(6) << "Apply Reduce Hooks for node";
grad_node->ApplyReduceHooks();
}
void RunBackward(const std::vector<egr::EagerTensor>& tensors, void RunBackward(const std::vector<egr::EagerTensor>& tensors,
const std::vector<egr::EagerTensor>& grad_tensors, const std::vector<egr::EagerTensor>& grad_tensors,
bool retain_graph) { bool retain_graph) {
...@@ -157,7 +165,11 @@ void RunBackward(const std::vector<egr::EagerTensor>& tensors, ...@@ -157,7 +165,11 @@ void RunBackward(const std::vector<egr::EagerTensor>& tensors,
std::unique_ptr<GradTensorHolder> node_input_buffer = std::unique_ptr<GradTensorHolder> node_input_buffer =
std::move(node_input_buffers_dict[node]); std::move(node_input_buffers_dict[node]);
VLOG(6) << "Run Backward Kernel with input_buffer"; VLOG(6) << "Run Backward Kernel with input_buffer";
// Run Backward Node and get outputs
RunBackwardHooks(node_input_buffer->Buffers(), node);
// TODO(jiabin): Support post hook here and make hook run in seperate
// operator
// Run Pre Backward Node and get outputs
std::vector<std::vector<egr::EagerTensor>> grad_output_tensors = std::vector<std::vector<egr::EagerTensor>> grad_output_tensors =
(*node)(node_input_buffer->Buffers()); (*node)(node_input_buffer->Buffers());
// TODO(jiabin): Should we erase it or find a more efficient way. // TODO(jiabin): Should we erase it or find a more efficient way.
......
...@@ -47,16 +47,20 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) { ...@@ -47,16 +47,20 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
// adj_edges has as same rank as fwd inputs, and record it's output rank // adj_edges has as same rank as fwd inputs, and record it's output rank
// from // from
// its pre-ops // its pre-ops
if (meta) {
auto node = meta->GetMutableGradNode(); auto node = meta->GetMutableGradNode();
if (node) { if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} else { } else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} }
} }
}
}
} }
void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas, void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas,
...@@ -71,16 +75,20 @@ void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas, ...@@ -71,16 +75,20 @@ void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas,
// adj_edges has as same rank as fwd inputs, and record it's output rank // adj_edges has as same rank as fwd inputs, and record it's output rank
// from // from
// its pre-ops // its pre-ops
if (meta) {
auto node = meta->GetMutableGradNode(); auto node = meta->GetMutableGradNode();
if (node) { if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} else { } else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} }
} }
}
}
} }
void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
...@@ -90,15 +98,19 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { ...@@ -90,15 +98,19 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"Given slot id is out of range of adj_edges outter size, " "Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad " "adj_edges is designed to has the same size of grad "
"inputs's slot num.")); "inputs's slot num."));
if (meta) {
auto node = meta->GetMutableGradNode(); auto node = meta->GetMutableGradNode();
if (node) { if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} else { } else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} }
}
}
} }
const std::vector<GradSlotMeta>& GradNodeBase::InputMeta() const { const std::vector<GradSlotMeta>& GradNodeBase::InputMeta() const {
...@@ -127,6 +139,11 @@ void GradNodeBase::SetGradInMeta(const std::vector<AutogradMeta*>& fwd_out, ...@@ -127,6 +139,11 @@ void GradNodeBase::SetGradInMeta(const std::vector<AutogradMeta*>& fwd_out,
// Init stop gradient vector before use to avoid push back // Init stop gradient vector before use to avoid push back
meta.Init(slot_size); meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) { for (size_t i = 0; i < slot_size; i++) {
PADDLE_ENFORCE_NOT_NULL(fwd_out[i],
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this "
"error, it indicates bugs in framework."));
if (fwd_out[i]->StopGradient()) { if (fwd_out[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta, // Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false. // since all default value is false.
...@@ -173,6 +190,10 @@ void GradNodeBase::SetGradOutMeta(const std::vector<AutogradMeta*>& fwd_in, ...@@ -173,6 +190,10 @@ void GradNodeBase::SetGradOutMeta(const std::vector<AutogradMeta*>& fwd_in,
// Init stop gradient vector before use to avoid push back // Init stop gradient vector before use to avoid push back
meta.Init(slot_size); meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) { for (size_t i = 0; i < slot_size; i++) {
if (!fwd_in[i]) {
meta.SetStopGradient(i, true);
continue;
}
if (fwd_in[i]->StopGradient()) { if (fwd_in[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta, // Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false. // since all default value is false.
...@@ -249,6 +270,7 @@ std::vector<std::vector<egr::EagerTensor>> GradNodeBase::ApplyGradientHooks( ...@@ -249,6 +270,7 @@ std::vector<std::vector<egr::EagerTensor>> GradNodeBase::ApplyGradientHooks(
slot_out.resize(tensors[slot_id].size()); slot_out.resize(tensors[slot_id].size());
egr::EagerTensor& out = slot_out[rank]; egr::EagerTensor& out = slot_out[rank];
if (!out.defined() || !out.initialized()) { if (!out.defined() || !out.initialized()) {
VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name();
out = hook(tensors[slot_id][rank]); out = hook(tensors[slot_id][rank]);
} else { } else {
// TODO(jiabin): Why this? // TODO(jiabin): Why this?
......
...@@ -266,6 +266,7 @@ std::vector<EagerTensor> EagerUtils::RecoverTensorWrapper( ...@@ -266,6 +266,7 @@ std::vector<EagerTensor> EagerUtils::RecoverTensorWrapper(
void EagerUtils::CheckAndRetainGrad(const egr::EagerTensor& tensor) { void EagerUtils::CheckAndRetainGrad(const egr::EagerTensor& tensor) {
VLOG(6) << "Check RetainGradForTensor: " << tensor.name(); VLOG(6) << "Check RetainGradForTensor: " << tensor.name();
if (FLAGS_retain_grad_for_all_tensor) { if (FLAGS_retain_grad_for_all_tensor) {
VLOG(6) << "RetainGradForTensor: " << tensor.name();
egr::egr_utils_api::RetainGradForTensor(tensor); egr::egr_utils_api::RetainGradForTensor(tensor);
} }
} }
...@@ -274,7 +275,7 @@ void EagerUtils::CheckAndRetainGrad( ...@@ -274,7 +275,7 @@ void EagerUtils::CheckAndRetainGrad(
const std::vector<egr::EagerTensor>& tensors) { const std::vector<egr::EagerTensor>& tensors) {
if (FLAGS_retain_grad_for_all_tensor) { if (FLAGS_retain_grad_for_all_tensor) {
for (auto& tensor : tensors) { for (auto& tensor : tensors) {
VLOG(6) << "Check RetainGradForTensor: " << tensor.name(); VLOG(6) << "RetainGradForTensor: " << tensor.name();
egr::egr_utils_api::RetainGradForTensor(tensor); egr::egr_utils_api::RetainGradForTensor(tensor);
} }
} }
......
...@@ -62,6 +62,13 @@ void EmptyEagerTensorInitializer( ...@@ -62,6 +62,13 @@ void EmptyEagerTensorInitializer(
const std::vector<int>& dims = {}, const std::vector<int>& dims = {},
framework::proto::VarType::Type var_type = framework::proto::VarType::Type var_type =
paddle::framework::proto::VarType::LOD_TENSOR) { paddle::framework::proto::VarType::LOD_TENSOR) {
auto ddims = paddle::framework::make_ddim(dims);
PADDLE_ENFORCE_GE(
paddle::framework::product(ddims), 0,
paddle::platform::errors::InvalidArgument(
"Create Eager Tensor with dims contain minus num is ilegal"
"Please check your code and make sure you new a "
"eager tensor with fixed shape instead of using -1."));
self->eager_tensor.set_name(name); self->eager_tensor.set_name(name);
auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->eager_tensor)); auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->eager_tensor));
autograd_meta->SetPersistable(persistable); autograd_meta->SetPersistable(persistable);
...@@ -71,8 +78,7 @@ void EmptyEagerTensorInitializer( ...@@ -71,8 +78,7 @@ void EmptyEagerTensorInitializer(
std::shared_ptr<pten::DenseTensor> dense_tensor = std::shared_ptr<pten::DenseTensor> dense_tensor =
std::make_shared<pten::DenseTensor>( std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(place), pten::make_intrusive<paddle::experimental::SharedStorage>(place),
pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims));
paddle::framework::make_ddim(dims)));
self->eager_tensor.set_impl(dense_tensor); self->eager_tensor.set_impl(dense_tensor);
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
......
...@@ -39,10 +39,12 @@ extern PyTypeObject* pEagerTensorType; ...@@ -39,10 +39,12 @@ extern PyTypeObject* pEagerTensorType;
static PyObject* eager_tensor_method_numpy(EagerTensorObject* self, static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
PyObject* args, PyObject* kwargs) { PyObject* args, PyObject* kwargs) {
EAGER_SYNC_TRY EAGER_SYNC_TRY
if (!self->eager_tensor.initialized()) { PADDLE_ENFORCE_EQ(
Py_INCREF(Py_None); self->eager_tensor.initialized(), true,
return Py_None; platform::errors::InvalidArgument(
} "Tensor data of %s is Empty that indicates we have null tensor for "
"now, please check if it has no data and initialize it first.",
self->eager_tensor.name()));
auto tensor_dims = self->eager_tensor.shape(); auto tensor_dims = self->eager_tensor.shape();
auto numpy_dtype = TensorDtype2NumpyDtype(self->eager_tensor.type()); auto numpy_dtype = TensorDtype2NumpyDtype(self->eager_tensor.type());
auto sizeof_dtype = pten::DataTypeSize(self->eager_tensor.type()); auto sizeof_dtype = pten::DataTypeSize(self->eager_tensor.type());
......
...@@ -75,6 +75,7 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, ...@@ -75,6 +75,7 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node); std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
return ToPyObject(accumulation_grad_node->Grad()); return ToPyObject(accumulation_grad_node->Grad());
} else { } else {
VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name();
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
return ToPyObject(meta->Grad()); return ToPyObject(meta->Grad());
} }
......
...@@ -123,10 +123,11 @@ def monkey_patch_eagertensor(): ...@@ -123,10 +123,11 @@ def monkey_patch_eagertensor():
# [500.] # [500.]
""" """
if self.grad is None: if self.grad._is_initialized():
return self.grad.numpy()
else:
return None return None
# TODO(wanghuancoder) support SELECTED_ROWS # TODO(wanghuancoder) support SELECTED_ROWS
return self.grad.numpy()
if hasattr(core, "eager"): if hasattr(core, "eager"):
setattr(core.eager.EagerTensor, "__str__", __str__) setattr(core.eager.EagerTensor, "__str__", __str__)
......
...@@ -452,6 +452,22 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): ...@@ -452,6 +452,22 @@ class EagerParamBaseUsageTestCase(unittest.TestCase):
self.assertTrue(np.array_equal(res1, res2)) self.assertTrue(np.array_equal(res1, res2))
self.assertTrue(np.array_equal(res3, res4)) self.assertTrue(np.array_equal(res3, res4))
def test_backward_with_single_tensor(self):
arr4 = np.random.rand(4, 16, 16, 32).astype('float32')
egr_tensor12 = core.eager.EagerTensor(arr4, core.CPUPlace())
egr_tensor12.retain_grads()
arr = np.ones([4, 16, 16, 32]).astype('float32')
self.assertEqual(egr_tensor12.persistable, False)
self.assertTrue("generated_tensor" in egr_tensor12.name)
self.assertEqual(egr_tensor12.shape, [4, 16, 16, 32])
self.assertEqual(egr_tensor12.dtype, core.VarDesc.VarType.FP32)
self.assertEqual(egr_tensor12.stop_gradient, True)
self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace()))
self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4))
self.assertTrue(np.array_equal(egr_tensor12.gradient(), None))
egr_tensor12.backward()
self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr))
class EagerGuardTestCase(unittest.TestCase): class EagerGuardTestCase(unittest.TestCase):
def test__test_eager_guard(self): def test__test_eager_guard(self):
......
...@@ -24,7 +24,7 @@ from test_imperative_base import new_program_scope ...@@ -24,7 +24,7 @@ from test_imperative_base import new_program_scope
import paddle.fluid.dygraph_utils as dygraph_utils import paddle.fluid.dygraph_utils as dygraph_utils
from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
import paddle import paddle
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode from paddle.fluid.framework import _test_eager_guard, _in_eager_mode, in_dygraph_mode
class MyLayer(fluid.Layer): class MyLayer(fluid.Layer):
...@@ -94,58 +94,13 @@ class SimpleRNNCell(fluid.Layer): ...@@ -94,58 +94,13 @@ class SimpleRNNCell(fluid.Layer):
is_bias=False) is_bias=False)
def forward(self, input, pre_hidden): def forward(self, input, pre_hidden):
tmp_i2h = self.create_variable(dtype=self._dtype) tmp_i2h = paddle.fluid.layers.nn.mul(input, self._i2h_w)
tmp_h2h = self.create_variable(dtype=self._dtype) tmp_h2h = paddle.fluid.layers.nn.mul(pre_hidden, self._h2h_w)
hidden = self.create_variable(dtype=self._dtype) hidden = paddle.add(tmp_h2h, tmp_i2h)
out = self.create_variable(dtype=self._dtype)
softmax_out = self.create_variable(dtype=self._dtype)
reduce_out = self.create_variable(dtype=self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": input,
"Y": self._i2h_w},
outputs={"Out": tmp_i2h},
attrs={"x_num_col_dims": 1,
"y_num_col_dims": 1})
self._helper.append_op(
type="mul",
inputs={"X": pre_hidden,
"Y": self._h2h_w},
outputs={"Out": tmp_h2h},
attrs={"x_num_col_dims": 1,
"y_num_col_dims": 1})
self._helper.append_op(
type="elementwise_add",
inputs={'X': tmp_h2h,
'Y': tmp_i2h},
outputs={'Out': hidden},
attrs={'axis': -1,
'use_mkldnn': False})
hidden = self._helper.append_activation(hidden, act='tanh') hidden = self._helper.append_activation(hidden, act='tanh')
out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w)
self._helper.append_op( softmax_out = paddle.nn.functional.softmax(out)
type="mul", reduce_out = paddle.fluid.layers.nn.reduce_sum(softmax_out)
inputs={"X": hidden,
"Y": self._h2o_w},
outputs={"Out": out},
attrs={"x_num_col_dims": 1,
"y_num_col_dims": 1})
self._helper.append_op(
type="softmax",
inputs={"X": out},
outputs={"Out": softmax_out},
attrs={"use_cudnn": False})
self._helper.append_op(
type='reduce_sum',
inputs={'X': softmax_out},
outputs={'Out': reduce_out},
attrs={'keep_dim': False,
'reduce_all': True})
return reduce_out, hidden return reduce_out, hidden
...@@ -394,12 +349,17 @@ class TestImperative(unittest.TestCase): ...@@ -394,12 +349,17 @@ class TestImperative(unittest.TestCase):
a = inputs2[0].gradient() a = inputs2[0].gradient()
self.assertTrue(np.allclose(inputs2[0].gradient(), x)) self.assertTrue(np.allclose(inputs2[0].gradient(), x))
def test_empty_var(self): def func_empty_var(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
cur_program = fluid.Program() cur_program = fluid.Program()
cur_block = cur_program.current_block() cur_block = cur_program.current_block()
# Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good.
if not _in_eager_mode():
new_variable = cur_block.create_var( new_variable = cur_block.create_var(
name="X", shape=[-1, 23, 48], dtype='float32') name="X", shape=[-1, 23, 48], dtype='float32')
else:
new_variable = cur_block.create_var(
name="X", shape=[1, 23, 48], dtype='float32')
try: try:
new_variable.numpy() new_variable.numpy()
except Exception as e: except Exception as e:
...@@ -409,21 +369,25 @@ class TestImperative(unittest.TestCase): ...@@ -409,21 +369,25 @@ class TestImperative(unittest.TestCase):
new_variable.backward() new_variable.backward()
except Exception as e: except Exception as e:
assert type(e) == core.EnforceNotMet assert type(e) == core.EnforceNotMet
# TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement
if not _in_eager_mode():
try: try:
new_variable.clear_gradient() new_variable.clear_gradient()
except Exception as e: except Exception as e:
assert type(e) == core.EnforceNotMet assert type(e) == core.EnforceNotMet
def test_empty_grad(self): def test_empty_var(self):
with _test_eager_guard():
self.func_empty_var()
self.func_empty_var()
def func_empty_grad(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = np.ones([2, 2], np.float32) x = np.ones([2, 2], np.float32)
new_var = paddle.to_tensor(x) new_var = paddle.to_tensor(x)
try: self.assertIsNone(new_var.gradient())
new_var.gradient() # TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement
except Exception as e: if not _in_eager_mode():
assert type(e) == ValueError
try: try:
new_var.clear_gradient() new_var.clear_gradient()
except Exception as e: except Exception as e:
...@@ -432,14 +396,24 @@ class TestImperative(unittest.TestCase): ...@@ -432,14 +396,24 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
cur_program = fluid.Program() cur_program = fluid.Program()
cur_block = cur_program.current_block() cur_block = cur_program.current_block()
# Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good.
if not _in_eager_mode():
new_variable = cur_block.create_var( new_variable = cur_block.create_var(
name="X", shape=[-1, 23, 48], dtype='float32') name="X", shape=[-1, 23, 48], dtype='float32')
else:
new_variable = cur_block.create_var(
name="X", shape=[1, 23, 48], dtype='float32')
try: try:
new_variable.gradient() new_variable.gradient()
except Exception as e: except Exception as e:
assert type(e) == ValueError assert type(e) == ValueError
def test_set_persistable(self): def test_empty_grad(self):
with _test_eager_guard():
self.func_empty_grad()
self.func_empty_grad()
def func_set_persistable(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = np.ones([2, 2], np.float32) x = np.ones([2, 2], np.float32)
new_var = paddle.to_tensor(x) new_var = paddle.to_tensor(x)
...@@ -447,12 +421,22 @@ class TestImperative(unittest.TestCase): ...@@ -447,12 +421,22 @@ class TestImperative(unittest.TestCase):
new_var.persistable = True new_var.persistable = True
self.assertTrue(new_var.persistable) self.assertTrue(new_var.persistable)
def test_layer(self): def test_set_persistable(self):
with _test_eager_guard():
self.func_set_persistable()
self.func_set_persistable()
def func_layer(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
l = fluid.Layer("l") l = fluid.Layer("l")
self.assertRaises(NotImplementedError, l.forward, []) self.assertRaises(NotImplementedError, l.forward, [])
def test_layer_in_out(self): def test_layer(self):
with _test_eager_guard():
self.func_layer()
self.func_layer()
def func_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = paddle.to_tensor(np_inp) var_inp = paddle.to_tensor(np_inp)
...@@ -489,12 +473,17 @@ class TestImperative(unittest.TestCase): ...@@ -489,12 +473,17 @@ class TestImperative(unittest.TestCase):
feed={inp.name: np_inp}, feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name]) fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.array_equal(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad)) self.assertTrue(np.array_equal(dy_grad, static_grad))
self.assertTrue(np.allclose(dy_out2, static_out)) self.assertTrue(np.array_equal(dy_out2, static_out))
self.assertTrue(np.allclose(dy_grad2, static_grad)) self.assertTrue(np.array_equal(dy_grad2, static_grad))
def test_mlp(self): def test_layer_in_out(self):
with _test_eager_guard():
self.func_layer_in_out()
self.func_layer_in_out()
def func_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = paddle.to_tensor(np_inp) var_inp = paddle.to_tensor(np_inp)
...@@ -545,6 +534,11 @@ class TestImperative(unittest.TestCase): ...@@ -545,6 +534,11 @@ class TestImperative(unittest.TestCase):
self.assertEqual(mlp._linear2, sublayers[1]) self.assertEqual(mlp._linear2, sublayers[1])
self.assertEqual(len(sublayers), 2) self.assertEqual(len(sublayers), 2)
def test_mlp(self):
with _test_eager_guard():
self.func_mlp()
self.func_mlp()
def test_gradient_accumulation(self): def test_gradient_accumulation(self):
def test_single_api(sort_sum_gradient): def test_single_api(sort_sum_gradient):
fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
...@@ -677,7 +671,7 @@ class TestImperative(unittest.TestCase): ...@@ -677,7 +671,7 @@ class TestImperative(unittest.TestCase):
test_mlp(False) test_mlp(False)
test_mlp(True) test_mlp(True)
def test_dygraph_vs_static(self): def func_dygraph_vs_static(self):
np_inp1 = np.random.rand(4, 3, 3) np_inp1 = np.random.rand(4, 3, 3)
np_inp2 = np.random.rand(4, 3, 3) np_inp2 = np.random.rand(4, 3, 3)
...@@ -728,7 +722,12 @@ class TestImperative(unittest.TestCase): ...@@ -728,7 +722,12 @@ class TestImperative(unittest.TestCase):
fetch_list=out)[0] fetch_list=out)[0]
self.assertTrue(np.allclose(dygraph_result, static_result)) self.assertTrue(np.allclose(dygraph_result, static_result))
def test_rnn(self): def test_dygraph_vs_static(self):
with _test_eager_guard():
self.func_dygraph_vs_static()
self.func_dygraph_vs_static()
def func_rnn(self):
np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
[10.0, 11.0, 12.0]]) [10.0, 11.0, 12.0]])
np_inp = np_inp.reshape((1, 4, 3)) np_inp = np_inp.reshape((1, 4, 3))
...@@ -771,14 +770,19 @@ class TestImperative(unittest.TestCase): ...@@ -771,14 +770,19 @@ class TestImperative(unittest.TestCase):
param_grads[1][1].name, param_grads[2][1].name param_grads[1][1].name, param_grads[2][1].name
]) ])
self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.array_equal(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) self.assertTrue(np.array_equal(dy_grad_h2o, static_grad_h2o))
self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) self.assertTrue(np.array_equal(dy_grad_h2h, static_grad_h2h))
self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) self.assertTrue(np.array_equal(dy_grad_i2h, static_grad_i2h))
self.assertTrue(np.allclose(dy_out2, static_out)) self.assertTrue(np.array_equal(dy_out2, static_out))
self.assertTrue(np.allclose(dy_grad_h2o2, static_grad_h2o)) self.assertTrue(np.array_equal(dy_grad_h2o2, static_grad_h2o))
self.assertTrue(np.allclose(dy_grad_h2h2, static_grad_h2h)) self.assertTrue(np.array_equal(dy_grad_h2h2, static_grad_h2h))
self.assertTrue(np.allclose(dy_grad_i2h2, static_grad_i2h)) self.assertTrue(np.array_equal(dy_grad_i2h2, static_grad_i2h))
def test_rnn(self):
with _test_eager_guard():
self.func_rnn()
self.func_rnn()
def func_layer_attrs(self): def func_layer_attrs(self):
layer = fluid.dygraph.Layer("test") layer = fluid.dygraph.Layer("test")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册