未验证 提交 d48f7c89 编写于 作者: J Jiabin Yang 提交者: GitHub

Support test imperative basic in eager (#38313)

* Rearranged Eager AutoCodeGen directory structure

* Removed USE_OP in Eager AutoCodeGen

* Enabled generation for Operators without Grad/Inputs/Outputs

* Resolved operators without input

* Fixed merge conflicts

* Enabled Eager AutoCodeGen for 10+ more operators

* Refactored Eager AutoCodeGen with more organized helper objects

* Enabled Eager AutoCodeGen for operators with multiple OpBases

* Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input Argument

* Handled Dispensable Inputs/Outputs in Eager AutoCodeGen

* Adjusted function generation/call between Python-C API & Dygraph API

* Synchronized auto-generated Python-C API with Dygraph Forward Functions

* support more eager tensor api

* fix merge compile error

* fix compile error and fit develop code

* support pure CPU

* fix some logic error in eager_mode

* support _varbase_creator in eager mode

* Added safe_initialized interface to EagerTensor for use in processing dispensable inputs

* for eager mode

* refine

* support multiple constructor for eager tensor

* add place related code

* polish code

* specific randint with dtype of int64

* Support pure cpu test

* eager logic

* refine test in pure cpu

* eager logic

* eager logic

* eager logic, test=develop

* skip core.eager when in inference, test=develop

* refine, test=develop

* refine, test=develop

* call RetainGrad after run forward kernel, test=develop

* refine, test=develop

* support dygraph util, meta, guard test

* support inference test

* refine test and fix initializer failed
Co-authored-by: Njim19930609 <jim19930609@gmail.com>
Co-authored-by: NWang Huan <wanghuan29@baidu.com>
上级 8da9eff4
...@@ -15,7 +15,7 @@ cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api) ...@@ -15,7 +15,7 @@ cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api)
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation) cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation)
cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api) cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api)
cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta) cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta hook_utils)
cc_library(legacy SRCS ${DYGRAPH_LEGACY} DEPS global_utils proto_desc operator pten pten_api op_registry variable_helper memcpy) cc_library(legacy SRCS ${DYGRAPH_LEGACY} DEPS global_utils proto_desc operator pten pten_api op_registry variable_helper memcpy)
cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info) cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info)
......
...@@ -32,6 +32,8 @@ class GradNodeAccumulation : public GradNodeBase { ...@@ -32,6 +32,8 @@ class GradNodeAccumulation : public GradNodeBase {
void RetainGrad( void RetainGrad(
const std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook); const std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook);
egr::EagerTensor Grad() { return accumulated_grad; }
private: private:
egr::EagerTensor accumulated_grad; egr::EagerTensor accumulated_grad;
......
...@@ -80,7 +80,7 @@ egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, ...@@ -80,7 +80,7 @@ egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias,
scale_node->SetAttributes_scale(scale); scale_node->SetAttributes_scale(scale);
// Set Next Edges // Set Next Edges
scale_node->AddEdges(*p_autograd_in, /*slot id*/ 0); scale_node->AddEdges(p_autograd_in, /*slot id*/ 0);
// Set TensorWrappers // Set TensorWrappers
scale_node->SetTensorWrappers_X({x}); scale_node->SetTensorWrappers_X({x});
......
...@@ -63,7 +63,7 @@ class Controller { ...@@ -63,7 +63,7 @@ class Controller {
void SetCurrentTracer( void SetCurrentTracer(
const std::shared_ptr<paddle::imperative::Tracer>& tracer) { const std::shared_ptr<paddle::imperative::Tracer>& tracer) {
tracer_ = tracer; tracer_ = tracer;
VLOG(6) << "Set current tracer: " << tracer_; VLOG(6) << "Set current tracer for Controller: " << tracer_;
} }
bool InEagerMode() const { return in_eager_mode_; } bool InEagerMode() const { return in_eager_mode_; }
......
...@@ -888,7 +888,7 @@ static std::string GenerateGradNodeCreationContent( ...@@ -888,7 +888,7 @@ static std::string GenerateGradNodeCreationContent(
if (input.duplicable()) { if (input.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE = const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = " " std::vector<egr::AutogradMeta*> %s = "
"egr::EagerUtils::unsafe_autograd_meta(%s);\n"; "egr::EagerUtils::nullable_autograd_meta(%s);\n";
get_autograd_meta_str += paddle::string::Sprintf( get_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name); GET_MULTI_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name);
...@@ -902,7 +902,7 @@ static std::string GenerateGradNodeCreationContent( ...@@ -902,7 +902,7 @@ static std::string GenerateGradNodeCreationContent(
} else { } else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta& %s = " " egr::AutogradMeta& %s = "
"*egr::EagerUtils::unsafe_autograd_meta(%s);\n"; "*egr::EagerUtils::nullable_autograd_meta(%s);\n";
get_autograd_meta_str += paddle::string::Sprintf( get_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name); GET_SINGLE_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name);
} }
...@@ -999,11 +999,17 @@ static std::string GenerateGradNodeCreationContent( ...@@ -999,11 +999,17 @@ static std::string GenerateGradNodeCreationContent(
input_position); input_position);
const char* ADD_EDGES_TEMPLATE = const char* ADD_EDGES_TEMPLATE =
" if(%s) grad_node->AddEdges(*%s, %d);\n"; " if(%s) grad_node->AddEdges(%s, %d);\n";
grad_node_creation_str += grad_node_creation_str +=
paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name, paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name,
input_autograd_name, input_position); input_autograd_name, input_position);
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, input_name);
} else { } else {
compute_require_grad_args += ", &" + input_autograd_name; compute_require_grad_args += ", &" + input_autograd_name;
size_t input_position = fwd_inputs_name_pos_map.at(input_name); size_t input_position = fwd_inputs_name_pos_map.at(input_name);
...@@ -1013,7 +1019,7 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1013,7 +1019,7 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position); SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position);
const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(%s, %d);\n"; const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
ADD_EDGES_TEMPLATE, input_autograd_name, input_position); ADD_EDGES_TEMPLATE, input_autograd_name, input_position);
} }
...@@ -1197,23 +1203,23 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1197,23 +1203,23 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
if (op_passing_outs_map[op_type].count(output_name)) { if (op_passing_outs_map[op_type].count(output_name)) {
const std::string output_var_name = output_name + "Var"; const std::string output_var_name = output_name + "Var";
// Pass Output from function argument, // Pass Output from function argument(EagerTensor*/vector<EagerTensor*>&),
// in form of shared_ptr<EagerTensor>/vector<shared_ptr<EagerTensor>> // in form of shared_ptr<EagerTensor>/vector<shared_ptr<EagerTensor>>
if (output.duplicable()) { if (output.duplicable()) {
const char* FWD_NUM_ARG_TEMPLATE = const char* FWD_NUM_ARG_TEMPLATE =
", std::vector<egr::EagerTensor>& %s"; ", std::vector<egr::EagerTensor*>& %s";
std::string arg_str = std::string arg_str =
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name); paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str; dygraph_function_args_str += arg_str;
} else { } else {
const char* FWD_NUM_ARG_TEMPLATE = ", egr::EagerTensor& %s"; const char* FWD_NUM_ARG_TEMPLATE = ", egr::EagerTensor* %s";
std::string arg_str = std::string arg_str =
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name); paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str; dygraph_function_args_str += arg_str;
} }
const char* FWD_OUTS_CONTENT_TEMPLATE = const char* FWD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(&%s) },"; "{ \"%s\", egr::EagerUtils::TrySyncToVars(%s) },";
outs_contents_str += paddle::string::Sprintf( outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name); FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
...@@ -1315,6 +1321,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1315,6 +1321,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
GenerateGradNodeCreationContent(fwd_info, bwd_info); GenerateGradNodeCreationContent(fwd_info, bwd_info);
generated_function_body += grad_node_creation_body_str; generated_function_body += grad_node_creation_body_str;
generated_function_body += "\n"; generated_function_body += "\n";
// [Generation] Call RetainGradForTensor
VLOG(6) << "Generated GradNode Creation codes"; VLOG(6) << "Generated GradNode Creation codes";
} }
......
...@@ -120,6 +120,10 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -120,6 +120,10 @@ class AutogradMeta : public AbstractAutogradMeta {
void SetPersistable(bool persistable) { persistable_ = persistable; } void SetPersistable(bool persistable) { persistable_ = persistable; }
bool RetainGrads() { return retain_grads_; }
void SetRetainGrads(bool value) { retain_grads_ = value; }
private: private:
// TODO(jiabin) :Should we use pointer instead of object? // TODO(jiabin) :Should we use pointer instead of object?
egr::EagerTensor grad_; egr::EagerTensor grad_;
...@@ -149,6 +153,8 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -149,6 +153,8 @@ class AutogradMeta : public AbstractAutogradMeta {
bool persistable_{false}; bool persistable_{false};
bool retain_grads_{false};
// TODO(jiabin) :Support Quantum here and add cache mechanism as // TODO(jiabin) :Support Quantum here and add cache mechanism as
// VarCache defined in VarBase // VarCache defined in VarBase
}; };
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/pten/common/data_type.h" #include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
...@@ -35,6 +36,29 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) { ...@@ -35,6 +36,29 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
adj_edges_.resize(bwd_out_slot_num); adj_edges_.resize(bwd_out_slot_num);
} }
void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
for (const auto& meta : *metas) {
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas, void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas,
size_t slot_id) { size_t slot_id) {
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
...@@ -47,20 +71,34 @@ void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas, ...@@ -47,20 +71,34 @@ void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas,
// adj_edges has as same rank as fwd inputs, and record it's output rank // adj_edges has as same rank as fwd inputs, and record it's output rank
// from // from
// its pre-ops // its pre-ops
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), auto node = meta->GetMutableGradNode();
meta->OutRankInfo()); if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
} }
} }
void GradNodeBase::AddEdges(const AutogradMeta& meta, size_t slot_id) { void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(), slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, " "Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad " "adj_edges is designed to has the same size of grad "
"inputs's slot num.")); "inputs's slot num."));
adj_edges_[slot_id].emplace_back(meta.GetMutableGradNode(), auto node = meta->GetMutableGradNode();
meta.OutRankInfo()); if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
} }
const std::vector<GradSlotMeta>& GradNodeBase::InputMeta() const { const std::vector<GradSlotMeta>& GradNodeBase::InputMeta() const {
......
...@@ -105,8 +105,9 @@ class GradNodeBase { ...@@ -105,8 +105,9 @@ class GradNodeBase {
* *
* This one is called slot by slot * This one is called slot by slot
* **/ * **/
void AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id);
void AddEdges(const std::vector<AutogradMeta*>& metas, size_t slot_id); void AddEdges(const std::vector<AutogradMeta*>& metas, size_t slot_id);
void AddEdges(const AutogradMeta& meta, size_t slot_id); void AddEdges(AutogradMeta* meta, size_t slot_id);
/** /**
* GetEdges is designed to get all edges of current node**/ * GetEdges is designed to get all edges of current node**/
......
...@@ -153,7 +153,8 @@ class TensorRuntimeInferVarTypeContext ...@@ -153,7 +153,8 @@ class TensorRuntimeInferVarTypeContext
paddle::framework::proto::VarType::Type GetOutputType( paddle::framework::proto::VarType::Type GetOutputType(
const std::string& name, const int& index = 0) const override { const std::string& name, const int& index = 0) const override {
return paddle::framework::ToVarType(outputs_.at(name)[index]->Var().Type()); // TODO(jiabin): Support SelectedRows when we have it.
return paddle::framework::proto::VarType::LOD_TENSOR;
} }
paddle::framework::proto::VarType::Type GetInputDataType( paddle::framework::proto::VarType::Type GetInputDataType(
......
...@@ -37,6 +37,7 @@ void OpRunImpl(const paddle::framework::OperatorBase& op, ...@@ -37,6 +37,7 @@ void OpRunImpl(const paddle::framework::OperatorBase& op,
const paddle::framework::AttributeMap& attrs, const paddle::framework::AttributeMap& attrs,
const paddle::framework::AttributeMap& default_attrs, const paddle::framework::AttributeMap& default_attrs,
const paddle::platform::Place& place) { const paddle::platform::Place& place) {
VLOG(6) << "Get Opertor With Kernel";
auto* op_kernel = auto* op_kernel =
dynamic_cast<const paddle::framework::OperatorWithKernel*>(&op); dynamic_cast<const paddle::framework::OperatorWithKernel*>(&op);
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -44,11 +45,13 @@ void OpRunImpl(const paddle::framework::OperatorBase& op, ...@@ -44,11 +45,13 @@ void OpRunImpl(const paddle::framework::OperatorBase& op,
"Only support operator with kernel in Dygraph mode.")); "Only support operator with kernel in Dygraph mode."));
auto& info = op.Info(); auto& info = op.Info();
if (info.infer_var_type_) { if (info.infer_var_type_) {
VLOG(6) << "Run InferVarType";
egr::legacy::TensorRuntimeInferVarTypeContext infer_var_type_ctx( egr::legacy::TensorRuntimeInferVarTypeContext infer_var_type_ctx(
ins, outs, attrs, default_attrs); ins, outs, attrs, default_attrs);
VLOG(9) << "Actual Run InferVarType";
info.infer_var_type_(&infer_var_type_ctx); info.infer_var_type_(&infer_var_type_ctx);
} }
VLOG(6) << "Initialize output tensor";
// Initialize output tensor // Initialize output tensor
for (auto& tensor_pair : outs) { for (auto& tensor_pair : outs) {
for (auto& tensor : tensor_pair.second) { for (auto& tensor : tensor_pair.second) {
...@@ -77,10 +80,13 @@ void OpRunImpl(const paddle::framework::OperatorBase& op, ...@@ -77,10 +80,13 @@ void OpRunImpl(const paddle::framework::OperatorBase& op,
* after the execution of op, but the original input is directly * after the execution of op, but the original input is directly
* overwritten in the previous dynamic graph implemention. * overwritten in the previous dynamic graph implemention.
*/ */
VLOG(6) << "Prepare Op";
auto prepared_op = egr::legacy::PreparedOp::Prepare( auto prepared_op = egr::legacy::PreparedOp::Prepare(
ins, outs, *op_kernel, place, attrs, default_attrs); ins, outs, *op_kernel, place, attrs, default_attrs);
VLOG(6) << "Prepare Data";
auto tmp_ins_ptr = auto tmp_ins_ptr =
egr::legacy::PrepareData(*op_kernel, ins, prepared_op.kernel_type()); egr::legacy::PrepareData(*op_kernel, ins, prepared_op.kernel_type());
VLOG(6) << "Run Prepared Op";
if (tmp_ins_ptr == nullptr) { if (tmp_ins_ptr == nullptr) {
prepared_op.Run(ins, outs, attrs, default_attrs); prepared_op.Run(ins, outs, attrs, default_attrs);
} else { } else {
...@@ -130,6 +136,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins, ...@@ -130,6 +136,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins,
} }
auto amp_level = egr::Controller::Instance().GetAMPLevel(); auto amp_level = egr::Controller::Instance().GetAMPLevel();
VLOG(6) << "Check AMP status";
NameTensorMap new_ins = ins; NameTensorMap new_ins = ins;
if (amp_level == paddle::imperative::AmpLevel::O1) { if (amp_level == paddle::imperative::AmpLevel::O1) {
VLOG(5) << "Auto mixed precision run operator: " << type; VLOG(5) << "Auto mixed precision run operator: " << type;
...@@ -140,6 +147,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins, ...@@ -140,6 +147,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins,
} }
try { try {
VLOG(6) << "Get Device id";
if (paddle::platform::is_gpu_place(place)) { if (paddle::platform::is_gpu_place(place)) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle::platform::SetDeviceId( paddle::platform::SetDeviceId(
...@@ -165,7 +173,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins, ...@@ -165,7 +173,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins,
"PaddlePaddle should compile with NPU if use NPUPlace.")); "PaddlePaddle should compile with NPU if use NPUPlace."));
#endif #endif
} }
VLOG(6) << "Step in OpRunImpl";
OpRunImpl(*op, new_ins, outs, attrs, *default_attrs, place); OpRunImpl(*op, new_ins, outs, attrs, *default_attrs, place);
} catch (paddle::platform::EnforceNotMet& exception) { } catch (paddle::platform::EnforceNotMet& exception) {
paddle::framework::AppendErrorOpHint(type, &exception); paddle::framework::AppendErrorOpHint(type, &exception);
...@@ -182,7 +190,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins, ...@@ -182,7 +190,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins,
PADDLE_THROW(paddle::platform::errors::Fatal( PADDLE_THROW(paddle::platform::errors::Fatal(
"Operator %s raises an unknown exception.", type)); "Operator %s raises an unknown exception.", type));
} }
VLOG(6) << "Finish Run Op";
// TODO(jiabin): Support this later // TODO(jiabin): Support this later
// if (enable_program_desc_tracing_) { // if (enable_program_desc_tracing_) {
// VLOG(5) << "Trace op " << type << " into ProgramDesc"; // VLOG(5) << "Trace op " << type << " into ProgramDesc";
......
...@@ -76,6 +76,7 @@ PreparedOp PrepareImpl(const NameTensorMap& ins, const NameTensorMap& outs, ...@@ -76,6 +76,7 @@ PreparedOp PrepareImpl(const NameTensorMap& ins, const NameTensorMap& outs,
const paddle::platform::Place& place, const paddle::platform::Place& place,
const paddle::framework::AttributeMap& attrs, const paddle::framework::AttributeMap& attrs,
const paddle::framework::AttributeMap& default_attrs) { const paddle::framework::AttributeMap& default_attrs) {
VLOG(6) << "Preparing an Op";
paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool& pool =
paddle::platform::DeviceContextPool::Instance(); paddle::platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place); auto* dev_ctx = pool.Get(place);
...@@ -146,7 +147,7 @@ PreparedOp PrepareImpl(const NameTensorMap& ins, const NameTensorMap& outs, ...@@ -146,7 +147,7 @@ PreparedOp PrepareImpl(const NameTensorMap& ins, const NameTensorMap& outs,
if (!(expected_kernel_key.place_ == place)) { if (!(expected_kernel_key.place_ == place)) {
dev_ctx = pool.Get(expected_kernel_key.place_); dev_ctx = pool.Get(expected_kernel_key.place_);
} }
VLOG(6) << "Construct Prepared Op";
return PreparedOp(op, ctx, expected_kernel_key, kernel_iter->second, dev_ctx); return PreparedOp(op, ctx, expected_kernel_key, kernel_iter->second, dev_ctx);
} }
...@@ -168,6 +169,7 @@ static void PreparedOpRunImpl( ...@@ -168,6 +169,7 @@ static void PreparedOpRunImpl(
const NameTensorMap& outs, const paddle::framework::AttributeMap& attrs, const NameTensorMap& outs, const paddle::framework::AttributeMap& attrs,
const paddle::framework::AttributeMap& default_attrs) { const paddle::framework::AttributeMap& default_attrs) {
// TODO(zjl): remove scope in dygraph // TODO(zjl): remove scope in dygraph
VLOG(6) << "Runing Prepared Op";
paddle::framework::Scope scope; paddle::framework::Scope scope;
EagerInferShapeContext infer_shape_ctx(&ins, &outs, &attrs, &default_attrs, EagerInferShapeContext infer_shape_ctx(&ins, &outs, &attrs, &default_attrs,
...@@ -198,6 +200,7 @@ static void PreparedOpRunImpl( ...@@ -198,6 +200,7 @@ static void PreparedOpRunImpl(
if (paddle::framework::IsComplexType(kernel_type.data_type_)) { if (paddle::framework::IsComplexType(kernel_type.data_type_)) {
HandleComplexGradToRealGrad(outs); HandleComplexGradToRealGrad(outs);
} }
VLOG(6) << "Finish Runing Prepared Op";
} }
void PreparedOp::Run(const NameTensorMap& ins, const NameTensorMap& outs, void PreparedOp::Run(const NameTensorMap& ins, const NameTensorMap& outs,
......
...@@ -58,7 +58,7 @@ TEST(GradNodeInfo, GradNodeBase) { ...@@ -58,7 +58,7 @@ TEST(GradNodeInfo, GradNodeBase) {
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(edge0); auto auto_grad0 = std::make_shared<egr::AutogradMeta>(edge0);
egr::Edge edge1(grad_test_node1, 3, 4); egr::Edge edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(edge1); auto auto_grad1 = std::make_shared<egr::AutogradMeta>(edge1);
grad_test_node0->AddEdges((*auto_grad0.get()), 0); grad_test_node0->AddEdges(auto_grad0.get(), 0);
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first, CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first,
size_t(1)); size_t(1));
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second, CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second,
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/eager/utils.h" #include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/api/utils/hook_utils.h"
#include "paddle/fluid/eager/tensor_wrapper.h" #include "paddle/fluid/eager/tensor_wrapper.h"
#include "paddle/pten/api/all.h" #include "paddle/pten/api/all.h"
...@@ -24,6 +25,9 @@ ...@@ -24,6 +25,9 @@
#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/pten_utils.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
PADDLE_DEFINE_EXPORTED_bool(retain_grad_for_all_tensor, true,
"retain grad for all tensor");
namespace egr { namespace egr {
/** /**
* Implementation of Eager Utils. * Implementation of Eager Utils.
...@@ -50,8 +54,9 @@ AutogradMeta* EagerUtils::unsafe_autograd_meta(const egr::EagerTensor& target) { ...@@ -50,8 +54,9 @@ AutogradMeta* EagerUtils::unsafe_autograd_meta(const egr::EagerTensor& target) {
std::vector<AutogradMeta*> EagerUtils::unsafe_autograd_meta( std::vector<AutogradMeta*> EagerUtils::unsafe_autograd_meta(
const std::vector<egr::EagerTensor>& targets) { const std::vector<egr::EagerTensor>& targets) {
std::vector<AutogradMeta*> metas; std::vector<AutogradMeta*> metas;
metas.reserve(targets.size());
for (const egr::EagerTensor& t : targets) { for (const egr::EagerTensor& t : targets) {
metas.push_back(unsafe_autograd_meta(t)); metas.emplace_back(unsafe_autograd_meta(t));
} }
return metas; return metas;
} }
...@@ -64,6 +69,16 @@ AutogradMeta* EagerUtils::nullable_autograd_meta( ...@@ -64,6 +69,16 @@ AutogradMeta* EagerUtils::nullable_autograd_meta(
return static_cast<AutogradMeta*>(p_autograd_meta); return static_cast<AutogradMeta*>(p_autograd_meta);
} }
std::vector<AutogradMeta*> EagerUtils::nullable_autograd_meta(
const std::vector<egr::EagerTensor>& targets) {
std::vector<AutogradMeta*> metas;
metas.reserve(targets.size());
for (const egr::EagerTensor& t : targets) {
metas.emplace_back(nullable_autograd_meta(t));
}
return metas;
}
std::vector<AutogradMeta*> EagerUtils::multi_autograd_meta( std::vector<AutogradMeta*> EagerUtils::multi_autograd_meta(
std::vector<egr::EagerTensor>* targets) { std::vector<egr::EagerTensor>* targets) {
std::vector<AutogradMeta*> ret; std::vector<AutogradMeta*> ret;
...@@ -140,7 +155,8 @@ static std::shared_ptr<egr::EagerTensor> TrySyncToVar( ...@@ -140,7 +155,8 @@ static std::shared_ptr<egr::EagerTensor> TrySyncToVar(
if (tensor->initialized() || tensor->Var().IsInitialized()) { if (tensor->initialized() || tensor->Var().IsInitialized()) {
tensor->SyncToVar(paddle::framework::proto::VarType_Type_LOD_TENSOR); tensor->SyncToVar(paddle::framework::proto::VarType_Type_LOD_TENSOR);
} }
return std::make_shared<EagerTensor>(*tensor); return std::shared_ptr<egr::EagerTensor>(tensor,
[&](egr::EagerTensor* ptr) {});
} }
std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars( std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars(
...@@ -159,6 +175,17 @@ std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars( ...@@ -159,6 +175,17 @@ std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars(
return res; return res;
} }
std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars(
const std::vector<egr::EagerTensor*>& tensors) {
std::vector<std::shared_ptr<EagerTensor>> res;
size_t num = tensors.size();
res.reserve(num);
for (size_t i = 0; i < num; i++) {
res.emplace_back(TrySyncToVar(tensors[i]));
}
return res;
}
/* ---- VarBase -> Tensor ---- */ /* ---- VarBase -> Tensor ---- */
std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::SyncToTensors( std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::SyncToTensors(
const egr::EagerTensor& tensor) { const egr::EagerTensor& tensor) {
...@@ -236,4 +263,21 @@ std::vector<EagerTensor> EagerUtils::RecoverTensorWrapper( ...@@ -236,4 +263,21 @@ std::vector<EagerTensor> EagerUtils::RecoverTensorWrapper(
return ret; return ret;
} }
void EagerUtils::CheckAndRetainGrad(const egr::EagerTensor& tensor) {
VLOG(6) << "Check RetainGradForTensor: " << tensor.name();
if (FLAGS_retain_grad_for_all_tensor) {
egr::egr_utils_api::RetainGradForTensor(tensor);
}
}
void EagerUtils::CheckAndRetainGrad(
const std::vector<egr::EagerTensor>& tensors) {
if (FLAGS_retain_grad_for_all_tensor) {
for (auto& tensor : tensors) {
VLOG(6) << "Check RetainGradForTensor: " << tensor.name();
egr::egr_utils_api::RetainGradForTensor(tensor);
}
}
}
} // namespace egr } // namespace egr
...@@ -116,6 +116,8 @@ class EagerUtils { ...@@ -116,6 +116,8 @@ class EagerUtils {
// This method will return an AutogradMeta pointer unsafely. // This method will return an AutogradMeta pointer unsafely.
static AutogradMeta* nullable_autograd_meta(const egr::EagerTensor& target); static AutogradMeta* nullable_autograd_meta(const egr::EagerTensor& target);
static std::vector<AutogradMeta*> nullable_autograd_meta(
const std::vector<egr::EagerTensor>& targets);
static AutogradMeta* unsafe_autograd_meta(const egr::EagerTensor& target); static AutogradMeta* unsafe_autograd_meta(const egr::EagerTensor& target);
static std::vector<AutogradMeta*> unsafe_autograd_meta( static std::vector<AutogradMeta*> unsafe_autograd_meta(
const std::vector<egr::EagerTensor>& targets); const std::vector<egr::EagerTensor>& targets);
...@@ -149,6 +151,8 @@ class EagerUtils { ...@@ -149,6 +151,8 @@ class EagerUtils {
egr::EagerTensor* tensor); egr::EagerTensor* tensor);
static std::vector<std::shared_ptr<egr::EagerTensor>> TrySyncToVars( static std::vector<std::shared_ptr<egr::EagerTensor>> TrySyncToVars(
std::vector<egr::EagerTensor>* tensors); std::vector<egr::EagerTensor>* tensors);
static std::vector<std::shared_ptr<egr::EagerTensor>> TrySyncToVars(
const std::vector<egr::EagerTensor*>& tensors);
static std::vector<std::shared_ptr<egr::EagerTensor>> SyncToVars( static std::vector<std::shared_ptr<egr::EagerTensor>> SyncToVars(
const egr::EagerTensor& tensor); const egr::EagerTensor& tensor);
...@@ -163,6 +167,9 @@ class EagerUtils { ...@@ -163,6 +167,9 @@ class EagerUtils {
static std::vector<egr::EagerTensor> GetOutputs( static std::vector<egr::EagerTensor> GetOutputs(
const std::vector<std::shared_ptr<EagerTensor>>& outs); const std::vector<std::shared_ptr<EagerTensor>>& outs);
static egr::EagerTensor GetOutput(const std::shared_ptr<EagerTensor>& outs); static egr::EagerTensor GetOutput(const std::shared_ptr<EagerTensor>& outs);
static void CheckAndRetainGrad(const egr::EagerTensor& tensor);
static void CheckAndRetainGrad(const std::vector<egr::EagerTensor>& tensors);
}; };
} // namespace egr } // namespace egr
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/utils.h" #include "paddle/fluid/eager/utils.h"
...@@ -72,6 +73,17 @@ void EmptyEagerTensorInitializer( ...@@ -72,6 +73,17 @@ void EmptyEagerTensorInitializer(
pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), pten::DenseTensorMeta(pten::TransToPtenDataType(dtype),
paddle::framework::make_ddim(dims))); paddle::framework::make_ddim(dims)));
self->eager_tensor.set_impl(dense_tensor); self->eager_tensor.set_impl(dense_tensor);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"We only support LoDTensor to be constructed by this initializer, "
"please check your var type first and make sure you are going to "
"construct LoDTensor."));
}
if (!autograd_meta->GetMutableGradNode()) {
VLOG(3) << "Tensor(" << name
<< ") have not GradNode, add GradNodeAccumulation for it.";
autograd_meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
} }
} }
......
...@@ -112,84 +112,6 @@ static PyObject* eager_api_scale(PyObject* self, PyObject* args, ...@@ -112,84 +112,6 @@ static PyObject* eager_api_scale(PyObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
static PyObject* eager_api_numpy_to_tensor(PyObject* numpy_data,
pten::DataType dtype,
const paddle::platform::Place& place,
bool stop_gradient) {
std::vector<int64_t> vec_dims;
auto numpy_shape = pybind11::detail::array_proxy(numpy_data)->dimensions;
int rank = pybind11::detail::array_proxy(numpy_data)->nd;
for (int i = 0; i < rank; i++) {
vec_dims.push_back(static_cast<int64_t>(numpy_shape[i]));
}
paddle::framework::DDim dims = paddle::framework::make_ddim(vec_dims);
// TODO(jiabin): Support GPU later
auto meta = pten::DenseTensorMeta(dtype, dims);
auto holder = std::make_shared<EagerNumpyAllocation>(numpy_data, dtype);
auto shared_storage =
pten::make_intrusive<paddle::experimental::SharedStorage>(holder, 0);
std::shared_ptr<pten::DenseTensor> densetensor(
new pten::DenseTensor(std::move(shared_storage), std::move(meta)));
PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0);
if (obj) {
auto v = reinterpret_cast<EagerTensorObject*>(obj);
new (&(v->eager_tensor)) egr::EagerTensor();
v->eager_tensor.set_impl(densetensor);
v->eager_tensor.set_name(egr::Controller::Instance().GenerateUniqueName());
auto meta = egr::EagerUtils::autograd_meta(&(v->eager_tensor));
meta->SetStopGradient(stop_gradient);
// Created tensor will be leaf tensor
// So we append AccumulationNode to it.
auto accumulation_node = std::make_shared<egr::GradNodeAccumulation>();
meta->SetGradNode(accumulation_node);
// TODO(jiabin): Shall we increase ref cnt here to make python ref cnt num
// correctly?
} else {
PADDLE_THROW(platform::errors::Fatal(
"tp_alloc return null, can not new a PyObject."));
}
return obj;
}
static PyObject* eager_api_to_tensor(PyObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
// TODO(jiabin): Support Kwargs here
PyObject* data = PyTuple_GET_ITEM(args, 0);
auto str_dtype = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 1), 1);
pten::DataType dtype = pten::String2DataType(str_dtype);
auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 2), 2);
bool stop_gradient = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3);
// TODO(jiabin): Support this when python given name
// auto str_name = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 4), 4);
if (pybind11::detail::npy_api::get().PyArray_Check_(data)) {
return eager_api_numpy_to_tensor(data, dtype, place, stop_gradient);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Eater to_tensor only support numpy to tensor."));
Py_INCREF(Py_None);
return Py_None;
}
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_api_retain_grad_for_tensor(PyObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_TRY
egr::egr_utils_api::RetainGradForTensor(
CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0));
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_api_run_backward(PyObject* self, PyObject* args, static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
...@@ -203,9 +125,29 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args, ...@@ -203,9 +125,29 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
static PyObject* eager_api_tensor_copy(PyObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
egr::EagerTensor& src =
reinterpret_cast<EagerTensorObject*>(PyTuple_GET_ITEM(args, 0))
->eager_tensor;
egr::EagerTensor& dst =
reinterpret_cast<EagerTensorObject*>(PyTuple_GET_ITEM(args, 1))
->eager_tensor;
auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 2), 2);
bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3);
dst = src.copy_to(pten::TransToPtenBackend(place), blocking);
egr::EagerUtils::autograd_meta(&dst)->SetStopGradient(
egr::EagerUtils::autograd_meta(&(src))->StopGradient());
egr::EagerUtils::autograd_meta(&dst)->SetPersistable(
egr::EagerUtils::autograd_meta(&(src))->Persistable());
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyMethodDef variable_functions[] = { PyMethodDef variable_functions[] = {
{"to_tensor", (PyCFunction)(void (*)(void))eager_api_to_tensor,
METH_VARARGS | METH_KEYWORDS, NULL},
{"scale", (PyCFunction)(void (*)(void))eager_api_scale, {"scale", (PyCFunction)(void (*)(void))eager_api_scale,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_expected_place", {"_set_expected_place",
...@@ -214,11 +156,10 @@ PyMethodDef variable_functions[] = { ...@@ -214,11 +156,10 @@ PyMethodDef variable_functions[] = {
{"_get_expected_place", {"_get_expected_place",
(PyCFunction)(void (*)(void))eager_api_get_expected_place, (PyCFunction)(void (*)(void))eager_api_get_expected_place,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"retain_grad_for_tensor",
(PyCFunction)(void (*)(void))eager_api_retain_grad_for_tensor,
METH_VARARGS | METH_KEYWORDS, NULL},
{"run_backward", (PyCFunction)(void (*)(void))eager_api_run_backward, {"run_backward", (PyCFunction)(void (*)(void))eager_api_run_backward,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"tensor_copy", (PyCFunction)(void (*)(void))eager_api_tensor_copy,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}}; {NULL, NULL, 0, NULL}};
void BindFunctions(PyObject* module) { void BindFunctions(PyObject* module) {
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include "pybind11/numpy.h" #include "pybind11/numpy.h"
#include "pybind11/pybind11.h" #include "pybind11/pybind11.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/utils.h" #include "paddle/fluid/eager/utils.h"
...@@ -120,6 +121,8 @@ static PyObject* eager_tensor_method_copy_(EagerTensorObject* self, ...@@ -120,6 +121,8 @@ static PyObject* eager_tensor_method_copy_(EagerTensorObject* self,
egr::EagerTensor src_tensor = egr::EagerTensor src_tensor =
CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0); CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0);
bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
<< self->eager_tensor.name();
self->eager_tensor.copy_(src_tensor, blocking); self->eager_tensor.copy_(src_tensor, blocking);
egr::EagerUtils::autograd_meta(&(self->eager_tensor)) egr::EagerUtils::autograd_meta(&(self->eager_tensor))
->SetStopGradient( ->SetStopGradient(
...@@ -127,6 +130,23 @@ static PyObject* eager_tensor_method_copy_(EagerTensorObject* self, ...@@ -127,6 +130,23 @@ static PyObject* eager_tensor_method_copy_(EagerTensorObject* self,
egr::EagerUtils::autograd_meta(&(self->eager_tensor)) egr::EagerUtils::autograd_meta(&(self->eager_tensor))
->SetPersistable( ->SetPersistable(
egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to "
<< self->eager_tensor.name();
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_tensor_retain_grads(EagerTensorObject* self,
PyObject* args, PyObject* kwargs) {
EAGER_TRY
auto meta = egr::EagerUtils::autograd_meta(&(self->eager_tensor));
if (!meta->GetMutableGradNode()) {
VLOG(6) << "Make grad node of tensor: " << self->eager_tensor.name()
<< "become accumulation node";
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
}
egr::egr_utils_api::RetainGradForTensor(self->eager_tensor);
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
...@@ -142,6 +162,8 @@ PyMethodDef variable_methods[] = { ...@@ -142,6 +162,8 @@ PyMethodDef variable_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"copy_", (PyCFunction)(void (*)(void))eager_tensor_method_copy_, {"copy_", (PyCFunction)(void (*)(void))eager_tensor_method_copy_,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"retain_grads", (PyCFunction)(void (*)(void))eager_tensor_retain_grads,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}}; {NULL, NULL, 0, NULL}};
} // namespace pybind } // namespace pybind
......
...@@ -70,11 +70,17 @@ const char* OUT_VAR_TYPE = R"(std::shared_ptr<imperative::VarBase>)"; ...@@ -70,11 +70,17 @@ const char* OUT_VAR_TYPE = R"(std::shared_ptr<imperative::VarBase>)";
const char* OUT_VAR_LIST_TYPE = R"(std::vector<std::shared_ptr<imperative::VarBase>>)"; const char* OUT_VAR_LIST_TYPE = R"(std::vector<std::shared_ptr<imperative::VarBase>>)";
const char* CAST_VAR_TEMPLATE = R"( const char* CAST_VAR_TEMPLATE = R"(
auto %s = GetEagerTensorFromArgs("%s", "%s", args, %d, %s);)"; auto& %s = GetEagerTensorFromArgs("%s", "%s", args, %d, %s);)";
const char* CAST_VAR_LIST_TEMPLATE = R"( const char* CAST_VAR_LIST_TEMPLATE = R"(
auto %s = GetEagerTensorListFromArgs("%s", "%s", args, %d, %s);)"; auto %s = GetEagerTensorListFromArgs("%s", "%s", args, %d, %s);)";
const char* CAST_VAR_PTR_TEMPLATE = R"(
auto %s = GetEagerTensorPtrFromArgs("%s", "%s", args, %d, %s);)";
const char* CAST_VAR_PTR_LIST_TEMPLATE = R"(
auto %s = GetEagerTensorPtrListFromArgs("%s", "%s", args, %d, %s);)";
const char* CAST_SIZE_T_TEMPLATE = R"( const char* CAST_SIZE_T_TEMPLATE = R"(
auto %s = GetUnsignedLongFromArgs("%s", "%s", args, %d, %s);)"; auto %s = GetUnsignedLongFromArgs("%s", "%s", args, %d, %s);)";
...@@ -221,8 +227,8 @@ std::string GenerateOpFunctionsBody( ...@@ -221,8 +227,8 @@ std::string GenerateOpFunctionsBody(
outs_initializer += ","; outs_initializer += ",";
} }
const auto in_cast_type = const auto in_cast_type = output.duplicable() ? CAST_VAR_PTR_LIST_TEMPLATE
output.duplicable() ? CAST_VAR_LIST_TEMPLATE : CAST_VAR_TEMPLATE; : CAST_VAR_PTR_TEMPLATE;
auto dispensable = output.dispensable() ? "true" : "false"; auto dispensable = output.dispensable() ? "true" : "false";
ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type, ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type,
out_name, arg_idx++, dispensable); out_name, arg_idx++, dispensable);
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/utils/tensor_utils.h"
#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/utils.h" #include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
...@@ -60,8 +62,22 @@ PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self, ...@@ -60,8 +62,22 @@ PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self,
PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
void* closure) { void* closure) {
EAGER_SYNC_TRY EAGER_SYNC_TRY
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) {
return ToPyObject(meta->Grad()); // Add RetainGrad as PostHook to AccumulationNode
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->eager_tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
return ToPyObject(accumulation_grad_node->Grad());
} else {
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
return ToPyObject(meta->Grad());
}
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
......
...@@ -199,7 +199,7 @@ std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj, ...@@ -199,7 +199,7 @@ std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj,
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be " "argument (position %d) must be "
"list of bool, but got %s at pos %d", "list of Tensor, but got %s at pos %d",
arg_pos + 1, arg_pos + 1,
reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i)); reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i));
} }
...@@ -216,7 +216,7 @@ std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj, ...@@ -216,7 +216,7 @@ std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj,
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be " "argument (position %d) must be "
"list of EagerTensor, but got %s at pos %d", "list of Tensor, but got %s at pos %d",
arg_pos + 1, arg_pos + 1,
reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i)); reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i));
} }
...@@ -478,10 +478,10 @@ PyObject* ToPyObject( ...@@ -478,10 +478,10 @@ PyObject* ToPyObject(
return dict; return dict;
} }
egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type, egr::EagerTensor& GetEagerTensorFromArgs(const std::string& op_type,
const std::string& arg_name, const std::string& arg_name,
PyObject* args, ssize_t arg_idx, PyObject* args, ssize_t arg_idx,
bool dispensable) { bool dispensable) {
PyObject* obj = PyTuple_GET_ITEM(args, arg_idx); PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
if (PyTuple_Check(obj)) { if (PyTuple_Check(obj)) {
...@@ -494,7 +494,7 @@ egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type, ...@@ -494,7 +494,7 @@ egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type,
"%s(): argument '%s' (position %d) must be Tensor, but got None", "%s(): argument '%s' (position %d) must be Tensor, but got None",
op_type, arg_name, arg_idx)); op_type, arg_name, arg_idx));
} }
egr::EagerTensor emptytensor; static egr::EagerTensor emptytensor;
return emptytensor; return emptytensor;
} }
...@@ -555,5 +555,82 @@ std::vector<egr::EagerTensor> GetEagerTensorListFromArgs( ...@@ -555,5 +555,82 @@ std::vector<egr::EagerTensor> GetEagerTensorListFromArgs(
return result; return result;
} }
egr::EagerTensor* GetEagerTensorPtrFromArgs(const std::string& op_type,
const std::string& arg_name,
PyObject* args, ssize_t arg_idx,
bool dispensable) {
PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
if (PyTuple_Check(obj)) {
obj = PyTuple_GET_ITEM(obj, 0);
}
if (obj == nullptr || obj == Py_None) {
if (!dispensable) {
PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument '%s' (position %d) must be Tensor, but got None",
op_type, arg_name, arg_idx));
}
static egr::EagerTensor emptytensor;
return &emptytensor;
}
return &(reinterpret_cast<EagerTensorObject*>(obj)->eager_tensor);
}
std::vector<egr::EagerTensor*> GetEagerTensorPtrListFromArgs(
const std::string& op_type, const std::string& arg_name, PyObject* args,
ssize_t arg_idx, bool dispensable) {
PyObject* list = PyTuple_GET_ITEM(args, arg_idx);
if (list == nullptr) {
if (!dispensable) {
PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument '%s' (position %d) must be list of Tensor, but got "
"None",
op_type, arg_name, arg_idx));
}
return {};
}
std::vector<egr::EagerTensor*> result;
if (PyList_Check(list)) {
Py_ssize_t len = PyList_Size(list);
if (len == 0) {
PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument '%s' (position %d) must be list of Tensors, but got "
"empty list",
op_type, arg_name, arg_idx));
}
for (Py_ssize_t i = 0; i < len; i++) {
result.emplace_back(
&(reinterpret_cast<EagerTensorObject*>(PyList_GetItem(list, i))
->eager_tensor));
}
} else if (PyTuple_Check(list)) {
Py_ssize_t len = PyTuple_Size(list);
if (len == 0) {
PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument '%s' (position %d) must be list of Tensors, but got "
"empty list",
op_type, arg_name, arg_idx));
}
for (Py_ssize_t i = 0; i < len; i++) {
result.emplace_back(
&(reinterpret_cast<EagerTensorObject*>(PyTuple_GetItem(list, i))
->eager_tensor));
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument '%s' (position %d) must be list of Tensors, but got "
"%s",
op_type, arg_name, arg_idx,
(reinterpret_cast<PyTypeObject*>(list->ob_type))->tp_name));
}
return result;
}
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -83,13 +83,21 @@ PyObject* ToPyObject(const std::tuple<Args...>& out) { ...@@ -83,13 +83,21 @@ PyObject* ToPyObject(const std::tuple<Args...>& out) {
return result; return result;
} }
egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type, egr::EagerTensor& GetEagerTensorFromArgs(const std::string& op_type,
const std::string& arg_name, const std::string& arg_name,
PyObject* args, ssize_t arg_idx, PyObject* args, ssize_t arg_idx,
bool dispensable = false); bool dispensable = false);
std::vector<egr::EagerTensor> GetEagerTensorListFromArgs( std::vector<egr::EagerTensor> GetEagerTensorListFromArgs(
const std::string& op_type, const std::string& arg_name, PyObject* args, const std::string& op_type, const std::string& arg_name, PyObject* args,
ssize_t arg_idx, bool dispensable = false); ssize_t arg_idx, bool dispensable = false);
egr::EagerTensor* GetEagerTensorPtrFromArgs(const std::string& op_type,
const std::string& arg_name,
PyObject* args, ssize_t arg_idx,
bool dispensable = false);
std::vector<egr::EagerTensor*> GetEagerTensorPtrListFromArgs(
const std::string& op_type, const std::string& arg_name, PyObject* args,
ssize_t arg_idx, bool dispensable = false);
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -876,9 +876,8 @@ void BindImperative(py::module *m_ptr) { ...@@ -876,9 +876,8 @@ void BindImperative(py::module *m_ptr) {
[](const std::shared_ptr<imperative::Tracer> &tracer) { [](const std::shared_ptr<imperative::Tracer> &tracer) {
if (egr::Controller::Instance().InEagerMode()) { if (egr::Controller::Instance().InEagerMode()) {
egr::Controller::Instance().SetCurrentTracer(tracer); egr::Controller::Instance().SetCurrentTracer(tracer);
} else {
imperative::SetCurrentTracer(tracer);
} }
imperative::SetCurrentTracer(tracer);
}); });
m.def("_enable_eager_mode", m.def("_enable_eager_mode",
[]() { egr::Controller::Instance().SetInEagerMode(true); }); []() { egr::Controller::Instance().SetInEagerMode(true); });
...@@ -2150,6 +2149,8 @@ void BindImperative(py::module *m_ptr) { ...@@ -2150,6 +2149,8 @@ void BindImperative(py::module *m_ptr) {
if (py::isinstance<platform::CUDAPlace>(obj)) { if (py::isinstance<platform::CUDAPlace>(obj)) {
auto p = obj.cast<platform::CUDAPlace *>(); auto p = obj.cast<platform::CUDAPlace *>();
self.SetExpectedPlace(*p); self.SetExpectedPlace(*p);
// TODO(jiabin): Support eager here when we need to make all
// dygraph in eager mode
VLOG(4) << "Tracer(" << &self << ")" VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p; << " set expected place " << *p;
} else if (py::isinstance<platform::XPUPlace>(obj)) { } else if (py::isinstance<platform::XPUPlace>(obj)) {
......
...@@ -22,7 +22,7 @@ from six.moves import zip, range, xrange ...@@ -22,7 +22,7 @@ from six.moves import zip, range, xrange
import multiprocessing import multiprocessing
import warnings import warnings
from .framework import Variable, default_main_program, _current_expected_place, in_dygraph_mode from .framework import Variable, default_main_program, _current_expected_place, in_dygraph_mode, _in_eager_mode
from .framework import _cpu_num, _cuda_ids from .framework import _cpu_num, _cuda_ids
__all__ = ['DataFeeder'] __all__ = ['DataFeeder']
...@@ -102,12 +102,20 @@ def check_type(input, input_name, expected_type, op_name, extra_message=''): ...@@ -102,12 +102,20 @@ def check_type(input, input_name, expected_type, op_name, extra_message=''):
if not isinstance(expected_type, tuple): if not isinstance(expected_type, tuple):
expected_type = (expected_type, ) expected_type = (expected_type, )
expected_type += (core.VarBase, ) expected_type += (core.VarBase, )
# TODO(jiabin): uncomment it when we support declarative mode in eager
# if _in_eager_mode():
# expected_type += (core.eager.EagerTensor, )
elif isinstance(input, core.VarBase): elif isinstance(input, core.VarBase):
raise TypeError( raise TypeError(
"Please use `with fluid.dygraph.guard()` as context or `fluid.enable_dygraph()` to switch to imperative mode firstly. " "Please use `with fluid.dygraph.guard()` as context or `fluid.enable_dygraph()` to switch to imperative mode firstly. "
"Because received '{}' in {} is a imperative Variable.".format( "Because received '{}' in {} is a imperative Variable.".format(
input_name, op_name)) input_name, op_name))
elif hasattr(core, "eager"):
if isinstance(input, core.eager.EagerTensor):
raise TypeError(
"Please use `with fluid.dygraph.guard()` as context or `fluid.enable_dygraph()` to switch to imperative mode firstly. "
"Because received '{}' in {} is a imperative Variable.".format(
input_name, op_name))
if not isinstance(input, expected_type): if not isinstance(input, expected_type):
raise TypeError( raise TypeError(
"The type of '%s' in %s must be %s, but received %s. %s" % "The type of '%s' in %s must be %s, but received %s. %s" %
......
...@@ -25,7 +25,7 @@ from .tracer import Tracer ...@@ -25,7 +25,7 @@ from .tracer import Tracer
import logging import logging
from ..data_feeder import convert_dtype from ..data_feeder import convert_dtype
import warnings import warnings
from ..framework import _get_paddle_place from ..framework import _get_paddle_place, _in_eager_mode
import paddle import paddle
__all__ = [ __all__ = [
...@@ -720,10 +720,16 @@ def to_variable(value, name=None, zero_copy=None, dtype=None): ...@@ -720,10 +720,16 @@ def to_variable(value, name=None, zero_copy=None, dtype=None):
if value.dtype != dtype: if value.dtype != dtype:
value = value.astype(dtype) value = value.astype(dtype)
py_var = core.VarBase( if _in_eager_mode():
value=value, return core.eager.EagerTensor(value,
place=framework._current_expected_place(), framework._current_expected_place(),
persistable=False, False, zero_copy, name
zero_copy=zero_copy, if name else None, True)
name=name if name else '') else:
return py_var py_var = core.VarBase(
value=value,
place=framework._current_expected_place(),
persistable=False,
zero_copy=zero_copy,
name=name if name else '')
return py_var
...@@ -21,6 +21,7 @@ from ..param_attr import ParamAttr ...@@ -21,6 +21,7 @@ from ..param_attr import ParamAttr
from .. import core from .. import core
from six.moves import zip from six.moves import zip
from ..layer_helper_base import LayerHelperBase from ..layer_helper_base import LayerHelperBase
from ..dygraph_utils import _append_activation_in_dygraph
class LayerObjectHelper(LayerHelperBase): class LayerObjectHelper(LayerHelperBase):
...@@ -162,14 +163,18 @@ class LayerObjectHelper(LayerHelperBase): ...@@ -162,14 +163,18 @@ class LayerObjectHelper(LayerHelperBase):
if (use_mkldnn is not None) and use_mkldnn: if (use_mkldnn is not None) and use_mkldnn:
act['use_mkldnn'] = use_mkldnn act['use_mkldnn'] = use_mkldnn
act_type = act.pop('type') act_type = act.pop('type')
if in_dygraph_mode():
tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) res = _append_activation_in_dygraph(input_var, act_type, use_cudnn,
self.append_op( use_mkldnn)
type=act_type, return res
inputs={"X": [input_var]}, else:
outputs={"Out": [tmp]}, tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
attrs=act) self.append_op(
return tmp type=act_type,
inputs={"X": [input_var]},
outputs={"Out": [tmp]},
attrs=act)
return tmp
def is_instance(self, param, cls): def is_instance(self, param, cls):
"""Check if the input parameter is instance of input class """Check if the input parameter is instance of input class
......
...@@ -18,6 +18,7 @@ from .. import core ...@@ -18,6 +18,7 @@ from .. import core
from ..framework import Variable, convert_np_dtype_to_dtype_, _varbase_creator from ..framework import Variable, convert_np_dtype_to_dtype_, _varbase_creator
from ..layers.layer_function_generator import OpProtoHolder from ..layers.layer_function_generator import OpProtoHolder
from . import no_grad from . import no_grad
from ..framework import _in_eager_mode
import numpy as np import numpy as np
import warnings import warnings
...@@ -332,6 +333,7 @@ def monkey_patch_math_varbase(): ...@@ -332,6 +333,7 @@ def monkey_patch_math_varbase():
] ]
global _already_patch_varbase global _already_patch_varbase
if not _already_patch_varbase: if not _already_patch_varbase:
for method in varbase_methods: for method in varbase_methods:
method_name = method[0] method_name = method[0]
......
...@@ -233,7 +233,7 @@ def monkey_patch_varbase(): ...@@ -233,7 +233,7 @@ def monkey_patch_varbase():
if grad_tensor is not None: if grad_tensor is not None:
assert isinstance( assert isinstance(
grad_tensor, paddle. grad_tensor, paddle.
Tensor), "The type of grad_tensot must be paddle.Tensor" Tensor), "The type of grad_tensor must be paddle.Tensor"
assert grad_tensor.shape == self.shape, \ assert grad_tensor.shape == self.shape, \
"Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format(
grad_tensor.name, grad_tensor.shape, self.name, self.shape) grad_tensor.name, grad_tensor.shape, self.name, self.shape)
......
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
# limitations under the License. # limitations under the License.
from .. import core as core from .. import core as core
from .. import framework as framework
from ..dygraph.parallel import scale_loss
import numpy as np
def monkey_patch_eagertensor(): def monkey_patch_eagertensor():
...@@ -20,5 +23,112 @@ def monkey_patch_eagertensor(): ...@@ -20,5 +23,112 @@ def monkey_patch_eagertensor():
from paddle.tensor.to_string import eager_tensor_to_string from paddle.tensor.to_string import eager_tensor_to_string
return eager_tensor_to_string(self) return eager_tensor_to_string(self)
@framework.dygraph_only
def backward(self, grad_tensor=None, retain_graph=False):
"""
Run backward of current Graph which starts from current Tensor.
The new gradient will accumulat on previous gradient.
You can clear gradient by ``Tensor.clear_grad()`` .
Args:
grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None,
the initial gradient values of the current Tensor would be Tensor filled with 1.0;
if `grad_tensor` is not None, it must have the same length as the current Tensor.
Teh default value is None.
retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would
like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter
:code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient.
Defaults to False.
Returns:
NoneType: None
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor(5., stop_gradient=False)
for i in range(5):
y = paddle.pow(x, 4.0)
y.backward()
print("{}: {}".format(i, x.grad))
# 0: [500.]
# 1: [1000.]
# 2: [1500.]
# 3: [2000.]
# 4: [2500.]
x.clear_grad()
print("{}".format(x.grad))
# 0.
grad_tensor=paddle.to_tensor(2.)
for i in range(5):
y = paddle.pow(x, 4.0)
y.backward(grad_tensor)
print("{}: {}".format(i, x.grad))
# 0: [1000.]
# 1: [2000.]
# 2: [3000.]
# 3: [4000.]
# 4: [5000.]
"""
if framework.in_dygraph_mode():
if grad_tensor is not None:
assert isinstance(
grad_tensor, core.eager.EagerTensor
), "The type of grad_tensor must be paddle.Tensor"
assert grad_tensor.shape == self.shape, \
"Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format(
grad_tensor.name, grad_tensor.shape, self.name, self.shape)
grad_tensor = [grad_tensor]
else:
grad_tensor = []
if core.is_compiled_with_xpu() or core.is_compiled_with_npu():
# TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
scaled_loss = scale_loss(self)
core.eager.run_backward([scaled_loss], grad_tensor,
retain_graph)
else:
core.eager.run_backward([self], grad_tensor, retain_graph)
else:
raise ValueError(
"Variable.backward() is only available in DyGraph mode")
@framework.dygraph_only
def gradient(self):
"""
.. warning::
This API will be deprecated in the future, it is recommended to use
:code:`x.grad` which returns the tensor value of the gradient.
Get the Gradient of Current Tensor.
Returns:
ndarray: Numpy value of the gradient of current Tensor
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor(5., stop_gradient=False)
y = paddle.pow(x, 4.0)
y.backward()
print("grad of x: {}".format(x.gradient()))
# [500.]
"""
if self.grad is None:
return None
# TODO(wanghuancoder) support SELECTED_ROWS
return self.grad.numpy()
if hasattr(core, "eager"): if hasattr(core, "eager"):
setattr(core.eager.EagerTensor, "__str__", __str__) setattr(core.eager.EagerTensor, "__str__", __str__)
setattr(core.eager.EagerTensor, "backward", backward)
setattr(core.eager.EagerTensor, "gradient", gradient)
...@@ -84,6 +84,7 @@ core._disable_eager_mode() ...@@ -84,6 +84,7 @@ core._disable_eager_mode()
def _test_eager_guard(): def _test_eager_guard():
core._enable_eager_mode() core._enable_eager_mode()
_C_ops.switch_to_eager_ops() _C_ops.switch_to_eager_ops()
core._switch_tracer(_dygraph_tracer_)
try: try:
yield yield
finally: finally:
...@@ -920,6 +921,14 @@ def _varbase_creator(type=core.VarDesc.VarType.LOD_TENSOR, ...@@ -920,6 +921,14 @@ def _varbase_creator(type=core.VarDesc.VarType.LOD_TENSOR,
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
if _in_eager_mode():
eager_tensor = core.eager.EagerTensor(
dtype if dtype else core.VarDesc.VarType.FP32,
list(shape) if shape else [], name, type
if type else core.VarDesc.VarType.LOD_TENSOR, True
if persistable else False)
eager_tensor.retain_grads()
return eager_tensor
return core.VarBase(dtype if dtype else core.VarDesc.VarType.FP32, return core.VarBase(dtype if dtype else core.VarDesc.VarType.FP32,
list(shape) if shape else [], name, type list(shape) if shape else [], name, type
if type else core.VarDesc.VarType.LOD_TENSOR, True if type else core.VarDesc.VarType.LOD_TENSOR, True
...@@ -931,6 +940,8 @@ class VariableMetaClass(type): ...@@ -931,6 +940,8 @@ class VariableMetaClass(type):
def __instancecheck__(cls, instance): def __instancecheck__(cls, instance):
t = type(instance) t = type(instance)
if in_dygraph_mode(): if in_dygraph_mode():
if _in_eager_mode():
return issubclass(t, core.eager.EagerTensor)
return issubclass(t, core.VarBase) return issubclass(t, core.VarBase)
else: else:
return issubclass(t, Variable) return issubclass(t, Variable)
...@@ -941,6 +952,8 @@ class ParameterMetaClass(VariableMetaClass): ...@@ -941,6 +952,8 @@ class ParameterMetaClass(VariableMetaClass):
def __instancecheck__(cls, instance): def __instancecheck__(cls, instance):
t = type(instance) t = type(instance)
if in_dygraph_mode(): if in_dygraph_mode():
if _in_eager_mode():
return issubclass(t, EagerParamBase)
return issubclass(t, ParamBase) return issubclass(t, ParamBase)
else: else:
return issubclass(t, Parameter) return issubclass(t, Parameter)
...@@ -3244,7 +3257,10 @@ class Block(object): ...@@ -3244,7 +3257,10 @@ class Block(object):
global_block = self.program.global_block() global_block = self.program.global_block()
param = None param = None
if in_dygraph_mode(): if in_dygraph_mode():
param = ParamBase(*args, **kwargs) if _in_eager_mode():
param = EagerParamBase(*args, **kwargs)
else:
param = ParamBase(*args, **kwargs)
else: else:
param = Parameter(global_block, *args, **kwargs) param = Parameter(global_block, *args, **kwargs)
...@@ -6243,6 +6259,153 @@ class ParamBase(core.VarBase): ...@@ -6243,6 +6259,153 @@ class ParamBase(core.VarBase):
__repr__ = __str__ __repr__ = __str__
if hasattr(core, "eager"):
_core_eager_eagertensor = core.eager.EagerTensor
else:
_core_eager_eagertensor = object
class EagerParamBase(_core_eager_eagertensor):
"""
EagerParamBase is derived from Tensor( Which is the concept in Eager-Dygraph Mode).
A EagerParamBase is a persistable Tensor, and will be updated by optimizers
after each iteration.
The training of a neural network is essentially the updating of
its EagerParamBase.
Relative to a general Tensor, a EagerParamBase has several its own
member variables:
Args:
trainable(bool): True if the EagerParamBase need to be updated after
iterations.
optimize_attr(map): EagerParamBase attributes related with optimizing.
Currently, it only contains 'learning_rate'.
Default: {'learning_rate': 1.0}
regularizer(WeightDecayRegularizer): The Regularizer which will
be applied on the EagerParamBase. Default: None
do_model_average(bool): True if the model average strategy will
be applied on this EagerParamBase.
need_clip (bool): Whether the parameter gradient need to be cliped
in optimizer. Default is True.
"""
@dygraph_only
def __init__(self, shape, dtype, **kwargs):
if shape is None:
raise ValueError("The shape of Parameter should not be None")
if dtype is None:
raise ValueError("The dtype of Parameter should not be None")
if len(shape) == 0:
raise ValueError(
"The dimensions of shape for Parameter must be greater than 0")
for each in shape:
if each < 0:
raise ValueError(
"Each dimension of shape for Parameter must be greater than 0, but received %s"
% list(shape))
if dtype is not None:
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
name = kwargs.get('name', unique_name.generate('_eager_param_base'))
super(EagerParamBase, self).__init__(
dtype if dtype else core.VarDesc.VarType.FP32,
list(shape)
if shape else [], name, core.VarDesc.VarType.LOD_TENSOR, True)
self.retain_grads()
trainable = kwargs.get('trainable', True)
self.stop_gradient = not trainable
self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0})
self.regularizer = kwargs.get('regularizer', None)
self.do_model_average = kwargs.get('do_model_average', None)
self.need_clip = kwargs.get('need_clip', True)
self.is_distributed = kwargs.get('is_distributed', False)
# self.block = default_main_program().global_block()
@property
def trainable(self):
return not self.stop_gradient
@trainable.setter
def trainable(self, trainable):
if isinstance(trainable, bool):
self.stop_gradient = not trainable
else:
raise ValueError(
"The type of trainable MUST be bool, but the type is ",
type(trainable))
def __str__(self):
"""
Convert a EagerParamBase object to a readable string.
Returns(str): A readable string.
Examples:
.. code-block:: python
import paddle
linear = paddle.nn.Linear(3, 3)
print(linear.weight)
# Parameter containing:
# Tensor(shape=[3, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=False,
# [[ 0.48948765, 0.05829060, -0.25524026],
# [-0.70368278, 0.52986908, -0.68742192],
# [-0.54217887, 0.48439729, 0.34082305]])
"""
return "Parameter containing:\n{tensor}".format(
tensor=super(EagerParamBase, self).__str__())
def __deepcopy__(self, memo):
"""
Deep copy parameter, it will always performs Tensor copy.
Examples:
.. code-block:: python
import paddle
import copy
linear = paddle.nn.Linear(1, 3)
linear_copy = copy.deepcopy(linear)
print(linear.weight)
# Parameter containing:
# Tensor(shape=[1, 3], dtype=float32, place=CPUPlace, stop_gradient=False,
# [[-0.30929261, -0.90929240, -1.07851017]])
print(linear_copy.weight)
# Parameter containing:
# Tensor(shape=[1, 3], dtype=float32, place=CPUPlace, stop_gradient=False,
# [[-0.30929261, -0.90929240, -1.07851017]])
"""
state = copy.deepcopy(self.__dict__, memo)
state["name"] = self.name + unique_name.generate("_deepcopy")
new_param = EagerParamBase(self.shape, self.dtype, **state)
memo[id(self)] = new_param
new_param.copy_(self, True)
return new_param
def _copy_to(self, device, blocking):
state = copy.deepcopy(self.__dict__)
new_param = EagerParamBase(self.shape, self.dtype, **state)
core.eager.tensor_copy(self, new_param, device, blocking)
return new_param
__repr__ = __str__
# program is a global instance. # program is a global instance.
_main_program_ = Program() _main_program_ = Program()
_startup_program_ = Program() _startup_program_ = Program()
......
此差异已折叠。
...@@ -24,6 +24,7 @@ from .param_attr import ParamAttr ...@@ -24,6 +24,7 @@ from .param_attr import ParamAttr
from . import core from . import core
from six.moves import zip from six.moves import zip
from .layer_helper_base import LayerHelperBase from .layer_helper_base import LayerHelperBase
from .dygraph_utils import _append_activation_in_dygraph
class LayerHelper(LayerHelperBase): class LayerHelper(LayerHelperBase):
...@@ -145,21 +146,27 @@ class LayerHelper(LayerHelperBase): ...@@ -145,21 +146,27 @@ class LayerHelper(LayerHelperBase):
else: else:
raise TypeError(str(act) + " should be unicode or str") raise TypeError(str(act) + " should be unicode or str")
use_cudnn = None
if 'use_cudnn' in self.kwargs and self.kwargs.get('use_cudnn'): if 'use_cudnn' in self.kwargs and self.kwargs.get('use_cudnn'):
act['use_cudnn'] = self.kwargs.get('use_cudnn') use_cudnn = self.kwargs.get('use_cudnn')
act['use_cudnn'] = use_cudnn
use_mkldnn = self.kwargs.get( use_mkldnn = self.kwargs.get(
'use_mkldnn', _global_flags().get("FLAGS_use_mkldnn", False)) 'use_mkldnn', _global_flags().get("FLAGS_use_mkldnn", False))
if use_mkldnn: if use_mkldnn:
act['use_mkldnn'] = use_mkldnn act['use_mkldnn'] = use_mkldnn
act_type = act.pop('type') act_type = act.pop('type')
if in_dygraph_mode():
tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) res = _append_activation_in_dygraph(input_var, act_type, use_cudnn,
self.append_op( use_mkldnn)
type=act_type, return res
inputs={"X": [input_var]}, else:
outputs={"Out": [tmp]}, tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
attrs=act) self.append_op(
return tmp type=act_type,
inputs={"X": [input_var]},
outputs={"Out": [tmp]},
attrs=act)
return tmp
#TODO (jiabin): should we remove this since it has never be used #TODO (jiabin): should we remove this since it has never be used
def _get_default_initializer(self, dtype): def _get_default_initializer(self, dtype):
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import copy import copy
import numpy as np import numpy as np
from .framework import Variable, default_main_program, default_startup_program, in_dygraph_mode, _current_expected_place from .framework import Variable, default_main_program, default_startup_program, in_dygraph_mode, _current_expected_place, _in_eager_mode
from . import unique_name from . import unique_name
from .param_attr import ParamAttr, WeightNormParamAttr from .param_attr import ParamAttr, WeightNormParamAttr
from . import core from . import core
...@@ -84,13 +84,19 @@ class LayerHelperBase(object): ...@@ -84,13 +84,19 @@ class LayerHelperBase(object):
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
assert in_dygraph_mode( assert in_dygraph_mode(
), "to_variable could only be called in dygraph mode" ), "to_variable could only be called in dygraph mode"
py_var = core.VarBase( if _in_eager_mode():
value=value, return core.eager.EagerTensor(value,
name=name if name else '', _current_expected_place(), False,
persistable=False, False, name
place=_current_expected_place(), if name else None, True)
zero_copy=False) else:
return py_var py_var = core.VarBase(
value=value,
name=name if name else '',
persistable=False,
place=_current_expected_place(),
zero_copy=False)
return py_var
elif isinstance(value, (core.VarBase, Variable)): elif isinstance(value, (core.VarBase, Variable)):
return value return value
else: else:
......
...@@ -16,9 +16,10 @@ import paddle.fluid.core as core ...@@ -16,9 +16,10 @@ import paddle.fluid.core as core
import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods
import paddle import paddle
import numpy as np import numpy as np
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard, EagerParamBase
from paddle.fluid.data_feeder import convert_dtype from paddle.fluid.data_feeder import convert_dtype
import unittest import unittest
import copy
class EagerScaleTestCase(unittest.TestCase): class EagerScaleTestCase(unittest.TestCase):
...@@ -46,14 +47,42 @@ class EagerScaleTestCase(unittest.TestCase): ...@@ -46,14 +47,42 @@ class EagerScaleTestCase(unittest.TestCase):
grad_data = np.ones([4, 16, 16, 32]).astype('float32') grad_data = np.ones([4, 16, 16, 32]).astype('float32')
grad_eager = paddle.to_tensor(grad_data, 'float32', core.CPUPlace()) grad_eager = paddle.to_tensor(grad_data, 'float32', core.CPUPlace())
core.eager.retain_grad_for_tensor(data_eager) data_eager.retain_grads()
out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True) out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True)
self.assertFalse(data_eager.grad._is_initialized()) self.assertFalse(data_eager.grad._is_initialized())
core.eager.run_backward([out_eager], [grad_eager], False) out_eager.backward(grad_eager, False)
self.assertTrue(data_eager.grad._is_initialized()) self.assertTrue(data_eager.grad._is_initialized())
self.assertTrue(np.array_equal(data_eager.grad.numpy(), input_data)) self.assertTrue(np.array_equal(data_eager.grad.numpy(), input_data))
def test_retain_grad_and_run_backward_raises(self):
with _test_eager_guard():
paddle.set_device("cpu")
input_data = np.ones([4, 16, 16, 32]).astype('float32')
data_eager = paddle.to_tensor(input_data, 'float32',
core.CPUPlace(), False)
grad_data = np.ones([4, 16, 16, 32]).astype('float32')
grad_data2 = np.ones([4, 16]).astype('float32')
grad_eager = paddle.to_tensor(grad_data, 'float32', core.CPUPlace())
grad_eager2 = paddle.to_tensor(grad_data2, 'float32',
core.CPUPlace())
data_eager.retain_grads()
out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True)
self.assertFalse(data_eager.grad._is_initialized())
with self.assertRaisesRegexp(
AssertionError,
"The type of grad_tensor must be paddle.Tensor"):
out_eager.backward(grad_data, False)
with self.assertRaisesRegexp(
AssertionError,
"Tensor shape not match, Tensor of grad_tensor /*"):
out_eager.backward(grad_eager2, False)
class EagerDtypeTestCase(unittest.TestCase): class EagerDtypeTestCase(unittest.TestCase):
def check_to_tesnsor_and_numpy(self, dtype, proto_dtype): def check_to_tesnsor_and_numpy(self, dtype, proto_dtype):
...@@ -192,6 +221,34 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): ...@@ -192,6 +221,34 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
self.assertTrue(egr_tensor9.place._equals(place)) self.assertTrue(egr_tensor9.place._equals(place))
self.assertTrue(np.array_equal(egr_tensor9.numpy(), arr4)) self.assertTrue(np.array_equal(egr_tensor9.numpy(), arr4))
with self.assertRaisesRegexp(
ValueError, "The shape of Parameter should not be None"):
eager_param = EagerParamBase(shape=None, dtype="float32")
with self.assertRaisesRegexp(
ValueError, "The dtype of Parameter should not be None"):
eager_param = EagerParamBase(shape=[1, 1], dtype=None)
with self.assertRaisesRegexp(
ValueError,
"The dimensions of shape for Parameter must be greater than 0"):
eager_param = EagerParamBase(shape=[], dtype="float32")
with self.assertRaisesRegexp(
ValueError,
"Each dimension of shape for Parameter must be greater than 0, but received /*"
):
eager_param = EagerParamBase(shape=[-1], dtype="float32")
eager_param = EagerParamBase(shape=[1, 1], dtype="float32")
self.assertTrue(eager_param.trainable)
eager_param.trainable = False
self.assertFalse(eager_param.trainable)
with self.assertRaisesRegexp(
ValueError,
"The type of trainable MUST be bool, but the type is /*"):
eager_param.trainable = "False"
def test_constructor(self): def test_constructor(self):
print("Test_constructor") print("Test_constructor")
paddle.set_device("cpu") paddle.set_device("cpu")
...@@ -291,5 +348,80 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): ...@@ -291,5 +348,80 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
core._disable_eager_mode() core._disable_eager_mode()
class EagerParamBaseUsageTestCase(unittest.TestCase):
def test_print(self):
with _test_eager_guard():
linear = paddle.nn.Linear(3, 3, bias_attr=False)
print(linear.weight)
def test_copy(self):
with _test_eager_guard():
linear = paddle.nn.Linear(1, 3)
linear_copy = copy.deepcopy(linear)
linear_copy2 = linear.weight._copy_to(core.CPUPlace(), True)
self.assertTrue(
np.array_equal(linear.weight.numpy(),
linear_copy.weight.numpy()))
self.assertTrue(
np.array_equal(linear.weight.numpy(), linear_copy2.numpy()))
def func_fp16_initilaizer(self):
paddle.set_default_dtype("float16")
linear1 = paddle.nn.Linear(1, 3, bias_attr=False)
linear2 = paddle.nn.Linear(
1,
3,
bias_attr=False,
weight_attr=paddle.fluid.initializer.Uniform())
linear3 = paddle.nn.Linear(
1,
3,
bias_attr=False,
weight_attr=paddle.fluid.initializer.TruncatedNormalInitializer())
linear4 = paddle.nn.Linear(
1,
3,
bias_attr=False,
weight_attr=paddle.fluid.initializer.MSRAInitializer())
res = [
linear1.weight.numpy(), linear2.weight.numpy(),
linear3.weight.numpy(), linear4.weight.numpy()
]
paddle.set_default_dtype("float32")
return res
def test_fp16_initializer(self):
res1 = list()
res2 = list()
paddle.seed(102)
paddle.framework.random._manual_program_seed(102)
with _test_eager_guard():
res1 = self.func_fp16_initilaizer()
res2 = self.func_fp16_initilaizer()
for i in range(len(res1)):
self.assertTrue(np.array_equal(res1[i], res2[i]))
def func_layer_helper_base(self, value):
base = paddle.fluid.layer_helper_base.LayerHelperBase("test_layer",
"test_layer")
return base.to_variable(value).numpy()
def func_base_to_variable(self, value):
paddle.fluid.dygraph.base.to_variable(value)
def test_to_variable(self):
value = np.random.rand(4, 16, 16, 32).astype('float32')
res1 = None
res3 = None
with _test_eager_guard():
res1 = self.func_layer_helper_base(value)
res3 = self.func_base_to_variable(value)
res2 = self.func_layer_helper_base(value)
res4 = self.func_base_to_variable(value)
self.assertTrue(np.array_equal(res1, res2))
self.assertTrue(np.array_equal(res3, res4))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -24,6 +24,7 @@ from test_imperative_base import new_program_scope ...@@ -24,6 +24,7 @@ from test_imperative_base import new_program_scope
import paddle.fluid.dygraph_utils as dygraph_utils import paddle.fluid.dygraph_utils as dygraph_utils
from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
import paddle import paddle
from paddle.fluid.framework import _test_eager_guard
class MyLayer(fluid.Layer): class MyLayer(fluid.Layer):
...@@ -180,12 +181,12 @@ class SimpleRNN(fluid.Layer): ...@@ -180,12 +181,12 @@ class SimpleRNN(fluid.Layer):
class TestImperative(unittest.TestCase): class TestImperative(unittest.TestCase):
def test_functional_dygraph_context(self): def functional_dygraph_context(self):
self.assertFalse(fluid.dygraph.enabled()) self.assertFalse(fluid.dygraph.enabled())
fluid.enable_dygraph() fluid.enable_dygraph()
self.assertTrue(fluid.dygraph.enabled()) self.assertTrue(fluid.dygraph.enabled())
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = paddle.to_tensor(np_inp)
mlp = MLP(input_size=2) mlp = MLP(input_size=2)
out = mlp(var_inp) out = mlp(var_inp)
dy_out1 = out.numpy() dy_out1 = out.numpy()
...@@ -195,7 +196,7 @@ class TestImperative(unittest.TestCase): ...@@ -195,7 +196,7 @@ class TestImperative(unittest.TestCase):
self.assertFalse(fluid.dygraph.enabled()) self.assertFalse(fluid.dygraph.enabled())
with fluid.dygraph.guard(): with fluid.dygraph.guard():
self.assertTrue(fluid.dygraph.enabled()) self.assertTrue(fluid.dygraph.enabled())
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = paddle.to_tensor(np_inp)
mlp = MLP(input_size=2) mlp = MLP(input_size=2)
out = mlp(var_inp) out = mlp(var_inp)
dy_out2 = out.numpy() dy_out2 = out.numpy()
...@@ -205,7 +206,12 @@ class TestImperative(unittest.TestCase): ...@@ -205,7 +206,12 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.array_equal(dy_out1, dy_out2)) self.assertTrue(np.array_equal(dy_out1, dy_out2))
self.assertTrue(np.array_equal(dy_grad1, dy_grad2)) self.assertTrue(np.array_equal(dy_grad1, dy_grad2))
def test_functional_paddle_imperative_dygraph_context(self): def test_functional_dygraph_context(self):
with _test_eager_guard():
self.functional_dygraph_context()
self.functional_dygraph_context()
def functional_paddle_imperative_dygraph_context(self):
self.assertFalse(paddle.in_dynamic_mode()) self.assertFalse(paddle.in_dynamic_mode())
paddle.disable_static() paddle.disable_static()
self.assertTrue(paddle.in_dynamic_mode()) self.assertTrue(paddle.in_dynamic_mode())
...@@ -231,13 +237,27 @@ class TestImperative(unittest.TestCase): ...@@ -231,13 +237,27 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.array_equal(dy_out1, dy_out2)) self.assertTrue(np.array_equal(dy_out1, dy_out2))
self.assertTrue(np.array_equal(dy_grad1, dy_grad2)) self.assertTrue(np.array_equal(dy_grad1, dy_grad2))
def test_isinstance(self): def test_functional_paddle_imperative_dygraph_context(self):
with _test_eager_guard():
self.functional_paddle_imperative_dygraph_context()
self.functional_paddle_imperative_dygraph_context()
def func_isinstance(self):
var = fluid.layers.data(shape=[1], name='x', dtype='float32') var = fluid.layers.data(shape=[1], name='x', dtype='float32')
self.assertTrue(isinstance(var, fluid.Variable)) self.assertTrue(isinstance(var, fluid.Variable))
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_base = fluid.dygraph.base.to_variable(np.array([3, 4, 5])) if fluid.framework._in_eager_mode():
self.assertTrue(isinstance(var_base, core.VarBase)) var_base = paddle.to_tensor(np.array([3, 4, 5]))
self.assertTrue(isinstance(var_base, fluid.Variable)) self.assertTrue(isinstance(var_base, core.eager.EagerTensor))
else:
var_base = paddle.to_tensor(np.array([3, 4, 5]))
self.assertTrue(isinstance(var_base, core.VarBase))
self.assertTrue(isinstance(var_base, fluid.Variable))
def test_isinstance(self):
with _test_eager_guard():
self.func_isinstance()
self.func_isinstance()
def test_create_VarBase(self): def test_create_VarBase(self):
x = np.ones([2, 2], np.float32) x = np.ones([2, 2], np.float32)
...@@ -247,7 +267,7 @@ class TestImperative(unittest.TestCase): ...@@ -247,7 +267,7 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
tmp = fluid.core.VarBase(value=x, place=fluid.core.CPUPlace()) tmp = fluid.core.VarBase(value=x, place=fluid.core.CPUPlace())
tmp2 = fluid.core.VarBase(y, fluid.core.CPUPlace()) tmp2 = fluid.core.VarBase(y, fluid.core.CPUPlace())
tmp3 = fluid.dygraph.base.to_variable(x) tmp3 = paddle.to_tensor(x)
tmp4 = fluid.core.VarBase(y) tmp4 = fluid.core.VarBase(y)
tmp5 = fluid.core.VarBase(value=x) tmp5 = fluid.core.VarBase(value=x)
tmp6 = fluid.core.VarBase(t) tmp6 = fluid.core.VarBase(t)
...@@ -269,7 +289,7 @@ class TestImperative(unittest.TestCase): ...@@ -269,7 +289,7 @@ class TestImperative(unittest.TestCase):
self.assertTrue(l1.weight.stop_gradient is False) self.assertTrue(l1.weight.stop_gradient is False)
tmp = l1.weight * 2 tmp = l1.weight * 2
self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp.stop_gradient)
x = fluid.dygraph.to_variable(data) x = paddle.to_tensor(data)
y = l0(x) + tmp y = l0(x) + tmp
o = l1(y) o = l1(y)
o.backward() o.backward()
...@@ -287,7 +307,7 @@ class TestImperative(unittest.TestCase): ...@@ -287,7 +307,7 @@ class TestImperative(unittest.TestCase):
self.assertTrue(l1.weight.stop_gradient is False) self.assertTrue(l1.weight.stop_gradient is False)
tmp = l1.weight * 2 tmp = l1.weight * 2
self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp.stop_gradient)
x = fluid.dygraph.to_variable(data) x = paddle.to_tensor(data)
y = l0(x) + tmp y = l0(x) + tmp
o = l1(y) o = l1(y)
o.backward() o.backward()
...@@ -308,7 +328,7 @@ class TestImperative(unittest.TestCase): ...@@ -308,7 +328,7 @@ class TestImperative(unittest.TestCase):
tmp2 = l1.weight * 2 tmp2 = l1.weight * 2
self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp.stop_gradient)
self.assertTrue(tmp2.stop_gradient is False) self.assertTrue(tmp2.stop_gradient is False)
x = fluid.dygraph.to_variable(data) x = paddle.to_tensor(data)
y = l0(x) + tmp2 y = l0(x) + tmp2
o = l1(y) o = l1(y)
o.backward() o.backward()
...@@ -329,7 +349,7 @@ class TestImperative(unittest.TestCase): ...@@ -329,7 +349,7 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inputs = [] inputs = []
for _ in range(10): for _ in range(10):
tmp = fluid.dygraph.base.to_variable(x) tmp = paddle.to_tensor(x)
tmp.stop_gradient = False tmp.stop_gradient = False
inputs.append(tmp) inputs.append(tmp)
ret = fluid.layers.sums(inputs) ret = fluid.layers.sums(inputs)
...@@ -338,7 +358,7 @@ class TestImperative(unittest.TestCase): ...@@ -338,7 +358,7 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inputs2 = [] inputs2 = []
for _ in range(10): for _ in range(10):
tmp = fluid.dygraph.base.to_variable(x) tmp = paddle.to_tensor(x)
tmp.stop_gradient = False tmp.stop_gradient = False
inputs2.append(tmp) inputs2.append(tmp)
ret2 = fluid.layers.sums(inputs2) ret2 = fluid.layers.sums(inputs2)
...@@ -376,7 +396,7 @@ class TestImperative(unittest.TestCase): ...@@ -376,7 +396,7 @@ class TestImperative(unittest.TestCase):
def test_empty_grad(self): def test_empty_grad(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = np.ones([2, 2], np.float32) x = np.ones([2, 2], np.float32)
new_var = fluid.dygraph.base.to_variable(x) new_var = paddle.to_tensor(x)
try: try:
new_var.gradient() new_var.gradient()
except Exception as e: except Exception as e:
...@@ -400,7 +420,7 @@ class TestImperative(unittest.TestCase): ...@@ -400,7 +420,7 @@ class TestImperative(unittest.TestCase):
def test_set_persistable(self): def test_set_persistable(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = np.ones([2, 2], np.float32) x = np.ones([2, 2], np.float32)
new_var = fluid.dygraph.base.to_variable(x) new_var = paddle.to_tensor(x)
self.assertFalse(new_var.persistable) self.assertFalse(new_var.persistable)
new_var.persistable = True new_var.persistable = True
self.assertTrue(new_var.persistable) self.assertTrue(new_var.persistable)
...@@ -413,7 +433,7 @@ class TestImperative(unittest.TestCase): ...@@ -413,7 +433,7 @@ class TestImperative(unittest.TestCase):
def test_layer_in_out(self): def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = paddle.to_tensor(np_inp)
var_inp.stop_gradient = False var_inp.stop_gradient = False
l = MyLayer() l = MyLayer()
x = l(var_inp)[0] x = l(var_inp)[0]
...@@ -423,7 +443,7 @@ class TestImperative(unittest.TestCase): ...@@ -423,7 +443,7 @@ class TestImperative(unittest.TestCase):
dy_grad = l._x_for_debug.gradient() dy_grad = l._x_for_debug.gradient()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp2 = fluid.dygraph.base.to_variable(np_inp) var_inp2 = paddle.to_tensor(np_inp)
var_inp2.stop_gradient = False var_inp2.stop_gradient = False
l2 = MyLayer() l2 = MyLayer()
x2 = l2(var_inp2)[0] x2 = l2(var_inp2)[0]
...@@ -455,7 +475,7 @@ class TestImperative(unittest.TestCase): ...@@ -455,7 +475,7 @@ class TestImperative(unittest.TestCase):
def test_mlp(self): def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = paddle.to_tensor(np_inp)
mlp = MLP(input_size=2) mlp = MLP(input_size=2)
out = mlp(var_inp) out = mlp(var_inp)
dy_out = out.numpy() dy_out = out.numpy()
...@@ -463,7 +483,7 @@ class TestImperative(unittest.TestCase): ...@@ -463,7 +483,7 @@ class TestImperative(unittest.TestCase):
dy_grad = mlp._linear1.weight.gradient() dy_grad = mlp._linear1.weight.gradient()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp2 = fluid.dygraph.base.to_variable(np_inp) var_inp2 = paddle.to_tensor(np_inp)
mlp2 = MLP(input_size=2) mlp2 = MLP(input_size=2)
out2 = mlp2(var_inp2) out2 = mlp2(var_inp2)
dy_out2 = out2.numpy() dy_out2 = out2.numpy()
...@@ -641,8 +661,8 @@ class TestImperative(unittest.TestCase): ...@@ -641,8 +661,8 @@ class TestImperative(unittest.TestCase):
# dynamic graph # dynamic graph
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inp1 = fluid.dygraph.to_variable(np_inp1) inp1 = paddle.to_tensor(np_inp1)
inp2 = fluid.dygraph.to_variable(np_inp2) inp2 = paddle.to_tensor(np_inp2)
if np.sum(np_inp1) < np.sum(np_inp2): if np.sum(np_inp1) < np.sum(np_inp2):
x = fluid.layers.elementwise_add(inp1, inp2) x = fluid.layers.elementwise_add(inp1, inp2)
else: else:
...@@ -692,7 +712,7 @@ class TestImperative(unittest.TestCase): ...@@ -692,7 +712,7 @@ class TestImperative(unittest.TestCase):
np_inp = np_inp.reshape((1, 4, 3)) np_inp = np_inp.reshape((1, 4, 3))
np_inp = np_inp.astype(np.float32) np_inp = np_inp.astype(np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = paddle.to_tensor(np_inp)
var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
simple_rnn = SimpleRNN() simple_rnn = SimpleRNN()
outs, pre_hiddens = simple_rnn.forward(var_inp) outs, pre_hiddens = simple_rnn.forward(var_inp)
...@@ -703,7 +723,7 @@ class TestImperative(unittest.TestCase): ...@@ -703,7 +723,7 @@ class TestImperative(unittest.TestCase):
dy_grad_i2h = simple_rnn._cell._i2h_w.gradient() dy_grad_i2h = simple_rnn._cell._i2h_w.gradient()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp2 = fluid.dygraph.base.to_variable(np_inp) var_inp2 = paddle.to_tensor(np_inp)
var_inp2 = fluid.layers.reshape(var_inp2, shape=[1, 4, 3]) var_inp2 = fluid.layers.reshape(var_inp2, shape=[1, 4, 3])
simple_rnn2 = SimpleRNN() simple_rnn2 = SimpleRNN()
outs2, pre_hiddens2 = simple_rnn2.forward(var_inp2) outs2, pre_hiddens2 = simple_rnn2.forward(var_inp2)
...@@ -760,58 +780,83 @@ class TestImperative(unittest.TestCase): ...@@ -760,58 +780,83 @@ class TestImperative(unittest.TestCase):
class TestDygraphUtils(unittest.TestCase): class TestDygraphUtils(unittest.TestCase):
def test_append_activation_in_dygraph_exception(self): def func_append_activation_in_dygraph_exception(self):
with new_program_scope(): with new_program_scope():
np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32) np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32)
a = fluid.layers.data("a", [10, 20]) a = fluid.layers.data("a", [10, 20])
func = dygraph_utils._append_activation_in_dygraph func = dygraph_utils._append_activation_in_dygraph
self.assertRaises(AssertionError, func, a, act="sigmoid") self.assertRaises(AssertionError, func, a, act="sigmoid")
def test_append_activation_in_dygraph1(self): def test_append_activation_in_dygraph_exception(self):
with _test_eager_guard():
self.func_append_activation_in_dygraph_exception()
self.func_append_activation_in_dygraph_exception()
def func_append_activation_in_dygraph1(self):
a_np = np.random.random(size=(10, 20, 30)).astype(np.float32) a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
func = dygraph_utils._append_activation_in_dygraph func = dygraph_utils._append_activation_in_dygraph
with fluid.dygraph.guard(): with fluid.dygraph.guard():
a = fluid.dygraph.to_variable(a_np) a = paddle.to_tensor(a_np)
res1 = func(a, act="hard_sigmoid") res1 = func(a, act="hard_sigmoid")
res2 = fluid.layers.hard_sigmoid(a) res2 = fluid.layers.hard_sigmoid(a)
self.assertTrue(np.array_equal(res1.numpy(), res2.numpy())) self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))
def test_append_activation_in_dygraph2(self): def test_append_activation_in_dygraph1(self):
with _test_eager_guard():
self.func_append_activation_in_dygraph1()
self.func_append_activation_in_dygraph1()
def func_append_activation_in_dygraph2(self):
a_np = np.random.random(size=(10, 20, 30)).astype(np.float32) a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
func = dygraph_utils._append_activation_in_dygraph func = dygraph_utils._append_activation_in_dygraph
with fluid.dygraph.guard(): with fluid.dygraph.guard():
a = fluid.dygraph.to_variable(a_np) a = paddle.to_tensor(a_np)
res1 = func(a, act="sigmoid", use_mkldnn=True, use_cudnn=True) res1 = func(a, act="sigmoid", use_mkldnn=True, use_cudnn=True)
res2 = fluid.layers.sigmoid(a) res2 = fluid.layers.sigmoid(a)
self.assertTrue(np.allclose(res1.numpy(), res2.numpy())) self.assertTrue(np.allclose(res1.numpy(), res2.numpy()))
def test_append_activation_in_dygraph3(self): def test_append_activation_in_dygraph2(self):
with _test_eager_guard():
self.func_append_activation_in_dygraph2()
self.func_append_activation_in_dygraph2()
def func_append_activation_in_dygraph3(self):
a_np = np.random.random(size=(10, 20, 30)).astype(np.float32) a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
helper = LayerObjectHelper(fluid.unique_name.generate("test")) helper = LayerObjectHelper(fluid.unique_name.generate("test"))
func = helper.append_activation func = helper.append_activation
with fluid.dygraph.guard(): with fluid.dygraph.guard():
a = fluid.dygraph.to_variable(a_np) a = paddle.to_tensor(a_np)
res1 = func(a, act="sigmoid", use_cudnn=True) res1 = func(a, act="sigmoid", use_cudnn=True)
res2 = fluid.layers.sigmoid(a) res2 = fluid.layers.sigmoid(a)
self.assertTrue(np.array_equal(res1.numpy(), res2.numpy())) self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))
def test_append_activation_in_dygraph_use_mkldnn(self): def test_append_activation_in_dygraph3(self):
with _test_eager_guard():
self.func_append_activation_in_dygraph3()
self.func_append_activation_in_dygraph3()
def func_append_activation_in_dygraph_use_mkldnn(self):
a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32) a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32)
helper = LayerHelper( helper = LayerHelper(
fluid.unique_name.generate("test"), act="relu", use_mkldnn=True) fluid.unique_name.generate("test"), act="relu", use_mkldnn=True)
func = helper.append_activation func = helper.append_activation
with fluid.dygraph.guard(): with fluid.dygraph.guard():
a = fluid.dygraph.to_variable(a_np) a = paddle.to_tensor(a_np)
res1 = func(a) res1 = func(a)
res2 = fluid.layers.relu(a) res2 = fluid.layers.relu(a)
self.assertTrue(np.array_equal(res1.numpy(), res2.numpy())) self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))
def test_append_activation_in_dygraph_global_use_mkldnn(self): def test_append_activation_in_dygraph_use_mkldnn(self):
with _test_eager_guard():
self.func_append_activation_in_dygraph_use_mkldnn()
self.func_append_activation_in_dygraph_use_mkldnn()
def func_append_activation_in_dygraph_global_use_mkldnn(self):
a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32) a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32)
helper = LayerHelper(fluid.unique_name.generate("test"), act="relu") helper = LayerHelper(fluid.unique_name.generate("test"), act="relu")
func = helper.append_activation func = helper.append_activation
with fluid.dygraph.guard(fluid.core.CPUPlace()): with fluid.dygraph.guard(fluid.core.CPUPlace()):
a = fluid.dygraph.to_variable(a_np) a = paddle.to_tensor(a_np)
fluid.set_flags({'FLAGS_use_mkldnn': True}) fluid.set_flags({'FLAGS_use_mkldnn': True})
try: try:
res1 = func(a) res1 = func(a)
...@@ -820,38 +865,67 @@ class TestDygraphUtils(unittest.TestCase): ...@@ -820,38 +865,67 @@ class TestDygraphUtils(unittest.TestCase):
res2 = fluid.layers.relu(a) res2 = fluid.layers.relu(a)
self.assertTrue(np.array_equal(res1.numpy(), res2.numpy())) self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))
def test_append_bias_in_dygraph_exception(self): def test_append_activation_in_dygraph_global_use_mkldnn(self):
with _test_eager_guard():
self.func_append_activation_in_dygraph_global_use_mkldnn()
self.func_append_activation_in_dygraph_global_use_mkldnn()
def func_append_bias_in_dygraph_exception(self):
with new_program_scope(): with new_program_scope():
np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32) np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32)
a = fluid.layers.data("a", [10, 20]) a = fluid.layers.data("a", [10, 20])
func = dygraph_utils._append_bias_in_dygraph func = dygraph_utils._append_bias_in_dygraph
self.assertRaises(AssertionError, func, a) self.assertRaises(AssertionError, func, a)
def test_append_bias_in_dygraph(self): def test_append_bias_in_dygraph_exception(self):
with _test_eager_guard():
self.func_append_bias_in_dygraph_exception()
self.func_append_bias_in_dygraph_exception()
def func_append_bias_in_dygraph(self):
a_np = np.random.random(size=(10, 20, 30)).astype(np.float32) a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
func = dygraph_utils._append_bias_in_dygraph func = dygraph_utils._append_bias_in_dygraph
with fluid.dygraph.guard(): with fluid.dygraph.guard():
a = fluid.dygraph.to_variable(a_np) a = paddle.to_tensor(a_np)
res1 = func(a, bias=a) res1 = func(a, bias=a)
res2 = a + a res2 = paddle.add(a, a)
self.assertTrue(np.array_equal(res1.numpy(), res2.numpy())) self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))
def test_append_bias_in_dygraph(self):
with _test_eager_guard():
self.func_append_bias_in_dygraph()
self.func_append_bias_in_dygraph()
class TestDygraphGuardWithError(unittest.TestCase): class TestDygraphGuardWithError(unittest.TestCase):
def test_without_guard(self): def func_without_guard(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = fluid.dygraph.to_variable(np.zeros([10, 10])) x = paddle.to_tensor(np.zeros([10, 10]))
with self.assertRaisesRegexp(TypeError, with self.assertRaisesRegexp(TypeError,
"Please use `with fluid.dygraph.guard()"): "Please use `with fluid.dygraph.guard()"):
y = fluid.layers.matmul(x, x) y = fluid.layers.matmul(x, x)
def test_without_guard(self):
with _test_eager_guard():
self.func_without_guard()
self.func_without_guard()
class TestMetaclass(unittest.TestCase): class TestMetaclass(unittest.TestCase):
def test_metaclass(self): def func_metaclass(self):
self.assertEqual(type(MyLayer).__name__, 'type') self.assertEqual(type(MyLayer).__name__, 'type')
self.assertNotEqual(type(MyLayer).__name__, 'pybind11_type') self.assertNotEqual(type(MyLayer).__name__, 'pybind11_type')
self.assertEqual( if core._in_eager_mode():
type(paddle.fluid.core.VarBase).__name__, 'pybind11_type') self.assertEqual(
type(paddle.fluid.core.eager.EagerTensor).__name__, 'type')
else:
self.assertEqual(
type(paddle.fluid.core.VarBase).__name__, 'pybind11_type')
def test_metaclass(self):
with _test_eager_guard():
self.func_metaclass()
self.func_metaclass()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -117,12 +117,6 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): ...@@ -117,12 +117,6 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
) != _current_expected_place()._get_device_id(): ) != _current_expected_place()._get_device_id():
place = _current_expected_place() place = _current_expected_place()
if _in_eager_mode():
if dtype is None:
dtype = paddle.get_default_dtype()
return core.eager.to_tensor(data,
convert_dtype(dtype), place, stop_gradient)
if not isinstance(data, np.ndarray): if not isinstance(data, np.ndarray):
def _handle_dtype(data, dtype): def _handle_dtype(data, dtype):
...@@ -172,12 +166,17 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): ...@@ -172,12 +166,17 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
if dtype and convert_dtype(dtype) != data.dtype: if dtype and convert_dtype(dtype) != data.dtype:
data = data.astype(convert_dtype(dtype)) data = data.astype(convert_dtype(dtype))
return paddle.Tensor( # TOOD(jiabin): Support kwargs in eager tensor constructor
value=data, if _in_eager_mode() and isinstance(data, np.ndarray):
place=place, return core.eager.EagerTensor(data, place, False, False, None,
persistable=False, stop_gradient)
zero_copy=False, else:
stop_gradient=stop_gradient) return paddle.Tensor(
value=data,
place=place,
persistable=False,
zero_copy=False,
stop_gradient=stop_gradient)
def full_like(x, fill_value, dtype=None, name=None): def full_like(x, fill_value, dtype=None, name=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册