未验证 提交 abe232d8 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] Support Eager mode for some model testcase (#39248)

* eager, test=develop

* fix bug, test=develop

* eager, test=develop

* merge legacy to fluid

* eager, test=develop

* eager, test=develop

* Refactor TensorAdd func by template and remove gradient_accumulation in eager

* Remove needless target name

* eager, test=develop

* eager, test=develop

* Use overload instead of template

* Remove legacy code

* Remove legacy code

* selectedrows, test=develop

* Remove DataType test

* eager, test=develop

* eager, test=develop

* support gan, test=develop

* Using Tensor directly instead of using EagerTensor

* support gradient_accumulation

* make test_imperative_lod_tensor_to_selected_rows longer

* make test_imperative_lod_tensor_to_selected_rows longer

* refine code

* ptb, test=develop

* Rename all EagerTensor to Tensor

* Rename some EagerTensor to Tensor

* rename EagerTensor to EagerVariable

* eager, test=develop

* eager, test=develop

* eager, test=develop

* eager, test=develop

* add more test

* eager, test=develop

* Support copiable selected rows and merge develop

* refine, test=develop

* refine, test=develop

* refine, test=develop

* refine, test=develop

* clear grad, test=develop

* merge, develop

* merge, develop
Co-authored-by: NJiabinYang <360788950@qq.com>
Co-authored-by: NWeilong Wu <veyron_wu@163.com>
上级 ca4df333
......@@ -47,6 +47,7 @@ void GradNodeAccumulation::RetainGrad(
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation::
operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation";
PADDLE_ENFORCE(grads.size() == 1,
paddle::platform::errors::Fatal(
"GradNodeAccumulation should take exactly 1 grad tensor"
......
......@@ -35,6 +35,8 @@ class GradNodeAccumulation : public GradNodeBase {
paddle::experimental::Tensor* Grad() { return &accumulated_grad; }
std::string name() { return "GradNodeAccumulation"; }
/**
* Register ReduceHook
* **/
......
......@@ -70,12 +70,8 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
grad_tensor->set_impl(t.impl());
return *grad_tensor.get();
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Detected uninitialized variable, causing segmentation "
"fault "
"inside the hook."
"Tensor has to be initialized while we need to set it."
"please check tensor initialization status."));
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
......
......@@ -554,6 +554,21 @@ static bool CheckOpProto(proto::OpProto* op_proto) {
return true;
}
static bool BeSameAsInput(const std::string& output_name,
const std::set<std::string>& input_names) {
if (output_name.size() < 4) {
return false;
}
if (output_name.substr(output_name.size() - 3, 3) == "Out") {
if (input_names.count(output_name.substr(0, output_name.size() - 3))) {
return true;
}
}
return false;
}
/* --------------------------------------- */
/* --------- Preprocess Ins/Outs --------- */
/* --------------------------------------- */
......@@ -1022,27 +1037,12 @@ static std::string GenerateGradNodeCreationContent(
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
if (op_passing_outs_map[op_type].count(output_name)) {
const std::string output_var_args_name = output_name + "Var";
const char* FWD_OUT_SYNC_BACK_TEMPLATE =
" egr::EagerUtils::OverwriteOutputs(%s, %s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name);
}
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
if (op_passing_outs_map[op_type].count(output_name)) {
const std::string output_var_args_name = output_name + "Var";
const char* FWD_OUT_SYNC_BACK_TEMPLATE =
" egr::EagerUtils::OverwriteOutputs(%s, %s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name);
}
}
}
VLOG(6) << "Generated outputs autograd_meta";
......@@ -1180,11 +1180,13 @@ static std::string GenerateGradNodeCreationContent(
SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position);
}
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name);
if (!output.intermediate()) {
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name);
}
}
VLOG(6) << "Generated SetGradIn/OutMeta";
......@@ -1324,19 +1326,21 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
generated_function_body += "\n";
// Handle Dispensable Inputs
std::set<std::string> input_names;
for (const proto::OpProto::Var& input : in_vars) {
const std::string& input_name = input.name();
input_names.insert(input_name);
if (input.dispensable()) {
if (input.duplicable()) {
const char* FWD_INS_CONTENT_TEMPLATE =
" if(%s.size() > 0) "
"ins[\"%s\"] = egr::EagerUtils::TrySyncToVars(%s)\n;";
"ins[\"%s\"] = egr::EagerUtils::TrySyncToVars(%s);\n";
generated_function_body += paddle::string::Sprintf(
FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name);
} else {
const char* FWD_INS_CONTENT_TEMPLATE =
" if(%s.initialized()) "
"ins[\"%s\"] = egr::EagerUtils::TrySyncToVars(%s)\n;";
"ins[\"%s\"] = egr::EagerUtils::TrySyncToVars(%s);\n";
generated_function_body += paddle::string::Sprintf(
FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name);
}
......@@ -1372,11 +1376,21 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
core_ops_args_type_info[op_type].push_back("tensor");
}
const char* FWD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(%s) },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
if (BeSameAsInput(output_name, input_names)) {
if (!output.dispensable()) {
std::string input_name =
output_name.substr(0, output_name.size() - 3);
const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", ins[\"%s\"] },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, input_name);
}
} else {
const char* FWD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(%s) },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
}
core_ops_args_info[op_type].push_back(output_var_name);
} else {
......@@ -1415,6 +1429,23 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
generated_function_body += outs_map_str;
generated_function_body += "\n";
for (const proto::OpProto::Var& output : out_vars) {
const std::string& output_name = output.name();
if (op_passing_outs_map[op_type].count(output_name)) {
if (BeSameAsInput(output_name, input_names)) {
if (output.dispensable()) {
std::string input_name =
output_name.substr(0, output_name.size() - 3);
const char* FWD_OUTS_CONTENT_TEMPLATE =
" if (ins.count(\"%s\")) outs[\"%s\"] = ins[\"%s\"];\n";
generated_function_body += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, input_name, output_name, input_name);
}
}
}
}
generated_function_body += "\n";
VLOG(6) << "Generated Outs Map";
// [Generation] Get Attrs
......@@ -1448,33 +1479,61 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
std::string output_varname = LegalizeVariableName(output_name);
if (output.duplicable()) {
const char* FWD_OUT_TENSORS_TEMPLATE =
" std::vector<paddle::experimental::Tensor> %s = "
"egr::EagerUtils::GetOutputs(outs[\"%s\"]);\n";
out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSORS_TEMPLATE,
output_varname, output_name);
if (op_passing_outs_map[op_type].count(output_name)) {
if (output.dispensable()) {
const char* FWD_OUT_TENSORS_TEMPLATE =
" std::vector<paddle::experimental::Tensor> %s;\n"
" if (outs.count(\"%s\")) "
"egr::EagerUtils::GetOutputs(outs[\"%s\"], %s);\n"
" egr::EagerUtils::Output2Result(%s, &%s);\n";
out_tensor_str = paddle::string::Sprintf(
FWD_OUT_TENSORS_TEMPLATE, output_varname, output_name,
output_name, output_var_args_name, output_var_args_name,
output_varname);
} else {
const char* FWD_OUT_TENSORS_TEMPLATE =
" std::vector<paddle::experimental::Tensor> %s;\n"
" egr::EagerUtils::GetOutputs(outs[\"%s\"], %s);\n"
" egr::EagerUtils::Output2Result(%s, &%s);\n";
out_tensor_str = paddle::string::Sprintf(
FWD_OUT_TENSORS_TEMPLATE, output_varname, output_name,
output_var_args_name, output_var_args_name, output_varname);
}
} else {
const char* FWD_OUT_TENSORS_TEMPLATE =
" std::vector<paddle::experimental::Tensor> %s;\n"
" egr::EagerUtils::GetOutputs(outs[\"%s\"], &%s);\n";
out_tensor_str =
paddle::string::Sprintf(FWD_OUT_TENSORS_TEMPLATE, output_varname,
output_name, output_varname);
}
return_types[return_position] =
"std::vector<paddle::experimental::Tensor>";
if (op_passing_outs_map[op_type].count(output_name) &&
bwd_info.GenerateForwardOnly()) {
const char* FWD_OUT_SYNC_BACK_TEMPLATE =
" egr::EagerUtils::OverwriteOutputs(outs[\"%s\"], %s);\n";
out_tensor_str += paddle::string::Sprintf(
FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name);
}
} else {
const char* FWD_OUT_TENSOR_TEMPLATE =
" paddle::experimental::Tensor %s = "
"egr::EagerUtils::GetOutput(outs[\"%s\"][0]);\n";
out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE,
output_varname, output_name);
if (op_passing_outs_map[op_type].count(output_name) &&
bwd_info.GenerateForwardOnly()) {
const char* FWD_OUT_SYNC_BACK_TEMPLATE =
" egr::EagerUtils::OverwriteOutputs(outs[\"%s\"][0], %s);\n";
out_tensor_str += paddle::string::Sprintf(
FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name);
if (op_passing_outs_map[op_type].count(output_name)) {
if (output.dispensable()) {
const char* FWD_OUT_TENSOR_TEMPLATE =
" if (outs.count(\"%s\")) "
"egr::EagerUtils::GetOutput(outs[\"%s\"][0], %s);\n"
" paddle::experimental::Tensor& %s = *%s;\n";
out_tensor_str = paddle::string::Sprintf(
FWD_OUT_TENSOR_TEMPLATE, output_name, output_name,
output_var_args_name, output_varname, output_var_args_name);
} else {
const char* FWD_OUT_TENSOR_TEMPLATE =
" egr::EagerUtils::GetOutput(outs[\"%s\"][0], %s);\n"
" paddle::experimental::Tensor& %s = *%s;\n";
out_tensor_str = paddle::string::Sprintf(
FWD_OUT_TENSOR_TEMPLATE, output_name, output_var_args_name,
output_varname, output_var_args_name);
}
} else {
const char* FWD_OUT_TENSOR_TEMPLATE =
" paddle::experimental::Tensor %s;\n"
" egr::EagerUtils::GetOutput(outs[\"%s\"][0], &%s);\n";
out_tensor_str =
paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, output_varname,
output_name, output_varname);
}
return_types[return_position] = "paddle::experimental::Tensor";
}
......@@ -1494,6 +1553,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
GenerateGradNodeCreationContent(fwd_info, bwd_info);
generated_function_body += grad_node_creation_body_str;
generated_function_body += "\n";
// [Generation] Call RetainGradForTensor
VLOG(6) << "Generated GradNode Creation codes";
}
......@@ -1588,12 +1648,25 @@ static std::string GenerateSingleOpBase(
const std::string& attrs_name = "attrs_map" + std::to_string(*outs_size);
// [Generation] Get Ins Map
std::unordered_set<std::string> dispensable_input_name_set;
for (const auto& in : in_vars) {
if (in.dispensable()) dispensable_input_name_set.insert(in.name());
}
std::unordered_set<std::string> duplicable_input_name_set;
for (const auto& in : in_vars) {
if (in.duplicable()) duplicable_input_name_set.insert(in.name());
}
std::string ins_contents_str = "";
for (auto iter : grad_ins) {
const std::string& grad_input_name = iter.first;
if (grad_ins_fwd_slotname_map.count(grad_input_name)) {
// Fwd Tensor
const std::string& fwd_name =
grad_ins_fwd_slotname_map.at(grad_input_name);
if (dispensable_input_name_set.count(fwd_name)) {
continue;
}
std::string struct_fwd_input_name =
grad_ins_fwd_slotname_map.at(grad_input_name) + "_";
const char* GRAD_INS_FWD_CONTENT_TEMPLATE =
......@@ -1634,14 +1707,41 @@ static std::string GenerateSingleOpBase(
paddle::string::Sprintf(BWD_INS_MAP_TEMPLATE, ins_name, ins_contents_str);
generated_grad_function_body += ins_map_str;
VLOG(6) << "Generated Ins Map";
for (auto iter : grad_ins) {
const std::string& grad_input_name = iter.first;
// [Generation] Get Outs Map
std::unordered_set<std::string> duplicable_input_name_set;
for (const auto& in : in_vars) {
if (in.duplicable()) duplicable_input_name_set.insert(in.name());
if (grad_ins_fwd_slotname_map.count(grad_input_name)) {
// Fwd Tensor
const std::string& fwd_name =
grad_ins_fwd_slotname_map.at(grad_input_name);
if (dispensable_input_name_set.count(fwd_name)) {
std::string struct_fwd_input_name =
grad_ins_fwd_slotname_map.at(grad_input_name) + "_";
if (duplicable_input_name_set.count(fwd_name)) {
const char* DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE =
" if(this->%s.size() > 0) %s[\"%s\"] = "
"egr::EagerUtils::TrySyncToVars(egr::EagerUtils::"
"RecoverTensorWrapper(&this->%s, nullptr));\n";
generated_grad_function_body += paddle::string::Sprintf(
DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE, struct_fwd_input_name,
ins_name, grad_input_name, struct_fwd_input_name);
} else {
const char* DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE =
" auto %s = egr::EagerUtils::RecoverTensorWrapper(&this->%s, "
"nullptr);\n if(%s.initialized()) %s[\"%s\"] = "
"egr::EagerUtils::TrySyncToVars(%s);\n";
generated_grad_function_body += paddle::string::Sprintf(
DISPENSABLE_GRAD_INS_FWD_CONTENT_TEMPLATE, grad_input_name,
struct_fwd_input_name, grad_input_name, ins_name, grad_input_name,
grad_input_name);
}
}
}
}
VLOG(6) << "Generated Ins Map";
// [Generation] Get Outs Map
std::string outs_contents_str = "";
for (auto iter : grad_outs) {
const std::string& grad_output_name = iter.first;
......@@ -1987,6 +2087,7 @@ static std::string GenerateGradNodeHeaderContents(
"%s\n"
" // SetAttrMap\n"
"%s\n"
" std::string name() { return \"GradNode%s\"; }\n"
"\n"
" private:\n"
" // TensorWrappers\n"
......@@ -2085,8 +2186,8 @@ static std::string GenerateGradNodeHeaderContents(
std::string grad_node_str = paddle::string::Sprintf(
GRAD_NODE_TEMPLATE, op_type, op_type, op_type, op_type,
set_tensor_wrappers_str, set_attr_map_str, tensor_wrapper_members_str,
attr_members_str);
set_tensor_wrappers_str, set_attr_map_str, op_type,
tensor_wrapper_members_str, attr_members_str);
return grad_node_str;
}
......
......@@ -127,6 +127,12 @@ class AutogradMeta : public AbstractAutogradMeta {
stop_gradient_ = static_cast<int>(stop_gradient);
}
void WeakSetStopGradient(bool stop_gradient) {
if (stop_gradient_ == -1) {
stop_gradient_ = static_cast<int>(stop_gradient);
}
}
bool Persistable() const { return persistable_; }
void SetPersistable(bool persistable) { persistable_ = persistable; }
......
......@@ -69,13 +69,16 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
if (meta && !meta->StopGradient()) {
VLOG(6) << "Add Edges for slot: " << slot_id;
auto node = meta->GetMutableGradNode();
if (node) {
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
......
......@@ -147,6 +147,8 @@ class GradNodeBase {
std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors);
virtual std::string name() { return "GradNodeBase"; }
private:
// TODO(jiabin): Use SmallVector instead after merge PR from develop
......
......@@ -159,7 +159,7 @@ TEST(EagerUtils, PassStopGradient) {
CHECK(auto_grad0->StopGradient() == false);
egr::EagerUtils::PassStopGradient(true, auto_grad0.get(), auto_grad1.get(),
auto_grad2.get(), auto_grad3.get());
CHECK(auto_grad0->StopGradient() == true);
CHECK(auto_grad0->StopGradient() == false);
CHECK(auto_grad1->StopGradient() == true);
CHECK(auto_grad2->StopGradient() == true);
CHECK(auto_grad3->StopGradient() == true);
......
......@@ -220,53 +220,62 @@ paddle::experimental::Tensor EagerUtils::GetOutput(
return paddle::experimental::Tensor(out->GetTensorBase(), out->name());
}
void EagerUtils::OverwriteOutputs(const std::shared_ptr<EagerVariable>& out,
paddle::experimental::Tensor* tensor) {
void EagerUtils::GetOutput(const std::shared_ptr<EagerVariable>& out,
paddle::experimental::Tensor* out_var) {
PADDLE_ENFORCE_NOT_NULL(
tensor, paddle::platform::errors::Fatal(
"Tensor is null and cannot be copied. "
"We are tring to OverwriteOutput from its "
"shared_ptr, this error may indicate some outputs "
"are nullptr"));
tensor->set_impl(out->GetTensorBase());
out_var, paddle::platform::errors::Fatal(
"Tensor is null and cannot be copied. "
"We are tring to OverwriteOutput from its "
"shared_ptr, this error may indicate some outputs "
"are nullptr"));
out_var->set_impl(out->GetTensorBase());
}
void EagerUtils::OverwriteOutputs(
void EagerUtils::GetOutputs(
const std::vector<std::shared_ptr<EagerVariable>>& outs,
const std::vector<paddle::experimental::Tensor*>& tensors) {
PADDLE_ENFORCE_EQ(
outs.size(), tensors.size(),
paddle::platform::errors::Fatal(
"We are tring to OverwriteOutputs which passed in and it expected "
"elements num of outs and origin outputs are equal, but we got outs "
"size of: %d, and tensors passed in size is: %d",
outs.size(), tensors.size()));
std::vector<paddle::experimental::Tensor>* result) {
for (size_t i = 0; i < outs.size(); i++) {
OverwriteOutputs(outs[i], tensors[i]);
result->emplace_back(outs[i]->GetTensorBase());
}
}
void EagerUtils::OverwriteOutputs(const paddle::experimental::Tensor& out,
paddle::experimental::Tensor* tensor) {
PADDLE_ENFORCE_NOT_NULL(
tensor, paddle::platform::errors::Fatal(
"Tensor is null and cannot be copied. "
"We are tring to OverwriteOutput from its "
"shared_ptr, this error may indicate some outputs "
"are nullptr"));
*tensor = out;
}
void EagerUtils::OverwriteOutputs(
const std::vector<paddle::experimental::Tensor>& outs,
const std::vector<paddle::experimental::Tensor*>& tensors) {
void EagerUtils::GetOutputs(
const std::vector<std::shared_ptr<EagerVariable>>& outs,
const std::vector<paddle::experimental::Tensor*>& out_var) {
for (size_t i = 0; i < outs.size(); i++) {
PADDLE_ENFORCE_NOT_NULL(
tensors[i], paddle::platform::errors::Fatal(
out_var[i], paddle::platform::errors::Fatal(
"Tensor is null and cannot be copied. "
"We are tring to OverwriteOutput from its "
"shared_ptr, this error may indicate some outputs "
"are nullptr"));
*tensors[i] = outs[i];
out_var[i]->set_impl(outs[i]->GetTensorBase());
}
}
void EagerUtils::GetOutputs(const std::shared_ptr<EagerVariable>& out,
std::vector<paddle::experimental::Tensor>* result) {
result->emplace_back(out->GetTensorBase());
}
void EagerUtils::GetOutputs(
const std::shared_ptr<EagerVariable>& out,
const std::vector<paddle::experimental::Tensor*>& out_var) {
PADDLE_ENFORCE_NOT_NULL(
out_var[0], paddle::platform::errors::Fatal(
"Tensor is null and cannot be copied. "
"We are tring to OverwriteOutput from its "
"shared_ptr, this error may indicate some outputs "
"are nullptr"));
out_var[0]->set_impl(out->GetTensorBase());
}
void EagerUtils::Output2Result(
const std::vector<paddle::experimental::Tensor*>& out_var,
std::vector<paddle::experimental::Tensor>* result) {
result->reserve(out_var.size());
for (size_t i = 0; i < out_var.size(); i++) {
result->emplace_back(*out_var[i]);
}
}
......
......@@ -77,7 +77,7 @@ class PassStopGradientIter : public IterHelper<AutogradMeta*> {
VLOG(2) << "Tensor is NULL";
return;
}
element->SetStopGradient(stop_gradient_);
element->WeakSetStopGradient(stop_gradient_);
}
bool stop_gradient_ = true;
......@@ -173,17 +173,24 @@ class EagerUtils {
const std::vector<std::shared_ptr<EagerVariable>>& outs);
static paddle::experimental::Tensor GetOutput(
const std::shared_ptr<EagerVariable>& out);
// Sync Back to origin output Tensor
static void OverwriteOutputs(const std::shared_ptr<EagerVariable>& out,
paddle::experimental::Tensor* tensor);
static void OverwriteOutputs(const paddle::experimental::Tensor& out,
paddle::experimental::Tensor* tensor);
static void OverwriteOutputs(
static void GetOutput(const std::shared_ptr<EagerVariable>& out,
paddle::experimental::Tensor* out_var);
static void GetOutputs(
const std::vector<std::shared_ptr<EagerVariable>>& outs,
const std::vector<paddle::experimental::Tensor*>& tensors);
static void OverwriteOutputs(
const std::vector<paddle::experimental::Tensor>& outs,
const std::vector<paddle::experimental::Tensor*>& tensors);
std::vector<paddle::experimental::Tensor>* result);
static void GetOutputs(
const std::vector<std::shared_ptr<EagerVariable>>& outs,
const std::vector<paddle::experimental::Tensor*>& out_var);
static void GetOutputs(const std::shared_ptr<EagerVariable>& out,
std::vector<paddle::experimental::Tensor>* result);
static void GetOutputs(
const std::shared_ptr<EagerVariable>& out,
const std::vector<paddle::experimental::Tensor*>& out_var);
static void Output2Result(
const std::vector<paddle::experimental::Tensor*>& out_var,
std::vector<paddle::experimental::Tensor>* result);
// end Intermidate needed
static void CheckAndRetainGrad(const paddle::experimental::Tensor& tensor);
......
......@@ -50,7 +50,6 @@ PyObject* TensorNew(PyTypeObject* type, PyObject* args, PyObject* kwargs) {
if (obj) {
auto v = reinterpret_cast<TensorObject*>(obj);
new (&(v->tensor)) paddle::experimental::Tensor();
Py_INCREF(obj);
}
return obj;
}
......@@ -58,7 +57,7 @@ PyObject* TensorNew(PyTypeObject* type, PyObject* args, PyObject* kwargs) {
// TODO(jiabin): Overload this once we need more constructor in Python
void EmptyTensorInitializer(TensorObject* self, const std::string& name,
const paddle::platform::Place& place,
bool persistable = false, bool stop_gradient = true,
bool persistable = false, int stop_gradient = -1,
framework::proto::VarType::Type dtype =
paddle::framework::proto::VarType::FP32,
const std::vector<int>& dims = {},
......@@ -74,7 +73,9 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
self->tensor.set_name(name);
auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor));
autograd_meta->SetPersistable(persistable);
autograd_meta->SetStopGradient(stop_gradient);
if (stop_gradient != -1) {
autograd_meta->SetStopGradient(static_cast<bool>(stop_gradient));
}
if (var_type == paddle::framework::proto::VarType::LOD_TENSOR) {
// TODO(jiabin): Maybe support LOD later
std::shared_ptr<phi::DenseTensor> dense_tensor =
......@@ -143,13 +144,12 @@ void InitTensorWithTensor(TensorObject* self,
src.copy_to(phi::TransToPtenBackend(place), true).impl());
VLOG(4) << "Different place, do TensorCopy";
}
egr::EagerUtils::autograd_meta(&(self->tensor))->SetStopGradient(true);
if (src.get_autograd_meta()) {
egr::EagerUtils::unsafe_autograd_meta(self->tensor)
egr::EagerUtils::autograd_meta(&(self->tensor))
->SetPersistable(
egr::EagerUtils::unsafe_autograd_meta(src)->Persistable());
} else {
egr::EagerUtils::unsafe_autograd_meta(self->tensor)->SetPersistable(false);
egr::EagerUtils::autograd_meta(&(self->tensor))->SetPersistable(false);
}
}
......@@ -168,8 +168,7 @@ void InitTensorWithFrameworkTensor(TensorObject* self,
temp.copy_to(phi::TransToPtenBackend(place), true).impl());
VLOG(4) << "Different place, do TensorCopy";
}
egr::EagerUtils::autograd_meta(&(self->tensor))->SetStopGradient(true);
egr::EagerUtils::unsafe_autograd_meta(self->tensor)->SetPersistable(false);
egr::EagerUtils::autograd_meta(&(self->tensor))->SetPersistable(false);
}
py::object ParsePyArray(
......@@ -218,21 +217,18 @@ paddle::platform::Place ParsePlace(
}
// boolean arguments: zero_copy, stop_gradient, persistable
bool ParseBooleanArgs(std::string key,
std::unordered_map<std::string, PyObject*> kws_map,
std::unordered_map<std::string, Py_ssize_t> kw_order_map,
PyObject* args, bool flag_kwargs, Py_ssize_t args_num) {
bool res = false;
if (key == "stop_gradient") res = true;
int ParseBooleanArgs(std::string key,
std::unordered_map<std::string, PyObject*> kws_map,
std::unordered_map<std::string, Py_ssize_t> kw_order_map,
PyObject* args, bool flag_kwargs, Py_ssize_t args_num) {
int res = -1;
if (kw_order_map[key] <= args_num) {
res = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, kw_order_map[key] - 1),
kw_order_map[key] - 1);
res = static_cast<int>(CastPyArg2AttrBoolean(
PyTuple_GET_ITEM(args, kw_order_map[key] - 1), kw_order_map[key] - 1));
} else {
if (flag_kwargs && kws_map[key] != NULL) {
res = CastPyArg2AttrBoolean(kws_map[key], 0);
} else {
return res;
res = static_cast<int>(CastPyArg2AttrBoolean(kws_map[key], 0));
}
}
return res;
......@@ -288,15 +284,15 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,
bool persistable = false;
bool zero_copy = false;
std::string act_name = "";
bool stop_gradient = true;
int stop_gradient = -1;
numpy_value =
ParsePyArray(kws_map, kw_order_map, args, flag_kwargs, args_num);
place = ParsePlace(kws_map, kw_order_map, args, flag_kwargs, args_num);
persistable = ParseBooleanArgs("persistable", kws_map, kw_order_map, args,
flag_kwargs, args_num);
zero_copy = ParseBooleanArgs("zero_copy", kws_map, kw_order_map, args,
flag_kwargs, args_num);
persistable = (1 == ParseBooleanArgs("persistable", kws_map, kw_order_map,
args, flag_kwargs, args_num));
zero_copy = (1 == ParseBooleanArgs("zero_copy", kws_map, kw_order_map, args,
flag_kwargs, args_num));
act_name = ParseName(kws_map, kw_order_map, args, flag_kwargs, args_num);
stop_gradient = ParseBooleanArgs("stop_gradient", kws_map, kw_order_map, args,
flag_kwargs, args_num);
......@@ -571,7 +567,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) {
EmptyTensorInitializer(py_tensor_ptr, act_name,
egr::Controller::Instance().GetExpectedPlace(),
persistable,
/* stop_gradient */ true, dtype, dims, var_type);
/* stop_gradient */ -1, dtype, dims, var_type);
return 0;
} else {
......@@ -655,7 +651,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) {
bool persistable = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4);
EmptyTensorInitializer(py_tensor_ptr, act_name,
egr::Controller::Instance().GetExpectedPlace(),
persistable, true, dtype, dims, var_type);
persistable, -1, dtype, dims, var_type);
return 0;
} else if (pybind11::detail::npy_api::get().PyArray_Check_(arg0_ptr)) {
VLOG(6) << "Calling case3's initializer.";
......@@ -726,9 +722,8 @@ PyMappingMethods mapping_methods;
void BindEager(pybind11::module* module) {
auto m = module->def_submodule("eager");
auto& internals = pybind11::detail::get_internals();
auto heap_type = reinterpret_cast<PyHeapTypeObject*>(
internals.default_metaclass->tp_alloc(internals.default_metaclass, 0));
PyType_Type.tp_alloc(&PyType_Type, 0));
heap_type->ht_name = ToPyObject("Tensor");
heap_type->ht_qualname = ToPyObject("Tensor");
auto type = &heap_type->ht_type;
......@@ -742,8 +737,8 @@ void BindEager(pybind11::module* module) {
type->tp_getset = variable_properties;
type->tp_init = TensorInit;
type->tp_new = TensorNew;
Py_INCREF(internals.instance_base);
type->tp_base = reinterpret_cast<PyTypeObject*>(internals.instance_base);
Py_INCREF(&PyBaseObject_Type);
type->tp_base = reinterpret_cast<PyTypeObject*>(&PyBaseObject_Type);
type->tp_flags |=
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
#if PY_VERSION_HEX >= 0x03050000
......
......@@ -186,11 +186,17 @@ static PyObject* tensor_retain_grads(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor__clear_gradient(TensorObject* self, PyObject* args,
PyObject* kwargs) {
static PyObject* tensor_clear_gradient(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
VLOG(4) << "ClearGradient " << self->tensor.name();
Py_ssize_t args_num = PyTuple_Size(args);
bool set_to_zero = true;
if (args_num == (Py_ssize_t)1) {
CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 0), 0);
}
paddle::experimental::Tensor* grad;
if (egr::egr_utils_api::IsLeafTensor(self->tensor)) {
// Add RetainGrad as PostHook to AccumulationNode
......@@ -209,13 +215,27 @@ static PyObject* tensor__clear_gradient(TensorObject* self, PyObject* args,
grad = meta->MutableGrad();
}
if (grad->initialized()) {
VLOG(4) << "Gradient of " << self->tensor.name()
<< " is initialized, will be released.";
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl());
dense_tensor->MoveMemoryHolder();
if (grad->is_selected_rows()) {
auto selected_rows =
std::dynamic_pointer_cast<phi::SelectedRows>(grad->impl());
if (selected_rows->mutable_value()->IsInitialized()) {
selected_rows->mutable_rows()->clear();
selected_rows->mutable_value()->clear();
}
} else if (grad->is_dense_tensor()) {
if (grad->initialized()) {
if (set_to_zero) {
grad->set_impl(paddle::experimental::zeros_like(*grad).impl());
} else {
VLOG(4) << "Gradient of " << self->tensor.name()
<< " is initialized, will be released.";
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl());
dense_tensor->MoveMemoryHolder();
}
}
}
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
......@@ -407,7 +427,7 @@ PyMethodDef variable_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"retain_grads", (PyCFunction)(void (*)(void))tensor_retain_grads,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_clear_gradient", (PyCFunction)(void (*)(void))tensor__clear_gradient,
{"clear_gradient", (PyCFunction)(void (*)(void))tensor_clear_gradient,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_zero_grads", (PyCFunction)(void (*)(void))tensor__zero_grads,
METH_VARARGS | METH_KEYWORDS, NULL},
......
......@@ -240,7 +240,8 @@ class Conv2D(layers.Layer):
is_bias=True)
def forward(self, input):
if in_dygraph_mode() and self._l_type == 'conv2d':
if in_dygraph_mode() and (self._l_type == 'conv2d' or
self._l_type == 'depthwise_conv2d'):
attrs = ('strides', self._stride, 'paddings', self._padding,
'dilations', self._dilation, 'groups', self._groups
if self._groups else 1, 'use_cudnn', self._use_cudnn,
......
......@@ -779,13 +779,6 @@ def monkey_patch_varbase():
raise TypeError(
"_set_grad_ivar is only supported for Parameter Tensor")
@framework.dygraph_only
def clear_gradient(self, set_to_zero=True):
if set_to_zero:
self._zero_grads()
else:
self._clear_gradient()
@framework.dygraph_only
def clone(self):
return _C_ops_.assign(self)
......@@ -815,7 +808,6 @@ def monkey_patch_varbase():
if core._in_eager_mode():
setattr(core.eager.Tensor, "_grad_ivar", _grad_ivar)
setattr(core.eager.Tensor, "_set_grad_ivar", _set_grad_ivar)
setattr(core.eager.Tensor, "clear_gradient", clear_gradient)
setattr(core.eager.Tensor, "clone", clone)
setattr(core.eager.Tensor, "value", value)
else:
......
......@@ -138,11 +138,11 @@ class ConstantInitializer(Initializer):
assert isinstance(block, framework.Block)
if framework.in_dygraph_mode():
var = _C_ops.fill_constant(
var, 'value',
float(self._value), 'force_cpu', self._force_cpu, 'dtype',
int(var.dtype), 'str_value',
str(float(self._value)), 'shape', var.shape)
_C_ops.fill_constant(var, 'value',
float(self._value), 'force_cpu',
self._force_cpu, 'dtype',
int(var.dtype), 'str_value',
str(float(self._value)), 'shape', var.shape)
return None
else:
# fill constant should set the "str_value" to preserve precision
......
......@@ -26,7 +26,7 @@ import six
import paddle
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant, NumpyArrayInitializer
from ..framework import Variable, OpProtoHolder, in_dygraph_mode, dygraph_only, _dygraph_tracer, default_main_program, _varbase_creator, static_only, _global_flags
from ..framework import Variable, OpProtoHolder, in_dygraph_mode, dygraph_only, _dygraph_tracer, default_main_program, _varbase_creator, static_only, _global_flags, _in_eager_mode
from .. import dygraph_utils
from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
......@@ -6254,6 +6254,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
# the shape of reshaped_3 is [6,8].
"""
if in_dygraph_mode():
if _in_eager_mode():
tmp_tensor_type = core.eager.Tensor
else:
tmp_tensor_type = Variable
#TODO(zhiqiu): enable inplace in dygraph mode.
if inplace:
warnings.warn(
......@@ -6265,7 +6269,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
for item in shape
]
out, _ = _C_ops.reshape2(x, None, 'shape', shape)
elif isinstance(shape, Variable):
elif isinstance(shape, tmp_tensor_type):
shape.stop_gradient = True
out, _ = _C_ops.reshape2(x, shape)
else:
......@@ -11132,24 +11136,30 @@ def slice(input, axes, starts, ends):
infer_flags = list(1 for i in range(len(axes)))
if _in_eager_mode():
tmp_tensor_type = core.eager.Tensor
else:
tmp_tensor_type = Variable
if isinstance(starts, (list, tuple)):
starts = [
item.numpy().item(0) if isinstance(item, Variable) else item
item.numpy().item(0)
if isinstance(item, tmp_tensor_type) else item
for item in starts
]
attrs += ('starts', starts)
elif isinstance(starts, Variable):
elif isinstance(starts, tmp_tensor_type):
starts_tensor = starts
starts.stop_gradient = True
infer_flags = list(-1 for i in range(len(axes)))
if isinstance(ends, (list, tuple)):
ends = [
item.numpy().item(0) if isinstance(item, Variable) else item
for item in ends
item.numpy().item(0)
if isinstance(item, tmp_tensor_type) else item for item in ends
]
attrs += ('ends', ends)
elif isinstance(ends, Variable):
elif isinstance(ends, tmp_tensor_type):
ends_tensor = ends
ends_tensor.stop_gradient = True
infer_flags = list(-1 for i in range(len(axes)))
......
......@@ -953,7 +953,7 @@ class TestMetaclass(unittest.TestCase):
self.assertNotEqual(type(MyLayer).__name__, 'pybind11_type')
if core._in_eager_mode():
self.assertEqual(
type(paddle.fluid.core.eager.Tensor).__name__, 'pybind11_type')
type(paddle.fluid.core.eager.Tensor).__name__, 'type')
else:
self.assertEqual(
type(paddle.fluid.core.VarBase).__name__, 'pybind11_type')
......
......@@ -18,6 +18,7 @@ import unittest
import paddle.fluid as fluid
import numpy as np
from test_imperative_base import new_program_scope
from paddle.fluid.framework import _test_eager_guard
class MLP(fluid.Layer):
......@@ -46,7 +47,7 @@ class MLP(fluid.Layer):
class TestDygraphFramework(unittest.TestCase):
def test_dygraph_backward(self):
def func_test_dygraph_backward(self):
with new_program_scope():
mlp = MLP(input_size=2)
var_inp = fluid.layers.data(
......@@ -59,8 +60,18 @@ class TestDygraphFramework(unittest.TestCase):
except AssertionError as e:
self.assertTrue((e is not None))
def test_dygraph_to_string(self):
def test_dygraph_backward(self):
with _test_eager_guard():
self.func_test_dygraph_backward()
self.func_test_dygraph_backward()
def func_test_dygraph_to_string(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.dygraph.guard():
var_inp = fluid.dygraph.to_variable(np_inp)
print(str(var_inp))
def test_dygraph_to_string(self):
with _test_eager_guard():
self.func_test_dygraph_to_string()
self.func_test_dygraph_to_string()
......@@ -25,6 +25,7 @@ from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid import Conv2D, Pool2D, Linear
from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.framework import _test_eager_guard
class Discriminator(fluid.Layer):
......@@ -54,7 +55,7 @@ class Generator(fluid.Layer):
class TestDygraphGAN(unittest.TestCase):
def test_gan_float32(self):
def func_test_gan_float32(self):
seed = 90
paddle.seed(1)
paddle.framework.random._manual_program_seed(1)
......@@ -227,6 +228,11 @@ class TestDygraphGAN(unittest.TestCase):
for k, v in six.iteritems(dy_params2):
self.assertTrue(np.allclose(v, static_params[k]))
def test_gan_float32(self):
with _test_eager_guard():
self.func_test_gan_float32()
self.func_test_gan_float32()
if __name__ == '__main__':
unittest.main()
......@@ -27,6 +27,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
class SimpleImgConvPool(fluid.dygraph.Layer):
......@@ -114,7 +115,7 @@ class TestImperativeMnist(unittest.TestCase):
return _reader_imple
def test_mnist_float32(self):
def func_test_mnist_float32(self):
seed = 90
epoch_num = 1
batch_size = 128
......@@ -152,7 +153,7 @@ class TestImperativeMnist(unittest.TestCase):
label = data[1]
label.stop_gradient = True
if batch_id % 10 == 0:
if batch_id % 10 == 0 and not _in_eager_mode():
cost, traced_layer = paddle.jit.TracedLayer.trace(
mnist, inputs=img)
if program is not None:
......@@ -257,6 +258,11 @@ class TestImperativeMnist(unittest.TestCase):
for key, value in six.iteritems(static_param_value):
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
def test_mnist_float32(self):
with _test_eager_guard():
self.func_test_mnist_float32()
self.func_test_mnist_float32()
if __name__ == '__main__':
unittest.main()
......@@ -26,10 +26,11 @@ from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
from test_imperative_mnist import MNIST
from paddle.fluid.framework import _test_eager_guard
class TestImperativeMnistSortGradient(unittest.TestCase):
def test_mnist_sort_gradient_float32(self):
def func_test_mnist_sort_gradient_float32(self):
seed = 90
epoch_num = 1
......@@ -144,6 +145,11 @@ class TestImperativeMnistSortGradient(unittest.TestCase):
for key, value in six.iteritems(static_param_value):
self.assertTrue(np.allclose(value, dy_param_value2[key], atol=1e-5))
def test_mnist_sort_gradient_float32(self):
with _test_eager_guard():
self.func_test_mnist_sort_gradient_float32()
self.func_test_mnist_sort_gradient_float32()
if __name__ == '__main__':
unittest.main()
......@@ -27,6 +27,7 @@ from test_imperative_base import new_program_scope
import numpy as np
import six
from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
class SimpleLSTMRNN(fluid.Layer):
......@@ -210,10 +211,15 @@ class PtbModel(fluid.Layer):
class TestDygraphPtbRnn(unittest.TestCase):
def test_ptb_rnn(self):
def func_test_ptb_rnn(self):
for is_sparse in [True, False]:
self.ptb_rnn_cpu_float32(is_sparse)
def test_ptb_rnn(self):
with _test_eager_guard():
self.func_test_ptb_rnn()
self.func_test_ptb_rnn()
def ptb_rnn_cpu_float32(self, is_sparse):
seed = 90
hidden_size = 10
......@@ -260,7 +266,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
y = to_variable(y_data)
init_hidden = to_variable(init_hidden_data)
init_cell = to_variable(init_cell_data)
if i % 5 == 0:
if i % 5 == 0 and (not _in_eager_mode()):
outs, traced_layer = TracedLayer.trace(
ptb_model, [x, y, init_hidden, init_cell])
outs_static = traced_layer([x, y, init_hidden, init_cell])
......
......@@ -26,6 +26,7 @@ from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
from paddle.fluid.dygraph import TracedLayer
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
#NOTE(zhiqiu): run with FLAGS_cudnn_deterministic=1
......@@ -242,7 +243,7 @@ class TestDygraphResnet(unittest.TestCase):
return _reader_imple
def test_resnet_float32(self):
def func_test_resnet_float32(self):
seed = 90
batch_size = train_parameters["batch_size"]
......@@ -259,14 +260,9 @@ class TestDygraphResnet(unittest.TestCase):
train_parameters, parameter_list=resnet.parameters())
np.random.seed(seed)
batch_py_reader = fluid.io.PyReader(capacity=1)
batch_py_reader.decorate_sample_list_generator(
paddle.batch(
self.reader_decorator(
paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size,
drop_last=True),
places=fluid.CPUPlace())
train_reader = paddle.batch(
paddle.dataset.flowers.train(use_xmap=False),
batch_size=batch_size)
dy_param_init_value = {}
for param in resnet.parameters():
......@@ -275,16 +271,21 @@ class TestDygraphResnet(unittest.TestCase):
helper = DyGraphProgramDescTracerTestHelper(self)
program = None
for batch_id, data in enumerate(batch_py_reader()):
for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num:
break
img = data[0]
label = data[1]
dy_x_data = np.array(
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True
out = None
if batch_id % 5 == 0:
if batch_id % 5 == 0 and not _in_eager_mode():
out, traced_layer = TracedLayer.trace(resnet, img)
if program is not None:
self.assertTrue(
......@@ -430,6 +431,11 @@ class TestDygraphResnet(unittest.TestCase):
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
def test_resnet_float32(self):
with _test_eager_guard():
self.func_test_resnet_float32()
self.func_test_resnet_float32()
if __name__ == '__main__':
unittest.main()
......@@ -22,6 +22,7 @@ from paddle.fluid import core
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
from test_imperative_resnet import ResNet
from paddle.fluid.framework import _test_eager_guard
batch_size = 8
train_parameters = {
......@@ -71,7 +72,7 @@ def optimizer_setting(params, parameter_list=None):
class TestDygraphResnetSortGradient(unittest.TestCase):
def test_resnet_sort_gradient_float32(self):
def func_test_resnet_sort_gradient_float32(self):
seed = 90
batch_size = train_parameters["batch_size"]
......@@ -230,6 +231,11 @@ class TestDygraphResnetSortGradient(unittest.TestCase):
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
def test_resnet_sort_gradient_float32(self):
with _test_eager_guard():
self.func_test_resnet_sort_gradient_float32()
self.func_test_resnet_sort_gradient_float32()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册