未验证 提交 45425411 编写于 作者: J Jiabin Yang 提交者: GitHub

Feature/auto prune in dygraph (#19757)

* refactor dygraph,test=develop

* fix failed unittest,test=develop

* polish code,test=develop

* check windows ci error,test=develop
try to fix windows ci error by np.allclose,test=develop

* polish vlog and profiler, test=develop

* try to fix preceding ops order,test=develop

* test transformer in windows ci, test=develop

* use python c-api to speed up tracer.trace,test=develop

* test=develop, fix docker with paddle nccl problem

* test=develop, add ut for debug string and gradient_accumulator

* test=develop, add tests for layer/gradient_accumulator/prepared_op

* test=develop, fix complie error for test_prepared_op

* test=develop, add more ut for dygraph

* test=develop, create API.spec for dygraph api change

* test=develop, refoctor name to make it easier to understand

* test=develop, refoctor name to make it easier to understand

* test=develop, fix multi-gpu failed problem , add Tracer tests, change PADDLEENFORCE to PADDLEENFORCE_EQ

* test=develop, fix ut failed on parallel se-resnext

* test=develop, change one more PADDLE_ENFORCE

* support auto prune in dygraph mode

* test=develop, support auto prune

* test=develop, merge develop conflict

* test=develop, fix test_layer and test_tracer ut

* test=develop, fix bug which may cause stop_gradient disabled with a list of backward inputs
上级 418a0967
...@@ -70,23 +70,48 @@ void BasicEngine::Init(VarBase* var, const detail::BackwardStrategy& strategy) { ...@@ -70,23 +70,48 @@ void BasicEngine::Init(VarBase* var, const detail::BackwardStrategy& strategy) {
auto& fwd_var = var->Var().Get<framework::LoDTensor>(); auto& fwd_var = var->Var().Get<framework::LoDTensor>();
auto* grad_var = auto* grad_var =
var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>(); var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>();
VLOG(6) << "init loss grad:" << var->GradVarBase()->Name()
<< " as stop_gradient false";
var->GradVarBase()->InnerSetOverridedStopGradient(false);
var->GradVarBase()->SetGradGenerated(true);
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place()); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place());
grad_var->Resize(fwd_var.dims()); grad_var->Resize(fwd_var.dims());
grad_var->mutable_data(fwd_var.place(), fwd_var.type()); grad_var->mutable_data(fwd_var.place(), fwd_var.type());
operators::math::set_constant(*dev_ctx, grad_var, 1.0); operators::math::set_constant(*dev_ctx, grad_var, 1.0);
} }
bool BasicEngine::CheckBackwardInputs(OpBase* op) { void BasicEngine::CheckBackwardInputs(OpBase* op) {
for (auto& pair : op->GetInsMap()) { for (auto& pair : op->GetInsMap()) {
for (auto& var : pair.second) { for (auto& var : pair.second) {
if (var && !var->StopGradient()) { if (var && IsGrad(var.get())) {
return true; // if grad var has OverridedStopGradient skip this Op
if (!var->GradGenerated()) {
VLOG(6) << "Set ungenerated Grad: " << var->Name() << " as zero";
auto* dev_ctx =
platform::DeviceContextPool::Instance().Get(op->place());
auto* tensor = var->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(op->place(), var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0);
} else {
continue;
}
} }
} }
} }
return false;
} }
void BasicEngine::SetBackwardOutputs(paddle::imperative::OpBase* op) {
for (auto& pair : op->GetOutsMap()) {
for (auto& var : pair.second) {
if (var) {
// Set Backward outputs's generate_grad as true
var->SetGradGenerated(true);
VLOG(6) << "Set backward output: " << var->Name()
<< "'s SetGeneratedGrad as True";
}
}
}
}
void BasicEngine::PrepareGradAccumulators(OpBase* op) { void BasicEngine::PrepareGradAccumulators(OpBase* op) {
for (const auto& pair : op->GetOutsMap()) { for (const auto& pair : op->GetOutsMap()) {
for (const auto& var : pair.second) { for (const auto& var : pair.second) {
...@@ -126,22 +151,19 @@ void BasicEngine::PrepareDeps() { ...@@ -126,22 +151,19 @@ void BasicEngine::PrepareDeps() {
q.pop(); q.pop();
VLOG(3) << "Checking grads of op " << cur_op->Type(); VLOG(3) << "Checking grads of op " << cur_op->Type();
if (!CheckBackwardInputs(cur_op)) { CheckBackwardInputs(cur_op);
// TODO(zjl): clear ops that do not need grad before running autograd
VLOG(3) << "Stop checking preceding ops of " << cur_op->Type() SetBackwardOutputs(cur_op);
<< " because all of its backward inputs is stop_gradient=True";
continue;
}
PrepareGradAccumulators(cur_op); PrepareGradAccumulators(cur_op);
auto& preceding_ops = cur_op->GradPendingOps(); auto& grad_pending_ops = cur_op->GradPendingOps();
for (auto* preceding_op : preceding_ops) { for (auto* grad_pending_op : grad_pending_ops) {
PADDLE_ENFORCE_NOT_NULL(preceding_op); PADDLE_ENFORCE_NOT_NULL(grad_pending_op);
++op_deps_[preceding_op]; ++op_deps_[grad_pending_op];
if (visited.count(preceding_op) == 0) { if (visited.count(grad_pending_op) == 0) {
visited.insert(preceding_op); visited.insert(grad_pending_op);
q.push(preceding_op); q.push(grad_pending_op);
} }
} }
} }
...@@ -204,19 +226,19 @@ void BasicEngine::Execute() { ...@@ -204,19 +226,19 @@ void BasicEngine::Execute() {
} }
// Step 3: Collect ready ops // Step 3: Collect ready ops
for (auto* preceding_op : cur_op->GradPendingOps()) { for (auto* grad_pending_op : cur_op->GradPendingOps()) {
PADDLE_ENFORCE_NOT_NULL(preceding_op); PADDLE_ENFORCE_NOT_NULL(grad_pending_op);
auto iter = op_deps_.find(preceding_op); auto iter = op_deps_.find(grad_pending_op);
if (iter == op_deps_.end()) { if (iter == op_deps_.end()) {
continue; continue;
} }
VLOG(3) << "Found preceding op of " << cur_op->Type(); VLOG(3) << "Found grad_pending op of " << cur_op->Type();
// An Op is ready to go while its deps comes to zero // An Op is ready to go while its deps comes to zero
if (--(iter->second) == 0) { if (--(iter->second) == 0) {
q.push(preceding_op); q.push(grad_pending_op);
VLOG(3) << "Push preceding op " << preceding_op->Type() VLOG(3) << "Push grad_pending op " << grad_pending_op->Type()
<< " into queue"; << " into queue";
} }
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <cstdint> #include <cstdint>
#include <memory> #include <memory>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/imperative/backward_strategy.h" #include "paddle/fluid/imperative/backward_strategy.h"
...@@ -49,11 +50,20 @@ class Engine { ...@@ -49,11 +50,20 @@ class Engine {
void InsertOp(OpBase* op, std::shared_ptr<OpBase> op_shared) { void InsertOp(OpBase* op, std::shared_ptr<OpBase> op_shared) {
grad_ops_[op] = std::move(op_shared); grad_ops_[op] = std::move(op_shared);
} }
void Clear() { grad_ops_.clear(); }
void InsertGradVar(VarBase* grad) { grad_vars_.emplace(grad); }
bool IsGrad(VarBase* var) { return grad_vars_.count(var) > 0; }
void Clear() {
grad_ops_.clear();
grad_vars_.clear();
}
private: private:
std::unordered_map<OpBase*, std::shared_ptr<OpBase>> std::unordered_map<OpBase*, std::shared_ptr<OpBase>>
grad_ops_; // opBase for remove - grad_op grad_ops_; // opBase for remove - grad_op
std::unordered_set<VarBase*> grad_vars_;
}; };
class BasicEngine : public Engine { class BasicEngine : public Engine {
...@@ -69,7 +79,9 @@ class BasicEngine : public Engine { ...@@ -69,7 +79,9 @@ class BasicEngine : public Engine {
private: private:
void PrepareDeps(); void PrepareDeps();
bool CheckBackwardInputs(OpBase* op); void CheckBackwardInputs(OpBase* op);
void SetBackwardOutputs(OpBase* op);
void PrepareGradAccumulators(OpBase* op); void PrepareGradAccumulators(OpBase* op);
......
...@@ -105,10 +105,23 @@ void TensorAdd(const framework::Variable& src, framework::Variable* dst) { ...@@ -105,10 +105,23 @@ void TensorAdd(const framework::Variable& src, framework::Variable* dst) {
void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var, void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var,
size_t trace_id) { size_t trace_id) {
auto* dst_var = var_->MutableVar(); auto* dst_var = var_->MutableVar();
if (cur_cnt_ == 0) { auto place = var->Var().Get<framework::LoDTensor>().place();
*dst_var = std::move(*(var->MutableVar())); if (!var_->OverridedStopGradient()) {
VLOG(3) << "Sum Gradient for: " << var_->Name();
if (cur_cnt_ == 0) {
*dst_var = std::move(*(var->MutableVar()));
} else {
TensorAdd(var->Var(), dst_var);
}
} else { } else {
TensorAdd(var->Var(), dst_var); if (!var_->Var().IsInitialized() ||
!var_->Var().Get<framework::LoDTensor>().IsInitialized()) {
VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero";
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0);
}
} }
++cur_cnt_; ++cur_cnt_;
} }
...@@ -116,30 +129,44 @@ void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var, ...@@ -116,30 +129,44 @@ void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var,
void SortedGradientAccumulator::Add(std::shared_ptr<VarBase> var, void SortedGradientAccumulator::Add(std::shared_ptr<VarBase> var,
size_t trace_id) { size_t trace_id) {
auto* dst_var = var_->MutableVar(); auto* dst_var = var_->MutableVar();
if (ref_cnt_ == 1) { auto place = var->Var().Get<framework::LoDTensor>().place();
*dst_var = std::move(*(var->MutableVar())); if (!var_->OverridedStopGradient()) {
} else { if (ref_cnt_ == 1) {
if (tmp_grad_vars_.empty()) { *dst_var = std::move(*(var->MutableVar()));
tmp_grad_vars_.reserve(ref_cnt_); } else {
} if (tmp_grad_vars_.empty()) {
tmp_grad_vars_.reserve(ref_cnt_);
tmp_grad_vars_.emplace_back(std::move(var), trace_id); }
if (tmp_grad_vars_.size() != ref_cnt_) { tmp_grad_vars_.emplace_back(std::move(var), trace_id);
return;
if (tmp_grad_vars_.size() != ref_cnt_) {
return;
}
std::sort(tmp_grad_vars_.begin(), tmp_grad_vars_.end(),
[](const std::pair<std::shared_ptr<VarBase>, size_t>& p1,
const std::pair<std::shared_ptr<VarBase>, size_t>& p2) {
return p1.second > p2.second;
});
*dst_var = std::move(*(tmp_grad_vars_[0].first->MutableVar()));
for (size_t i = 1; i < tmp_grad_vars_.size(); ++i) {
TensorAdd(tmp_grad_vars_[i].first->Var(), dst_var);
}
tmp_grad_vars_.clear();
} }
} else {
std::sort(tmp_grad_vars_.begin(), tmp_grad_vars_.end(), if (!var_->Var().IsInitialized() ||
[](const std::pair<std::shared_ptr<VarBase>, size_t>& p1, !var_->Var().Get<framework::LoDTensor>().IsInitialized()) {
const std::pair<std::shared_ptr<VarBase>, size_t>& p2) { VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero";
return p1.second > p2.second; auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
}); auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType());
*dst_var = std::move(*(tmp_grad_vars_[0].first->MutableVar())); operators::math::set_constant(*dev_ctx, tensor, 0.0);
for (size_t i = 1; i < tmp_grad_vars_.size(); ++i) {
TensorAdd(tmp_grad_vars_[i].first->Var(), dst_var);
} }
// looks like tmp_grad_vars will not have any member but just in case
tmp_grad_vars_.clear(); tmp_grad_vars_.clear();
} }
} }
......
...@@ -93,14 +93,44 @@ class VarBase { ...@@ -93,14 +93,44 @@ class VarBase {
return &(grad_var_->var_); return &(grad_var_->var_);
} }
void SetStopGradient(bool stop_gradient) { // This is used for python api
stop_gradient_ = stop_gradient; void SetOverridedStopGradient(bool stop_gradient) {
if (stop_gradient) {
overrided_stop_gradient_ = 1;
} else {
overrided_stop_gradient_ = 0;
}
if (grad_var_) { if (grad_var_) {
grad_var_->stop_gradient_ = stop_gradient; grad_var_->SetOverridedStopGradient(stop_gradient);
}
}
// This is used for python api
bool OverridedStopGradient() const {
if (overrided_stop_gradient_ == 0) {
return false;
} else {
return true;
} }
} }
bool StopGradient() const { return stop_gradient_; } // This is used inside C++
int InnerOverridedStopGradient() const { return overrided_stop_gradient_; }
bool GradGenerated() const { return grad_generated_; }
void SetGradGenerated(bool generated) { grad_generated_ = generated; }
// This is used inside C++
void InnerSetOverridedStopGradient(bool stop_gradient) {
if (overrided_stop_gradient_ == -1) {
overrided_stop_gradient_ = static_cast<int>(stop_gradient);
if (grad_var_) {
grad_var_->InnerSetOverridedStopGradient(stop_gradient);
}
} else {
VLOG(6) << "Ignore Stop gradient conversion for Var: " << Name()
<< "Set value is: " << overrided_stop_gradient_;
}
}
void SetPersistable(bool persistable) { persistable_ = persistable; } void SetPersistable(bool persistable) { persistable_ = persistable; }
...@@ -156,8 +186,11 @@ class VarBase { ...@@ -156,8 +186,11 @@ class VarBase {
// grad_op indicates which grad_op will this var be used as input // grad_op indicates which grad_op will this var be used as input
std::vector<std::weak_ptr<OpBase>> grad_ops_; std::vector<std::weak_ptr<OpBase>> grad_ops_;
// add this property for users may set stop_gradient themselves and this
bool stop_gradient_{false}; // should override the
// frameworks setting (-1) unset, (1) true, (0) false
int overrided_stop_gradient_{-1};
bool grad_generated_{false};
bool persistable_{false}; bool persistable_{false};
framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR}; framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR};
......
...@@ -139,10 +139,10 @@ TEST(test_layer, test_varbase_basic) { ...@@ -139,10 +139,10 @@ TEST(test_layer, test_varbase_basic) {
vin_with_grad->MutableGradVar()) != 0)); vin_with_grad->MutableGradVar()) != 0));
ASSERT_TRUE( ASSERT_TRUE(
dynamic_cast<framework::Variable*>(vin_with_grad->MutableGradVar()) != 0); dynamic_cast<framework::Variable*>(vin_with_grad->MutableGradVar()) != 0);
vin_with_grad->SetStopGradient(true); vin_with_grad->SetOverridedStopGradient(false);
ASSERT_TRUE(vin_with_grad->StopGradient()); ASSERT_FALSE(vin_with_grad->OverridedStopGradient());
ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetPersistable(true)); ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetPersistable(true));
ASSERT_TRUE(vin_with_grad->StopGradient()); ASSERT_FALSE(vin_with_grad->OverridedStopGradient());
ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetName("new_name")); ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetName("new_name"));
ASSERT_EQ(vin_with_grad->Name(), "new_name"); ASSERT_EQ(vin_with_grad->Name(), "new_name");
} }
......
...@@ -81,6 +81,7 @@ TEST(test_tracer, test_track_backward_output) { ...@@ -81,6 +81,7 @@ TEST(test_tracer, test_track_backward_output) {
new imperative::VarBase(true, "x_in")); new imperative::VarBase(true, "x_in"));
std::shared_ptr<imperative::VarBase> y_in( std::shared_ptr<imperative::VarBase> y_in(
new imperative::VarBase(false, "y_in")); new imperative::VarBase(false, "y_in"));
x_in->SetOverridedStopGradient(false);
std::shared_ptr<imperative::VarBase> vout( std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(true, "vout")); new imperative::VarBase(true, "vout"));
platform::CPUPlace place; platform::CPUPlace place;
...@@ -119,6 +120,7 @@ TEST(test_tracer, test_track_backward_input) { ...@@ -119,6 +120,7 @@ TEST(test_tracer, test_track_backward_input) {
std::shared_ptr<imperative::VarBase> vout( std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(false, "vout")); new imperative::VarBase(false, "vout"));
platform::CPUPlace place; platform::CPUPlace place;
x_in->SetOverridedStopGradient(false);
std::vector<float> src_data(10, 2.0); std::vector<float> src_data(10, 2.0);
std::vector<int64_t> dims1 = {2, 5}; std::vector<int64_t> dims1 = {2, 5};
std::vector<int64_t> dims2 = {5, 2}; std::vector<int64_t> dims2 = {5, 2};
......
...@@ -32,6 +32,16 @@ static std::vector<std::unique_ptr<framework::OpDesc>> CreateGradOpDescs( ...@@ -32,6 +32,16 @@ static std::vector<std::unique_ptr<framework::OpDesc>> CreateGradOpDescs(
} }
} }
static void PassStopGradient(const NameVarBaseMap& outs, bool generate_grad) {
for (const auto& name_pair : outs) {
for (const auto& vb : name_pair.second) {
VLOG(6) << "Set output: " << vb->Name() << "'s OverridedStopGradient as "
<< generate_grad;
vb->InnerSetOverridedStopGradient(generate_grad);
}
}
}
void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins, void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
const NameVarBaseMap& outs, framework::AttributeMap attrs, const NameVarBaseMap& outs, framework::AttributeMap attrs,
const platform::Place& place, bool trace_backward) { const platform::Place& place, bool trace_backward) {
...@@ -45,16 +55,27 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins, ...@@ -45,16 +55,27 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
TraceBackward(op, framework::OpDesc(op->Type(), op->InputNameMap(), TraceBackward(op, framework::OpDesc(op->Type(), op->InputNameMap(),
op->OutputNameMap(), op->Attrs()), op->OutputNameMap(), op->Attrs()),
ins, outs); ins, outs);
VLOG(6) << "Finish tracking Backward of op: " << type; } else {
VLOG(3) << "No Grad to track for Op: " << type;
} }
VLOG(6) << "Finish tracing fwd op: " << type;
} }
bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins, bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins,
const NameVarBaseMap& outs, const NameVarBaseMap& outs,
bool trace_backward) { bool trace_backward) {
// TODO(jiabin): Implement auto prune here if (!trace_backward) return false;
return trace_backward;
for (const auto& name_pair : ins) {
for (const auto& var_base : name_pair.second) {
if (!var_base->OverridedStopGradient()) {
VLOG(6) << "Find out input: " << var_base->Name()
<< "'s GeneratedGrad is True";
PassStopGradient(outs, var_base->OverridedStopGradient());
return true;
}
}
}
return false;
} }
void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op, void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
...@@ -133,14 +154,25 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op, ...@@ -133,14 +154,25 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true, PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true,
"Cannot find forward variable named %s", "Cannot find forward variable named %s",
fwd_var_name); fwd_var_name);
const auto& tmp = (*(fwd_var_iter->second))->GradVarBase();
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
(*(fwd_var_iter->second))->GradVarBase(), tmp.get(),
"Grad of %s should " "Grad of %s should "
"not be NULL when we Track_Backward Input of %s", "not be NULL when we Track_Backward Input of %s",
(*(fwd_var_iter->second))->Name(), grad_op->Type()); (*(fwd_var_iter->second))->Name(), grad_op->Type());
(*(fwd_var_iter->second))->GradVarBase()->AddGradOps(grad_op); // Create grad_in's dim in tensor for Grad Dependency compute
auto* tensor = tmp->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->Resize((*(fwd_var_iter->second))
->Var()
.Get<framework::LoDTensor>()
.dims());
// Add Grad Op for grad_in
tmp->AddGradOps(grad_op);
VLOG(3) << "Add Grad Op " << grad_op->Type() << " for :" VLOG(3) << "Add Grad Op " << grad_op->Type() << " for :"
<< (*(fwd_var_iter->second))->GradVarBase()->Name(); << (*(fwd_var_iter->second))->GradVarBase()->Name();
// Add Grad var input to engine set
engine_->InsertGradVar(tmp.get());
VLOG(3) << "Add Grad: " << tmp->Name() << " in to Engine";
bwd_in.emplace_back((*(fwd_var_iter->second))->GradVarBase()); bwd_in.emplace_back((*(fwd_var_iter->second))->GradVarBase());
} else { } else {
// If it is a forward var, just add it // If it is a forward var, just add it
...@@ -150,8 +182,7 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op, ...@@ -150,8 +182,7 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
grad_in_var_name); grad_in_var_name);
bwd_in.emplace_back(*(fwd_var_iter->second)); bwd_in.emplace_back(*(fwd_var_iter->second));
} }
VLOG(3) << "Set backward input from fwd var" << grad_ins.first << " of "
VLOG(3) << "Set backward input " << grad_ins.first << " of "
<< grad_op->Type() << " to be " << grad_op->Type() << " to be "
<< (bwd_in.back() ? bwd_in.back()->Name() : "nullptr"); << (bwd_in.back() ? bwd_in.back()->Name() : "nullptr");
} }
...@@ -173,40 +204,44 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op, ...@@ -173,40 +204,44 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true, PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true,
"Cannot find forward variable named %s", "Cannot find forward variable named %s",
iter->second); iter->second);
PADDLE_ENFORCE_NOT_NULL( const auto& tmp = (*(fwd_var_iter->second))->GradVarBase();
(*(fwd_var_iter->second))->GradVarBase(),
"Grad of %s should " PADDLE_ENFORCE_NOT_NULL(tmp.get(),
"not be NULL when we Track_Backward Output of %s", "Grad output: %s of op: %s should not be NULL",
(*(fwd_var_iter->second))->Name(), grad_op->Type()); (tmp->Name(), grad_op->Type()));
bwd_out.emplace_back((*(fwd_var_iter->second))->GradVarBase());
VLOG(3) << "Set backward output " << grad_outs.first << " of " if ((!tmp->OverridedStopGradient()) || (grad_outs.second.size() > 1)) {
<< grad_op->Type() << " to be " VLOG(3) << "Set backward output " << grad_outs.first << " of "
<< (bwd_out.back() ? bwd_out.back()->Name() : "nullptr"); << grad_op->Type() << " to be " << tmp->Name()
<< ". Its Overrided Stop_Gradient is: False";
auto preceding_ops = bwd_out.emplace_back(tmp);
(*(fwd_var_iter->second))->GradVarBase()->GradOps(); auto grad_pending_ops =
(*(fwd_var_iter->second))->GradVarBase()->GradOps();
if (VLOG_IS_ON(3) && !preceding_ops.empty()) { if (VLOG_IS_ON(3) && !grad_pending_ops.empty()) {
VLOG(3) << "Add preceding Op of :" VLOG(3) << "Add grad_pending Op of :"
<< (*(fwd_var_iter->second))->GradVarBase()->Name() << (*(fwd_var_iter->second))->GradVarBase()->Name()
<< " It's preceding Op are: "; << " It's grad_pending Op are: ";
for (const auto& op : preceding_ops) { for (const auto& op : grad_pending_ops) {
VLOG(3) << op->Type(); VLOG(3) << op->Type();
}
} }
} if (!grad_pending_ops.empty()) {
for (const auto& op : grad_pending_ops) {
if (!preceding_ops.empty()) { PADDLE_ENFORCE_NOT_NULL(op,
for (const auto& op : preceding_ops) { "No nullptr should be grad_pending op");
PADDLE_ENFORCE_NOT_NULL(op, "No nullptr should be preceding_op"); if (visited_preceding_ops.count(op) == 0) {
if (visited_preceding_ops.count(op) == 0) { visited_preceding_ops.insert(op);
visited_preceding_ops.insert(op); grad_op->InsertGradPendingOps(op);
grad_op->InsertGradPendingOps(op); }
} }
} else {
VLOG(5) << "Hit leaf VarBase"
<< (*(fwd_var_iter->second))->GradVarBase()->Name();
} }
} else { } else {
VLOG(5) << "Hit leaf VarBase"; VLOG(3) << "Skip backward output " << grad_outs.first << " of "
VLOG(5) << "Hit leaf VarBase" << grad_op->Type() << " Named: " << tmp->Name()
<< (*(fwd_var_iter->second))->GradVarBase()->Name(); << ", since its Overrided Stop_Gradient is: True";
} }
} }
} }
......
...@@ -230,13 +230,11 @@ void BindImperative(py::module *m_ptr) { ...@@ -230,13 +230,11 @@ void BindImperative(py::module *m_ptr) {
[](imperative::VarBase &self, const std::string &name, [](imperative::VarBase &self, const std::string &name,
framework::proto::VarType::Type type, framework::proto::VarType::Type type,
framework::proto::VarType::Type dtype, framework::proto::VarType::Type dtype,
const std::vector<int> &dims, bool stop_gradient, const std::vector<int> &dims, bool persistable) {
bool persistable) {
new (&self) imperative::VarBase(name); new (&self) imperative::VarBase(name);
self.SetPersistable(persistable); self.SetPersistable(persistable);
self.SetType(type); self.SetType(type);
self.SetDataType(dtype); self.SetDataType(dtype);
self.SetStopGradient(stop_gradient);
if (type == framework::proto::VarType::LOD_TENSOR) { if (type == framework::proto::VarType::LOD_TENSOR) {
auto *tensor = auto *tensor =
self.MutableVar()->GetMutable<framework::LoDTensor>(); self.MutableVar()->GetMutable<framework::LoDTensor>();
...@@ -302,8 +300,9 @@ void BindImperative(py::module *m_ptr) { ...@@ -302,8 +300,9 @@ void BindImperative(py::module *m_ptr) {
.def_property_readonly("dtype", &imperative::VarBase::DataType) .def_property_readonly("dtype", &imperative::VarBase::DataType)
.def_property("persistable", &imperative::VarBase::Persistable, .def_property("persistable", &imperative::VarBase::Persistable,
&imperative::VarBase::SetPersistable) &imperative::VarBase::SetPersistable)
.def_property("stop_gradient", &imperative::VarBase::StopGradient, .def_property("stop_gradient",
&imperative::VarBase::SetStopGradient); &imperative::VarBase::OverridedStopGradient,
&imperative::VarBase::SetOverridedStopGradient);
py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer"); py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer");
layer.def(py::init<>()) layer.def(py::init<>())
......
...@@ -456,12 +456,13 @@ class Variable(object): ...@@ -456,12 +456,13 @@ class Variable(object):
if in_dygraph_mode(): if in_dygraph_mode():
# record vars in tracer rather than blocks # record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None) self._ivar = kwargs.get("ivar", None)
self.stop_gradient_ = kwargs.get("stop_gradient", True)
if not self._ivar: if not self._ivar:
self._ivar = core.VarBase( self._ivar = core.VarBase(
name, type name, type
if type else core.VarDesc.VarType.LOD_TENSOR, dtype if type else core.VarDesc.VarType.LOD_TENSOR, dtype
if dtype else core.VarDesc.VarType.FP32, if dtype else core.VarDesc.VarType.FP32,
list(shape) if shape else [], stop_gradient, True list(shape) if shape else [], True
if persistable else False) if persistable else False)
if persistable: if persistable:
_dygraph_tracer().trace_var(name, self) _dygraph_tracer().trace_var(name, self)
...@@ -1847,6 +1848,7 @@ class Block(object): ...@@ -1847,6 +1848,7 @@ class Block(object):
pass pass
else: else:
initializer(param, self) initializer(param, self)
param.stop_gradient = False
return param return param
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):
......
...@@ -266,7 +266,8 @@ class LayerHelperBase(object): ...@@ -266,7 +266,8 @@ class LayerHelperBase(object):
shape, shape,
dtype, dtype,
is_bias=False, is_bias=False,
default_initializer=None): default_initializer=None,
stop_gradient=False):
"""Create parameters for this layers. """Create parameters for this layers.
Args: Args:
...@@ -320,6 +321,7 @@ class LayerHelperBase(object): ...@@ -320,6 +321,7 @@ class LayerHelperBase(object):
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
dtype=dtype, dtype=dtype,
shape=shape, shape=shape,
stop_gradient=stop_gradient,
**attr._to_kwargs(with_initializer=True)) **attr._to_kwargs(with_initializer=True))
else: else:
self.startup_program.global_block().create_parameter( self.startup_program.global_block().create_parameter(
......
...@@ -6980,8 +6980,8 @@ def one_hot(input, depth, allow_out_of_range=False): ...@@ -6980,8 +6980,8 @@ def one_hot(input, depth, allow_out_of_range=False):
type="one_hot", type="one_hot",
inputs=inputs, inputs=inputs,
attrs=attrs, attrs=attrs,
outputs={'Out': one_hot_out}, outputs={'Out': one_hot_out})
stop_gradient=True) one_hot_out.stop_gradient = True
return one_hot_out return one_hot_out
...@@ -7019,8 +7019,7 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ...@@ -7019,8 +7019,7 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
type='increment', type='increment',
inputs={'X': [counter]}, inputs={'X': [counter]},
outputs={'Out': [counter]}, outputs={'Out': [counter]},
attrs={'step': float(step)}, attrs={'step': float(step)})
stop_gradient=True)
counter.stop_gradient = True counter.stop_gradient = True
return counter return counter
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
import numpy as np
class AutoPruneLayer0(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer0, self).__init__(name_scope)
self.fc1 = fluid.dygraph.FC(
"FC_1",
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
self.fc2 = fluid.dygraph.FC(
"FC_2",
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
def forward(self, x, y):
a = self.fc1(x)
b = self.fc2(y)
c = fluid.layers.mul(a, b)
d = fluid.layers.reduce_mean(c)
return d
class AutoPruneLayer1(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer1, self).__init__(name_scope)
self.fc1 = fluid.dygraph.FC(
"FC_1",
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
self.fc2 = fluid.dygraph.FC(
"FC_2",
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
def forward(self, x, y):
a = self.fc1(x)
b = self.fc2(y)
b.stop_gradient = True
c = fluid.layers.mul(a, b)
d = fluid.layers.reduce_mean(c)
return d
class AutoPruneLayer2(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer2, self).__init__(name_scope)
self.fc = fluid.dygraph.FC("FC1", size=10, act=None)
self.fc2 = fluid.dygraph.FC("FC2", size=1, act=None)
def forward(self, x, label):
feature = self.fc(x)
label = self.fc2(label)
label = fluid.layers.cast(label, dtype="float32")
label = fluid.layers.cast(label, dtype='int64')
# Note that the label is not persistable in fluid.layers.cross_entropy.
loss = fluid.layers.cross_entropy(input=feature, label=label)
loss = fluid.layers.mean(loss)
return loss
class AutoPruneLayer3(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer3, self).__init__(name_scope)
self.fc = fluid.dygraph.FC("FC1", size=20, act=None)
def forward(self, x, label, test_num):
feature = self.fc(x)
part1, part2 = fluid.layers.split(
feature, num_or_sections=[10, 10], dim=1)
# Note that: part2 is not used.
loss = fluid.layers.cross_entropy(input=part1, label=label)
loss = fluid.layers.mean(loss)
if test_num == 1:
return loss, part2
else:
return loss, part1, part2
class MyLayer(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
def forward(self, x):
# this method involves only the fc layers
loss = fluid.layers.reduce_mean(self.fc0(x) + self.fc1(x))
return loss
def linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(x))
return loss
def embed_linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x)))
return loss
class MyLayer2(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer2, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
def forward(self, indices):
# mind the difference with MyLayer
# In this example, the forward method involes all params
loss = fluid.layers.reduce_mean(
self.fc0(self.embed0(indices)) + self.fc1(self.embed1(indices)))
return loss
def linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(x))
return loss
def embed_linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x)))
return loss
class TestImperativeAutoPrune(unittest.TestCase):
def test_auto_prune(self):
with fluid.dygraph.guard():
case1 = AutoPruneLayer0("l1")
value1 = np.arange(25).reshape(5, 5).astype("float32")
value2 = np.arange(25).reshape(5, 5).astype("float32")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss = case1(v1, v2)
loss.backward()
self.assertTrue(case1.fc2._w._ivar._grad_ivar() is not None)
self.assertTrue(case1.fc1._w._ivar._grad_ivar() is not None)
def test_auto_prune2(self):
with fluid.dygraph.guard():
case2 = AutoPruneLayer1("l1")
value1 = np.arange(25).reshape(5, 5).astype("float32")
value2 = np.arange(25).reshape(5, 5).astype("float32")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss = case2(v1, v2)
loss.backward()
self.assertTrue(case2.fc2._w._ivar._grad_ivar() is None)
self.assertTrue(case2.fc1._w._ivar._grad_ivar() is not None)
def test_auto_prune3(self):
with fluid.dygraph.guard():
case3 = AutoPruneLayer3("l3")
value1 = np.arange(784).reshape(1, 784).astype("float32")
value2 = np.arange(1).reshape(1, 1).astype("int64")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss, part2 = case3(v1, v2, 1)
loss.backward()
self.assertTrue(case3.fc._w._ivar._grad_ivar() is not None)
self.assertTrue((part2.gradient() == 0).all())
def test_auto_prune4(self):
with fluid.dygraph.guard():
case4 = AutoPruneLayer3("l3")
value1 = np.arange(784).reshape(1, 784).astype("float32")
value2 = np.arange(1).reshape(1, 1).astype("int64")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss, part2 = case4(v1, v2, 1)
part2.backward()
self.assertTrue(case4.fc._w._ivar._grad_ivar() is not None)
self.assertTrue((part2.gradient() == 1).all())
def test_auto_prune5(self):
with fluid.dygraph.guard():
case4 = AutoPruneLayer3("l3")
value1 = np.arange(784).reshape(1, 784).astype("float32")
value2 = np.arange(1).reshape(1, 1).astype("int64")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss, part1, part2 = case4(v1, v2, 2)
part1.backward()
self.assertTrue(case4.fc._w._ivar._grad_ivar() is not None)
self.assertTrue((part2.gradient() == 0).all())
def test_auto_prune6(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
out2 = fc2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
out.backward()
self.assertTrue((fc._w.gradient() == 0).all())
self.assertTrue((out1.gradient() == 0).all())
def test_auto_prune7(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
out2 = fc2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
backward_strategy = fluid.dygraph.BackwardStrategy()
out.backward(backward_strategy)
self.assertTrue((fc._w.gradient() == 0).all())
self.assertTrue((out1.gradient() == 0).all())
def test_auto_prune_with_optimizer(self):
vocab_size = 100
size = 20
batch_size = 16
indices = np.random.randint(
low=0, high=100, size=(batch_size, 1)).astype("int64")
embed = np.random.randn(batch_size, size).astype("float32")
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
model = MyLayer("mylayer", vocab_size, size)
optimizer = fluid.optimizer.AdamOptimizer(0.001)
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
indices = fluid.dygraph.to_variable(indices)
emebd = fluid.dygraph.to_variable(embed)
dummy_loss = model(embed)
loss = model.embed_linear0(indices)
loss.backward()
_, params_grads = optimizer.minimize(loss, grad_clip=grad_clip)
for items in params_grads:
assert items[0].name is not model.embed1._w.name
assert items[0].name is not model.fc1._w.name
assert model.embed1._w._ivar._grad_ivar() is None
assert model.fc1._w._ivar._grad_ivar() is None
with fluid.dygraph.guard(place):
model = MyLayer2("mylayer", vocab_size, size)
optimizer = fluid.optimizer.AdamOptimizer(0.001)
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
indices = fluid.dygraph.to_variable(indices)
emebd = fluid.dygraph.to_variable(embed)
dummy_loss = model(indices)
loss = model.embed_linear0(indices)
loss.backward()
optimizer.minimize(loss, grad_clip=grad_clip)
for items in params_grads:
assert items[0].name is not model.embed1._w.name
assert items[0].name is not model.fc1._w.name
assert model.embed1._w._ivar._grad_ivar() is None
assert model.fc1._w._ivar._grad_ivar() is None
def test_case2_prune_no_grad_branch(self):
with fluid.dygraph.guard():
value1 = np.arange(784).reshape(1, 784)
value2 = np.arange(1).reshape(1, 1)
v1 = fluid.dygraph.to_variable(value1).astype("float32")
v2 = fluid.dygraph.to_variable(value2).astype("float32")
case3 = AutoPruneLayer2("l2")
loss = case3(v1, v2)
loss.backward()
self.assertTrue(case3.fc2._w._ivar._grad_ivar() is None)
self.assertTrue(case3.fc._w._ivar._grad_ivar() is not None)
def test_case2_prune_no_grad_branch(self):
with fluid.dygraph.guard():
value1 = np.arange(784).reshape(1, 784)
value2 = np.arange(1).reshape(1, 1)
v1 = fluid.dygraph.to_variable(value1).astype("float32")
v2 = fluid.dygraph.to_variable(value2).astype("float32")
case3 = AutoPruneLayer2("l2")
loss = case3(v1, v2)
loss.backward()
self.assertTrue(case3.fc2._w._ivar._grad_ivar() is None)
self.assertTrue(case3.fc._w._ivar._grad_ivar() is not None)
def test_case3_prune_no_grad_branch2(self):
with fluid.dygraph.guard():
value1 = np.arange(1).reshape(1, 1)
fc = fluid.dygraph.FC("FC1", size=1, act=None)
label = fluid.dygraph.to_variable(value1).astype("float32")
label = fc(label)
label = fluid.layers.cast(label, dtype="float32")
label = fluid.layers.cast(label, dtype='int64')
out = fluid.layers.one_hot(input=label, depth=100)
loss = fluid.layers.mean(out)
loss.backward()
self.assertTrue(fc._w._ivar._grad_ivar() is None)
def test_case4_with_no_grad_op_maker(self):
with fluid.dygraph.guard():
out = fluid.layers.gaussian_random(shape=[20, 30])
loss = fluid.layers.mean(out)
loss.backward()
self.assertTrue(out._ivar._grad_ivar() is None)
if __name__ == '__main__':
unittest.main()
...@@ -183,14 +183,18 @@ class TestImperative(unittest.TestCase): ...@@ -183,14 +183,18 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inputs = [] inputs = []
for _ in range(10): for _ in range(10):
inputs.append(fluid.dygraph.base.to_variable(x)) tmp = fluid.dygraph.base.to_variable(x)
tmp.stop_gradient = False
inputs.append(tmp)
ret = fluid.layers.sums(inputs) ret = fluid.layers.sums(inputs)
loss = fluid.layers.reduce_sum(ret) loss = fluid.layers.reduce_sum(ret)
loss.backward() loss.backward()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inputs2 = [] inputs2 = []
for _ in range(10): for _ in range(10):
inputs2.append(fluid.dygraph.base.to_variable(x)) tmp = fluid.dygraph.base.to_variable(x)
tmp.stop_gradient = False
inputs2.append(tmp)
ret2 = fluid.layers.sums(inputs2) ret2 = fluid.layers.sums(inputs2)
loss2 = fluid.layers.reduce_sum(ret2) loss2 = fluid.layers.reduce_sum(ret2)
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
...@@ -214,6 +218,7 @@ class TestImperative(unittest.TestCase): ...@@ -214,6 +218,7 @@ class TestImperative(unittest.TestCase):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
var_inp.stop_gradient = False
l = MyLayer("my_layer") l = MyLayer("my_layer")
x = l(var_inp)[0] x = l(var_inp)[0]
self.assertIsNotNone(x) self.assertIsNotNone(x)
...@@ -223,6 +228,7 @@ class TestImperative(unittest.TestCase): ...@@ -223,6 +228,7 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp2 = fluid.dygraph.base.to_variable(np_inp) var_inp2 = fluid.dygraph.base.to_variable(np_inp)
var_inp2.stop_gradient = False
l2 = MyLayer("my_layer") l2 = MyLayer("my_layer")
x2 = l2(var_inp2)[0] x2 = l2(var_inp2)[0]
self.assertIsNotNone(x2) self.assertIsNotNone(x2)
......
...@@ -47,6 +47,8 @@ class TestRecurrentFeed(unittest.TestCase): ...@@ -47,6 +47,8 @@ class TestRecurrentFeed(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
original_in1 = to_variable(original_np1) original_in1 = to_variable(original_np1)
original_in2 = to_variable(original_np2) original_in2 = to_variable(original_np2)
original_in1.stop_gradient = False
original_in2.stop_gradient = False
rt = RecurrentTest("RecurrentTest") rt = RecurrentTest("RecurrentTest")
for i in range(3): for i in range(3):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册