未验证 提交 7a9bd0c5 编写于 作者: J Jiabin Yang 提交者: GitHub

Fix/auto prune error on leaf (#20056)

* test=develop, fix docker with paddle nccl problem

* test=develop, Add Variable api and refine dygraph related API

* test=develop, Add Variable api and refine dygraph related API

* test=develop, refine test for new api and error info

* test=develop, refine error info and test_layers

* test=develop, add API.spec

* test=devleop, fix to_string python2 and python3 compat error and refien doc

* test=devleop, add API spec

* test=devleop, update API spec

* test=devleop, update API spec

* test=develop, invoke ci

* test=develop, fix example code

* test=develop, update API spec

* test=develop, fix auto_prune_error_on_leaf

* test=develop, fix auto prune error on loss stop_gradient

* test=develop, remove useless error check

* test=develop, add more ut for sorted gradient
上级 301eeb5b
...@@ -44,8 +44,9 @@ void BasicEngine::Init(VarBase* var, const detail::BackwardStrategy& strategy) { ...@@ -44,8 +44,9 @@ void BasicEngine::Init(VarBase* var, const detail::BackwardStrategy& strategy) {
const std::vector<OpBase*> ops = var->GradVarBase()->GradOps(); const std::vector<OpBase*> ops = var->GradVarBase()->GradOps();
var->ClearGradOps(); var->ClearGradOps();
if (ops.empty()) { if (ops.empty() || var->OverridedStopGradient()) {
VLOG(3) << "Skip auto grad since there is no grad op for var: " VLOG(3) << "Skip auto grad since there is no grad op for var or loss is "
"stop_gradient=True: "
<< var->Name(); << var->Name();
return; return;
} else { } else {
......
...@@ -116,13 +116,23 @@ void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var, ...@@ -116,13 +116,23 @@ void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var,
} else { } else {
if (!var_->Var().IsInitialized() || if (!var_->Var().IsInitialized() ||
!var_->Var().Get<framework::LoDTensor>().IsInitialized()) { !var_->Var().Get<framework::LoDTensor>().IsInitialized()) {
VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero"; VLOG(6) << "Set StopGradient Grad: " << var_->Name() << " as zero ";
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
if (!var_->Var().IsInitialized()) {
auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
VLOG(6) << "Dims of " << var_->Name() << " is set as: "
<< var->Var().Get<framework::LoDTensor>().dims();
tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
tensor->mutable_data(place, var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0);
} else {
auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>(); auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType()); tensor->mutable_data(place, var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0); operators::math::set_constant(*dev_ctx, tensor, 0.0);
} }
} }
}
++cur_cnt_; ++cur_cnt_;
} }
...@@ -162,10 +172,19 @@ void SortedGradientAccumulator::Add(std::shared_ptr<VarBase> var, ...@@ -162,10 +172,19 @@ void SortedGradientAccumulator::Add(std::shared_ptr<VarBase> var,
!var_->Var().Get<framework::LoDTensor>().IsInitialized()) { !var_->Var().Get<framework::LoDTensor>().IsInitialized()) {
VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero"; VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero";
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
if (!var_->Var().IsInitialized()) {
auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
VLOG(6) << "Dims of " << var_->Name() << " is set as: "
<< var->Var().Get<framework::LoDTensor>().dims();
tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
tensor->mutable_data(place, var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0);
} else {
auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>(); auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType()); tensor->mutable_data(place, var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0); operators::math::set_constant(*dev_ctx, tensor, 0.0);
} }
}
// looks like tmp_grad_vars will not have any member but just in case // looks like tmp_grad_vars will not have any member but just in case
tmp_grad_vars_.clear(); tmp_grad_vars_.clear();
} }
......
...@@ -241,6 +241,72 @@ class TestImperativeAutoPrune(unittest.TestCase): ...@@ -241,6 +241,72 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertTrue((fc._w.gradient() == 0).all()) self.assertTrue((fc._w.gradient() == 0).all())
self.assertTrue((out1.gradient() == 0).all()) self.assertTrue((out1.gradient() == 0).all())
def test_auto_prune8(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
fc_origin = fc._w.numpy()
out2 = fc2(out1)
fc2_origin = fc2._w.numpy()
fc2._w.stop_gradient = True
out2.backward()
optimizer = fluid.optimizer.SGD(learning_rate=0.003)
optimizer.minimize(out2)
self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy()))
self.assertFalse(np.array_equal(fc_origin, fc._w.numpy()))
def test_auto_prune9(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
fc_origin = fc._w.numpy()
out2 = fc2(out1)
fc2_origin = fc2._w.numpy()
out2.stop_gradient = True
out2.backward()
optimizer = fluid.optimizer.SGD(learning_rate=0.003)
optimizer.minimize(out2)
self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy()))
self.assertTrue(np.array_equal(fc_origin, fc._w.numpy()))
try:
fc2._w.gradient()
except ValueError as e:
assert type(e) == ValueError
def test_auto_prune10(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
out2 = fc2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True
out.backward(backward_strategy)
self.assertTrue((fc._w.gradient() == 0).all())
self.assertTrue((out1.gradient() == 0).all())
def test_auto_prune_with_optimizer(self): def test_auto_prune_with_optimizer(self):
vocab_size = 100 vocab_size = 100
size = 20 size = 20
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册