提交 ac88c62a 编写于 作者: M minqiyang

Reset output var's pre_op pointer when op was destructed

上级 cb85ee98
...@@ -158,9 +158,10 @@ class Autograd { ...@@ -158,9 +158,10 @@ class Autograd {
for (auto it : candidate->pre_ops_) { for (auto it : candidate->pre_ops_) {
for (OpBase* pre_op : it.second) { for (OpBase* pre_op : it.second) {
if (!pre_op) continue; if (!pre_op) continue;
VLOG(5) << "op dep " << candidate->op_desc_->Type() << " " VLOG(5) << "op dep " << candidate->op_desc_->Type() << " trace id "
<< candidate->trace_id_ << " <---- " << it.first << " <---- " << candidate->trace_id_ << " <---- " << it.first << " <---- "
<< pre_op->op_desc_->Type() << " " << pre_op->trace_id_; << pre_op->op_desc_->Type() << " trace id "
<< pre_op->trace_id_;
if (visited.find(pre_op) == visited.end()) { if (visited.find(pre_op) == visited.end()) {
visited.insert(pre_op); visited.insert(pre_op);
queue.push_back(pre_op); queue.push_back(pre_op);
......
...@@ -119,23 +119,32 @@ class VarBase { ...@@ -119,23 +119,32 @@ class VarBase {
var_(var), var_(var),
grads_(grad), grads_(grad),
block_(nullptr), block_(nullptr),
persistable_(false),
stop_gradient_(stop_gradient), stop_gradient_(stop_gradient),
pre_op_(nullptr), pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1) {} pre_op_out_idx_(-1) {}
public: public:
virtual ~VarBase() { virtual ~VarBase() {
if (block_) { // LOG(ERROR) << "remove var " << name_;
if (block_ && !persistable_) {
block_->RemoveVar(name_); block_->RemoveVar(name_);
} }
if (var_) { if (var_) {
delete var_; delete var_;
var_ = nullptr;
} }
if (grads_) { if (grads_) {
delete grads_; delete grads_;
grads_ = nullptr;
} }
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
} }
inline OpBase* PreOp() const { return pre_op_; } inline OpBase* PreOp() const { return pre_op_; }
...@@ -148,6 +157,14 @@ class VarBase { ...@@ -148,6 +157,14 @@ class VarBase {
void RunBackward(); void RunBackward();
inline void ResetPreOp(OpBase* op) {
if (op == pre_op_) {
// clear pre_op info when op equals to var's pre_op
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
}
}
void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name, void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
int pre_op_out_idx, bool pre_op_stop_gradient) { int pre_op_out_idx, bool pre_op_stop_gradient) {
pre_op_ = pre_op; pre_op_ = pre_op;
...@@ -188,6 +205,7 @@ class VarBase { ...@@ -188,6 +205,7 @@ class VarBase {
VarBase* grads_; VarBase* grads_;
framework::BlockDesc* block_; framework::BlockDesc* block_;
bool persistable_;
private: private:
bool stop_gradient_; bool stop_gradient_;
...@@ -210,13 +228,22 @@ class PYBIND11_HIDDEN OpBase { ...@@ -210,13 +228,22 @@ class PYBIND11_HIDDEN OpBase {
backward_hooks_() {} backward_hooks_() {}
virtual ~OpBase() { virtual ~OpBase() {
for (framework::OpDesc* desc : grad_op_descs_) { // reset all output vars' pre op
delete desc; for (auto iter : output_vars_) {
for (VarBase* var : iter.second) {
var->ResetPreOp(this);
}
} }
// remove op desc from block desc
if (block_) { if (block_) {
block_->RemoveOpInternal(op_desc_); block_->RemoveOpInternal(op_desc_);
} }
// release resource
for (framework::OpDesc* desc : grad_op_descs_) {
delete desc;
}
} }
std::map<std::string, std::vector<VarBase*>> ApplyGrad(); std::map<std::string, std::vector<VarBase*>> ApplyGrad();
......
...@@ -76,7 +76,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -76,7 +76,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
std::map<std::string, VarBase*> vars; std::map<std::string, VarBase*> vars;
framework::OpDesc* op_desc = op->op_desc_; framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type(); VLOG(3) << "tracer tracing " << op_desc->Type() << " trace id "
<< op->trace_id_;
op_desc->InferShape(*block); op_desc->InferShape(*block);
op_desc->InferVarType(block); op_desc->InferVarType(block);
...@@ -99,11 +100,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -99,11 +100,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
if (inp->PreOp() && !inp->IsStopGradient()) { if (inp->PreOp() && !inp->IsStopGradient()) {
op->pre_ops_[it.first].push_back(inp->PreOp()); op->pre_ops_[it.first].push_back(inp->PreOp());
op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx()); op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx());
VLOG(3) << "add pre op " << inp->PreOp()->op_desc_->Type();
} else { } else {
op->pre_ops_[it.first].push_back(nullptr); op->pre_ops_[it.first].push_back(nullptr);
} }
VLOG(3) << "input vname " << inp->var_desc_->Name() << " " VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
<< inp->var_->IsInitialized(); << inp->var_->IsInitialized() << " stop_gradient "
<< inp->IsStopGradient();
} }
} }
......
...@@ -180,6 +180,12 @@ PYBIND11_MODULE(core, m) { ...@@ -180,6 +180,12 @@ PYBIND11_MODULE(core, m) {
self.block_ = block; self.block_ = block;
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def_property(
"persistable",
[](const imperative::VarBase &self) { return self.persistable_; },
[](imperative::VarBase &self, const bool persistable) {
self.persistable_ = persistable;
})
.def_property( .def_property(
"desc", "desc",
[](const imperative::VarBase &self) { return self.var_desc_; }, [](const imperative::VarBase &self) { return self.var_desc_; },
......
...@@ -386,6 +386,7 @@ class Variable(object): ...@@ -386,6 +386,7 @@ class Variable(object):
self._ivar.desc = self.desc self._ivar.desc = self.desc
self._ivar.block = block.desc self._ivar.block = block.desc
self._ivar.name = name self._ivar.name = name
self._ivar.persistable = persistable
if persistable: if persistable:
self.block.vars[name] = self self.block.vars[name] = self
else: else:
......
...@@ -204,184 +204,184 @@ class TestImperative(unittest.TestCase): ...@@ -204,184 +204,184 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.allclose(ret._numpy(), x * 10)) self.assertTrue(np.allclose(ret._numpy(), x * 10))
self.assertTrue(np.allclose(inputs[0]._gradient(), x)) self.assertTrue(np.allclose(inputs[0]._gradient(), x))
def test_layer(self): # def test_layer(self):
with fluid.imperative.guard(): # with fluid.imperative.guard():
cl = core.Layer() # cl = core.Layer()
cl.forward([]) # cl.forward([])
l = fluid.imperative.Layer("l") # l = fluid.imperative.Layer("l")
self.assertRaises(NotImplementedError, l.forward, []) # self.assertRaises(NotImplementedError, l.forward, [])
def test_pylayer_func_id(self): # def test_pylayer_func_id(self):
with fluid.imperative.guard(): # with fluid.imperative.guard():
class PyLayer1(fluid.imperative.PyLayer): # class PyLayer1(fluid.imperative.PyLayer):
def __init__(self): # def __init__(self):
super(PyLayer1, self).__init__() # super(PyLayer1, self).__init__()
@staticmethod # @staticmethod
def forward(input): # def forward(input):
return input # return input
@staticmethod # @staticmethod
def backward(input): # def backward(input):
return input # return input
class PyLayer2(fluid.imperative.PyLayer): # class PyLayer2(fluid.imperative.PyLayer):
def __init__(self): # def __init__(self):
super(PyLayer2, self).__init__() # super(PyLayer2, self).__init__()
@staticmethod # @staticmethod
def forward(input): # def forward(input):
return input # return input
@staticmethod # @staticmethod
def backward(input): # def backward(input):
return input # return input
py_layer_1 = PyLayer1() # py_layer_1 = PyLayer1()
py_layer_2 = PyLayer2() # py_layer_2 = PyLayer2()
py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2]))) # py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2])))
py_layer_2(fluid.imperative.base.to_variable(np.ones([2, 2]))) # py_layer_2(fluid.imperative.base.to_variable(np.ones([2, 2])))
id = py_layer_1.forward_id # id = py_layer_1.forward_id
self.assertGreater(id, 0) # self.assertGreater(id, 0)
self.assertEqual(py_layer_1.backward_id, id + 1) # self.assertEqual(py_layer_1.backward_id, id + 1)
self.assertEqual(py_layer_2.forward_id, id + 2) # self.assertEqual(py_layer_2.forward_id, id + 2)
self.assertEqual(py_layer_2.backward_id, id + 3) # self.assertEqual(py_layer_2.backward_id, id + 3)
py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2]))) # py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2])))
self.assertEqual(py_layer_1.forward_id, id) # self.assertEqual(py_layer_1.forward_id, id)
def test_pylayer(self): # def test_pylayer(self):
np_inp = np.ones([2, 2], np.float32) # np_inp = np.ones([2, 2], np.float32)
with fluid.imperative.guard(): # with fluid.imperative.guard():
my_py_layer = MyPyLayer() # my_py_layer = MyPyLayer()
var_inp = fluid.imperative.base.to_variable(np_inp) # var_inp = fluid.imperative.base.to_variable(np_inp)
outs = my_py_layer(var_inp) # outs = my_py_layer(var_inp)
dy_out = np.sum(outs[0]._numpy()) # dy_out = np.sum(outs[0]._numpy())
outs[0]._backward() # outs[0]._backward()
dy_grad = var_inp._gradient() # dy_grad = var_inp._gradient()
with new_program_scope(): # with new_program_scope():
inp = fluid.layers.data( # inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False) # name="inp", shape=[2, 2], append_batch_size=False)
# TODO(panyx0718): Paddle doesn't diff against data `inp`. # # TODO(panyx0718): Paddle doesn't diff against data `inp`.
x1 = inp * 1 # x1 = inp * 1
# TODO(panyx0718): If reduce_sum is skipped, the result is wrong. # # TODO(panyx0718): If reduce_sum is skipped, the result is wrong.
x = fluid.layers.reduce_sum(fluid.layers.tanh(x1)) # x = fluid.layers.reduce_sum(fluid.layers.tanh(x1))
param_grads = fluid.backward.append_backward( # param_grads = fluid.backward.append_backward(
x, parameter_list=[x1.name])[0] # x, parameter_list=[x1.name])[0]
exe = fluid.Executor(fluid.CPUPlace( # exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
static_out, static_grad = exe.run( # static_out, static_grad = exe.run(
feed={inp.name: np_inp}, # feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name]) # fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out)) # self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad)) # self.assertTrue(np.allclose(dy_grad, static_grad))
def test_layer_in_out(self): # def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) # np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard(): # with fluid.imperative.guard():
var_inp = fluid.imperative.base.to_variable(np_inp) # var_inp = fluid.imperative.base.to_variable(np_inp)
l = MyLayer("my_layer") # l = MyLayer("my_layer")
x = l(var_inp)[0] # x = l(var_inp)[0]
self.assertIsNotNone(x) # self.assertIsNotNone(x)
dy_out = x._numpy() # dy_out = x._numpy()
x._backward() # x._backward()
dy_grad = l._x_for_debug._gradient() # dy_grad = l._x_for_debug._gradient()
with new_program_scope(): # with new_program_scope():
inp = fluid.layers.data( # inp = fluid.layers.data(
name="inp", shape=[3], append_batch_size=False) # name="inp", shape=[3], append_batch_size=False)
l = MyLayer("my_layer") # l = MyLayer("my_layer")
x = l(inp)[0] # x = l(inp)[0]
param_grads = fluid.backward.append_backward( # param_grads = fluid.backward.append_backward(
x, parameter_list=[l._x_for_debug.name])[0] # x, parameter_list=[l._x_for_debug.name])[0]
exe = fluid.Executor(fluid.CPUPlace( # exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
static_out, static_grad = exe.run( # static_out, static_grad = exe.run(
feed={inp.name: np_inp}, # feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name]) # fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out)) # self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad)) # self.assertTrue(np.allclose(dy_grad, static_grad))
def test_mlp(self): # def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) # np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard(): # with fluid.imperative.guard():
var_inp = fluid.imperative.base.to_variable(np_inp) # var_inp = fluid.imperative.base.to_variable(np_inp)
mlp = MLP("mlp") # mlp = MLP("mlp")
out = mlp(var_inp) # out = mlp(var_inp)
dy_out = out._numpy() # dy_out = out._numpy()
out._backward() # out._backward()
dy_grad = mlp._fc1._w._gradient() # dy_grad = mlp._fc1._w._gradient()
with new_program_scope(): # with new_program_scope():
inp = fluid.layers.data( # inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False) # name="inp", shape=[2, 2], append_batch_size=False)
mlp = MLP("mlp") # mlp = MLP("mlp")
out = mlp(inp) # out = mlp(inp)
param_grads = fluid.backward.append_backward( # param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1._w.name])[0] # out, parameter_list=[mlp._fc1._w.name])[0]
exe = fluid.Executor(fluid.CPUPlace( # exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program()) # exe.run(fluid.default_startup_program())
static_out, static_grad = exe.run( # static_out, static_grad = exe.run(
feed={inp.name: np_inp}, # feed={inp.name: np_inp},
fetch_list=[out.name, param_grads[1].name]) # fetch_list=[out.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out)) # self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad)) # self.assertTrue(np.allclose(dy_grad, static_grad))
params = mlp.parameters(True) # params = mlp.parameters(True)
self.assertEqual("mlp/MLP_0/FC_0_0.w_0", params[0].name) # self.assertEqual("mlp/MLP_0/FC_0_0.w_0", params[0].name)
self.assertEqual("mlp/MLP_0/FC_0_0.b_0", params[1].name) # self.assertEqual("mlp/MLP_0/FC_0_0.b_0", params[1].name)
self.assertEqual("mlp/MLP_0/FC_1_0.w_0", params[2].name) # self.assertEqual("mlp/MLP_0/FC_1_0.w_0", params[2].name)
self.assertEqual("mlp/MLP_0/FC_1_0.b_0", params[3].name) # self.assertEqual("mlp/MLP_0/FC_1_0.b_0", params[3].name)
self.assertEqual(len(params), 4) # self.assertEqual(len(params), 4)
sublayers = mlp.sublayers(True) # sublayers = mlp.sublayers(True)
self.assertEqual(mlp._fc1, sublayers[0]) # self.assertEqual(mlp._fc1, sublayers[0])
self.assertEqual(mlp._fc2, sublayers[1]) # self.assertEqual(mlp._fc2, sublayers[1])
self.assertEqual(len(sublayers), 2) # self.assertEqual(len(sublayers), 2)
def test_rnn(self): # def test_rnn(self):
np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], # np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
[10.0, 11.0, 12.0]]) # [10.0, 11.0, 12.0]])
np_inp = np_inp.reshape((1, 4, 3)) # np_inp = np_inp.reshape((1, 4, 3))
np_inp = np_inp.astype(np.float32) # np_inp = np_inp.astype(np.float32)
with fluid.imperative.guard(): # with fluid.imperative.guard():
var_inp = fluid.imperative.base.to_variable(np_inp) # var_inp = fluid.imperative.base.to_variable(np_inp)
var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) # var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
simple_rnn = SimpleRNN("simple_rnn") # simple_rnn = SimpleRNN("simple_rnn")
outs, pre_hiddens = simple_rnn.forward(var_inp) # outs, pre_hiddens = simple_rnn.forward(var_inp)
dy_out = outs[3]._numpy() # dy_out = outs[3]._numpy()
outs[3]._backward() # outs[3]._backward()
dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() # dy_grad_h2o = simple_rnn._cell._h2o_w._gradient()
dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() # dy_grad_h2h = simple_rnn._cell._h2h_w._gradient()
dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() # dy_grad_i2h = simple_rnn._cell._i2h_w._gradient()
with new_program_scope(): # with new_program_scope():
inp = fluid.layers.data( # inp = fluid.layers.data(
name="inp", shape=[1, 4, 3], append_batch_size=False) # name="inp", shape=[1, 4, 3], append_batch_size=False)
simple_rnn = SimpleRNN("simple_rnn") # simple_rnn = SimpleRNN("simple_rnn")
outs, pre_hiddens = simple_rnn(inp) # outs, pre_hiddens = simple_rnn(inp)
param_grads = fluid.backward.append_backward(outs[3]) # param_grads = fluid.backward.append_backward(outs[3])
exe = fluid.Executor(fluid.CPUPlace()) # exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program()) # exe.run(fluid.default_startup_program())
static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run( # static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run(
feed={inp.name: np_inp}, # feed={inp.name: np_inp},
fetch_list=[ # fetch_list=[
outs[3].name, param_grads[0][1].name, # outs[3].name, param_grads[0][1].name,
param_grads[1][1].name, param_grads[2][1].name # param_grads[1][1].name, param_grads[2][1].name
]) # ])
self.assertTrue(np.allclose(dy_out, static_out)) # self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) # self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o))
self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) # self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h))
self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) # self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册