diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc index f537e4b9e569dd4c513ac0efde7240833bcf04b6..174c77a69b9629d9546111e27dd3f63cd3c0f83e 100644 --- a/paddle/fluid/framework/block_desc.cc +++ b/paddle/fluid/framework/block_desc.cc @@ -163,6 +163,20 @@ std::vector BlockDesc::AllOps() const { return res; } +void BlockDesc::ClearBlock() { + // clear all ops + ops_.clear(); + + // clear all vars which are not persistable + for (auto it = vars_.begin(); it != vars_.end();) { + if (it->second->Persistable()) { + ++it; + } else { + vars_.erase(it++); + } + } +} + void BlockDesc::Flush() { for (auto &op_desc : ops_) { op_desc->Flush(); diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h index 960ca39e1eadd3c064beb0e2c1342a406c4f0b6a..651841daea4ec8d7890a102e24f955fdd67bc26b 100644 --- a/paddle/fluid/framework/block_desc.h +++ b/paddle/fluid/framework/block_desc.h @@ -97,6 +97,8 @@ class BlockDesc { std::vector AllOps() const; + void ClearBlock(); + size_t OpSize() const { return ops_.size(); } OpDesc *Op(int idx) const { return ops_.at(idx).get(); } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 5d38c339953922565f7bb880eaea9762ae868fe2..f42ceb50275875ca39376b7228fe730891b101c4 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -103,7 +103,9 @@ class OpBase; */ class VarBase { public: - VarBase(std::string name) : VarBase(new framework::Variable(), new VarBase(name + "XGRAD", true), name) {} + explicit VarBase(std::string name) + : VarBase(new framework::Variable(), new VarBase(name + "XGRAD", true), + name) {} // Owns `var` and `grad` VarBase(framework::Variable* var, VarBase* grad, std::string name) @@ -113,7 +115,7 @@ class VarBase { stop_gradient_(false), pre_op_(nullptr), pre_op_out_idx_(-1), - name_(name) { LOG(ERROR) << "create " << name; } + name_(name) {} explicit VarBase(std::string name, bool stop_gradient) : var_desc_(nullptr), @@ -122,11 +124,9 @@ class VarBase { stop_gradient_(stop_gradient), pre_op_(nullptr), pre_op_out_idx_(-1), - name_(name) { LOG(ERROR) << "create " << name; } + name_(name) {} virtual ~VarBase() { - LOG(ERROR) << "delete " << name_; - if (var_) { delete var_; } diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index bc39d11ba00a6a7c386162a1f9201c6f992c8692..c8244e22fd0f68fa43b291598747453ef09f3a9d 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -66,16 +66,38 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) { return result; } +// framework::BlockDesc* InferShapeAndVarType(OpBase* op, const VarBasePtrMap& +// inputs, const VarBasePtrMap& outputs) { +// std::unique_ptr block(new BlockDesc()); + +// // construct op desc +// op->op_desc_ = block.AppendOp(); + +// // construct op inputs and outputs +// // for +// // +// for (auto it = ) +// op->op_desc_->SetInput() + +// op->op_desc_->InferShape(*block); +// op->op_desc_->InferVarType(block.get()); + +// return block.release(); +// } + void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, const VarBasePtrMap& outputs, framework::BlockDesc* block, const platform::Place expected_place, const bool stop_gradient) { std::map vars; + // framework::BlockDesc* block = InferShapeAndVarType(op, inputs, outputs); + framework::OpDesc* op_desc = op->op_desc_; VLOG(3) << "tracer tracing " << op_desc->Type(); op_desc->InferShape(*block); op_desc->InferVarType(block); + std::unique_ptr op_base = framework::OpRegistry::CreateOp(*op_desc); @@ -92,7 +114,7 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, invars.emplace_back(inp->var_); vars[inp->var_desc_->Name()] = inp; - if (inp->PreOp()) { + if (inp->PreOp() && !inp->IsStopGradient()) { op->pre_ops_[it.first].push_back(inp->PreOp()); op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx()); } else { @@ -202,7 +224,7 @@ std::vector Tracer::PyTrace(OpBase* op, op->input_vars_[PyLayer::kFwdInp] = inputs; op->output_vars_[PyLayer::kFwdOut] = PyLayer::Apply(op->forward_id_, inputs); for (VarBase* inp : inputs) { - if (inp->PreOp()) { + if (inp->PreOp() && !inp->IsStopGradient()) { op->pre_ops_[PyLayer::kFwdInp].push_back(inp->PreOp()); op->pre_ops_out_idx_[PyLayer::kFwdInp].push_back(inp->PreOpOutIdx()); } else { diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index e729be4a95a58510f1e0162af4216feaa400d971..6bfee48af83661405580e62a9d4d70c1c024a53d 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -189,6 +189,9 @@ void BindBlockDesc(pybind11::module *m) { return self.HasVar(name); }, pybind11::return_value_policy::reference) + .def("_clear_block", + [](pd::BlockDesc &self) { return self.ClearBlock(); }, + pybind11::return_value_policy::reference) .def("_rename_var", [](pd::BlockDesc &self, const pybind11::bytes &byte_name, const pybind11::bytes &byte_name_new) { diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 6ffb185d44df157bc3b894c2c6d7a78aed71925b..14b8339df05159d9914536b0efda73647df576b0 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1188,6 +1188,15 @@ class Block(object): else: raise ValueError("Var {0} is not found recursively".format(name)) + def _clear_block(self): + self.desc._clear_block() + + for name, var in self.vars.items(): + if not var.persistable: + del self.vars[name] + + self.ops.clear() + def all_parameters(self): return list(self.iter_parameters()) @@ -1273,8 +1282,7 @@ class Block(object): return var def _remove_var(self, name): - if not _in_imperative_mode(): - self._sync_with_cpp() + self._sync_with_cpp() self.desc._remove_var(cpt.to_bytes(name)) del self.vars[name] @@ -1358,8 +1366,7 @@ class Block(object): Returns: None """ - if not _in_imperative_mode(): - self._sync_with_cpp() + self._sync_with_cpp() self.desc._remove_op(index, index + 1) del self.ops[index] diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 3823b4f81e2c7422cf079aa95d8b3dd20d1662ef..3bcfdac6ce49ab92f7987aac6cf1656071467a8b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -101,7 +101,8 @@ class MNIST(fluid.imperative.Layer): class TestImperativeMnist(unittest.TestCase): def test_mnist_float32(self): seed = 90 - batch_num = 100000 + epoch_num = 1 + batch_num = 200 with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -109,125 +110,112 @@ class TestImperativeMnist(unittest.TestCase): mnist = MNIST() sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128) + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) dy_param_init_value = {} - for batch_id, data in enumerate(train_reader()): - if batch_id >= batch_num: - break - - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - 128, 1) - - img = to_variable(dy_x_data) - label = to_variable(y_data) - label._stop_gradient = True - - print("forward start") - - cost = mnist(img) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - # dy_out = avg_loss._numpy() - print("forward end") - - # if batch_id == 0: - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # dy_param_init_value[param.name] = param._numpy() - - avg_loss._backward() - - print("backward end") - - sgd.minimize(avg_loss) - - print("sgd end") - - mnist.clear_gradients() - - import gc - for name, var in fluid.default_main_program().global_block().vars.items(): - if not var.persistable: - fluid.default_main_program().global_block()._remove_var(name) - # var._ivar._clear_values() - for op in fluid.default_main_program().global_block().ops: - fluid.default_main_program().global_block()._remove_op(op.idx) + for epoch in range(epoch_num): + print("epoch", epoch) + for batch_id, data in enumerate(train_reader()): + # if batch_id >= batch_num: + # break - assert len(gc.get_referrers(avg_loss)) == 1 + dy_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(128, 1) - print("clear end") - print("ivar ref ", gc.get_referrers(gc.get_referrers(avg_loss._ivar)[0])[0].__class__.__name__) - print("ivar ref ", gc.get_referrers(gc.get_referrers(avg_loss._ivar)[1])[0].__class__.__name__) + img = to_variable(dy_x_data) + label = to_variable(y_data) + label._stop_gradient = True - # dy_param_value = {} - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # dy_param_value[param.name] = param._numpy() + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) - # with new_program_scope(): - # fluid.default_startup_program().random_seed = seed - # fluid.default_main_program().random_seed = seed + dy_out = avg_loss._numpy() - # exe = fluid.Executor(fluid.CPUPlace( - # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + if epoch == 0 and batch_id == 0: + for param in fluid.default_main_program().global_block( + ).all_parameters(): + dy_param_init_value[param.name] = param._numpy() - # mnist = MNIST() - # sgd = SGDOptimizer(learning_rate=1e-3) - # train_reader = paddle.batch( - # paddle.dataset.mnist.train(), batch_size=128) + avg_loss._backward() + sgd.minimize(avg_loss) + mnist.clear_gradients() - # img = fluid.layers.data( - # name='pixel', shape=[1, 28, 28], dtype='float32') - # label = fluid.layers.data(name='label', shape=[1], dtype='int64') - # cost = mnist(img) - # loss = fluid.layers.cross_entropy(cost, label) - # avg_loss = fluid.layers.mean(loss) - # sgd.minimize(avg_loss) + fluid.default_main_program().global_block()._clear_block() - # # initialize params and fetch them - # static_param_init_value = {} - # static_param_name_list = [] - # for param in fluid.default_startup_program().global_block( - # ).all_parameters(): - # static_param_name_list.append(param.name) + dy_param_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + dy_param_value[param.name] = param._numpy() - # out = exe.run(fluid.default_startup_program(), - # fetch_list=static_param_name_list) + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed - # for i in range(len(static_param_name_list)): - # static_param_init_value[static_param_name_list[i]] = out[i] + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - # for batch_id, data in enumerate(train_reader()): - # if batch_id >= batch_num: + mnist = MNIST() + sgd = SGDOptimizer(learning_rate=1e-3) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + img = fluid.layers.data( + name='pixel', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + sgd.minimize(avg_loss) + + # initialize params and fetch them + static_param_init_value = {} + static_param_name_list = [] + for param in fluid.default_startup_program().global_block( + ).all_parameters(): + static_param_name_list.append(param.name) + + out = exe.run(fluid.default_startup_program(), + fetch_list=static_param_name_list) + + for i in range(len(static_param_name_list)): + static_param_init_value[static_param_name_list[i]] = out[i] + + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + # if batch_id >= batch_num: # break - # static_x_data = np.array( - # [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - # y_data = np.array([x[1] for x in data]).astype('int64').reshape( - # [128, 1]) - - # fetch_list = [avg_loss.name] - # fetch_list.extend(static_param_name_list) - # out = exe.run(fluid.default_main_program(), - # feed={"pixel": static_x_data, - # "label": y_data}, - # fetch_list=fetch_list) - - # static_param_value = {} - # static_out = out[0] - # for i in range(1, len(out)): - # static_param_value[static_param_name_list[i - 1]] = out[i] - - # for key, value in six.iteritems(static_param_init_value): - # self.assertTrue(np.allclose(value, dy_param_init_value[key])) - - # self.assertTrue(np.allclose(static_out, dy_out)) - - # for key, value in six.iteritems(static_param_value): - # self.assertTrue(np.allclose(value, dy_param_value[key])) + static_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape([128, 1]) + + fetch_list = [avg_loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run( + fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[ + i] + + for key, value in six.iteritems(static_param_init_value): + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + + self.assertTrue(np.allclose(static_out, dy_out)) + + for key, value in six.iteritems(static_param_value): + self.assertTrue(np.allclose(value, dy_param_value[key])) if __name__ == '__main__':