diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 47488d4dea79f285769f29c93f7888a7f783f070..827473ec82149df24d9bf82db7144f2fa5683b2c 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -175,7 +175,7 @@ std::unique_ptr VarBase::NewVarBase(const platform::Place& dst_place, PADDLE_ENFORCE(var_->IsInitialized(), "Variable must be initialized when getting numpy tensor"); - std::unique_ptr new_var(new VarBase()); + std::unique_ptr new_var(new VarBase("NewVarBase")); framework::LoDTensor* tensor = new_var->var_->GetMutable(); tensor->Resize(var_->Get().dims()); @@ -303,7 +303,7 @@ std::vector PyLayer::Apply(int func_id, std::vector outvars = CallPythonFunc(py_funcs_[func_id], invars); std::vector ret; for (Variable* v : outvars) { - ret.push_back(new VarBase(v, new VarBase(true))); + ret.push_back(new VarBase(v, new VarBase("PYLAYER_XGRAD", true), "")); } return ret; } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 78205486c5534ac0c61cc6d545bdafa4dfc95695..5d38c339953922565f7bb880eaea9762ae868fe2 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -103,26 +103,30 @@ class OpBase; */ class VarBase { public: - VarBase() : VarBase(new framework::Variable(), new VarBase(true)) {} + VarBase(std::string name) : VarBase(new framework::Variable(), new VarBase(name + "XGRAD", true), name) {} // Owns `var` and `grad` - VarBase(framework::Variable* var, VarBase* grad) + VarBase(framework::Variable* var, VarBase* grad, std::string name) : var_desc_(nullptr), var_(var), grads_(grad), stop_gradient_(false), pre_op_(nullptr), - pre_op_out_idx_(-1) {} + pre_op_out_idx_(-1), + name_(name) { LOG(ERROR) << "create " << name; } - explicit VarBase(bool stop_gradient) + explicit VarBase(std::string name, bool stop_gradient) : var_desc_(nullptr), var_(new framework::Variable()), - grads_(stop_gradient ? nullptr : new VarBase(true)), + grads_(stop_gradient ? nullptr : new VarBase(name + "XGRAD", true)), stop_gradient_(stop_gradient), pre_op_(nullptr), - pre_op_out_idx_(-1) {} + pre_op_out_idx_(-1), + name_(name) { LOG(ERROR) << "create " << name; } virtual ~VarBase() { + LOG(ERROR) << "delete " << name_; + if (var_) { delete var_; } @@ -183,6 +187,7 @@ class VarBase { OpBase* pre_op_; std::string pre_op_out_name_; int pre_op_out_idx_; + std::string name_; }; /* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 351513712cc4297bf7fbe67878aeba162ef66e4d..26ebacc13ff1eb36f979d1d260f855fec3fa8261 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -137,7 +137,7 @@ PYBIND11_MODULE(core, m) { py::class_(m, "VarBase", R"DOC()DOC") // .def(py::init<>()) - .def(py::init(), py::arg("stop_gradient") = false) + .def(py::init(), py::arg("stop_gradient") = false, py::arg("name") = "") .def("_run_backward", [](imperative::VarBase &self) { self.RunBackward(); }) .def("_grad_name", &imperative::VarBase::GradName) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 832c97c7deb49b4e118e15989ab7a34da6ce57a0..6ffb185d44df157bc3b894c2c6d7a78aed71925b 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -306,6 +306,10 @@ class Variable(object): if name is None: name = unique_name.generate('_generated_var') + # print("create var", name) + # import sys + # sys.stdout.flush() + is_new_var = False name = cpt.to_text(name) self.desc = self.block.desc.find_var(cpt.to_bytes(name)) @@ -383,7 +387,7 @@ class Variable(object): if _in_imperative_mode(): self._ivar = kwargs.get("ivar", None) if not self._ivar: - self._ivar = core.VarBase() + self._ivar = core.VarBase(name, stop_gradient) self._ivar.desc = self.desc self._ivar.stop_gradient = stop_gradient @@ -1269,7 +1273,8 @@ class Block(object): return var def _remove_var(self, name): - self._sync_with_cpp() + if not _in_imperative_mode(): + self._sync_with_cpp() self.desc._remove_var(cpt.to_bytes(name)) del self.vars[name] @@ -1353,7 +1358,8 @@ class Block(object): Returns: None """ - self._sync_with_cpp() + if not _in_imperative_mode(): + self._sync_with_cpp() self.desc._remove_op(index, index + 1) del self.ops[index] diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 08b155acc657c3a4a73f5b1d72ac356fc7e83a58..3823b4f81e2c7422cf079aa95d8b3dd20d1662ef 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -101,7 +101,7 @@ class MNIST(fluid.imperative.Layer): class TestImperativeMnist(unittest.TestCase): def test_mnist_float32(self): seed = 90 - batch_num = 2 + batch_num = 100000 with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -125,85 +125,109 @@ class TestImperativeMnist(unittest.TestCase): label = to_variable(y_data) label._stop_gradient = True + print("forward start") + cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) - dy_out = avg_loss._numpy() + # dy_out = avg_loss._numpy() + print("forward end") - if batch_id == 0: - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_init_value[param.name] = param._numpy() + # if batch_id == 0: + # for param in fluid.default_main_program().global_block( + # ).all_parameters(): + # dy_param_init_value[param.name] = param._numpy() avg_loss._backward() - sgd.minimize(avg_loss) - mnist.clear_gradients() - dy_param_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_value[param.name] = param._numpy() - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - - exe = fluid.Executor(fluid.CPUPlace( - ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - - mnist = MNIST() - sgd = SGDOptimizer(learning_rate=1e-3) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128) - - img = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - cost = mnist(img) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - sgd.minimize(avg_loss) - # initialize params and fetch them - static_param_init_value = {} - static_param_name_list = [] - for param in fluid.default_startup_program().global_block( - ).all_parameters(): - static_param_name_list.append(param.name) + print("backward end") - out = exe.run(fluid.default_startup_program(), - fetch_list=static_param_name_list) - - for i in range(len(static_param_name_list)): - static_param_init_value[static_param_name_list[i]] = out[i] - - for batch_id, data in enumerate(train_reader()): - if batch_id >= batch_num: - break - - static_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [128, 1]) - - fetch_list = [avg_loss.name] - fetch_list.extend(static_param_name_list) - out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) - - static_param_value = {} - static_out = out[0] - for i in range(1, len(out)): - static_param_value[static_param_name_list[i - 1]] = out[i] + sgd.minimize(avg_loss) - for key, value in six.iteritems(static_param_init_value): - self.assertTrue(np.allclose(value, dy_param_init_value[key])) + print("sgd end") - self.assertTrue(np.allclose(static_out, dy_out)) + mnist.clear_gradients() - for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value, dy_param_value[key])) + import gc + for name, var in fluid.default_main_program().global_block().vars.items(): + if not var.persistable: + fluid.default_main_program().global_block()._remove_var(name) + # var._ivar._clear_values() + for op in fluid.default_main_program().global_block().ops: + fluid.default_main_program().global_block()._remove_op(op.idx) + + assert len(gc.get_referrers(avg_loss)) == 1 + + print("clear end") + print("ivar ref ", gc.get_referrers(gc.get_referrers(avg_loss._ivar)[0])[0].__class__.__name__) + print("ivar ref ", gc.get_referrers(gc.get_referrers(avg_loss._ivar)[1])[0].__class__.__name__) + + # dy_param_value = {} + # for param in fluid.default_main_program().global_block( + # ).all_parameters(): + # dy_param_value[param.name] = param._numpy() + + # with new_program_scope(): + # fluid.default_startup_program().random_seed = seed + # fluid.default_main_program().random_seed = seed + + # exe = fluid.Executor(fluid.CPUPlace( + # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + + # mnist = MNIST() + # sgd = SGDOptimizer(learning_rate=1e-3) + # train_reader = paddle.batch( + # paddle.dataset.mnist.train(), batch_size=128) + + # img = fluid.layers.data( + # name='pixel', shape=[1, 28, 28], dtype='float32') + # label = fluid.layers.data(name='label', shape=[1], dtype='int64') + # cost = mnist(img) + # loss = fluid.layers.cross_entropy(cost, label) + # avg_loss = fluid.layers.mean(loss) + # sgd.minimize(avg_loss) + + # # initialize params and fetch them + # static_param_init_value = {} + # static_param_name_list = [] + # for param in fluid.default_startup_program().global_block( + # ).all_parameters(): + # static_param_name_list.append(param.name) + + # out = exe.run(fluid.default_startup_program(), + # fetch_list=static_param_name_list) + + # for i in range(len(static_param_name_list)): + # static_param_init_value[static_param_name_list[i]] = out[i] + + # for batch_id, data in enumerate(train_reader()): + # if batch_id >= batch_num: + # break + + # static_x_data = np.array( + # [x[0].reshape(1, 28, 28) for x in data]).astype('float32') + # y_data = np.array([x[1] for x in data]).astype('int64').reshape( + # [128, 1]) + + # fetch_list = [avg_loss.name] + # fetch_list.extend(static_param_name_list) + # out = exe.run(fluid.default_main_program(), + # feed={"pixel": static_x_data, + # "label": y_data}, + # fetch_list=fetch_list) + + # static_param_value = {} + # static_out = out[0] + # for i in range(1, len(out)): + # static_param_value[static_param_name_list[i - 1]] = out[i] + + # for key, value in six.iteritems(static_param_init_value): + # self.assertTrue(np.allclose(value, dy_param_init_value[key])) + + # self.assertTrue(np.allclose(static_out, dy_out)) + + # for key, value in six.iteritems(static_param_value): + # self.assertTrue(np.allclose(value, dy_param_value[key])) if __name__ == '__main__':