diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 8afef8c90e0592b902c8dc15098473108040825b..191235d89784a7beb495750440f54fa21c2a561d 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -118,19 +118,16 @@ class Autograd { while (!ready.empty()) { OpBase* ready_op = ready.front(); ready.pop_front(); - LOG(ERROR) << "ApplyGrad Start"; std::map> input_grads = ready_op->ApplyGrad(); for (auto it : input_grads) { const std::vector& ingrads = it.second; - LOG(ERROR) << "XX"; for (size_t i = 0; i < ingrads.size(); ++i) { if (!ingrads[i]) continue; if (ready_op->input_vars_[it.first][i]->IsStopGradient()) { continue; } - LOG(ERROR) << "XX"; OpBase* pre_op = ready_op->pre_ops_[it.first][i]; if (!pre_op) continue; @@ -140,13 +137,10 @@ class Autograd { if (pre_op_ready) { ready.push_back(pre_op); } - LOG(ERROR) << "XX"; } } ready_op->InvokeBackwardHooks(); - - LOG(ERROR) << "ApplyGrad End"; } } @@ -219,6 +213,7 @@ std::map> OpBase::ApplyGrad() { return {}; } + VLOG(3) << "apply op grad: " << op_desc_->Type(); std::vector grad_outputs; if (backward_id_ > 0) { VLOG(3) << "py_layer_grad"; @@ -229,10 +224,8 @@ std::map> OpBase::ApplyGrad() { grad_input_vars_[0][framework::GradVarName(PyLayer::kFwdInp)]); } else { grad_outputs.resize(grad_op_descs_.size()); - LOG(ERROR) << "ApplyGrad " << grad_op_descs_.size(); for (size_t k = 0; k < grad_op_descs_.size(); ++k) { framework::OpDesc* grad_op_desc = grad_op_descs_[k]; - LOG(ERROR) << "op grad " << grad_op_desc->Type(); VLOG(3) << "op grad " << grad_op_desc->Type(); for (auto it : grad_output_vars_[k]) { auto& outputs = grad_outputs[k][it.first]; @@ -244,16 +237,12 @@ std::map> OpBase::ApplyGrad() { } } - LOG(ERROR) << "op grad " << grad_op_desc->Type(); - framework::RuntimeContext ctx(grad_input_vars_[k], grad_outputs[k]); // No need to do compile time infer shape here. // grad_op_desc_->InferShape(*block_); grad_op_desc->InferVarType(block_); - LOG(ERROR) << "op grad " << grad_op_desc->Type(); - std::unique_ptr opbase = framework::OpRegistry::CreateOp(*grad_op_desc); framework::OperatorWithKernel* op_kernel = @@ -268,8 +257,6 @@ std::map> OpBase::ApplyGrad() { } } - LOG(ERROR) << "delete grad start "; - for (size_t k = 0; k < grad_output_vars_.size(); ++k) { for (auto it : grad_output_vars_[k]) { auto& outputs = grad_outputs[k][it.first]; @@ -289,18 +276,16 @@ std::map> OpBase::ApplyGrad() { } void OpBase::InvokeBackwardHooks() { - LOG(ERROR) << "call backward start "; + VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size(); // call backward hooks for (py::object& callable : backward_hooks_) { callable(this); } - - LOG(ERROR) << "call backward end "; } void OpBase::RegisterBackwardHooks(const py::object& callable) { - LOG(ERROR) << "Register backward hooks " << trace_id_; + VLOG(3) << "Register backward hooks " << trace_id_; // TODO(minqiyang): check the callable format backward_hooks_.push_back(callable); diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index a1078acdee11090f9033c06a7d5fe3ce9c406e7d..30010d07dc954a3cfc22b28198e4ca747e54cf03 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -134,8 +134,6 @@ class VarBase { public: virtual ~VarBase() { - LOG(ERROR) << "remove var " << name_.c_str(); - if (block_) { block_->RemoveVar(name_); } @@ -225,13 +223,9 @@ class PYBIND11_HIDDEN OpBase { delete desc; } - LOG(ERROR) << "remove op " << op_desc_->Type() << " id " << trace_id_; - if (block_) { block_->RemoveOpInternal(op_desc_); } - - LOG(ERROR) << "remove op end " << trace_id_; } std::map> ApplyGrad(); diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 2993ab309027f9306c61023b55b1c061e0ebddc0..d773497e6c7305774927b62886abf0fbd00ca86a 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -155,6 +155,7 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, op->grad_input_vars_.resize(op->grad_op_descs_.size()); op->grad_output_vars_.resize(op->grad_op_descs_.size()); + for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) { framework::OpDesc* grad_op_desc = op->grad_op_descs_[i]; for (auto it : grad_op_desc->Inputs()) { @@ -167,7 +168,6 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, PADDLE_ENFORCE(fwd_var_it != vars.end()); // Forward inputs or outputs. grad_in_vars.push_back(fwd_var_it->second->var_); - vars_saved_for_backward.insert(it.first); } else { VarBase* var = vars[var_it->second]; if (!var->grads_->var_->IsInitialized()) { @@ -177,6 +177,8 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, // Douts. grad_in_vars.push_back(var->grads_->var_); } + + vars_saved_for_backward.insert(it.first); } } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 54ab0372fc68f127050ff5d658772590243c7593..d744394022f8c40d7d5956ccc6d9f8274141985c 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -173,7 +173,6 @@ PYBIND11_MODULE(core, m) { [](const imperative::VarBase &self) { return self.name_; }, [](imperative::VarBase &self, const std::string &name) { self.name_ = name; - LOG(ERROR) << "create ivar name " << self.name_; }) .def_property("block", [](const imperative::VarBase &self) { return self.block_; }, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 132ea2c10e016aa5e0ca3e9f7b71d3542440a23f..7afbf61472a3d09ba5e34731d3a3ebbb8076e310 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import print_function + import contextlib import unittest import numpy as np @@ -146,69 +148,69 @@ class TestImperativeMnist(unittest.TestCase): for param in mnist.parameters(): dy_param_value[param.name] = param._numpy() - # with new_program_scope(): - # fluid.default_startup_program().random_seed = seed - # fluid.default_main_program().random_seed = seed - - # exe = fluid.Executor(fluid.CPUPlace( - # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - - # mnist = MNIST("mnist") - # sgd = SGDOptimizer(learning_rate=1e-3) - # train_reader = paddle.batch( - # paddle.dataset.mnist.train(), batch_size=128, drop_last=True) - - # img = fluid.layers.data( - # name='pixel', shape=[1, 28, 28], dtype='float32') - # label = fluid.layers.data(name='label', shape=[1], dtype='int64') - # cost = mnist(img) - # loss = fluid.layers.cross_entropy(cost, label) - # avg_loss = fluid.layers.mean(loss) - # sgd.minimize(avg_loss) - - # # initialize params and fetch them - # static_param_init_value = {} - # static_param_name_list = [] - # for param in mnist.parameters(): - # static_param_name_list.append(param.name) - - # out = exe.run(fluid.default_startup_program(), - # fetch_list=static_param_name_list) - - # for i in range(len(static_param_name_list)): - # static_param_init_value[static_param_name_list[i]] = out[i] - - # for epoch in range(epoch_num): - # for batch_id, data in enumerate(train_reader()): - # static_x_data = np.array( - # [x[0].reshape(1, 28, 28) - # for x in data]).astype('float32') - # y_data = np.array( - # [x[1] for x in data]).astype('int64').reshape([128, 1]) - - # fetch_list = [avg_loss.name] - # fetch_list.extend(static_param_name_list) - # out = exe.run( - # fluid.default_main_program(), - # feed={"pixel": static_x_data, - # "label": y_data}, - # fetch_list=fetch_list) - - # static_param_value = {} - # static_out = out[0] - # for i in range(1, len(out)): - # static_param_value[static_param_name_list[i - 1]] = out[ - # i] - - # self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) - - # for key, value in six.iteritems(static_param_init_value): - # self.assertTrue(np.allclose(value, dy_param_init_value[key])) - - # self.assertTrue(np.allclose(static_out, dy_out)) - - # for key, value in six.iteritems(static_param_value): - # self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + img = fluid.layers.data( + name='pixel', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + sgd.minimize(avg_loss) + + # initialize params and fetch them + static_param_init_value = {} + static_param_name_list = [] + for param in mnist.parameters(): + static_param_name_list.append(param.name) + + out = exe.run(fluid.default_startup_program(), + fetch_list=static_param_name_list) + + for i in range(len(static_param_name_list)): + static_param_init_value[static_param_name_list[i]] = out[i] + + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + static_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape([128, 1]) + + fetch_list = [avg_loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run( + fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[ + i] + + self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) + + for key, value in six.iteritems(static_param_init_value): + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + + self.assertTrue(np.allclose(static_out, dy_out)) + + for key, value in six.iteritems(static_param_value): + self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) if __name__ == '__main__':