提交 efb2f2ba 编写于 作者: M minqiyang

Fix bugs

test=develop
上级 b420ec3a
...@@ -118,19 +118,16 @@ class Autograd { ...@@ -118,19 +118,16 @@ class Autograd {
while (!ready.empty()) { while (!ready.empty()) {
OpBase* ready_op = ready.front(); OpBase* ready_op = ready.front();
ready.pop_front(); ready.pop_front();
LOG(ERROR) << "ApplyGrad Start";
std::map<std::string, std::vector<VarBase*>> input_grads = std::map<std::string, std::vector<VarBase*>> input_grads =
ready_op->ApplyGrad(); ready_op->ApplyGrad();
for (auto it : input_grads) { for (auto it : input_grads) {
const std::vector<VarBase*>& ingrads = it.second; const std::vector<VarBase*>& ingrads = it.second;
LOG(ERROR) << "XX";
for (size_t i = 0; i < ingrads.size(); ++i) { for (size_t i = 0; i < ingrads.size(); ++i) {
if (!ingrads[i]) continue; if (!ingrads[i]) continue;
if (ready_op->input_vars_[it.first][i]->IsStopGradient()) { if (ready_op->input_vars_[it.first][i]->IsStopGradient()) {
continue; continue;
} }
LOG(ERROR) << "XX";
OpBase* pre_op = ready_op->pre_ops_[it.first][i]; OpBase* pre_op = ready_op->pre_ops_[it.first][i];
if (!pre_op) continue; if (!pre_op) continue;
...@@ -140,13 +137,10 @@ class Autograd { ...@@ -140,13 +137,10 @@ class Autograd {
if (pre_op_ready) { if (pre_op_ready) {
ready.push_back(pre_op); ready.push_back(pre_op);
} }
LOG(ERROR) << "XX";
} }
} }
ready_op->InvokeBackwardHooks(); ready_op->InvokeBackwardHooks();
LOG(ERROR) << "ApplyGrad End";
} }
} }
...@@ -219,6 +213,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -219,6 +213,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
return {}; return {};
} }
VLOG(3) << "apply op grad: " << op_desc_->Type();
std::vector<framework::VariableValueMap> grad_outputs; std::vector<framework::VariableValueMap> grad_outputs;
if (backward_id_ > 0) { if (backward_id_ > 0) {
VLOG(3) << "py_layer_grad"; VLOG(3) << "py_layer_grad";
...@@ -229,10 +224,8 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -229,10 +224,8 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
grad_input_vars_[0][framework::GradVarName(PyLayer::kFwdInp)]); grad_input_vars_[0][framework::GradVarName(PyLayer::kFwdInp)]);
} else { } else {
grad_outputs.resize(grad_op_descs_.size()); grad_outputs.resize(grad_op_descs_.size());
LOG(ERROR) << "ApplyGrad " << grad_op_descs_.size();
for (size_t k = 0; k < grad_op_descs_.size(); ++k) { for (size_t k = 0; k < grad_op_descs_.size(); ++k) {
framework::OpDesc* grad_op_desc = grad_op_descs_[k]; framework::OpDesc* grad_op_desc = grad_op_descs_[k];
LOG(ERROR) << "op grad " << grad_op_desc->Type();
VLOG(3) << "op grad " << grad_op_desc->Type(); VLOG(3) << "op grad " << grad_op_desc->Type();
for (auto it : grad_output_vars_[k]) { for (auto it : grad_output_vars_[k]) {
auto& outputs = grad_outputs[k][it.first]; auto& outputs = grad_outputs[k][it.first];
...@@ -244,16 +237,12 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -244,16 +237,12 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
} }
} }
LOG(ERROR) << "op grad " << grad_op_desc->Type();
framework::RuntimeContext ctx(grad_input_vars_[k], grad_outputs[k]); framework::RuntimeContext ctx(grad_input_vars_[k], grad_outputs[k]);
// No need to do compile time infer shape here. // No need to do compile time infer shape here.
// grad_op_desc_->InferShape(*block_); // grad_op_desc_->InferShape(*block_);
grad_op_desc->InferVarType(block_); grad_op_desc->InferVarType(block_);
LOG(ERROR) << "op grad " << grad_op_desc->Type();
std::unique_ptr<framework::OperatorBase> opbase = std::unique_ptr<framework::OperatorBase> opbase =
framework::OpRegistry::CreateOp(*grad_op_desc); framework::OpRegistry::CreateOp(*grad_op_desc);
framework::OperatorWithKernel* op_kernel = framework::OperatorWithKernel* op_kernel =
...@@ -267,8 +256,6 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -267,8 +256,6 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
} }
} }
LOG(ERROR) << "delete grad start ";
for (size_t k = 0; k < grad_output_vars_.size(); ++k) { for (size_t k = 0; k < grad_output_vars_.size(); ++k) {
for (auto it : grad_output_vars_[k]) { for (auto it : grad_output_vars_[k]) {
auto& outputs = grad_outputs[k][it.first]; auto& outputs = grad_outputs[k][it.first];
...@@ -288,18 +275,16 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -288,18 +275,16 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
} }
void OpBase::InvokeBackwardHooks() { void OpBase::InvokeBackwardHooks() {
LOG(ERROR) << "call backward start "; VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size();
// call backward hooks // call backward hooks
for (py::object& callable : backward_hooks_) { for (py::object& callable : backward_hooks_) {
callable(this); callable(this);
} }
LOG(ERROR) << "call backward end ";
} }
void OpBase::RegisterBackwardHooks(const py::object& callable) { void OpBase::RegisterBackwardHooks(const py::object& callable) {
LOG(ERROR) << "Register backward hooks " << trace_id_; VLOG(3) << "Register backward hooks " << trace_id_;
// TODO(minqiyang): check the callable format // TODO(minqiyang): check the callable format
backward_hooks_.push_back(callable); backward_hooks_.push_back(callable);
......
...@@ -125,8 +125,6 @@ class VarBase { ...@@ -125,8 +125,6 @@ class VarBase {
public: public:
virtual ~VarBase() { virtual ~VarBase() {
LOG(ERROR) << "remove var " << name_.c_str();
if (block_) { if (block_) {
block_->RemoveVar(name_); block_->RemoveVar(name_);
} }
...@@ -216,13 +214,9 @@ class PYBIND11_HIDDEN OpBase { ...@@ -216,13 +214,9 @@ class PYBIND11_HIDDEN OpBase {
delete desc; delete desc;
} }
LOG(ERROR) << "remove op " << op_desc_->Type() << " id " << trace_id_;
if (block_) { if (block_) {
block_->RemoveOpInternal(op_desc_); block_->RemoveOpInternal(op_desc_);
} }
LOG(ERROR) << "remove op end " << trace_id_;
} }
std::map<std::string, std::vector<VarBase*>> ApplyGrad(); std::map<std::string, std::vector<VarBase*>> ApplyGrad();
......
...@@ -154,6 +154,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -154,6 +154,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
op->grad_input_vars_.resize(op->grad_op_descs_.size()); op->grad_input_vars_.resize(op->grad_op_descs_.size());
op->grad_output_vars_.resize(op->grad_op_descs_.size()); op->grad_output_vars_.resize(op->grad_op_descs_.size());
for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) { for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) {
framework::OpDesc* grad_op_desc = op->grad_op_descs_[i]; framework::OpDesc* grad_op_desc = op->grad_op_descs_[i];
for (auto it : grad_op_desc->Inputs()) { for (auto it : grad_op_desc->Inputs()) {
...@@ -166,7 +167,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -166,7 +167,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE(fwd_var_it != vars.end()); PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs. // Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_); grad_in_vars.push_back(fwd_var_it->second->var_);
vars_saved_for_backward.insert(it.first);
} else { } else {
VarBase* var = vars[var_it->second]; VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) { if (!var->grads_->var_->IsInitialized()) {
...@@ -176,6 +176,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -176,6 +176,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
// Douts. // Douts.
grad_in_vars.push_back(var->grads_->var_); grad_in_vars.push_back(var->grads_->var_);
} }
vars_saved_for_backward.insert(it.first);
} }
} }
......
...@@ -173,7 +173,6 @@ PYBIND11_MODULE(core, m) { ...@@ -173,7 +173,6 @@ PYBIND11_MODULE(core, m) {
[](const imperative::VarBase &self) { return self.name_; }, [](const imperative::VarBase &self) { return self.name_; },
[](imperative::VarBase &self, const std::string &name) { [](imperative::VarBase &self, const std::string &name) {
self.name_ = name; self.name_ = name;
LOG(ERROR) << "create ivar name " << self.name_;
}) })
.def_property("block", .def_property("block",
[](const imperative::VarBase &self) { return self.block_; }, [](const imperative::VarBase &self) { return self.block_; },
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import contextlib import contextlib
import unittest import unittest
import numpy as np import numpy as np
...@@ -146,69 +148,69 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -146,69 +148,69 @@ class TestImperativeMnist(unittest.TestCase):
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param._numpy()
# with new_program_scope(): with new_program_scope():
# fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
# fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
# ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
# mnist = MNIST("mnist") mnist = MNIST("mnist")
# sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
# train_reader = paddle.batch( train_reader = paddle.batch(
# paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
# img = fluid.layers.data( img = fluid.layers.data(
# name='pixel', shape=[1, 28, 28], dtype='float32') name='pixel', shape=[1, 28, 28], dtype='float32')
# label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# cost = mnist(img) cost = mnist(img)
# loss = fluid.layers.cross_entropy(cost, label) loss = fluid.layers.cross_entropy(cost, label)
# avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
# sgd.minimize(avg_loss) sgd.minimize(avg_loss)
# # initialize params and fetch them # initialize params and fetch them
# static_param_init_value = {} static_param_init_value = {}
# static_param_name_list = [] static_param_name_list = []
# for param in mnist.parameters(): for param in mnist.parameters():
# static_param_name_list.append(param.name) static_param_name_list.append(param.name)
# out = exe.run(fluid.default_startup_program(), out = exe.run(fluid.default_startup_program(),
# fetch_list=static_param_name_list) fetch_list=static_param_name_list)
# for i in range(len(static_param_name_list)): for i in range(len(static_param_name_list)):
# static_param_init_value[static_param_name_list[i]] = out[i] static_param_init_value[static_param_name_list[i]] = out[i]
# for epoch in range(epoch_num): for epoch in range(epoch_num):
# for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
# static_x_data = np.array( static_x_data = np.array(
# [x[0].reshape(1, 28, 28) [x[0].reshape(1, 28, 28)
# for x in data]).astype('float32') for x in data]).astype('float32')
# y_data = np.array( y_data = np.array(
# [x[1] for x in data]).astype('int64').reshape([128, 1]) [x[1] for x in data]).astype('int64').reshape([128, 1])
# fetch_list = [avg_loss.name] fetch_list = [avg_loss.name]
# fetch_list.extend(static_param_name_list) fetch_list.extend(static_param_name_list)
# out = exe.run( out = exe.run(
# fluid.default_main_program(), fluid.default_main_program(),
# feed={"pixel": static_x_data, feed={"pixel": static_x_data,
# "label": y_data}, "label": y_data},
# fetch_list=fetch_list) fetch_list=fetch_list)
# static_param_value = {} static_param_value = {}
# static_out = out[0] static_out = out[0]
# for i in range(1, len(out)): for i in range(1, len(out)):
# static_param_value[static_param_name_list[i - 1]] = out[ static_param_value[static_param_name_list[i - 1]] = out[
# i] i]
# self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))
# for key, value in six.iteritems(static_param_init_value): for key, value in six.iteritems(static_param_init_value):
# self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.allclose(value, dy_param_init_value[key]))
# self.assertTrue(np.allclose(static_out, dy_out)) self.assertTrue(np.allclose(static_out, dy_out))
# for key, value in six.iteritems(static_param_value): for key, value in six.iteritems(static_param_value):
# self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册