diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 51de4c9dfb583b8e8a7fe8d92d5be07d32609c67..5709eb1a7d4d26e6cc358651c1521ebf9a279801 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -69,7 +69,7 @@ inline std::string GradVarName(const std::string& var_name) { return result; } -inline std::string OriginVarName(const std::string& grad_var_name) { +inline std::string GradOriginalVarName(const std::string& grad_var_name) { std::size_t pos = grad_var_name.rfind(kGradVarSuffix); if (pos == std::string::npos) { return grad_var_name; diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index 3bbbda6424ca110eabe31f232207c7562d4c8fc5..fe4804ac253925c112cf7b508efc42c45868a2fa 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -294,24 +294,24 @@ TEST(VarNameTest, all) { std::string grad_var_name = paddle::framework::GradVarName(var_name); ASSERT_EQ(grad_var_name, "X@GRAD"); std::string original_var_name = - paddle::framework::OriginVarName(grad_var_name); + paddle::framework::GradOriginalVarName(grad_var_name); ASSERT_EQ(original_var_name, "X"); - original_var_name = paddle::framework::OriginVarName(original_var_name); + original_var_name = paddle::framework::GradOriginalVarName(original_var_name); ASSERT_EQ(original_var_name, "X"); std::string var_name_2("XYZ"); grad_var_name = paddle::framework::GradVarName(var_name_2); ASSERT_EQ(grad_var_name, "XYZ@GRAD"); - original_var_name = paddle::framework::OriginVarName(grad_var_name); + original_var_name = paddle::framework::GradOriginalVarName(grad_var_name); ASSERT_EQ(original_var_name, "XYZ"); - original_var_name = paddle::framework::OriginVarName(original_var_name); + original_var_name = paddle::framework::GradOriginalVarName(original_var_name); ASSERT_EQ(original_var_name, "XYZ"); std::string var_name_3(""); grad_var_name = paddle::framework::GradVarName(var_name_3); ASSERT_EQ(grad_var_name, "@GRAD"); - original_var_name = paddle::framework::OriginVarName(grad_var_name); + original_var_name = paddle::framework::GradOriginalVarName(grad_var_name); ASSERT_EQ(original_var_name, ""); - original_var_name = paddle::framework::OriginVarName(original_var_name); + original_var_name = paddle::framework::GradOriginalVarName(original_var_name); ASSERT_EQ(original_var_name, ""); } diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 28ad829aa96de647d041e51b74602d13b6f81717..9813149865489f7c10c0f4942172d46e1bccf81f 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -32,6 +32,11 @@ using framework::Variable; void AddTo(Variable* src, Variable* dst) { framework::LoDTensor* dst_tensor = dst->GetMutable(); framework::LoDTensor* src_tensor = src->GetMutable(); + // FIXME(minqiyang): loss_grad op will pass a zero grad of label + // ugly fix for it + if (src_tensor->numel() == 0) { + return; + } PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), "dst_numel %lld vs. src_numel %lld", dst_tensor->numel(), src_tensor->numel()); @@ -157,13 +162,9 @@ std::map> OpBase::ApplyGrad() { auto& outputs = grad_outputs[it.first]; auto& origin_outputs = it.second; - auto& forward_inputs = input_vars_[framework::OriginVarName(it.first)]; - for (size_t i = 0; i < outputs.size(); ++i) { - if (!forward_inputs[i]->stop_gradient_) { - framework::Variable* orig_grad = origin_outputs[i]; - AddTo(outputs[i], orig_grad); - } + framework::Variable* orig_grad = origin_outputs[i]; + AddTo(outputs[i], orig_grad); } } return input_vars_; diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 85740222e8f5642bbf26c95eca3909cc0273f853..2abda933cfc983bcef2433c3be4681c51e78ff1c 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -81,7 +81,15 @@ class OpBase; class VarBase { public: - explicit VarBase(bool stop_gradient = false) + VarBase() + : pre_op_(nullptr), + pre_op_out_idx_(-1), + var_desc_(nullptr), + var_(new framework::Variable()), + grads_(new framework::Variable()), + stop_gradient_(false) {} + + explicit VarBase(bool stop_gradient) : pre_op_(nullptr), pre_op_out_idx_(-1), var_desc_(nullptr), @@ -89,23 +97,12 @@ class VarBase { grads_(new framework::Variable()), stop_gradient_(stop_gradient) {} - virtual ~VarBase() { - if (var_) { - delete var_; - var_ = nullptr; - } - if (grads_) { - delete grads_; - grads_ = nullptr; - } - } + virtual ~VarBase() {} void RunBackward(); framework::LoDTensor& Grad(); - inline framework::Variable* GradVar() { return grads_; } - inline std::string GradName() const { PADDLE_ENFORCE( var_desc_, diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index 420ca646e6b162a93247524861342c8bcc7cbd80..c6eff86fac7be04821e1f8718ad06356fd6adb48 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -57,7 +57,7 @@ class Tracer { void Trace(OpBase* op, const std::map>& inputs, const std::map>& outputs, - framework::BlockDesc* block, const bool stop_gradient) { + framework::BlockDesc* block, const bool stop_gradient = false) { std::map vars; framework::OpDesc* op_desc = op->op_desc_; @@ -153,6 +153,7 @@ class Tracer { } } } + for (auto it : grad_op_desc->Outputs()) { auto& grad_out_vars = op->grad_output_vars_[it.first]; for (const std::string& grad_outvar : it.second) { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 3be0d9085b1af60d65651bfe92918aacb5fcf540..3b81d59ad965b7532ca729682e7aeb8eb96194a8 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -125,7 +125,8 @@ PYBIND11_MODULE(core, m) { m.add_object("_cleanup", py::capsule([]() { ScopePool::Instance().Clear(); })); - py::class_(m, "VarBase", R"DOC()DOC") + py::class_>( + m, "VarBase", R"DOC()DOC") // .def(py::init<>()) .def(py::init(), py::arg("stop_gradient") = false) .def("_run_backward", diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 80645acc8a5d103ff040c41912f75e6626dc8111..c95b89a2c48d7b69da05365a865d612befa2f808 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -24,20 +24,29 @@ __all__ = ['PyLayer'] class PyLayer(core.Layer): - def __init__(self, *args, **kwargs): - self._once_built = True - + def __init__(self, + dtype=core.VarDesc.VarType.FP32, + param_attr=None, + bias_attr=None, + name=None): from ..layer_helper import LayerHelper - self._helper = LayerHelper(type(self).__name__, **kwargs) - self._dtype = kwargs.get("dtype", core.VarDesc.VarType.FP32) + self._helper = LayerHelper( + type(self).__name__, + param_attr=param_attr, + bias_attr=bias_attr, + dtype=dtype, + name=name) + + self._once_built = False + self._dtype = dtype def _build_once(self, inputs): pass def __call__(self, *inputs): - if self._once_built: + if not self._once_built: self._build_once(*inputs) - self._once_built = False + self._once_built = True outputs = self.forward(*inputs) diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index f44009a05da437d6e6db9cc6e471611d0c103211..ea9953f5814207c569e799c2ffa11758e31699aa 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -314,11 +314,9 @@ class LayerHelper(object): WeightNormParamAttr.params_with_weight_norm.append(param) return param if _in_imperative_mode(): - self.main_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr._to_kwargs()) # In imperative mode, we want the returned parameter to be # initialized so that it can be used imperatively. - return self.startup_program.global_block().create_parameter( + return self.main_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr._to_kwargs(with_initializer=True)) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py index 802db5d1e034219ab7b3e818ec905ea045d66e05..bda9f0e410506c0a4bca5e0fc790834d0aa1639a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py @@ -111,8 +111,6 @@ class TestImperativeMnist(unittest.TestCase): predict = mnist(img) out = fluid.layers.cross_entropy(predict, label) out._backward() - filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient( - ) sgd.minimize(out)