From d7df4e5e5b36ae58e69b78b48953ea917e484a18 Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Fri, 17 May 2019 14:09:11 +0800 Subject: [PATCH] Fix/Fix memory leak in dygraph (#17394) * test=develop, add gradient sort backward strategy * test=develop, fix test by add FLAGS_cudnn_deterministic on new tests * test=develop, fix memory leak in dygraph mode * test=develop, fix memory leak in dygraph mode * test=develop, polish code * test=develop, polish code * test=develop, polish code --- paddle/fluid/imperative/layer.cc | 52 +++++++++++++++---------------- paddle/fluid/imperative/layer.h | 46 +++++++++++++-------------- paddle/fluid/imperative/tracer.cc | 13 ++++---- paddle/fluid/pybind/imperative.h | 6 ++-- paddle/fluid/pybind/pybind.cc | 10 +++--- 5 files changed, 61 insertions(+), 66 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 9698d250c99..ad459a76a19 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -112,8 +112,8 @@ void AddGradBySort(BackwardSumMap* bck_map, VarBase* target) { return a.first > b.first; }); for (auto& var_pair : current.second) { - Variable* origin_grad = target->var_; - Variable* grad_to_add = var_pair.second->var_; + Variable* origin_grad = target->var_.get(); + Variable* grad_to_add = var_pair.second->var_.get(); VLOG(2) << "add origin_grad: " << target->Name(); VLOG(2) << "added grad: " << var_pair.second->Name() << " trace id is: " << var_pair.first; @@ -132,19 +132,19 @@ class Autograd { return; } VLOG(3) << "start autograd"; - bck_map = new BackwardSumMap(); - grad_ref = new GradientRef(); + BackwardSumMap bck_map; + GradientRef grad_ref; std::deque ready; ready.push_back(var->PreOp()); std::map dep_counts = - ComputeDepCounts(var->PreOp(), bck_stratedy); + ComputeDepCounts(var->PreOp(), bck_stratedy, &grad_ref); while (!ready.empty()) { OpBase* ready_op = ready.front(); ready.pop_front(); std::map> input_grads = - ready_op->ApplyGrad(bck_map, grad_ref, bck_stratedy); + ready_op->ApplyGrad(&bck_map, &grad_ref, bck_stratedy); for (auto it = input_grads.rbegin(); it != input_grads.rend(); ++it) { const std::vector& ingrads = it->second; @@ -171,7 +171,13 @@ class Autograd { private: std::map ComputeDepCounts( - OpBase* op, const detail::BackwardStrategy& bck_stratedy) { + OpBase* op, const detail::BackwardStrategy& bck_stratedy, + GradientRef* grad_ref) { + if (bck_stratedy.sorted_sum_gradient_) { + PADDLE_ENFORCE_NOT_NULL(grad_ref, + "grad_ref should not be null when " + "using sorted grad backward strategy"); + } std::map ret; std::deque queue; @@ -185,13 +191,7 @@ class Autograd { for (const auto& map : candidate->grad_output_vars_) { for (const auto& it : map) { for (const auto& vb : it.second) { - if (grad_ref->find(vb) == grad_ref->end()) { - grad_ref->insert(std::make_pair(vb, 1)); - } else { - // add ref count by 1 when we find grad_var can be generated by - // one grad_op - grad_ref->at(vb) += 1; - } + ++(*grad_ref)[vb]; } } } @@ -212,9 +212,6 @@ class Autograd { } return ret; } - - BackwardSumMap* bck_map; - GradientRef* grad_ref; }; std::unique_ptr VarBase::NewVarBase(const platform::Place& dst_place, @@ -324,7 +321,7 @@ std::map> OpBase::ApplyGrad( PADDLE_ENFORCE_NOT_NULL(grad_inp->var_, "op %s input %s nullptr", grad_op_desc->Type(), grad_inp->Name()); - grad_invars.emplace_back(grad_inp->var_); + grad_invars.emplace_back(grad_inp->var_.get()); } } @@ -335,7 +332,7 @@ std::map> OpBase::ApplyGrad( PADDLE_ENFORCE_NOT_NULL(grad_out->var_, "op %s output %s nullptr", grad_op_desc->Type(), grad_out->Name()); - grad_outvars.emplace_back(grad_out->var_); + grad_outvars.emplace_back(grad_out->var_.get()); } } @@ -394,8 +391,8 @@ std::map> OpBase::ApplyGrad( grad_ref->at(origin_outputs[i])--; } } else { - framework::Variable* grad = outputs[i]->var_; - framework::Variable* orig_grad = origin_outputs[i]->var_; + framework::Variable* grad = outputs[i]->var_.get(); + framework::Variable* orig_grad = origin_outputs[i]->var_.get(); VLOG(2) << "AddTo Called with orig_grad is: " << origin_outputs[i]->name_ << " Grad to be added is " << outputs[i]->name_; @@ -451,7 +448,7 @@ void PyLayer::RegisterFunc(int func_id, const py::object& py_func) { int PyLayer::NumFuncs() { return py_funcs_.size(); } -std::vector PyLayer::Apply( +std::vector> PyLayer::Apply( int func_id, const std::vector& inputs) { PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end()); return CallPythonFunc(py_funcs_[func_id], inputs); @@ -468,13 +465,13 @@ std::vector PyLayer::ApplyGrad(int func_id, outs.emplace_back(new VarBase( string::Sprintf("%s_out_%d", framework::GradVarName(PyLayer::kFwdOut), i), - rets[i], nullptr, true)); + std::move(rets[i]), nullptr, true)); } return outs; } -std::vector PyLayer::CallPythonFunc( +std::vector> PyLayer::CallPythonFunc( const py::object& callable, const std::vector& ins) { py::gil_scoped_acquire guard; py::tuple in_args(ins.size()); @@ -488,7 +485,7 @@ std::vector PyLayer::CallPythonFunc( auto ret = callable(in_args); auto ret_tuple = py::cast(ret); size_t ret_num = py::len(ret_tuple); - std::vector outs; + std::vector> outs; outs.reserve(ret_num); VLOG(3) << "pyfunc out " << ret_num; for (size_t i = 0; i < ret_num; ++i) { @@ -496,11 +493,12 @@ std::vector PyLayer::CallPythonFunc( auto* py_out_tensor = py::cast(ret_tuple[i]); PADDLE_ENFORCE_NOT_NULL(py_out_tensor, "Output tensor %d should not be nullptr", i); - auto* var = new framework::Variable(); + auto var = + std::unique_ptr(new framework::Variable()); auto* tensor = var->GetMutable(); tensor->ShareDataWith(*py_out_tensor); tensor->set_lod(py_out_tensor->lod()); - outs.emplace_back(var); + outs.emplace_back(std::move(var)); } catch (py::cast_error&) { PADDLE_THROW("The %d-th output must be LoDTensor", i); } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 76d98640af3..e7e0a692e31 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -14,15 +14,16 @@ #pragma once -// clang-format off -#include "paddle/fluid/framework/python_headers.h" -// clang-format on - #include // NOLINT -#include // NOLINT -#include // NOLINT #include // NOLINT +#include // NOLINT #include // NOLINT +#include +#include // NOLINT + +// clang-format off +#include "paddle/fluid/framework/python_headers.h" +// clang-format on #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/operator.h" @@ -115,12 +116,14 @@ class OpBase; class VarBase { public: // Internal interface, create VarBase from exist variable - VarBase(const std::string& name, framework::Variable* var, VarBase* grad, - bool stop_gradient) + VarBase(const std::string& name, std::unique_ptr var, + VarBase* grad, bool stop_gradient) : VarBase(name, var->Get().type(), var->Get().dims(), - var->Get().place(), var, grad, - stop_gradient, false) {} + var->Get().place(), nullptr, grad, + stop_gradient, false) { + var_ = std::move(var); + } // Python interface VarBase(const std::string& name, const framework::proto::VarType::Type dtype, @@ -140,11 +143,11 @@ class VarBase { // TODO(minqiyang): need support SelectedRows VarBase(const std::string& name, framework::proto::VarType::Type dtype, const framework::DDim& shape, const platform::Place& place, - framework::Variable* var, VarBase* grad, bool stop_gradient, - bool persistable) + std::unique_ptr var, VarBase* grad, + bool stop_gradient, bool persistable) : name_(name), type_(framework::proto::VarType::LOD_TENSOR), - var_(var), + var_(std::move(var)), grads_(grad), stop_gradient_(stop_gradient), persistable_(persistable), @@ -152,7 +155,7 @@ class VarBase { pre_op_out_name_(), pre_op_out_idx_(-1) { if (!var_) { - var_ = new framework::Variable(); + var_.reset(new framework::Variable()); } auto tensor = var_->GetMutable(); tensor->Resize(shape); @@ -163,11 +166,6 @@ class VarBase { public: virtual ~VarBase() { - if (var_) { - delete var_; - var_ = nullptr; - } - if (grads_) { delete grads_; grads_ = nullptr; @@ -261,7 +259,7 @@ class VarBase { framework::proto::VarType::Type type_; platform::Place place_; - framework::Variable* var_; + std::unique_ptr var_; VarBase* grads_; private: @@ -369,8 +367,8 @@ class Layer { public: virtual ~Layer() {} - virtual std::vector Forward(const std::vector& inputs) { - std::vector vars; + virtual std::vector Forward(const std::vector& inputs) { + std::vector vars; return vars; } }; @@ -386,14 +384,14 @@ class PyLayer { static int NumFuncs(); - static std::vector Apply( + static std::vector> Apply( int func_id, const std::vector& inputs); static std::vector ApplyGrad(int func_id, const std::vector& inputs); private: - static std::vector CallPythonFunc( + static std::vector> CallPythonFunc( const py::object& callable, const std::vector& ins); }; diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 7c495ddd682..bda1ae283e3 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/operators/math/math_function.h" @@ -153,7 +154,7 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr", op->Type(), inp->Name()); - invars.emplace_back(inp->var_); + invars.emplace_back(inp->var_.get()); if (!stop_gradient) { current_vars_map[inp->Name()] = inp; } @@ -171,7 +172,7 @@ std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, outvars.reserve(outputs.size()); for (size_t i = 0U; i < outputs.size(); ++i) { VarBase* out = outputs[i]; - outvars.emplace_back(out->var_); + outvars.emplace_back(out->var_.get()); out->TrackPreOp(op, it.first, i, stop_gradient); if (!stop_gradient) { current_vars_map[out->Name()] = out; @@ -294,17 +295,15 @@ std::vector Tracer::PyTrace(OpBase* op, op->input_vars_[PyLayer::kFwdInp] = inputs; - std::vector ret_vars = + std::vector> ret_vars = PyLayer::Apply(op->forward_id_, inputs); - op->TrackPreOp(PyLayer::kFwdInp, inputs); std::vector& outputs = op->output_vars_[PyLayer::kFwdOut]; outputs.reserve(ret_vars.size()); for (size_t i = 0U; i != ret_vars.size(); ++i) { - framework::Variable* v = ret_vars[i]; - VarBase* out = new VarBase(string::Sprintf("%s_out_%d", op->Type(), i), v, - nullptr, stop_gradient); + VarBase* out = new VarBase(string::Sprintf("%s_out_%d", op->Type(), i), + std::move(ret_vars[i]), nullptr, stop_gradient); outputs.emplace_back(out); out->TrackPreOp(op, PyLayer::kFwdOut, i, stop_gradient); } diff --git a/paddle/fluid/pybind/imperative.h b/paddle/fluid/pybind/imperative.h index f9d4a7c990e..5c5556501ff 100644 --- a/paddle/fluid/pybind/imperative.h +++ b/paddle/fluid/pybind/imperative.h @@ -28,9 +28,9 @@ class Layer : public imperative::Layer { public: using imperative::Layer::Layer; // Inherit constructors - std::vector Forward( - const std::vector& inputs) override { - PYBIND11_OVERLOAD(std::vector, Layer, Forward, + std::vector Forward( + const std::vector& inputs) override { + PYBIND11_OVERLOAD(std::vector, Layer, Forward, inputs); // NOLINT } }; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index e0ab615bb5d..a792b24be3c 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -237,7 +237,8 @@ PYBIND11_MODULE(core, m) { return new_var.release(); }, py::return_value_policy::take_ownership) - .def("value", [](const imperative::VarBase &self) { return self.var_; }, + .def("value", + [](const imperative::VarBase &self) { return self.var_.get(); }, py::return_value_policy::reference) .def_property("name", &imperative::VarBase::Name, &imperative::VarBase::SetName) @@ -285,7 +286,7 @@ PYBIND11_MODULE(core, m) { py::class_ layer(m, "Layer"); layer.def(py::init<>()) .def("forward", [](imperative::Layer &self, - const std::vector &inputs) { + const std::vector &inputs) { return self.Forward(inputs); }); @@ -299,10 +300,9 @@ PYBIND11_MODULE(core, m) { std::vector outputs; outputs.reserve(ret_vars.size()); for (size_t i = 0U; i != ret_vars.size(); ++i) { - framework::Variable *v = ret_vars[i]; // TODO(minqiyang): use unique_name generator to set a name - outputs.emplace_back( - new imperative::VarBase("", v, nullptr, true)); + outputs.emplace_back(new imperative::VarBase( + "", std::move(ret_vars[i]), nullptr, true)); } return outputs; -- GitLab